diff --git a/mossutils/aggr.py b/mossutils/aggr.py index 69ff7f06488dafafec8d473172f4904d1dd5f956..8ca8f9db1a2a8b0e3ac7eedce06297f3d648e290 100644 --- a/mossutils/aggr.py +++ b/mossutils/aggr.py @@ -5,22 +5,26 @@ import jinja2 import pandas as pd from .utils import logging as logutils -DEFAULT_CLUSTERS_SUMMARY_CSV_FILE="clusters.csv" +DEFAULT_CLUSTER_FILES_DIR="_clusters" +DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv" +DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv" DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv" def main( - clusters_summary_csv_file=DEFAULT_CLUSTERS_SUMMARY_CSV_FILE, + cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR, + clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE, + cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN, cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE): logutils.configure_level_and_format() - if not os.path.isfile(clusters_summary_csv_file): - raise RuntimeError("Cluster summary CSV file {} doesn't exist. Should have been created by mu-cluster.".format(clusters_summary_csv_file)) + if not os.path.isfile(clusters_matches_csv_file): + raise RuntimeError("CSV file {} with matches per clusters doesn't exist. Should have been created by mu-cluster.".format(clusters_matches_csv_file)) if not os.path.isfile(cx_course_students_csv_file): raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file)) - clusters_csv: pd.DataFrame = pd.read_csv(clusters_summary_csv_file) + clusters_csv: pd.DataFrame = pd.read_csv(clusters_matches_csv_file) # Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column relevant_course_columns = ["Legi", "Lastname", "Firstname", "Email", "Gender", "TotalScore"] @@ -86,7 +90,7 @@ def main( jinja2_rows = [] cluster_groups: pd.DataFrameGroupBy = clusters_csv.groupby("cluster_id") - for _, cluster in cluster_groups: # cluster: pd.DataFrame + for cluster_id, cluster in cluster_groups: # cluster: pd.DataFrame # print("-"*60) # Get all ids (= legis) participating in a cluster ids_values: numpy.ndarray = pd.concat([cluster["id1"], cluster["id2"]]).unique() @@ -115,9 +119,17 @@ def main( cluster_rows: pd.DataFrame = cluster_course_rows.join(edoz_csv) + students_per_clusters_file = os.path.join( + cluster_files_dir, + cluster_students_csv_file_pattern.format(cluster_id)) + + logging.info("Writing students per clusters to file {}".format(students_per_clusters_file)) + cluster_rows.to_csv(students_per_clusters_file) + # print("========== cluster_rows") # print(cluster_rows.shape) # print(cluster_rows) + # print(name) # print(cluster) # print(cluster["svg_file"].iat[0]) diff --git a/mossutils/cluster.py b/mossutils/cluster.py index b9f48ac65e97caa3c6cd004e404efc1c6e651f89..19a544909cf13ad83334d0c2432739192315a2ee 100644 --- a/mossutils/cluster.py +++ b/mossutils/cluster.py @@ -27,7 +27,7 @@ DEFAULT_CLUSTER_FILE_PATTERN="cluster-{}.{}" DEFAULT_THRESHOLD_PERCENTAGE=90 DEFAULT_THRESHOLD_LINES=50 DEFAULT_CREATE_SVG_FILES=True -DEFAULT_SUMMARY_CSV_FILE="clusters.csv" +DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv" @dataclasses.dataclass class MossResult: @@ -184,11 +184,11 @@ def create_clusters(graph, cluster_file_pattern, create_svg_files): return cluster_entries -def create_summary_csv_file(cluster_entries, summary_csv_file): - logging.info("Writing summary file {}".format(summary_csv_file)) +def create_clusters_matches_csv_file(cluster_entries, clusters_matches_csv_file): + logging.info("Writing file with matches per clusters {}".format(clusters_matches_csv_file)) if cluster_entries: - with open(summary_csv_file, "w", newline="") as csv_fh: + with open(clusters_matches_csv_file, "w", newline="") as csv_fh: csv_writer = csv.writer(csv_fh) csv_writer.writerow(cluster_entries[0].fields_flattened()) @@ -204,7 +204,7 @@ def main( percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE, lines_threshold=DEFAULT_THRESHOLD_LINES, create_svg_files=DEFAULT_CREATE_SVG_FILES, - summary_csv_file=DEFAULT_SUMMARY_CSV_FILE): + clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE): logutils.configure_level_and_format() @@ -222,7 +222,7 @@ def main( os.path.join(cluster_files_dir, cluster_file_pattern), create_svg_files) - create_summary_csv_file(cluster_entries, summary_csv_file) + create_clusters_matches_csv_file(cluster_entries, clusters_matches_csv_file) if __name__ == "__main__":