diff --git a/mossutils/aggr.py b/mossutils/aggr.py index 20df5e83934c9be345e5879422de02e802b4732b..854ab63d0b9c836ddd947ca045a8d58689f891d7 100644 --- a/mossutils/aggr.py +++ b/mossutils/aggr.py @@ -7,6 +7,7 @@ from .utils import logging as logutils DEFAULT_CLUSTER_FILES_DIR="_clusters" DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv" +DEFAULT_CLUSTERS_STUDENTS_CSV_FILE="clusters-students.csv" DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv" DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv" DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja" @@ -14,6 +15,7 @@ DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja" def main( cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR, clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE, + clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE, cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN, cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE, jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE): @@ -96,6 +98,7 @@ def main( jinja2_rows = [] cluster_groups: pd.DataFrameGroupBy = clusters_csv.groupby("cluster_id") + for cluster_id, cluster in cluster_groups: # cluster: pd.DataFrame # print("-"*60) # Get all ids (= legis) participating in a cluster @@ -129,7 +132,7 @@ def main( cluster_files_dir, cluster_students_csv_file_pattern.format(cluster_id)) - logging.info("Writing students per clusters to file {}".format(students_per_clusters_file)) + logging.info("Writing students from cluster {} to file {}".format(cluster_id, students_per_clusters_file)) cluster_rows.to_csv(students_per_clusters_file) # print("========== cluster_rows") @@ -140,19 +143,30 @@ def main( # print(cluster) # print(cluster["svg_file"].iat[0]) - jinja2_rows.append((cluster, cluster_rows)) + jinja2_rows.append((cluster_id, cluster_rows.shape[0], cluster, cluster_rows)) + + logging.info("Writing all clusters to file {}".format(clusters_students_csv_file)) + write_header = True + write_mode = "w" + for cluster_id, _, _, cluster_rows in jinja2_rows: + cluster_rows["Cluster-ID"] = cluster_id ## Inserts column add end + # cluster_rows.insert(0, "Cluster-ID", cluster_id) ## Inserts column after index (Legi) + cluster_rows.to_csv(clusters_students_csv_file, mode=write_mode, header=write_header) + write_header = False + write_mode = "a" + ## TODO: Support sorting clusters by max (or average) involved percentage plagiarist_count = 0 - for (_, cluster_rows) in jinja2_rows: - plagiarist_count += cluster_rows.shape[0] + for _, size, _, cluster_rows in jinja2_rows: + plagiarist_count += size # cluster_rows.shape[0] department_counts = {} - for (cluster, cluster_rows) in jinja2_rows: + for _, _, _, cluster_rows in jinja2_rows: for index, value in cluster_rows["Departement"].value_counts().iteritems(): if index in department_counts: department_counts[index] += value @@ -169,7 +183,7 @@ def main( gender_counts = {} - for (cluster, cluster_rows) in jinja2_rows: + for _, _, _, cluster_rows in jinja2_rows: for index, value in cluster_rows["Gender"].value_counts().iteritems(): if index in gender_counts: gender_counts[index] += value diff --git a/mossutils/data/clusters.html.jinja b/mossutils/data/clusters.html.jinja index ac95d19532be012e87dc6bd83dd71564c4e40032..ccdfbe7e2b97279348b2242d30f8eb4dd0fe25e9 100644 --- a/mossutils/data/clusters.html.jinja +++ b/mossutils/data/clusters.html.jinja @@ -26,10 +26,10 @@ - {% for (cluster, cluster_rows) in clusters %} + {% for (cluster_id, cluster_size, cluster, cluster_rows) in clusters %} - Size: {{ cluster_rows.shape[0] }} + Id: {{cluster_id}} | Size: {{ cluster_size }} {{ cluster_rows.to_html(classes="cluster", header=False, index_names=False) }} {# {% for row in cluster_rows %}