Commit 89930c09 authored by scmalte's avatar scmalte

aggr.py: added more info to clusters.html

parent a8c8da88
......@@ -7,6 +7,7 @@ from .utils import logging as logutils
DEFAULT_CLUSTER_FILES_DIR="_clusters"
DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv"
DEFAULT_CLUSTERS_STUDENTS_CSV_FILE="clusters-students.csv"
DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"
DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
......@@ -14,6 +15,7 @@ DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
def main(
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE,
jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE):
......@@ -96,6 +98,7 @@ def main(
jinja2_rows = []
cluster_groups: pd.DataFrameGroupBy = clusters_csv.groupby("cluster_id")
for cluster_id, cluster in cluster_groups: # cluster: pd.DataFrame
# print("-"*60)
# Get all ids (= legis) participating in a cluster
......@@ -129,7 +132,7 @@ def main(
cluster_files_dir,
cluster_students_csv_file_pattern.format(cluster_id))
logging.info("Writing students per clusters to file {}".format(students_per_clusters_file))
logging.info("Writing students from cluster {} to file {}".format(cluster_id, students_per_clusters_file))
cluster_rows.to_csv(students_per_clusters_file)
# print("========== cluster_rows")
......@@ -140,19 +143,30 @@ def main(
# print(cluster)
# print(cluster["svg_file"].iat[0])
jinja2_rows.append((cluster, cluster_rows))
jinja2_rows.append((cluster_id, cluster_rows.shape[0], cluster, cluster_rows))
logging.info("Writing all clusters to file {}".format(clusters_students_csv_file))
write_header = True
write_mode = "w"
for cluster_id, _, _, cluster_rows in jinja2_rows:
cluster_rows["Cluster-ID"] = cluster_id ## Inserts column add end
# cluster_rows.insert(0, "Cluster-ID", cluster_id) ## Inserts column after index (Legi)
cluster_rows.to_csv(clusters_students_csv_file, mode=write_mode, header=write_header)
write_header = False
write_mode = "a"
## TODO: Support sorting clusters by max (or average) involved percentage
plagiarist_count = 0
for (_, cluster_rows) in jinja2_rows:
plagiarist_count += cluster_rows.shape[0]
for _, size, _, cluster_rows in jinja2_rows:
plagiarist_count += size # cluster_rows.shape[0]
department_counts = {}
for (cluster, cluster_rows) in jinja2_rows:
for _, _, _, cluster_rows in jinja2_rows:
for index, value in cluster_rows["Departement"].value_counts().iteritems():
if index in department_counts:
department_counts[index] += value
......@@ -169,7 +183,7 @@ def main(
gender_counts = {}
for (cluster, cluster_rows) in jinja2_rows:
for _, _, _, cluster_rows in jinja2_rows:
for index, value in cluster_rows["Gender"].value_counts().iteritems():
if index in gender_counts:
gender_counts[index] += value
......
......@@ -26,10 +26,10 @@
</tr>
</thead>
<tbody>
{% for (cluster, cluster_rows) in clusters %}
{% for (cluster_id, cluster_size, cluster, cluster_rows) in clusters %}
<tr>
<td>
Size: {{ cluster_rows.shape[0] }}
Id: {{cluster_id}} | Size: {{ cluster_size }}
{{ cluster_rows.to_html(classes="cluster", header=False, index_names=False) }}
{# <table>
{% for row in cluster_rows %}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment