To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit bd514b9e authored by scmalte's avatar scmalte
Browse files

cluster.py, aggr.py: different cluster CSV files, one with matches, individual ones with students

parent e75780eb
......@@ -5,22 +5,26 @@ import jinja2
import pandas as pd
from .utils import logging as logutils
DEFAULT_CLUSTERS_SUMMARY_CSV_FILE="clusters.csv"
DEFAULT_CLUSTER_FILES_DIR="_clusters"
DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv"
DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"
def main(
clusters_summary_csv_file=DEFAULT_CLUSTERS_SUMMARY_CSV_FILE,
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE):
logutils.configure_level_and_format()
if not os.path.isfile(clusters_summary_csv_file):
raise RuntimeError("Cluster summary CSV file {} doesn't exist. Should have been created by mu-cluster.".format(clusters_summary_csv_file))
if not os.path.isfile(clusters_matches_csv_file):
raise RuntimeError("CSV file {} with matches per clusters doesn't exist. Should have been created by mu-cluster.".format(clusters_matches_csv_file))
if not os.path.isfile(cx_course_students_csv_file):
raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file))
clusters_csv: pd.DataFrame = pd.read_csv(clusters_summary_csv_file)
clusters_csv: pd.DataFrame = pd.read_csv(clusters_matches_csv_file)
# Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column
relevant_course_columns = ["Legi", "Lastname", "Firstname", "Email", "Gender", "TotalScore"]
......@@ -86,7 +90,7 @@ def main(
jinja2_rows = []
cluster_groups: pd.DataFrameGroupBy = clusters_csv.groupby("cluster_id")
for _, cluster in cluster_groups: # cluster: pd.DataFrame
for cluster_id, cluster in cluster_groups: # cluster: pd.DataFrame
# print("-"*60)
# Get all ids (= legis) participating in a cluster
ids_values: numpy.ndarray = pd.concat([cluster["id1"], cluster["id2"]]).unique()
......@@ -115,9 +119,17 @@ def main(
cluster_rows: pd.DataFrame = cluster_course_rows.join(edoz_csv)
students_per_clusters_file = os.path.join(
cluster_files_dir,
cluster_students_csv_file_pattern.format(cluster_id))
logging.info("Writing students per clusters to file {}".format(students_per_clusters_file))
cluster_rows.to_csv(students_per_clusters_file)
# print("========== cluster_rows")
# print(cluster_rows.shape)
# print(cluster_rows)
# print(name)
# print(cluster)
# print(cluster["svg_file"].iat[0])
......
......@@ -27,7 +27,7 @@ DEFAULT_CLUSTER_FILE_PATTERN="cluster-{}.{}"
DEFAULT_THRESHOLD_PERCENTAGE=90
DEFAULT_THRESHOLD_LINES=50
DEFAULT_CREATE_SVG_FILES=True
DEFAULT_SUMMARY_CSV_FILE="clusters.csv"
DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv"
@dataclasses.dataclass
class MossResult:
......@@ -184,11 +184,11 @@ def create_clusters(graph, cluster_file_pattern, create_svg_files):
return cluster_entries
def create_summary_csv_file(cluster_entries, summary_csv_file):
logging.info("Writing summary file {}".format(summary_csv_file))
def create_clusters_matches_csv_file(cluster_entries, clusters_matches_csv_file):
logging.info("Writing file with matches per clusters {}".format(clusters_matches_csv_file))
if cluster_entries:
with open(summary_csv_file, "w", newline="") as csv_fh:
with open(clusters_matches_csv_file, "w", newline="") as csv_fh:
csv_writer = csv.writer(csv_fh)
csv_writer.writerow(cluster_entries[0].fields_flattened())
......@@ -204,7 +204,7 @@ def main(
percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE,
lines_threshold=DEFAULT_THRESHOLD_LINES,
create_svg_files=DEFAULT_CREATE_SVG_FILES,
summary_csv_file=DEFAULT_SUMMARY_CSV_FILE):
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE):
logutils.configure_level_and_format()
......@@ -222,7 +222,7 @@ def main(
os.path.join(cluster_files_dir, cluster_file_pattern),
create_svg_files)
create_summary_csv_file(cluster_entries, summary_csv_file)
create_clusters_matches_csv_file(cluster_entries, clusters_matches_csv_file)
if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment