To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit bd514b9e authored by scmalte's avatar scmalte
Browse files

cluster.py, aggr.py: different cluster CSV files, one with matches, individual ones with students

parent e75780eb
...@@ -5,22 +5,26 @@ import jinja2 ...@@ -5,22 +5,26 @@ import jinja2
import pandas as pd import pandas as pd
from .utils import logging as logutils from .utils import logging as logutils
DEFAULT_CLUSTERS_SUMMARY_CSV_FILE="clusters.csv" DEFAULT_CLUSTER_FILES_DIR="_clusters"
DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv"
DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv" DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"
def main( def main(
clusters_summary_csv_file=DEFAULT_CLUSTERS_SUMMARY_CSV_FILE, cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE): cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE):
logutils.configure_level_and_format() logutils.configure_level_and_format()
if not os.path.isfile(clusters_summary_csv_file): if not os.path.isfile(clusters_matches_csv_file):
raise RuntimeError("Cluster summary CSV file {} doesn't exist. Should have been created by mu-cluster.".format(clusters_summary_csv_file)) raise RuntimeError("CSV file {} with matches per clusters doesn't exist. Should have been created by mu-cluster.".format(clusters_matches_csv_file))
if not os.path.isfile(cx_course_students_csv_file): if not os.path.isfile(cx_course_students_csv_file):
raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file)) raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file))
clusters_csv: pd.DataFrame = pd.read_csv(clusters_summary_csv_file) clusters_csv: pd.DataFrame = pd.read_csv(clusters_matches_csv_file)
# Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column # Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column
relevant_course_columns = ["Legi", "Lastname", "Firstname", "Email", "Gender", "TotalScore"] relevant_course_columns = ["Legi", "Lastname", "Firstname", "Email", "Gender", "TotalScore"]
...@@ -86,7 +90,7 @@ def main( ...@@ -86,7 +90,7 @@ def main(
jinja2_rows = [] jinja2_rows = []
cluster_groups: pd.DataFrameGroupBy = clusters_csv.groupby("cluster_id") cluster_groups: pd.DataFrameGroupBy = clusters_csv.groupby("cluster_id")
for _, cluster in cluster_groups: # cluster: pd.DataFrame for cluster_id, cluster in cluster_groups: # cluster: pd.DataFrame
# print("-"*60) # print("-"*60)
# Get all ids (= legis) participating in a cluster # Get all ids (= legis) participating in a cluster
ids_values: numpy.ndarray = pd.concat([cluster["id1"], cluster["id2"]]).unique() ids_values: numpy.ndarray = pd.concat([cluster["id1"], cluster["id2"]]).unique()
...@@ -115,9 +119,17 @@ def main( ...@@ -115,9 +119,17 @@ def main(
cluster_rows: pd.DataFrame = cluster_course_rows.join(edoz_csv) cluster_rows: pd.DataFrame = cluster_course_rows.join(edoz_csv)
students_per_clusters_file = os.path.join(
cluster_files_dir,
cluster_students_csv_file_pattern.format(cluster_id))
logging.info("Writing students per clusters to file {}".format(students_per_clusters_file))
cluster_rows.to_csv(students_per_clusters_file)
# print("========== cluster_rows") # print("========== cluster_rows")
# print(cluster_rows.shape) # print(cluster_rows.shape)
# print(cluster_rows) # print(cluster_rows)
# print(name)
# print(cluster) # print(cluster)
# print(cluster["svg_file"].iat[0]) # print(cluster["svg_file"].iat[0])
......
...@@ -27,7 +27,7 @@ DEFAULT_CLUSTER_FILE_PATTERN="cluster-{}.{}" ...@@ -27,7 +27,7 @@ DEFAULT_CLUSTER_FILE_PATTERN="cluster-{}.{}"
DEFAULT_THRESHOLD_PERCENTAGE=90 DEFAULT_THRESHOLD_PERCENTAGE=90
DEFAULT_THRESHOLD_LINES=50 DEFAULT_THRESHOLD_LINES=50
DEFAULT_CREATE_SVG_FILES=True DEFAULT_CREATE_SVG_FILES=True
DEFAULT_SUMMARY_CSV_FILE="clusters.csv" DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv"
@dataclasses.dataclass @dataclasses.dataclass
class MossResult: class MossResult:
...@@ -184,11 +184,11 @@ def create_clusters(graph, cluster_file_pattern, create_svg_files): ...@@ -184,11 +184,11 @@ def create_clusters(graph, cluster_file_pattern, create_svg_files):
return cluster_entries return cluster_entries
def create_summary_csv_file(cluster_entries, summary_csv_file): def create_clusters_matches_csv_file(cluster_entries, clusters_matches_csv_file):
logging.info("Writing summary file {}".format(summary_csv_file)) logging.info("Writing file with matches per clusters {}".format(clusters_matches_csv_file))
if cluster_entries: if cluster_entries:
with open(summary_csv_file, "w", newline="") as csv_fh: with open(clusters_matches_csv_file, "w", newline="") as csv_fh:
csv_writer = csv.writer(csv_fh) csv_writer = csv.writer(csv_fh)
csv_writer.writerow(cluster_entries[0].fields_flattened()) csv_writer.writerow(cluster_entries[0].fields_flattened())
...@@ -204,7 +204,7 @@ def main( ...@@ -204,7 +204,7 @@ def main(
percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE, percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE,
lines_threshold=DEFAULT_THRESHOLD_LINES, lines_threshold=DEFAULT_THRESHOLD_LINES,
create_svg_files=DEFAULT_CREATE_SVG_FILES, create_svg_files=DEFAULT_CREATE_SVG_FILES,
summary_csv_file=DEFAULT_SUMMARY_CSV_FILE): clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE):
logutils.configure_level_and_format() logutils.configure_level_and_format()
...@@ -222,7 +222,7 @@ def main( ...@@ -222,7 +222,7 @@ def main(
os.path.join(cluster_files_dir, cluster_file_pattern), os.path.join(cluster_files_dir, cluster_file_pattern),
create_svg_files) create_svg_files)
create_summary_csv_file(cluster_entries, summary_csv_file) create_clusters_matches_csv_file(cluster_entries, clusters_matches_csv_file)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment