Commit 2fd3d283 authored by scmalte's avatar scmalte

aggr.py, cluster.py: added assertions and todos

parent ab6a6bd9
import os
import logging
import csv
import jinja2
......@@ -13,6 +14,12 @@ def main(
logutils.configure_level_and_format()
if not os.path.isfile(clusters_summary_csv_file):
raise RuntimeError("Cluster summary CSV file {} doesn't exist. Should have been created by mu-cluster.".format(clusters_summary_csv_file))
if not os.path.isfile(cx_course_students_csv_file):
raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file))
clusters_csv: pd.DataFrame = pd.read_csv(clusters_summary_csv_file)
# Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column
......@@ -23,6 +30,11 @@ def main(
course_csv.set_index("Legi", inplace=True)
## TODO: Remove staff from course_csv
## TODO: Make eDoz files configurable
## TODO: Make eDoz files optional
## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether
## or not a student is a repeater
# Analogous for eDoz course data
relevant_edoz_columns = ["Nummer", "Departement"]
edoz1_csv: pd.DataFrame = pd.read_csv("edoz-252083200L.csv", sep="\t")
......@@ -39,11 +51,6 @@ def main(
# print(edoz2_csv.index)
# print("edoz2_csv.index.is_unique = {}".format(edoz2_csv.index.is_unique))
## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether
## or not a student is a repeater
# Vertically concat eDoz data. Since students may be enrolled into multiple
# courses, duplicated rows are afterwards dropped.
edoz_csv: pd.DataFrame = pd.concat([edoz1_csv, edoz2_csv])
......
......@@ -10,6 +10,13 @@ import networkx as nx
from dataclass_csv import DataclassReader
from .utils import logging as logutils
## TODO: cluster.py could create a first, less detailed version of the
## clusters.html report, by extracting the strictly necessary information
## (student name and e-mail address) from the details.json file located
## in the CX export. This information would already be enough to generate
## e-mails afterwards.
## aggr.py would then be optional, if a more detailed cluster report is desired.
DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment