From e3e13b4b56bcce9f40ab7ff8d65327d89427fa99 Mon Sep 17 00:00:00 2001 From: Malte Schwerhoff Date: Sun, 8 Nov 2020 11:38:22 +0100 Subject: [PATCH] aggr.py: added CLI option for specifying eDoz CSV files --- mossutils/aggr.py | 67 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/mossutils/aggr.py b/mossutils/aggr.py index 854ab63..2fda8a9 100644 --- a/mossutils/aggr.py +++ b/mossutils/aggr.py @@ -2,6 +2,7 @@ import os import logging import csv import jinja2 +import argparse import pandas as pd from .utils import logging as logutils @@ -12,7 +13,8 @@ DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv" DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv" DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja" -def main( +def aggregate( + edoz_exports, # List of argparse.FileType objects cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR, clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE, clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE, @@ -43,25 +45,21 @@ def main( ## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether ## or not a student is a repeater - # Analogous for eDoz course data - relevant_edoz_columns = ["Nummer", "Departement"] - edoz1_csv: pd.DataFrame = pd.read_csv("edoz-252083200L.csv", sep="\t") - edoz1_csv = edoz1_csv[relevant_edoz_columns] - edoz1_csv.rename(columns={"Nummer": "Legi"}, inplace=True) - edoz1_csv.set_index("Legi", inplace=True) - # print(edoz1_csv) - # print("edoz1_csv.index.is_unique = {}".format(edoz1_csv.index.is_unique)) - - edoz2_csv: pd.DataFrame = pd.read_csv("edoz-252084800L.csv", sep="\t") - edoz2_csv = edoz2_csv[relevant_edoz_columns] - edoz2_csv.rename(columns={"Nummer": "Legi"}, inplace=True) - edoz2_csv.set_index("Legi", inplace=True) - # print(edoz2_csv.index) - # print("edoz2_csv.index.is_unique = {}".format(edoz2_csv.index.is_unique)) + individual_edoz_csv_frames = [] + for csvfile in edoz_exports: + relevant_edoz_columns = ["Nummer", "Departement"] + edoz_csv: pd.DataFrame = pd.read_csv(csvfile, sep="\t") + edoz_csv = edoz_csv[relevant_edoz_columns] + edoz_csv.rename(columns={"Nummer": "Legi"}, inplace=True) + edoz_csv.set_index("Legi", inplace=True) + # print(edoz1_csv) + # print("edoz1_csv.index.is_unique = {}".format(edoz1_csv.index.is_unique)) + + individual_edoz_csv_frames.append(edoz_csv) # Vertically concat eDoz data. Since students may be enrolled into multiple # courses, duplicated rows are afterwards dropped. - edoz_csv: pd.DataFrame = pd.concat([edoz1_csv, edoz2_csv]) + edoz_csv: pd.DataFrame = pd.concat(individual_edoz_csv_frames) # print("========== edoz_csv [initial]") # print(edoz_csv.shape) # print(edoz_csv) @@ -215,5 +213,40 @@ def main( ).dump("clusters.html") +def configure_cli_parser(parser): + parser.add_argument( + "-e", "--edoz-exports", + type=argparse.FileType('r'), + nargs="+", + help="eDoz student list exports (CSV)", + required=True) + + logutils.add_loglevel_argument(parser) + + +def main( + cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR, + clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE, + clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE, + cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN, + cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE, + jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE): + + parser = argparse.ArgumentParser() + configure_cli_parser(parser) + args = parser.parse_args() + + logutils.configure_level_and_format(args.log_level) + + aggregate( + args.edoz_exports, + cluster_files_dir, + clusters_matches_csv_file, + clusters_students_csv_file, + cluster_students_csv_file_pattern, + cx_course_students_csv_file, + jinja_cluster_template_file) + + if __name__ == "__main__": main() -- GitLab