Commit e3e13b4b authored by scmalte's avatar scmalte
Browse files

aggr.py: added CLI option for specifying eDoz CSV files

parent 296f27f2
......@@ -2,6 +2,7 @@ import os
import logging
import csv
import jinja2
import argparse
import pandas as pd
from .utils import logging as logutils
......@@ -12,7 +13,8 @@ DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"
DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
def main(
def aggregate(
edoz_exports, # List of argparse.FileType objects
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
......@@ -43,25 +45,21 @@ def main(
## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether
## or not a student is a repeater
# Analogous for eDoz course data
relevant_edoz_columns = ["Nummer", "Departement"]
edoz1_csv: pd.DataFrame = pd.read_csv("edoz-252083200L.csv", sep="\t")
edoz1_csv = edoz1_csv[relevant_edoz_columns]
edoz1_csv.rename(columns={"Nummer": "Legi"}, inplace=True)
edoz1_csv.set_index("Legi", inplace=True)
# print(edoz1_csv)
# print("edoz1_csv.index.is_unique = {}".format(edoz1_csv.index.is_unique))
edoz2_csv: pd.DataFrame = pd.read_csv("edoz-252084800L.csv", sep="\t")
edoz2_csv = edoz2_csv[relevant_edoz_columns]
edoz2_csv.rename(columns={"Nummer": "Legi"}, inplace=True)
edoz2_csv.set_index("Legi", inplace=True)
# print(edoz2_csv.index)
# print("edoz2_csv.index.is_unique = {}".format(edoz2_csv.index.is_unique))
individual_edoz_csv_frames = []
for csvfile in edoz_exports:
relevant_edoz_columns = ["Nummer", "Departement"]
edoz_csv: pd.DataFrame = pd.read_csv(csvfile, sep="\t")
edoz_csv = edoz_csv[relevant_edoz_columns]
edoz_csv.rename(columns={"Nummer": "Legi"}, inplace=True)
edoz_csv.set_index("Legi", inplace=True)
# print(edoz1_csv)
# print("edoz1_csv.index.is_unique = {}".format(edoz1_csv.index.is_unique))
individual_edoz_csv_frames.append(edoz_csv)
# Vertically concat eDoz data. Since students may be enrolled into multiple
# courses, duplicated rows are afterwards dropped.
edoz_csv: pd.DataFrame = pd.concat([edoz1_csv, edoz2_csv])
edoz_csv: pd.DataFrame = pd.concat(individual_edoz_csv_frames)
# print("========== edoz_csv [initial]")
# print(edoz_csv.shape)
# print(edoz_csv)
......@@ -215,5 +213,40 @@ def main(
).dump("clusters.html")
def configure_cli_parser(parser):
parser.add_argument(
"-e", "--edoz-exports",
type=argparse.FileType('r'),
nargs="+",
help="eDoz student list exports (CSV)",
required=True)
logutils.add_loglevel_argument(parser)
def main(
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE,
jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE):
parser = argparse.ArgumentParser()
configure_cli_parser(parser)
args = parser.parse_args()
logutils.configure_level_and_format(args.log_level)
aggregate(
args.edoz_exports,
cluster_files_dir,
clusters_matches_csv_file,
clusters_students_csv_file,
cluster_students_csv_file_pattern,
cx_course_students_csv_file,
jinja_cluster_template_file)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment