To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit e3e13b4b authored by scmalte's avatar scmalte
Browse files

aggr.py: added CLI option for specifying eDoz CSV files

parent 296f27f2
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
import logging import logging
import csv import csv
import jinja2 import jinja2
import argparse
import pandas as pd import pandas as pd
from .utils import logging as logutils from .utils import logging as logutils
...@@ -12,7 +13,8 @@ DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv" ...@@ -12,7 +13,8 @@ DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv" DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"
DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja" DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
def main( def aggregate(
edoz_exports, # List of argparse.FileType objects
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR, cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE, clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE, clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
...@@ -43,25 +45,21 @@ def main( ...@@ -43,25 +45,21 @@ def main(
## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether ## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether
## or not a student is a repeater ## or not a student is a repeater
# Analogous for eDoz course data individual_edoz_csv_frames = []
relevant_edoz_columns = ["Nummer", "Departement"] for csvfile in edoz_exports:
edoz1_csv: pd.DataFrame = pd.read_csv("edoz-252083200L.csv", sep="\t") relevant_edoz_columns = ["Nummer", "Departement"]
edoz1_csv = edoz1_csv[relevant_edoz_columns] edoz_csv: pd.DataFrame = pd.read_csv(csvfile, sep="\t")
edoz1_csv.rename(columns={"Nummer": "Legi"}, inplace=True) edoz_csv = edoz_csv[relevant_edoz_columns]
edoz1_csv.set_index("Legi", inplace=True) edoz_csv.rename(columns={"Nummer": "Legi"}, inplace=True)
# print(edoz1_csv) edoz_csv.set_index("Legi", inplace=True)
# print("edoz1_csv.index.is_unique = {}".format(edoz1_csv.index.is_unique)) # print(edoz1_csv)
# print("edoz1_csv.index.is_unique = {}".format(edoz1_csv.index.is_unique))
edoz2_csv: pd.DataFrame = pd.read_csv("edoz-252084800L.csv", sep="\t")
edoz2_csv = edoz2_csv[relevant_edoz_columns] individual_edoz_csv_frames.append(edoz_csv)
edoz2_csv.rename(columns={"Nummer": "Legi"}, inplace=True)
edoz2_csv.set_index("Legi", inplace=True)
# print(edoz2_csv.index)
# print("edoz2_csv.index.is_unique = {}".format(edoz2_csv.index.is_unique))
# Vertically concat eDoz data. Since students may be enrolled into multiple # Vertically concat eDoz data. Since students may be enrolled into multiple
# courses, duplicated rows are afterwards dropped. # courses, duplicated rows are afterwards dropped.
edoz_csv: pd.DataFrame = pd.concat([edoz1_csv, edoz2_csv]) edoz_csv: pd.DataFrame = pd.concat(individual_edoz_csv_frames)
# print("========== edoz_csv [initial]") # print("========== edoz_csv [initial]")
# print(edoz_csv.shape) # print(edoz_csv.shape)
# print(edoz_csv) # print(edoz_csv)
...@@ -215,5 +213,40 @@ def main( ...@@ -215,5 +213,40 @@ def main(
).dump("clusters.html") ).dump("clusters.html")
def configure_cli_parser(parser):
parser.add_argument(
"-e", "--edoz-exports",
type=argparse.FileType('r'),
nargs="+",
help="eDoz student list exports (CSV)",
required=True)
logutils.add_loglevel_argument(parser)
def main(
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE,
jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE):
parser = argparse.ArgumentParser()
configure_cli_parser(parser)
args = parser.parse_args()
logutils.configure_level_and_format(args.log_level)
aggregate(
args.edoz_exports,
cluster_files_dir,
clusters_matches_csv_file,
clusters_students_csv_file,
cluster_students_csv_file_pattern,
cx_course_students_csv_file,
jinja_cluster_template_file)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment