To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 32a3fd7b authored by scmalte's avatar scmalte
Browse files

aggr.py: added CLI option for specifying Code Export CSV file

parent e3e13b4b
......@@ -40,13 +40,13 @@ Argument `--upgrade` (re)installs the package, even if the version number hasn't
1. Run `mu-aggr`.
**NOTE:** `mu-aggr` is not yet configurable and expects `mu-cluster` to have used its default file and directory names!
**NOTE:** `mu-aggr` is not yet *fully* configurable and expects `mu-cluster` to have used its default file and directory names! Run `mu-aggr --help` for options that can already be configured.
`mu-aggr` combines the cluster data with exports from eDoz and Code Expert, and generates a cluster report as an HTML file.
* Code Expert export: `My Courses -> Students (Student Overview) -> Export to CSV` and save `./cx_students.csv`
* Code Expert export: *My Courses**Students (Student Overview)**Export to CSV*. Pass file to `mu-aggr` via the mandatory `--code-expert-export` option.
* eDoz exports: **TODO:** `mu-aggr` is currently not reusable, since eDoz export files are hardcoded. Must make them configurable!
* eDoz exports: *Communication/List**Course units**<Your course>**Export data (Zip/Text)*, then extract CSV (`.txt`) file from downloaded ZIP file. Pass files to `mu-aggr` via the mandatory `--edoz-exports` option.
## Python Package Tutorials
......
......@@ -10,16 +10,16 @@ DEFAULT_CLUSTER_FILES_DIR="_clusters"
DEFAULT_CLUSTERS_MATCHES_CSV_FILE="clusters-matches.csv"
DEFAULT_CLUSTERS_STUDENTS_CSV_FILE="clusters-students.csv"
DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"
DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
## TODO: Refactor function into separate ones
def aggregate(
edoz_exports, # List of argparse.FileType objects
cx_course_students_csv_file, # Single argparse.FileType object
cluster_files_dir=DEFAULT_CLUSTER_FILES_DIR,
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE,
jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE):
logutils.configure_level_and_format()
......@@ -27,9 +27,6 @@ def aggregate(
if not os.path.isfile(clusters_matches_csv_file):
raise RuntimeError("CSV file {} with matches per clusters doesn't exist. Should have been created by mu-cluster.".format(clusters_matches_csv_file))
if not os.path.isfile(cx_course_students_csv_file):
raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file))
clusters_csv: pd.DataFrame = pd.read_csv(clusters_matches_csv_file)
# Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column
......@@ -40,8 +37,6 @@ def aggregate(
course_csv.set_index("Legi", inplace=True)
## TODO: Remove staff from course_csv
## TODO: Make eDoz files configurable
## TODO: Make eDoz files optional
## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether
## or not a student is a repeater
......@@ -107,7 +102,13 @@ def aggregate(
# # https://pandas.pydata.org/pandas-docs/stable/getting_started/comparison/comparison_with_sql.html#compare-with-sql-join
# join = pd.merge(ids, course_csv, left_index=True, right_index=True)
cluster_course_rows: pd.DataFrame = course_csv.loc[ids_values]
# Select rows for list of indices ids_values.
# If there is no row for a given index — e.g. when a master solution was send
# to MOSS as an additional submission — a row with all NaNs is returned.
# See also https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike.
cluster_course_rows: pd.DataFrame = course_csv.reindex(ids_values)
# Preceding row selection is equivalent to the following, iff all indices exist:
# cluster_course_rows: pd.DataFrame = course_csv.loc[ids_values]
# print("========== cluster ")
# print(cluster.shape)
......@@ -215,12 +216,18 @@ def aggregate(
def configure_cli_parser(parser):
parser.add_argument(
"-e", "--edoz-exports",
"-ee", "--edoz-exports",
type=argparse.FileType('r'),
nargs="+",
help="eDoz student list exports (CSV)",
required=True)
parser.add_argument(
"-ce", "--code-expert-export",
type=argparse.FileType('r'),
help="Code Expert student data export (CSV)",
required=True)
logutils.add_loglevel_argument(parser)
......@@ -229,7 +236,7 @@ def main(
clusters_matches_csv_file=DEFAULT_CLUSTERS_MATCHES_CSV_FILE,
clusters_students_csv_file=DEFAULT_CLUSTERS_STUDENTS_CSV_FILE,
cluster_students_csv_file_pattern=DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN,
cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE,
# cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE,
jinja_cluster_template_file=DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE):
parser = argparse.ArgumentParser()
......@@ -240,11 +247,11 @@ def main(
aggregate(
args.edoz_exports,
args.code_expert_export,
cluster_files_dir,
clusters_matches_csv_file,
clusters_students_csv_file,
cluster_students_csv_file_pattern,
cx_course_students_csv_file,
jinja_cluster_template_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment