To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 180074ef authored by scmalte's avatar scmalte
Browse files

aggr.py: include more detailed statistics in generated clusters.html

parent 32a3fd7b
......@@ -4,6 +4,7 @@ import csv
import jinja2
import argparse
import pandas as pd
from dataclasses import dataclass
from .utils import logging as logutils
DEFAULT_CLUSTER_FILES_DIR="_clusters"
......@@ -12,6 +13,16 @@ DEFAULT_CLUSTERS_STUDENTS_CSV_FILE="clusters-students.csv"
DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
@dataclass
class RelativeDataPoint:
name: str # Data point's name
part: float # Relevant elements (i.e. numerator)
base: float # All elements (i.e. denominator)
@property
def percentage(self, round_to_digits=1):
return round(self.part * 100 / self.base, 1)
## TODO: Refactor function into separate ones
def aggregate(
edoz_exports, # List of argparse.FileType objects
......@@ -174,11 +185,16 @@ def aggregate(
# print(department_counts)
department_percentage = {}
department_data = []
for dep in department_counts:
department_percentage[dep] = department_counts[dep] / edoz_departements[dep] * 100
data = RelativeDataPoint(
dep,
department_counts[dep],
edoz_departements[dep])
department_data.append(data)
# print(department_percentage)
# print(department_data)
gender_counts = {}
......@@ -191,17 +207,19 @@ def aggregate(
# print(gender_counts)
gender_percentage = {}
gender_data = []
for dep in gender_counts:
gender_percentage[dep] = gender_counts[dep] / course_genders[dep] * 100
data = RelativeDataPoint(
dep,
gender_counts[dep],
course_genders[dep])
# print(gender_percentage)
gender_data.append(data)
percentages = {**department_percentage, **gender_percentage}
for key, value in percentages.items():
percentages[key] = round(value, 1)
# print(gender_data)
# print(percentages)
datapoints = department_data + gender_data
# print(datapoints)
template.stream(
......@@ -210,21 +228,21 @@ def aggregate(
edoz_count=edoz_csv.shape[0],
course_count=course_csv.shape[0],
plagiarist_count=plagiarist_count,
percentages=percentages
datapoints=datapoints
).dump("clusters.html")
def configure_cli_parser(parser):
parser.add_argument(
"-ee", "--edoz-exports",
type=argparse.FileType('r'),
type=argparse.FileType("r", encoding="utf-8"),
nargs="+",
help="eDoz student list exports (CSV)",
required=True)
parser.add_argument(
"-ce", "--code-expert-export",
type=argparse.FileType('r'),
type=argparse.FileType("r", encoding="utf-8"),
help="Code Expert student data export (CSV)",
required=True)
......
......@@ -15,8 +15,8 @@
No. of eDoz students: {{ edoz_count }} <br>
No. of CX students: {{ course_count }} <span style="color: #999999">(may include staff)</span><br>
No. of plagiarists: {{ plagiarist_count }} <br>
{% for key, value in percentages.items() %}
{{ key }}: {{ value }}% <br>
{% for data in datapoints %}
{{ data.name }}: {{ data.percentage }}% ({{ data.part }}/{{ data.base }}) <br>
{% endfor %}
<table style="border-spacing: 1em 2em">
<thead>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment