Commit 180074ef authored by scmalte's avatar scmalte

aggr.py: include more detailed statistics in generated clusters.html

parent 32a3fd7b
...@@ -4,6 +4,7 @@ import csv ...@@ -4,6 +4,7 @@ import csv
import jinja2 import jinja2
import argparse import argparse
import pandas as pd import pandas as pd
from dataclasses import dataclass
from .utils import logging as logutils from .utils import logging as logutils
DEFAULT_CLUSTER_FILES_DIR="_clusters" DEFAULT_CLUSTER_FILES_DIR="_clusters"
...@@ -12,6 +13,16 @@ DEFAULT_CLUSTERS_STUDENTS_CSV_FILE="clusters-students.csv" ...@@ -12,6 +13,16 @@ DEFAULT_CLUSTERS_STUDENTS_CSV_FILE="clusters-students.csv"
DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv" DEFAULT_CLUSTER_STUDENTS_CSV_FILE_PATTERN="cluster-students-{}.csv"
DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja" DEFAULT_JINJA_CLUSTER_TEMPLATE_FILE="./_static/clusters.html.jinja"
@dataclass
class RelativeDataPoint:
name: str # Data point's name
part: float # Relevant elements (i.e. numerator)
base: float # All elements (i.e. denominator)
@property
def percentage(self, round_to_digits=1):
return round(self.part * 100 / self.base, 1)
## TODO: Refactor function into separate ones ## TODO: Refactor function into separate ones
def aggregate( def aggregate(
edoz_exports, # List of argparse.FileType objects edoz_exports, # List of argparse.FileType objects
...@@ -174,11 +185,16 @@ def aggregate( ...@@ -174,11 +185,16 @@ def aggregate(
# print(department_counts) # print(department_counts)
department_percentage = {} department_data = []
for dep in department_counts: for dep in department_counts:
department_percentage[dep] = department_counts[dep] / edoz_departements[dep] * 100 data = RelativeDataPoint(
dep,
department_counts[dep],
edoz_departements[dep])
department_data.append(data)
# print(department_percentage) # print(department_data)
gender_counts = {} gender_counts = {}
...@@ -191,17 +207,19 @@ def aggregate( ...@@ -191,17 +207,19 @@ def aggregate(
# print(gender_counts) # print(gender_counts)
gender_percentage = {} gender_data = []
for dep in gender_counts: for dep in gender_counts:
gender_percentage[dep] = gender_counts[dep] / course_genders[dep] * 100 data = RelativeDataPoint(
dep,
gender_counts[dep],
course_genders[dep])
gender_data.append(data)
# print(gender_percentage) # print(gender_data)
percentages = {**department_percentage, **gender_percentage}
for key, value in percentages.items():
percentages[key] = round(value, 1)
# print(percentages) datapoints = department_data + gender_data
# print(datapoints)
template.stream( template.stream(
...@@ -210,21 +228,21 @@ def aggregate( ...@@ -210,21 +228,21 @@ def aggregate(
edoz_count=edoz_csv.shape[0], edoz_count=edoz_csv.shape[0],
course_count=course_csv.shape[0], course_count=course_csv.shape[0],
plagiarist_count=plagiarist_count, plagiarist_count=plagiarist_count,
percentages=percentages datapoints=datapoints
).dump("clusters.html") ).dump("clusters.html")
def configure_cli_parser(parser): def configure_cli_parser(parser):
parser.add_argument( parser.add_argument(
"-ee", "--edoz-exports", "-ee", "--edoz-exports",
type=argparse.FileType('r'), type=argparse.FileType("r", encoding="utf-8"),
nargs="+", nargs="+",
help="eDoz student list exports (CSV)", help="eDoz student list exports (CSV)",
required=True) required=True)
parser.add_argument( parser.add_argument(
"-ce", "--code-expert-export", "-ce", "--code-expert-export",
type=argparse.FileType('r'), type=argparse.FileType("r", encoding="utf-8"),
help="Code Expert student data export (CSV)", help="Code Expert student data export (CSV)",
required=True) required=True)
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
No. of eDoz students: {{ edoz_count }} <br> No. of eDoz students: {{ edoz_count }} <br>
No. of CX students: {{ course_count }} <span style="color: #999999">(may include staff)</span><br> No. of CX students: {{ course_count }} <span style="color: #999999">(may include staff)</span><br>
No. of plagiarists: {{ plagiarist_count }} <br> No. of plagiarists: {{ plagiarist_count }} <br>
{% for key, value in percentages.items() %} {% for data in datapoints %}
{{ key }}: {{ value }}% <br> {{ data.name }}: {{ data.percentage }}% ({{ data.part }}/{{ data.base }}) <br>
{% endfor %} {% endfor %}
<table style="border-spacing: 1em 2em"> <table style="border-spacing: 1em 2em">
<thead> <thead>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment