diff --git a/README.md b/README.md index 0d1955fb6998de108b4f0385ed133584ca32756c..05b260369b4caa228ee0f4c9c2b694164bdbec03 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,10 @@ A collection for useful scripts for working with Moss in the context of ETH (eDo ## Installation -``` -$ pip install --upgrade git+https://gitlab.ethz.ch/scmalte/mossutils.git` +```shell +$ pip install --upgrade git+https://gitlab.ethz.ch/scmalte/mossutils.git +... +Successfully installed [...] mossutils [...] ``` Argument `--upgrade` (re)installs the package, even if the version number hasn't changed. @@ -14,11 +16,11 @@ Argument `--upgrade` (re)installs the package, even if the version number hasn't **TODO:** Add instructions for how to obtain and prepare the involved data. See also `preprocessing/README.md`. -1. Obtain and prepare data +1. Obtain and prepare data, see `preprocessing/README.md`. -1. Run `mu-moss --help` for arguments that can/must be configured. Afterwards, run `mu-moss` as desired. +1. Run `mu-moss --help` for arguments that can/must be configured. Afterwards, run `mu-moss` as desired, e.g. `mu-moss -u 1234 -n 300 "./ex1/*/main.cpp"`. - `mu-moss` connects to the Moss service, uploads submissions and downloads the generated report. + `mu-moss` connects to the Moss service, uploads submissions and downloads the generated report. This may take a while, and will probably not work for large `-n` values (3000 worked for me, 10,000 didn't). 1. Run `mu-revise`. diff --git a/mossutils/aggr.py b/mossutils/aggr.py index 9488d37e08eda262f0d3357ff52213e90434e098..69ff7f06488dafafec8d473172f4904d1dd5f956 100644 --- a/mossutils/aggr.py +++ b/mossutils/aggr.py @@ -1,3 +1,4 @@ +import os import logging import csv import jinja2 @@ -13,6 +14,12 @@ def main( logutils.configure_level_and_format() + if not os.path.isfile(clusters_summary_csv_file): + raise RuntimeError("Cluster summary CSV file {} doesn't exist. Should have been created by mu-cluster.".format(clusters_summary_csv_file)) + + if not os.path.isfile(cx_course_students_csv_file): + raise RuntimeError("Code Expert course data CSV file {} doesn't exist. Download it from Code Expert as follows: My Courses -> Students -> Export to CSV.".format(cx_course_students_csv_file)) + clusters_csv: pd.DataFrame = pd.read_csv(clusters_summary_csv_file) # Read CX course data, reduce to relevant columns, truncate TotalScore (which are floats), set index column @@ -23,6 +30,11 @@ def main( course_csv.set_index("Legi", inplace=True) ## TODO: Remove staff from course_csv + ## TODO: Make eDoz files configurable + ## TODO: Make eDoz files optional + ## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether + ## or not a student is a repeater + # Analogous for eDoz course data relevant_edoz_columns = ["Nummer", "Departement"] edoz1_csv: pd.DataFrame = pd.read_csv("edoz-252083200L.csv", sep="\t") @@ -39,11 +51,6 @@ def main( # print(edoz2_csv.index) # print("edoz2_csv.index.is_unique = {}".format(edoz2_csv.index.is_unique)) - - ## TODO: Could integrate eDoz data "Leistungskontrollen" to get information whether - ## or not a student is a repeater - - # Vertically concat eDoz data. Since students may be enrolled into multiple # courses, duplicated rows are afterwards dropped. edoz_csv: pd.DataFrame = pd.concat([edoz1_csv, edoz2_csv]) diff --git a/mossutils/cluster.py b/mossutils/cluster.py index d3e0f0aed648fa6a3bc94e9d0674864bfb90061e..90ddb90fe3abf693686a10b7c6c9a7a7380ea532 100644 --- a/mossutils/cluster.py +++ b/mossutils/cluster.py @@ -10,6 +10,15 @@ import networkx as nx from dataclass_csv import DataclassReader from .utils import logging as logutils +## TODO: cluster.py could create a first, less detailed version of the +## clusters.html report, by extracting the strictly necessary information +## (student name and e-mail address) from the details.json file located +## in the CX export. This information would already be enough to generate +## e-mails afterwards. +## aggr.py would then be optional, if a more detailed cluster report is desired. +## +## TODO: Generate DOT, SVG and CSV files in a subdirectory, e.g. "_clusters" + DEFAULT_RESULTS_CSV_FILE="moss-report.csv" DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot" DEFAULT_CLUSTERS_DOT_FILE="clusters.dot" @@ -123,7 +132,7 @@ def get_results_graph(results, percentage_threshold, lines_threshold): return graph def create_cluster_dot_and_svg_files(subgraph, index, cluster_dot_file, cluster_svg_file=None): - logging.debug( + logging.debug( "Writing cluster {} with {}/{} nodes/edge to file {}".format( index, subgraph.number_of_nodes(), @@ -133,8 +142,12 @@ def create_cluster_dot_and_svg_files(subgraph, index, cluster_dot_file, cluster_ nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file) if cluster_svg_file: + dot_command = ["dot", "-Tsvg", "-o{}".format(cluster_svg_file), cluster_dot_file] + logging.debug("Calling dot to create SVG {} file from {}".format(cluster_svg_file, cluster_dot_file)) - subprocess.run(["dot", "-Tsvg", "-o{}".format(cluster_svg_file), cluster_dot_file]) + logging.debug("Command: {}".format(" ".join(dot_command))) + + subprocess.run(dot_command) def create_clusters(graph, cluster_file_pattern, create_svg_files): logging.info("Computing connected component (CC) clusters") diff --git a/mossutils/moss.py b/mossutils/moss.py index 4fd6d21b2b82f3aad4dee7caca41ba5314c52d8f..d28842b68ac883d1af8ea0061b2df273b10d6bc2 100644 --- a/mossutils/moss.py +++ b/mossutils/moss.py @@ -62,8 +62,6 @@ def run_moss( moss.addFilesByWildcard(file_pattern) - exit(0) - logging.info("Sending files to Moss") url = moss.send() # Submission Report URL @@ -128,7 +126,7 @@ def configure_cli_parser(parser): "pattern", type=str, default=DEFAULT_FILE_PATTERN, - help="Pattern for files to send to Moss (e.g.: {})".format(DEFAULT_FILE_PATTERN)) + help="Pattern for files to send to Moss (e.g.: '{}'). Must be in quotes!".format(DEFAULT_FILE_PATTERN)) def main(): parser = argparse.ArgumentParser() diff --git a/mossutils/revise.py b/mossutils/revise.py index f77181c4eb4e93c467b93c3319d00f5001e37322..c6c2d6ff8b7f13691d8dc5564a7fb64745019161 100644 --- a/mossutils/revise.py +++ b/mossutils/revise.py @@ -81,7 +81,8 @@ def localize_match_links(doc, input_report_subdir): url_pattern = r"http://moss\.stanford\.edu/results/\d+/\d+/(match.*\.html)" # E.g. ./12-345-678/main.cpp (77%) - text_pattern = r"\./([\d-]+)/.* (\(\d+%\))" + # ./some/dir/12-345-678/main.cpp (77%) + text_pattern = r".*?/([\d-]+)/.* (\(\d+%\))" logging.info("Localising links to match files") @@ -92,14 +93,22 @@ def localize_match_links(doc, input_report_subdir): for a in row.find_all("a"): # Change remote URLs to local ones url_match = re.search(url_pattern, a["href"]) + + if not url_match: + raise RuntimeError("Failure while localising match links in the Moss report. Failed to match link '{}' against regex '{}'".format(a["href"], url_pattern)) + a["href"] = "./{}/{}".format(input_report_subdir, url_match.group(1)) # Open links in a new tab/window a["target"] = "_blank" # Strip away unnecessary link text - # print(a.get_text().strip()) - text_match = re.search(text_pattern, a.get_text().strip())#.group(1) + link_text = a.get_text().strip() + text_match = re.search(text_pattern, link_text) + + if not text_match: + raise RuntimeError("Failure while localising match links in the Moss report. Failed to match link text '{}' against regex '{}'".format(link_text, text_pattern)) + a.string = "{} {}".format(text_match.group(1), text_match.group(2)) def get_match_percentage(match_text): diff --git a/preprocessing/README.md b/preprocessing/README.md index 6978b47ee69def10181e659a376d69f4e8c53305..25d124f53a1c55322229913fc5f95273bdd17640 100644 --- a/preprocessing/README.md +++ b/preprocessing/README.md @@ -2,25 +2,16 @@ ## Prerequisites -* Tested with Python 3.7 -* Script `run_-_moss.py` - * https://pypi.org/project/mosspy/ - * `pip install mosspy` * Script `rename_to_legi.sh`: * https://stedolan.github.io/jq/ * Download `jq` and add to path -* Script `visualize.py` - * https://github.com/hjalti/mossum - * `pip3 install git+https://github.com/hjalti/mossum@master` - * Replace `/Lib/site-packages/mossum/mossum.py` with `MODIFIED-mossum.py` - * http://networkx.github.io/ - * `pip install networkx` + * TODO: Re-implement shell script in Python ## Tidying up files and directories * `cx-dump_bonus-exercise-1_2020-04-17.zip` contains the latest submission from each user -* Execute next commands on the level of the user directories, e.g. in `./bonus_exercise_1`, so that, e.g. `./bonus_exercise_1/` are the individual user directories +* Execute commands on the level of the user directories, e.g. in `./bonus_exercise_1`, so that, e.g. `./bonus_exercise_1/` are the individual user directories * Assumption: relevant files per submission are `/details.json` and `/files/main.cpp`, whereas all other files and directories can be deleted: @@ -29,9 +20,9 @@ ```plain $ cd ./bonus_exercise_1 - $ find -type f ! \( -iname details.json -or -iname main.cpp \) -delete + $ find -type f ! \( -iname details.json -or -iname main.cpp \) -delete -print - $ find . -type d -iname cx_data -delete + $ find . -type d -iname cx_data -delete -print ``` * Move `/files/main.cpp` to `/main.cpp` and delete the (now empty) directory `/files`: @@ -39,11 +30,13 @@ ```plain $ cd ./bonus_exercise_1 - $ find . -type d -iname files -execdir mv ./files/main.cpp . \; + $ find . -type d -iname files -execdir mv ./files/main.cpp . \; -print - $ find . -type d -iname files -delete + $ find . -type d -iname files -delete -print ``` +* Now, each `` directory should only have two files in it: `/main.cpp` and `/details.json` + ## Renaming user directories * Rename directories from user names to Legi numbers before submitting data to Moss, e.g. rename `scmalte` to `01-234-567`. The file `/details.json` provides the Legi number. @@ -53,37 +46,7 @@ ```plain $ cd ./bonus_exercise_1 - $ ../rename_to_legi.sh + $ /rename_to_legi.sh ``` The script prompts for confirmation before the first renaming is executed. - -## Moss - -### moss.py - -* Edit `moss.py` and check configuration - -* Execute `moss.py` from e.g. `./bonus_exercise_1/`: - - ```plain - $ cd ./bonus_exercise_1/ - - $ python ../moss.py - ``` - -* If not configured otherwise, open `./bonus_exercise_1/moss-report.html` in your browser - -### clusters.py - -* Edit `clusters.py` and check configuration - -* Execute `clusters.py` from e.g. `./bonus_exercise_1/`: - - ```plain - $ cd ./bonus_exercise_1/ - - $ python ../clusters.py - ``` - -* If not configured otherwise, open `./bonus_exercise_1/clusters.html` in your browser