To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 6a4e6679 authored by scmalte's avatar scmalte
Browse files

revise.py: added lots of transformations of the HTML report, extract data into CSV file

parent 747eec33
import re
import logging
import argparse
# import argparse
import pkgutil
import os
from bs4 import BeautifulSoup
from .utils import logging as logutils
DEFAULT_INPUT_REPORT_FILE="moss-report.html"
DEFAULT_INPUT_REPORT_SUBDIR="_moss-report"
DEFAULT_OUTPUT_REPORT_FILE="moss-report-revised.html"
DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN="moss-report-revised.v{}.html"
DEFAULT_STATIC_DIR="_static"
DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
output_file_version_counter = 1
......@@ -26,13 +28,6 @@ def copy_static_files_to_working_directory(output_dir):
with open(dest_file, "wb") as dest_fh:
dest_fh.write(data)
def get_match_percentage(match_text):
percentage_pattern = r"\((\d+)%\)$"
percentage_string = re.search(percentage_pattern, match_text).group(1)
return int(percentage_string)
def parse_original_html_report(input_report_file):
with open(input_report_file) as input_fh:
# We use BeautifulSoup because of it allows easy DOM manipulation and has
......@@ -71,6 +66,34 @@ def make_report_table_sortable(doc, static_dir):
link["rel"] = "stylesheet"
doc.html.head.append(link) # Add to head
def localize_match_links(doc, input_report_subdir):
# E.g. http://moss.stanford.edu/results/8/7282327060561/match0.html
url_pattern = r"http://moss\.stanford\.edu/results/\d+/\d+/(match.*\.html)"
# E.g. ./12-345-678/main.cpp (77%)
text_pattern = r"\./([\d-]+)/.* (\(\d+%\))"
for row in doc.find_all("tr"):
for a in row.find_all("a"):
# Change remote URLs to local ones
url_match = re.search(url_pattern, a["href"])
a["href"] = "./{}/{}".format(input_report_subdir, url_match.group(1))
# Open links in a new tab/window
a["target"] = "_blank"
# Strip away unnecessary link text
# print(a.get_text().strip())
text_match = re.search(text_pattern, a.get_text().strip())#.group(1)
a.string = "{} {}".format(text_match.group(1), text_match.group(2))
def get_match_percentage(match_text):
percentage_pattern = r"\((\d+)%\)$"
percentage_string = re.search(percentage_pattern, match_text).group(1)
return int(percentage_string)
def add_average_percentage_column(doc):
ths = doc.find_all("th")
th = doc.new_tag("th")
......@@ -78,13 +101,10 @@ def add_average_percentage_column(doc):
ths[1].insert_after(th)
for row in doc.find_all("tr")[1:]: # Skip first TR, since table head
for a in row.find_all("a"):
a["target"] = "_blank"
cols = row.find_all("td")
first_match_text = cols[0].a.string
second_match_text = cols[1].a.string
first_match_text = cols[0].get_text().strip()
second_match_text = cols[1].get_text().strip()
first_percentage = get_match_percentage(first_match_text)
second_percentage = get_match_percentage(second_match_text)
......@@ -94,10 +114,26 @@ def add_average_percentage_column(doc):
td.string = str(avg_percentage)
row.insert(2, td)
def write_result_table_to_csv_file(doc, csv_file):
with open(csv_file, "w") as csv_fh:
rows = doc.find_all("tr")
column_heads = [th.get_text().strip() for th in rows[0].find_all("th")]
csv_fh.write(",".join(column_heads))
csv_fh.write("\n")
for row in rows[1:]:
for td in row.find_all("td"):
col_data = [td.get_text().strip() for td in row.find_all("td")]
csv_fh.write(",".join(col_data))
csv_fh.write("\n")
def main(
input_report_file=DEFAULT_INPUT_REPORT_FILE,
input_report_subdir=DEFAULT_INPUT_REPORT_SUBDIR,
output_report_file=DEFAULT_OUTPUT_REPORT_FILE,
static_dir=DEFAULT_STATIC_DIR):
static_dir=DEFAULT_STATIC_DIR,
results_csv_file=DEFAULT_RESULTS_CSV_FILE):
# Copy static web files (style.css etc.), shipped with this package,
# to the current working directory
......@@ -110,8 +146,11 @@ def main(
prettyprint_html_to_versioned_file(doc, DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN)
make_report_table_sortable(doc, static_dir)
localize_match_links(doc, input_report_subdir)
add_average_percentage_column(doc)
write_result_table_to_csv_file(doc, results_csv_file)
prettyprint_html_to_file(doc, output_report_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment