To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 6a4e6679 authored by scmalte's avatar scmalte
Browse files

revise.py: added lots of transformations of the HTML report, extract data into CSV file

parent 747eec33
import re import re
import logging import logging
import argparse # import argparse
import pkgutil import pkgutil
import os import os
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from .utils import logging as logutils from .utils import logging as logutils
DEFAULT_INPUT_REPORT_FILE="moss-report.html" DEFAULT_INPUT_REPORT_FILE="moss-report.html"
DEFAULT_INPUT_REPORT_SUBDIR="_moss-report"
DEFAULT_OUTPUT_REPORT_FILE="moss-report-revised.html" DEFAULT_OUTPUT_REPORT_FILE="moss-report-revised.html"
DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN="moss-report-revised.v{}.html" DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN="moss-report-revised.v{}.html"
DEFAULT_STATIC_DIR="_static" DEFAULT_STATIC_DIR="_static"
DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
output_file_version_counter = 1 output_file_version_counter = 1
...@@ -26,13 +28,6 @@ def copy_static_files_to_working_directory(output_dir): ...@@ -26,13 +28,6 @@ def copy_static_files_to_working_directory(output_dir):
with open(dest_file, "wb") as dest_fh: with open(dest_file, "wb") as dest_fh:
dest_fh.write(data) dest_fh.write(data)
def get_match_percentage(match_text):
percentage_pattern = r"\((\d+)%\)$"
percentage_string = re.search(percentage_pattern, match_text).group(1)
return int(percentage_string)
def parse_original_html_report(input_report_file): def parse_original_html_report(input_report_file):
with open(input_report_file) as input_fh: with open(input_report_file) as input_fh:
# We use BeautifulSoup because of it allows easy DOM manipulation and has # We use BeautifulSoup because of it allows easy DOM manipulation and has
...@@ -71,6 +66,34 @@ def make_report_table_sortable(doc, static_dir): ...@@ -71,6 +66,34 @@ def make_report_table_sortable(doc, static_dir):
link["rel"] = "stylesheet" link["rel"] = "stylesheet"
doc.html.head.append(link) # Add to head doc.html.head.append(link) # Add to head
def localize_match_links(doc, input_report_subdir):
# E.g. http://moss.stanford.edu/results/8/7282327060561/match0.html
url_pattern = r"http://moss\.stanford\.edu/results/\d+/\d+/(match.*\.html)"
# E.g. ./12-345-678/main.cpp (77%)
text_pattern = r"\./([\d-]+)/.* (\(\d+%\))"
for row in doc.find_all("tr"):
for a in row.find_all("a"):
# Change remote URLs to local ones
url_match = re.search(url_pattern, a["href"])
a["href"] = "./{}/{}".format(input_report_subdir, url_match.group(1))
# Open links in a new tab/window
a["target"] = "_blank"
# Strip away unnecessary link text
# print(a.get_text().strip())
text_match = re.search(text_pattern, a.get_text().strip())#.group(1)
a.string = "{} {}".format(text_match.group(1), text_match.group(2))
def get_match_percentage(match_text):
percentage_pattern = r"\((\d+)%\)$"
percentage_string = re.search(percentage_pattern, match_text).group(1)
return int(percentage_string)
def add_average_percentage_column(doc): def add_average_percentage_column(doc):
ths = doc.find_all("th") ths = doc.find_all("th")
th = doc.new_tag("th") th = doc.new_tag("th")
...@@ -78,13 +101,10 @@ def add_average_percentage_column(doc): ...@@ -78,13 +101,10 @@ def add_average_percentage_column(doc):
ths[1].insert_after(th) ths[1].insert_after(th)
for row in doc.find_all("tr")[1:]: # Skip first TR, since table head for row in doc.find_all("tr")[1:]: # Skip first TR, since table head
for a in row.find_all("a"):
a["target"] = "_blank"
cols = row.find_all("td") cols = row.find_all("td")
first_match_text = cols[0].a.string first_match_text = cols[0].get_text().strip()
second_match_text = cols[1].a.string second_match_text = cols[1].get_text().strip()
first_percentage = get_match_percentage(first_match_text) first_percentage = get_match_percentage(first_match_text)
second_percentage = get_match_percentage(second_match_text) second_percentage = get_match_percentage(second_match_text)
...@@ -94,10 +114,26 @@ def add_average_percentage_column(doc): ...@@ -94,10 +114,26 @@ def add_average_percentage_column(doc):
td.string = str(avg_percentage) td.string = str(avg_percentage)
row.insert(2, td) row.insert(2, td)
def write_result_table_to_csv_file(doc, csv_file):
with open(csv_file, "w") as csv_fh:
rows = doc.find_all("tr")
column_heads = [th.get_text().strip() for th in rows[0].find_all("th")]
csv_fh.write(",".join(column_heads))
csv_fh.write("\n")
for row in rows[1:]:
for td in row.find_all("td"):
col_data = [td.get_text().strip() for td in row.find_all("td")]
csv_fh.write(",".join(col_data))
csv_fh.write("\n")
def main( def main(
input_report_file=DEFAULT_INPUT_REPORT_FILE, input_report_file=DEFAULT_INPUT_REPORT_FILE,
input_report_subdir=DEFAULT_INPUT_REPORT_SUBDIR,
output_report_file=DEFAULT_OUTPUT_REPORT_FILE, output_report_file=DEFAULT_OUTPUT_REPORT_FILE,
static_dir=DEFAULT_STATIC_DIR): static_dir=DEFAULT_STATIC_DIR,
results_csv_file=DEFAULT_RESULTS_CSV_FILE):
# Copy static web files (style.css etc.), shipped with this package, # Copy static web files (style.css etc.), shipped with this package,
# to the current working directory # to the current working directory
...@@ -110,8 +146,11 @@ def main( ...@@ -110,8 +146,11 @@ def main(
prettyprint_html_to_versioned_file(doc, DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN) prettyprint_html_to_versioned_file(doc, DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN)
make_report_table_sortable(doc, static_dir) make_report_table_sortable(doc, static_dir)
localize_match_links(doc, input_report_subdir)
add_average_percentage_column(doc) add_average_percentage_column(doc)
write_result_table_to_csv_file(doc, results_csv_file)
prettyprint_html_to_file(doc, output_report_file) prettyprint_html_to_file(doc, output_report_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment