To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 8758dce7 authored by scmalte's avatar scmalte
Browse files

revise.py: fixed two bugs (CSV file lacked first non-header row, contained...

revise.py: fixed two bugs (CSV file lacked first non-header row, contained duplicated rows) and added log output
parent 49f918b9
...@@ -17,10 +17,12 @@ output_file_version_counter = 1 ...@@ -17,10 +17,12 @@ output_file_version_counter = 1
def copy_static_files_to_working_directory(output_dir): def copy_static_files_to_working_directory(output_dir):
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
files = ["style.css", "script.js", "sorttable.js"] files = ["style.css", "script.js", "sorttable.js"]
for file in files: for file in files:
logging.debug("Copying {} to current working directory".format(file))
src_file = os.path.join("data", file) src_file = os.path.join("data", file)
dest_file = os.path.join(output_dir, file) dest_file = os.path.join(output_dir, file)
data = pkgutil.get_data(__name__, src_file) data = pkgutil.get_data(__name__, src_file)
...@@ -29,25 +31,33 @@ def copy_static_files_to_working_directory(output_dir): ...@@ -29,25 +31,33 @@ def copy_static_files_to_working_directory(output_dir):
dest_fh.write(data) dest_fh.write(data)
def parse_original_html_report(input_report_file): def parse_original_html_report(input_report_file):
logging.info("Reading Moss report file {}".format(input_report_file))
with open(input_report_file) as input_fh: with open(input_report_file) as input_fh:
# We use BeautifulSoup because of it allows easy DOM manipulation and has # We use BeautifulSoup because of it allows easy DOM manipulation and has
# a nice pretty-printing feature. The lxml parser is used because it is able # a nice pretty-printing feature. The lxml parser is used because it is able
# to parse the totally broken HTML report file that Moss generates. # to parse the somewhat broken HTML report file that Moss generates.
doc = BeautifulSoup(input_fh, features="lxml") doc = BeautifulSoup(input_fh, features="lxml")
return doc return doc
def prettyprint_html_to_file(doc, file): def prettyprint_html_to_file(doc, file):
logging.debug("Writing report to {}".format(file))
with open(file, "w") as fh: with open(file, "w") as fh:
fh.write(doc.prettify()) # "utf-8" fh.write(doc.prettify()) # "utf-8"
def prettyprint_html_to_versioned_file(doc, filename_pattern): def prettyprint_html_to_versioned_file(doc, filename_pattern):
global output_file_version_counter
file = filename_pattern.format(output_file_version_counter) file = filename_pattern.format(output_file_version_counter)
# output_file_version_counter += 1 output_file_version_counter += 1
prettyprint_html_to_file(doc, file) prettyprint_html_to_file(doc, file)
def make_report_table_sortable(doc, static_dir): def make_report_table_sortable(doc, static_dir):
logging.info("Making report table sortable")
doc.body.table["class"] = "sortable" doc.body.table["class"] = "sortable"
# <script src="<static_dir>/sorttable.js"></script> # <script src="<static_dir>/sorttable.js"></script>
...@@ -69,11 +79,16 @@ def make_report_table_sortable(doc, static_dir): ...@@ -69,11 +79,16 @@ def make_report_table_sortable(doc, static_dir):
def localize_match_links(doc, input_report_subdir): def localize_match_links(doc, input_report_subdir):
# E.g. http://moss.stanford.edu/results/8/7282327060561/match0.html # E.g. http://moss.stanford.edu/results/8/7282327060561/match0.html
url_pattern = r"http://moss\.stanford\.edu/results/\d+/\d+/(match.*\.html)" url_pattern = r"http://moss\.stanford\.edu/results/\d+/\d+/(match.*\.html)"
# E.g. ./12-345-678/main.cpp (77%) # E.g. ./12-345-678/main.cpp (77%)
text_pattern = r"\./([\d-]+)/.* (\(\d+%\))" text_pattern = r"\./([\d-]+)/.* (\(\d+%\))"
for row in doc.find_all("tr"): logging.info("Localising links to match files")
rows = doc.find_all("tr")
logging.debug("Considering {} rows, including table header".format(len(rows)))
for row in rows:
for a in row.find_all("a"): for a in row.find_all("a"):
# Change remote URLs to local ones # Change remote URLs to local ones
url_match = re.search(url_pattern, a["href"]) url_match = re.search(url_pattern, a["href"])
...@@ -91,18 +106,23 @@ def get_match_percentage(match_text): ...@@ -91,18 +106,23 @@ def get_match_percentage(match_text):
percentage_pattern = r"\((\d+)%\)$" percentage_pattern = r"\((\d+)%\)$"
percentage_string = re.search(percentage_pattern, match_text).group(1) percentage_string = re.search(percentage_pattern, match_text).group(1)
return int(percentage_string) return int(percentage_string)
def add_average_percentage_column(doc): def add_average_percentage_column(doc):
logging.info("Adding average percentage column")
ths = doc.find_all("th") ths = doc.find_all("th")
th = doc.new_tag("th") th = doc.new_tag("th")
th.string = "Avg. %" th.string = "Avg. %"
ths[1].insert_after(th) ths[1].insert_after(th)
for row in doc.find_all("tr")[1:]: # Skip first TR, since table head rows = doc.find_all("tr")[1:] # Skip first TR, since table head
logging.debug("Considering {} rows, excluding table header".format(len(rows)))
for row in rows:
cols = row.find_all("td") cols = row.find_all("td")
first_match_text = cols[0].get_text().strip() first_match_text = cols[0].get_text().strip()
second_match_text = cols[1].get_text().strip() second_match_text = cols[1].get_text().strip()
...@@ -112,16 +132,18 @@ def add_average_percentage_column(doc): ...@@ -112,16 +132,18 @@ def add_average_percentage_column(doc):
td = doc.new_tag("td") td = doc.new_tag("td")
td.string = str(avg_percentage) td.string = str(avg_percentage)
row.insert(2, td) row.insert(2, td)
def write_result_table_to_csv_file(doc, csv_file): def write_result_table_to_csv_file(doc, csv_file):
logging.info("Writing report data to CSV file {}".format(csv_file))
# E.g. 12-345-678 (77%) # E.g. 12-345-678 (77%)
text_pattern = r"([\d-]+) \((\d+)%\)" text_pattern = r"([\d-]+) \((\d+)%\)"
with open(csv_file, "w") as csv_fh: with open(csv_file, "w") as csv_fh:
rows = doc.find_all("tr") rows = doc.find_all("tr")[1:] # Skip first TR, since table head
logging.debug("Considering {} rows, excluding table header".format(len(rows)))
# column_heads = [th.get_text().strip() for th in rows[0].find_all("th")]
column_heads = [ column_heads = [
"id1", "percentage1", "id1", "percentage1",
"id2", "percentage2", "id2", "percentage2",
...@@ -133,26 +155,25 @@ def write_result_table_to_csv_file(doc, csv_file): ...@@ -133,26 +155,25 @@ def write_result_table_to_csv_file(doc, csv_file):
csv_fh.write(",".join(column_heads)) csv_fh.write(",".join(column_heads))
csv_fh.write("\n") csv_fh.write("\n")
for row in rows[1:]: for row in rows:
for td in row.find_all("td"): tds = [td for td in row.find_all("td")]
tds = [td for td in row.find_all("td")] tds_text = [td.get_text().strip() for td in tds]
tds_text = [td.get_text().strip() for td in tds]
file1_match = re.search(text_pattern, tds_text[0]) file1_match = re.search(text_pattern, tds_text[0])
file2_match = re.search(text_pattern, tds_text[1]) file2_match = re.search(text_pattern, tds_text[1])
col_data = [ col_data = [
file1_match.group(1), file1_match.group(1),
file1_match.group(2), file1_match.group(2),
file2_match.group(1), file2_match.group(1),
file2_match.group(2), file2_match.group(2),
tds_text[2], tds_text[2],
tds_text[3], tds_text[3],
tds[0].a["href"] tds[0].a["href"]
] ]
csv_fh.write(",".join(col_data)) csv_fh.write(",".join(col_data))
csv_fh.write("\n") csv_fh.write("\n")
def main( def main(
input_report_file=DEFAULT_INPUT_REPORT_FILE, input_report_file=DEFAULT_INPUT_REPORT_FILE,
...@@ -161,16 +182,21 @@ def main( ...@@ -161,16 +182,21 @@ def main(
static_dir=DEFAULT_STATIC_DIR, static_dir=DEFAULT_STATIC_DIR,
results_csv_file=DEFAULT_RESULTS_CSV_FILE): results_csv_file=DEFAULT_RESULTS_CSV_FILE):
logutils.configure_level_and_format()
# Copy static web files (style.css etc.), shipped with this package, # Copy static web files (style.css etc.), shipped with this package,
# to the current working directory # to the current working directory
copy_static_files_to_working_directory(static_dir) copy_static_files_to_working_directory(static_dir)
## TODO: Insert thead and tbody tags. Makes subsequent steps safer, since table
## rows inside the body can be selected explicitly.
# Parse original Moss report. Should fix the broken HTML that Moss generates. # Parse original Moss report. Should fix the broken HTML that Moss generates.
doc = parse_original_html_report(input_report_file) doc = parse_original_html_report(input_report_file)
# Save fixed (but otherwise unchanged) report in a file # Save fixed (but otherwise unchanged) report in a file
prettyprint_html_to_versioned_file(doc, DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN) prettyprint_html_to_versioned_file(doc, DEFAULT_OUTPUT_REPORT_VERSIONED_FILE_PATTERN)
make_report_table_sortable(doc, static_dir) make_report_table_sortable(doc, static_dir)
localize_match_links(doc, input_report_subdir) localize_match_links(doc, input_report_subdir)
add_average_percentage_column(doc) add_average_percentage_column(doc)
...@@ -181,4 +207,4 @@ def main( ...@@ -181,4 +207,4 @@ def main(
if __name__ == "__main__": if __name__ == "__main__":
main() main()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment