Commit 8758dce7 authored by scmalte's avatar scmalte

revise.py: fixed two bugs (CSV file lacked first non-header row, contained...

revise.py: fixed two bugs (CSV file lacked first non-header row, contained duplicated rows) and added log output
parent 49f918b9
...@@ -21,6 +21,8 @@ def copy_static_files_to_working_directory(output_dir): ...@@ -21,6 +21,8 @@ def copy_static_files_to_working_directory(output_dir):
files = ["style.css", "script.js", "sorttable.js"] files = ["style.css", "script.js", "sorttable.js"]
for file in files: for file in files:
logging.debug("Copying {} to current working directory".format(file))
src_file = os.path.join("data", file) src_file = os.path.join("data", file)
dest_file = os.path.join(output_dir, file) dest_file = os.path.join(output_dir, file)
data = pkgutil.get_data(__name__, src_file) data = pkgutil.get_data(__name__, src_file)
...@@ -29,25 +31,33 @@ def copy_static_files_to_working_directory(output_dir): ...@@ -29,25 +31,33 @@ def copy_static_files_to_working_directory(output_dir):
dest_fh.write(data) dest_fh.write(data)
def parse_original_html_report(input_report_file): def parse_original_html_report(input_report_file):
logging.info("Reading Moss report file {}".format(input_report_file))
with open(input_report_file) as input_fh: with open(input_report_file) as input_fh:
# We use BeautifulSoup because of it allows easy DOM manipulation and has # We use BeautifulSoup because of it allows easy DOM manipulation and has
# a nice pretty-printing feature. The lxml parser is used because it is able # a nice pretty-printing feature. The lxml parser is used because it is able
# to parse the totally broken HTML report file that Moss generates. # to parse the somewhat broken HTML report file that Moss generates.
doc = BeautifulSoup(input_fh, features="lxml") doc = BeautifulSoup(input_fh, features="lxml")
return doc return doc
def prettyprint_html_to_file(doc, file): def prettyprint_html_to_file(doc, file):
logging.debug("Writing report to {}".format(file))
with open(file, "w") as fh: with open(file, "w") as fh:
fh.write(doc.prettify()) # "utf-8" fh.write(doc.prettify()) # "utf-8"
def prettyprint_html_to_versioned_file(doc, filename_pattern): def prettyprint_html_to_versioned_file(doc, filename_pattern):
global output_file_version_counter
file = filename_pattern.format(output_file_version_counter) file = filename_pattern.format(output_file_version_counter)
# output_file_version_counter += 1 output_file_version_counter += 1
prettyprint_html_to_file(doc, file) prettyprint_html_to_file(doc, file)
def make_report_table_sortable(doc, static_dir): def make_report_table_sortable(doc, static_dir):
logging.info("Making report table sortable")
doc.body.table["class"] = "sortable" doc.body.table["class"] = "sortable"
# <script src="<static_dir>/sorttable.js"></script> # <script src="<static_dir>/sorttable.js"></script>
...@@ -73,7 +83,12 @@ def localize_match_links(doc, input_report_subdir): ...@@ -73,7 +83,12 @@ def localize_match_links(doc, input_report_subdir):
# E.g. ./12-345-678/main.cpp (77%) # E.g. ./12-345-678/main.cpp (77%)
text_pattern = r"\./([\d-]+)/.* (\(\d+%\))" text_pattern = r"\./([\d-]+)/.* (\(\d+%\))"
for row in doc.find_all("tr"): logging.info("Localising links to match files")
rows = doc.find_all("tr")
logging.debug("Considering {} rows, including table header".format(len(rows)))
for row in rows:
for a in row.find_all("a"): for a in row.find_all("a"):
# Change remote URLs to local ones # Change remote URLs to local ones
url_match = re.search(url_pattern, a["href"]) url_match = re.search(url_pattern, a["href"])
...@@ -95,12 +110,17 @@ def get_match_percentage(match_text): ...@@ -95,12 +110,17 @@ def get_match_percentage(match_text):
return int(percentage_string) return int(percentage_string)
def add_average_percentage_column(doc): def add_average_percentage_column(doc):
logging.info("Adding average percentage column")
ths = doc.find_all("th") ths = doc.find_all("th")
th = doc.new_tag("th") th = doc.new_tag("th")
th.string = "Avg. %" th.string = "Avg. %"
ths[1].insert_after(th) ths[1].insert_after(th)
for row in doc.find_all("tr")[1:]: # Skip first TR, since table head rows = doc.find_all("tr")[1:] # Skip first TR, since table head
logging.debug("Considering {} rows, excluding table header".format(len(rows)))
for row in rows:
cols = row.find_all("td") cols = row.find_all("td")
first_match_text = cols[0].get_text().strip() first_match_text = cols[0].get_text().strip()
...@@ -115,13 +135,15 @@ def add_average_percentage_column(doc): ...@@ -115,13 +135,15 @@ def add_average_percentage_column(doc):
row.insert(2, td) row.insert(2, td)
def write_result_table_to_csv_file(doc, csv_file): def write_result_table_to_csv_file(doc, csv_file):
logging.info("Writing report data to CSV file {}".format(csv_file))
# E.g. 12-345-678 (77%) # E.g. 12-345-678 (77%)
text_pattern = r"([\d-]+) \((\d+)%\)" text_pattern = r"([\d-]+) \((\d+)%\)"
with open(csv_file, "w") as csv_fh: with open(csv_file, "w") as csv_fh:
rows = doc.find_all("tr") rows = doc.find_all("tr")[1:] # Skip first TR, since table head
logging.debug("Considering {} rows, excluding table header".format(len(rows)))
# column_heads = [th.get_text().strip() for th in rows[0].find_all("th")]
column_heads = [ column_heads = [
"id1", "percentage1", "id1", "percentage1",
"id2", "percentage2", "id2", "percentage2",
...@@ -133,8 +155,7 @@ def write_result_table_to_csv_file(doc, csv_file): ...@@ -133,8 +155,7 @@ def write_result_table_to_csv_file(doc, csv_file):
csv_fh.write(",".join(column_heads)) csv_fh.write(",".join(column_heads))
csv_fh.write("\n") csv_fh.write("\n")
for row in rows[1:]: for row in rows:
for td in row.find_all("td"):
tds = [td for td in row.find_all("td")] tds = [td for td in row.find_all("td")]
tds_text = [td.get_text().strip() for td in tds] tds_text = [td.get_text().strip() for td in tds]
...@@ -161,10 +182,15 @@ def main( ...@@ -161,10 +182,15 @@ def main(
static_dir=DEFAULT_STATIC_DIR, static_dir=DEFAULT_STATIC_DIR,
results_csv_file=DEFAULT_RESULTS_CSV_FILE): results_csv_file=DEFAULT_RESULTS_CSV_FILE):
logutils.configure_level_and_format()
# Copy static web files (style.css etc.), shipped with this package, # Copy static web files (style.css etc.), shipped with this package,
# to the current working directory # to the current working directory
copy_static_files_to_working_directory(static_dir) copy_static_files_to_working_directory(static_dir)
## TODO: Insert thead and tbody tags. Makes subsequent steps safer, since table
## rows inside the body can be selected explicitly.
# Parse original Moss report. Should fix the broken HTML that Moss generates. # Parse original Moss report. Should fix the broken HTML that Moss generates.
doc = parse_original_html_report(input_report_file) doc = parse_original_html_report(input_report_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment