Commit ece4aadb authored by scmalte's avatar scmalte started implementation

parent ec53ee16
......@@ -2,37 +2,115 @@ import re
# import argparse
import logging
import csv
import subprocess
import pydot
import networkx as nx
from dataclasses import dataclass
from .utils import logging as logutils
class MossMatch:
legi1: str
@dataclass # USE AS IF frozen=True
class MossResult:
id1: str
percentage1: int
legi2: str
id2: str
percentage2: int
avg_percentage: int,
avg_percentage: int
lines: int
match_file: str
def main(results_csv_file=DEFAULT_RESULTS_CSV_FILE):
candidate_percentage_pattern = r"([\d-]+) \((\d+)%\)"
def __post_init__(self):
# Despite the (mandatory) type annotations above, there is no guarantee that
# the field values have the expected type, hence the explicit conversions.
self.percentage1 = int(self.percentage1)
self.percentage2 = int(self.percentage2)
self.avg_percentage = float(self.avg_percentage)
self.lines = int(self.lines)
with open(results_csv_file, newline="") as csv_fh:
csv_reader = csv.DictReader(csv_fh, delimiter=",", quotechar='"')
for row in csv_reader:
match1 =, row["File 1"])
match2 =, row["File 2"])
def read_results_from_csv_file(csv_file):
results = []
moss_match = MossMatch(,,,,
row["Avg. %"],
row["Lines Matched"])"Reading results from {}".format(csv_file))
with open(csv_file, newline="") as csv_fh:
csv_reader = csv.reader(csv_fh, delimiter=",", quotechar='"')
next(csv_reader, None) # Skip CSV header line
results = [MossResult(*row) for row in csv_reader]
logging.debug("Read {} results".format(len(results)))
return results
def get_weight(result):
return min(result.percentage1, result.percentage2)
def get_color(percentage):
if (percentage >= 90): return "#D83018" # Red
elif (percentage >= 80): return "#F07241" # Orange
elif (percentage >= 70): return "#601848" # Purple
else: return "#000000" # Black
def create_results_graph(results):
graph = nx.Graph()
logging.debug("Creating total graph from {} results".format(len(results)))
for result in results:
weight = get_weight(result)
edge = (result.id1, result.id2, weight)
color = get_color(weight)
attributes = {
"color": color,
"penwidth": 2,
"label": "{0}% ({1})".format(weight, result.lines),
"labelURL": result.match_file,
"URL": result.match_file,
"target": "match",
"fontcolor": color
graph.add_weighted_edges_from([edge], **attributes)
return graph
def main(
results = read_results_from_csv_file(results_csv_file)
graph = create_results_graph(results)"Writing total graph to {}".format(total_graph_dot_file))
nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)"Computing clusters")
clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
"Found {} clusters, will write them to files {}".format(
for index, cluster in enumerate(clusters):
subgraph = graph.subgraph(cluster).copy()
cluster_dot_file = cluster_dot_file_pattern.format(index)
logging.debug("Writing cluster {} to file {}".format(index, cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))["dot", "-Tsvg", "-O", cluster_dot_file])
if __name__ == "__main__":
......@@ -17,8 +17,10 @@ setup(
# scripts=['bin/mossutils-moss'],
entry_points = {
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment