To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit ece4aadb authored by scmalte's avatar scmalte
Browse files

cluster.py: started implementation

parent ec53ee16
...@@ -2,37 +2,115 @@ import re ...@@ -2,37 +2,115 @@ import re
# import argparse # import argparse
import logging import logging
import csv import csv
import subprocess
import pydot
import networkx as nx
from dataclasses import dataclass from dataclasses import dataclass
from .utils import logging as logutils
DEFAULT_RESULTS_CSV_FILE="moss-report.csv" DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}.dot"
DEFAULT_THRESHOLD
@dataclass @dataclass # USE AS IF frozen=True
class MossMatch: class MossResult:
legi1: str id1: str
percentage1: int percentage1: int
legi2: str id2: str
percentage2: int percentage2: int
avg_percentage: int, avg_percentage: int
lines: int lines: int
match_file: str
def main(results_csv_file=DEFAULT_RESULTS_CSV_FILE): def __post_init__(self):
candidate_percentage_pattern = r"([\d-]+) \((\d+)%\)" # Despite the (mandatory) type annotations above, there is no guarantee that
# the field values have the expected type, hence the explicit conversions.
self.percentage1 = int(self.percentage1)
self.percentage2 = int(self.percentage2)
self.avg_percentage = float(self.avg_percentage)
self.lines = int(self.lines)
with open(results_csv_file, newline="") as csv_fh: def read_results_from_csv_file(csv_file):
csv_reader = csv.DictReader(csv_fh, delimiter=",", quotechar='"') results = []
for row in csv_reader:
match1 = re.search(candidate_percentage_pattern, row["File 1"])
match2 = re.search(candidate_percentage_pattern, row["File 2"])
moss_match = MossMatch( logging.info("Reading results from {}".format(csv_file))
match1.group(1),
match1.group(2),
match2.group(1),
match2.group(2),
row["Avg. %"],
row["Lines Matched"])
print(moss_match) with open(csv_file, newline="") as csv_fh:
csv_reader = csv.reader(csv_fh, delimiter=",", quotechar='"')
next(csv_reader, None) # Skip CSV header line
results = [MossResult(*row) for row in csv_reader]
logging.debug("Read {} results".format(len(results)))
return results
def get_weight(result):
return min(result.percentage1, result.percentage2)
def get_color(percentage):
if (percentage >= 90): return "#D83018" # Red
elif (percentage >= 80): return "#F07241" # Orange
elif (percentage >= 70): return "#601848" # Purple
else: return "#000000" # Black
def create_results_graph(results):
graph = nx.Graph()
logging.debug("Creating total graph from {} results".format(len(results)))
for result in results:
weight = get_weight(result)
edge = (result.id1, result.id2, weight)
color = get_color(weight)
attributes = {
"color": color,
"penwidth": 2,
"label": "{0}% ({1})".format(weight, result.lines),
"labelURL": result.match_file,
"URL": result.match_file,
"target": "match",
"fontcolor": color
}
graph.add_weighted_edges_from([edge], **attributes)
return graph
def main(
results_csv_file=DEFAULT_RESULTS_CSV_FILE,
total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE,
cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN):
logutils.configure_level_and_format()
results = read_results_from_csv_file(results_csv_file)
graph = create_results_graph(results)
logging.info("Writing total graph to {}".format(total_graph_dot_file))
nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)
logging.info("Computing clusters")
clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
logging.info(
"Found {} clusters, will write them to files {}".format(
len(clusters),
cluster_dot_file_pattern.format("#")))
for index, cluster in enumerate(clusters):
subgraph = graph.subgraph(cluster).copy()
cluster_dot_file = cluster_dot_file_pattern.format(index)
logging.debug("Writing cluster {} to file {}".format(index, cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -17,8 +17,10 @@ setup( ...@@ -17,8 +17,10 @@ setup(
}, },
install_requires=[ install_requires=[
'mosspy', 'mosspy',
'lxml',
'bs4', 'bs4',
'lxml' 'pydot',
'networkx'
], ],
# scripts=['bin/mossutils-moss'], # scripts=['bin/mossutils-moss'],
entry_points = { entry_points = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment