To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit ece4aadb authored by scmalte's avatar scmalte
Browse files

cluster.py: started implementation

parent ec53ee16
......@@ -2,37 +2,115 @@ import re
# import argparse
import logging
import csv
import subprocess
import pydot
import networkx as nx
from dataclasses import dataclass
from .utils import logging as logutils
DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}.dot"
DEFAULT_THRESHOLD
@dataclass
class MossMatch:
legi1: str
@dataclass # USE AS IF frozen=True
class MossResult:
id1: str
percentage1: int
legi2: str
id2: str
percentage2: int
avg_percentage: int,
avg_percentage: int
lines: int
match_file: str
def main(results_csv_file=DEFAULT_RESULTS_CSV_FILE):
candidate_percentage_pattern = r"([\d-]+) \((\d+)%\)"
def __post_init__(self):
# Despite the (mandatory) type annotations above, there is no guarantee that
# the field values have the expected type, hence the explicit conversions.
self.percentage1 = int(self.percentage1)
self.percentage2 = int(self.percentage2)
self.avg_percentage = float(self.avg_percentage)
self.lines = int(self.lines)
with open(results_csv_file, newline="") as csv_fh:
csv_reader = csv.DictReader(csv_fh, delimiter=",", quotechar='"')
for row in csv_reader:
match1 = re.search(candidate_percentage_pattern, row["File 1"])
match2 = re.search(candidate_percentage_pattern, row["File 2"])
def read_results_from_csv_file(csv_file):
results = []
moss_match = MossMatch(
match1.group(1),
match1.group(2),
match2.group(1),
match2.group(2),
row["Avg. %"],
row["Lines Matched"])
logging.info("Reading results from {}".format(csv_file))
print(moss_match)
with open(csv_file, newline="") as csv_fh:
csv_reader = csv.reader(csv_fh, delimiter=",", quotechar='"')
next(csv_reader, None) # Skip CSV header line
results = [MossResult(*row) for row in csv_reader]
logging.debug("Read {} results".format(len(results)))
return results
def get_weight(result):
return min(result.percentage1, result.percentage2)
def get_color(percentage):
if (percentage >= 90): return "#D83018" # Red
elif (percentage >= 80): return "#F07241" # Orange
elif (percentage >= 70): return "#601848" # Purple
else: return "#000000" # Black
def create_results_graph(results):
graph = nx.Graph()
logging.debug("Creating total graph from {} results".format(len(results)))
for result in results:
weight = get_weight(result)
edge = (result.id1, result.id2, weight)
color = get_color(weight)
attributes = {
"color": color,
"penwidth": 2,
"label": "{0}% ({1})".format(weight, result.lines),
"labelURL": result.match_file,
"URL": result.match_file,
"target": "match",
"fontcolor": color
}
graph.add_weighted_edges_from([edge], **attributes)
return graph
def main(
results_csv_file=DEFAULT_RESULTS_CSV_FILE,
total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE,
cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN):
logutils.configure_level_and_format()
results = read_results_from_csv_file(results_csv_file)
graph = create_results_graph(results)
logging.info("Writing total graph to {}".format(total_graph_dot_file))
nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)
logging.info("Computing clusters")
clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
logging.info(
"Found {} clusters, will write them to files {}".format(
len(clusters),
cluster_dot_file_pattern.format("#")))
for index, cluster in enumerate(clusters):
subgraph = graph.subgraph(cluster).copy()
cluster_dot_file = cluster_dot_file_pattern.format(index)
logging.debug("Writing cluster {} to file {}".format(index, cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
if __name__ == "__main__":
......
......@@ -17,8 +17,10 @@ setup(
},
install_requires=[
'mosspy',
'lxml',
'bs4',
'lxml'
'pydot',
'networkx'
],
# scripts=['bin/mossutils-moss'],
entry_points = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment