To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit a0d02765 authored by scmalte's avatar scmalte
Browse files

cluster.py: compute clusters and generate DOT files

parent ece4aadb
...@@ -11,8 +11,10 @@ from .utils import logging as logutils ...@@ -11,8 +11,10 @@ from .utils import logging as logutils
DEFAULT_RESULTS_CSV_FILE="moss-report.csv" DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot" DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot" DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}.dot" DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}-{}.dot"
DEFAULT_THRESHOLD DEFAULT_THRESHOLD_PERCENTAGE=90
DEFAULT_THRESHOLD_LINES=50
DEFAULT_CREATE_SVG_FILES=True
@dataclass # USE AS IF frozen=True @dataclass # USE AS IF frozen=True
class MossResult: class MossResult:
...@@ -49,7 +51,7 @@ def read_results_from_csv_file(csv_file): ...@@ -49,7 +51,7 @@ def read_results_from_csv_file(csv_file):
return results return results
def get_weight(result): def get_weight(result):
return min(result.percentage1, result.percentage2) return max(result.percentage1, result.percentage2)
def get_color(percentage): def get_color(percentage):
if (percentage >= 90): return "#D83018" # Red if (percentage >= 90): return "#D83018" # Red
...@@ -57,12 +59,21 @@ def get_color(percentage): ...@@ -57,12 +59,21 @@ def get_color(percentage):
elif (percentage >= 70): return "#601848" # Purple elif (percentage >= 70): return "#601848" # Purple
else: return "#000000" # Black else: return "#000000" # Black
def create_results_graph(results): def include(result, percentage_threshold, lines_threshold):
return (
percentage_threshold <= get_weight(result) and
lines_threshold <= result.lines)
def create_results_graph(results, percentage_threshold, lines_threshold):
graph = nx.Graph() graph = nx.Graph()
logging.debug("Creating total graph from {} results".format(len(results))) logging.debug("Creating graph from {} initial results".format(len(results)))
logging.debug("Thresholds percentages/lines: ".format(percentage_threshold, lines_threshold))
for result in results: for result in results:
if not include(result, percentage_threshold, lines_threshold):
continue
weight = get_weight(result) weight = get_weight(result)
edge = (result.id1, result.id2, weight) edge = (result.id1, result.id2, weight)
color = get_color(weight) color = get_color(weight)
...@@ -79,38 +90,57 @@ def create_results_graph(results): ...@@ -79,38 +90,57 @@ def create_results_graph(results):
graph.add_weighted_edges_from([edge], **attributes) graph.add_weighted_edges_from([edge], **attributes)
logging.debug(
"Graph contains {} nodes and {} edged".format(
graph.number_of_nodes(),
graph.number_of_edges()))
return graph return graph
def write_cluster_files(subgraph, index, cluster_dot_file, create_svg_files):
logging.debug(
"Writing cluster {} with {}/{} nodes/edge to file {}".format(
index,
subgraph.number_of_nodes(),
subgraph.number_of_edges(),
cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
if create_svg_files:
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
def main( def main(
results_csv_file=DEFAULT_RESULTS_CSV_FILE, results_csv_file=DEFAULT_RESULTS_CSV_FILE,
total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE, total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE,
cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN): cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN,
percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE,
lines_threshold=DEFAULT_THRESHOLD_LINES,
create_svg_files=DEFAULT_CREATE_SVG_FILES):
logutils.configure_level_and_format() logutils.configure_level_and_format()
results = read_results_from_csv_file(results_csv_file) results = read_results_from_csv_file(results_csv_file)
graph = create_results_graph(results) graph = create_results_graph(results, percentage_threshold, lines_threshold)
logging.info("Writing total graph to {}".format(total_graph_dot_file)) logging.info("Writing total graph to {}".format(total_graph_dot_file))
nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file) nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)
logging.info("Computing clusters") logging.info("Computing connected component (CC) clusters")
clusters = sorted(nx.connected_components(graph), key=len, reverse=True) clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
cluster_dot_file_pattern = cluster_dot_file_pattern.format("cc", "{}")
logging.info( logging.info(
"Found {} clusters, will write them to files {}".format( "Found {} CC clusters, will write them to files {}".format(
len(clusters), len(clusters),
cluster_dot_file_pattern.format("#"))) cluster_dot_file_pattern.format("#")))
for index, cluster in enumerate(clusters): for index, cluster in enumerate(clusters):
subgraph = graph.subgraph(cluster).copy() subgraph = graph.subgraph(cluster).copy()
dot_file = cluster_dot_file_pattern.format(index)
cluster_dot_file = cluster_dot_file_pattern.format(index) write_cluster_files(subgraph, index, dot_file, create_svg_files)
logging.debug("Writing cluster {} to file {}".format(index, cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment