To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit a0d02765 authored by scmalte's avatar scmalte
Browse files

cluster.py: compute clusters and generate DOT files

parent ece4aadb
......@@ -11,8 +11,10 @@ from .utils import logging as logutils
DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}.dot"
DEFAULT_THRESHOLD
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}-{}.dot"
DEFAULT_THRESHOLD_PERCENTAGE=90
DEFAULT_THRESHOLD_LINES=50
DEFAULT_CREATE_SVG_FILES=True
@dataclass # USE AS IF frozen=True
class MossResult:
......@@ -49,7 +51,7 @@ def read_results_from_csv_file(csv_file):
return results
def get_weight(result):
return min(result.percentage1, result.percentage2)
return max(result.percentage1, result.percentage2)
def get_color(percentage):
if (percentage >= 90): return "#D83018" # Red
......@@ -57,12 +59,21 @@ def get_color(percentage):
elif (percentage >= 70): return "#601848" # Purple
else: return "#000000" # Black
def create_results_graph(results):
def include(result, percentage_threshold, lines_threshold):
return (
percentage_threshold <= get_weight(result) and
lines_threshold <= result.lines)
def create_results_graph(results, percentage_threshold, lines_threshold):
graph = nx.Graph()
logging.debug("Creating total graph from {} results".format(len(results)))
logging.debug("Creating graph from {} initial results".format(len(results)))
logging.debug("Thresholds percentages/lines: ".format(percentage_threshold, lines_threshold))
for result in results:
if not include(result, percentage_threshold, lines_threshold):
continue
weight = get_weight(result)
edge = (result.id1, result.id2, weight)
color = get_color(weight)
......@@ -79,38 +90,57 @@ def create_results_graph(results):
graph.add_weighted_edges_from([edge], **attributes)
logging.debug(
"Graph contains {} nodes and {} edged".format(
graph.number_of_nodes(),
graph.number_of_edges()))
return graph
def write_cluster_files(subgraph, index, cluster_dot_file, create_svg_files):
logging.debug(
"Writing cluster {} with {}/{} nodes/edge to file {}".format(
index,
subgraph.number_of_nodes(),
subgraph.number_of_edges(),
cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
if create_svg_files:
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
def main(
results_csv_file=DEFAULT_RESULTS_CSV_FILE,
total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE,
cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN):
cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN,
percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE,
lines_threshold=DEFAULT_THRESHOLD_LINES,
create_svg_files=DEFAULT_CREATE_SVG_FILES):
logutils.configure_level_and_format()
results = read_results_from_csv_file(results_csv_file)
graph = create_results_graph(results)
graph = create_results_graph(results, percentage_threshold, lines_threshold)
logging.info("Writing total graph to {}".format(total_graph_dot_file))
nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)
logging.info("Computing clusters")
logging.info("Computing connected component (CC) clusters")
clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
cluster_dot_file_pattern = cluster_dot_file_pattern.format("cc", "{}")
logging.info(
"Found {} clusters, will write them to files {}".format(
"Found {} CC clusters, will write them to files {}".format(
len(clusters),
cluster_dot_file_pattern.format("#")))
for index, cluster in enumerate(clusters):
subgraph = graph.subgraph(cluster).copy()
cluster_dot_file = cluster_dot_file_pattern.format(index)
logging.debug("Writing cluster {} to file {}".format(index, cluster_dot_file))
nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)
logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
dot_file = cluster_dot_file_pattern.format(index)
write_cluster_files(subgraph, index, dot_file, create_svg_files)
if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment