To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

cluster.py 3.33 KB
Newer Older
1
2
3
4
import re
# import argparse
import logging
import csv
scmalte's avatar
scmalte committed
5
6
7
import subprocess
import pydot
import networkx as nx
8
from dataclasses import dataclass
scmalte's avatar
scmalte committed
9
from .utils import logging as logutils
10
11

DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
scmalte's avatar
scmalte committed
12
13
14
15
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}.dot"
DEFAULT_THRESHOLD
16

scmalte's avatar
scmalte committed
17
18
19
@dataclass # USE AS IF frozen=True
class MossResult:
  id1: str
20
  percentage1: int
scmalte's avatar
scmalte committed
21
  id2: str
22
  percentage2: int
scmalte's avatar
scmalte committed
23
  avg_percentage: int
24
  lines: int
scmalte's avatar
scmalte committed
25
  match_file: str
26

scmalte's avatar
scmalte committed
27
28
29
30
31
32
33
  def __post_init__(self):
    # Despite the (mandatory) type annotations above, there is no guarantee that
    # the field values have the expected type, hence the explicit conversions.
    self.percentage1 = int(self.percentage1)  
    self.percentage2 = int(self.percentage2)  
    self.avg_percentage = float(self.avg_percentage)  
    self.lines = int(self.lines)
34

scmalte's avatar
scmalte committed
35
36
def read_results_from_csv_file(csv_file):
  results = []
37

scmalte's avatar
scmalte committed
38
  logging.info("Reading results from {}".format(csv_file))
39

scmalte's avatar
scmalte committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  with open(csv_file, newline="") as csv_fh:
    csv_reader = csv.reader(csv_fh, delimiter=",", quotechar='"')

    next(csv_reader, None) # Skip CSV header line

    results = [MossResult(*row) for row in csv_reader]

  logging.debug("Read {} results".format(len(results)))

  return results

def get_weight(result):
  return min(result.percentage1, result.percentage2)

def get_color(percentage):
  if (percentage >= 90): return "#D83018" # Red
  elif (percentage >= 80): return "#F07241" # Orange
  elif (percentage >= 70): return "#601848" # Purple
  else: return "#000000" # Black

def create_results_graph(results):
  graph = nx.Graph()

  logging.debug("Creating total graph from {} results".format(len(results)))

  for result in results:
    weight = get_weight(result)
    edge = (result.id1, result.id2, weight)
    color = get_color(weight)

    attributes = {
      "color": color,
      "penwidth": 2,
      "label": "{0}% ({1})".format(weight, result.lines),
      "labelURL": result.match_file,
      "URL": result.match_file,
      "target": "match",
      "fontcolor": color
    }

    graph.add_weighted_edges_from([edge], **attributes)

  return graph

def main(
    results_csv_file=DEFAULT_RESULTS_CSV_FILE,
    total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE,
    cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN):

  logutils.configure_level_and_format()

  results = read_results_from_csv_file(results_csv_file)
  graph = create_results_graph(results)

  logging.info("Writing total graph to {}".format(total_graph_dot_file))
  nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)

  logging.info("Computing clusters")
  clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
  
  logging.info(
    "Found {} clusters, will write them to files {}".format(
      len(clusters),
      cluster_dot_file_pattern.format("#")))
  
  for index, cluster in enumerate(clusters):
    subgraph = graph.subgraph(cluster).copy()

    cluster_dot_file = cluster_dot_file_pattern.format(index)
    logging.debug("Writing cluster {} to file {}".format(index, cluster_dot_file))
    nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)

    logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
    subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])
114
115
116
117


if __name__ == "__main__":
  main()