To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

cluster.py 4.44 KB
Newer Older
1
2
3
4
import re
# import argparse
import logging
import csv
scmalte's avatar
scmalte committed
5
6
7
import subprocess
import pydot
import networkx as nx
8
from dataclasses import dataclass
scmalte's avatar
scmalte committed
9
from .utils import logging as logutils
10
11

DEFAULT_RESULTS_CSV_FILE="moss-report.csv"
scmalte's avatar
scmalte committed
12
13
DEFAULT_TOTAL_GRAPH_DOT_FILE="moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE="clusters.dot"
14
15
16
17
DEFAULT_CLUSTER_DOT_FILE_PATTERN="cluster-{}-{}.dot"
DEFAULT_THRESHOLD_PERCENTAGE=90
DEFAULT_THRESHOLD_LINES=50
DEFAULT_CREATE_SVG_FILES=True
18

scmalte's avatar
scmalte committed
19
20
21
@dataclass # USE AS IF frozen=True
class MossResult:
  id1: str
22
  percentage1: int
scmalte's avatar
scmalte committed
23
  id2: str
24
  percentage2: int
scmalte's avatar
scmalte committed
25
  avg_percentage: int
26
  lines: int
scmalte's avatar
scmalte committed
27
  match_file: str
28

scmalte's avatar
scmalte committed
29
30
31
32
33
34
35
  def __post_init__(self):
    # Despite the (mandatory) type annotations above, there is no guarantee that
    # the field values have the expected type, hence the explicit conversions.
    self.percentage1 = int(self.percentage1)  
    self.percentage2 = int(self.percentage2)  
    self.avg_percentage = float(self.avg_percentage)  
    self.lines = int(self.lines)
36

scmalte's avatar
scmalte committed
37
38
def read_results_from_csv_file(csv_file):
  results = []
39

scmalte's avatar
scmalte committed
40
  logging.info("Reading results from {}".format(csv_file))
41

scmalte's avatar
scmalte committed
42
43
44
45
46
47
48
49
50
51
52
53
  with open(csv_file, newline="") as csv_fh:
    csv_reader = csv.reader(csv_fh, delimiter=",", quotechar='"')

    next(csv_reader, None) # Skip CSV header line

    results = [MossResult(*row) for row in csv_reader]

  logging.debug("Read {} results".format(len(results)))

  return results

def get_weight(result):
54
  return max(result.percentage1, result.percentage2)
scmalte's avatar
scmalte committed
55
56
57
58
59
60
61

def get_color(percentage):
  if (percentage >= 90): return "#D83018" # Red
  elif (percentage >= 80): return "#F07241" # Orange
  elif (percentage >= 70): return "#601848" # Purple
  else: return "#000000" # Black

62
63
64
65
66
67
def include(result, percentage_threshold, lines_threshold):
  return (
    percentage_threshold <= get_weight(result) and
    lines_threshold <= result.lines)

def create_results_graph(results, percentage_threshold, lines_threshold):
scmalte's avatar
scmalte committed
68
69
  graph = nx.Graph()

70
71
  logging.debug("Creating graph from {} initial results".format(len(results)))
  logging.debug("Thresholds percentages/lines: ".format(percentage_threshold, lines_threshold))
scmalte's avatar
scmalte committed
72
73

  for result in results:
74
75
76
    if not include(result, percentage_threshold, lines_threshold):
      continue

scmalte's avatar
scmalte committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    weight = get_weight(result)
    edge = (result.id1, result.id2, weight)
    color = get_color(weight)

    attributes = {
      "color": color,
      "penwidth": 2,
      "label": "{0}% ({1})".format(weight, result.lines),
      "labelURL": result.match_file,
      "URL": result.match_file,
      "target": "match",
      "fontcolor": color
    }

    graph.add_weighted_edges_from([edge], **attributes)

93
94
95
96
97
  logging.debug(
    "Graph contains {} nodes and {} edged".format(
      graph.number_of_nodes(),
      graph.number_of_edges()))

scmalte's avatar
scmalte committed
98
99
  return graph

100
101
102
103
104
105
106
107
108
109
110
111
112
113
def write_cluster_files(subgraph, index, cluster_dot_file, create_svg_files):
  logging.debug(
    "Writing cluster {} with {}/{} nodes/edge to file {}".format(
      index, 
      subgraph.number_of_nodes(),
      subgraph.number_of_edges(),
      cluster_dot_file))
  
  nx.drawing.nx_pydot.write_dot(subgraph, cluster_dot_file)

  if create_svg_files:
    logging.debug("Calling dot to create SVG file from {}".format(cluster_dot_file))
    subprocess.run(["dot", "-Tsvg", "-O", cluster_dot_file])  

scmalte's avatar
scmalte committed
114
115
116
def main(
    results_csv_file=DEFAULT_RESULTS_CSV_FILE,
    total_graph_dot_file=DEFAULT_TOTAL_GRAPH_DOT_FILE,
117
118
119
120
    cluster_dot_file_pattern=DEFAULT_CLUSTER_DOT_FILE_PATTERN,
    percentage_threshold=DEFAULT_THRESHOLD_PERCENTAGE,
    lines_threshold=DEFAULT_THRESHOLD_LINES,
    create_svg_files=DEFAULT_CREATE_SVG_FILES):
scmalte's avatar
scmalte committed
121
122
123
124

  logutils.configure_level_and_format()

  results = read_results_from_csv_file(results_csv_file)
125
  graph = create_results_graph(results, percentage_threshold, lines_threshold)
scmalte's avatar
scmalte committed
126
127
128
129

  logging.info("Writing total graph to {}".format(total_graph_dot_file))
  nx.drawing.nx_pydot.write_dot(graph, total_graph_dot_file)

130
  logging.info("Computing connected component (CC) clusters")
scmalte's avatar
scmalte committed
131
132
  clusters = sorted(nx.connected_components(graph), key=len, reverse=True)
  
133
134
  cluster_dot_file_pattern = cluster_dot_file_pattern.format("cc", "{}")

scmalte's avatar
scmalte committed
135
  logging.info(
136
    "Found {} CC clusters, will write them to files {}".format(
scmalte's avatar
scmalte committed
137
138
139
140
141
      len(clusters),
      cluster_dot_file_pattern.format("#")))
  
  for index, cluster in enumerate(clusters):
    subgraph = graph.subgraph(cluster).copy()
142
143
    dot_file = cluster_dot_file_pattern.format(index)
    write_cluster_files(subgraph, index, dot_file, create_svg_files)
144
145
146
147


if __name__ == "__main__":
  main()