Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
dinfk-lecturers
mossutils
Commits
76420566
Commit
76420566
authored
May 01, 2020
by
scmalte
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cluster.py: added several features, including generation of a summary CSV
parent
0091a1ae
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
97 additions
and
37 deletions
+97
-37
mossutils/cluster.py
mossutils/cluster.py
+94
-36
setup.py
setup.py
+3
-1
No files found.
mossutils/cluster.py
View file @
76420566
...
...
@@ -4,35 +4,55 @@ import logging
import
csv
import
subprocess
import
pydot
import
dataclasses
import
itertools
import
networkx
as
nx
from
dataclass
es
import
d
ataclass
from
dataclass
_csv
import
D
ataclass
Reader
from
.utils
import
logging
as
logutils
DEFAULT_RESULTS_CSV_FILE
=
"moss-report.csv"
DEFAULT_TOTAL_GRAPH_DOT_FILE
=
"moss-report.dot"
DEFAULT_CLUSTERS_DOT_FILE
=
"clusters.dot"
DEFAULT_CLUSTER_
DOT_
FILE_PATTERN
=
"cluster-{}
-
{}
.dot
"
DEFAULT_CLUSTER_FILE_PATTERN
=
"cluster-{}
.
{}"
DEFAULT_THRESHOLD_PERCENTAGE
=
90
DEFAULT_THRESHOLD_LINES
=
50
DEFAULT_CREATE_SVG_FILES
=
True
DEFAULT_SUMMARY_CSV_FILE
=
"clusters.csv"
@
dataclass
# USE AS IF frozen=True
@
dataclass
es
.
dataclass
class
MossResult
:
id1
:
str
percentage1
:
int
id2
:
str
percentage2
:
int
avg_
percentage
:
in
t
percentage
_avg
:
floa
t
lines
:
int
match_file
:
str
def
__post_init__
(
self
):
# Despite the (mandatory) type annotations above, there is no guarantee that
# the field values have the expected type, hence the explicit conversions.
self
.
percentage1
=
int
(
self
.
percentage1
)
self
.
percentage2
=
int
(
self
.
percentage2
)
self
.
avg_percentage
=
float
(
self
.
avg_percentage
)
self
.
lines
=
int
(
self
.
lines
)
def
flat_headers
(
self
):
return
[
f
.
name
for
f
in
dataclasses
.
fields
(
self
)]
def
flat_data
(
self
):
return
dataclasses
.
astuple
(
self
)
@
dataclasses
.
dataclass
class
ClusterEntry
:
cluster_id
:
int
result
:
MossResult
dot_file
:
str
svg_file
:
str
def
flat_headers
(
self
):
field_names
=
[
f
.
name
for
f
in
dataclasses
.
fields
(
self
)]
result_headers
=
self
.
result
.
flat_headers
()
return
field_names
[:
1
]
+
result_headers
+
field_names
[
2
:]
def
flat_data
(
self
):
field_data
=
dataclasses
.
astuple
(
self
)
result_data
=
self
.
result
.
flat_data
()
return
field_data
[:
1
]
+
result_data
+
field_data
[
2
:]
def
read_results_from_csv_file
(
csv_file
):
results
=
[]
...
...
@@ -40,11 +60,13 @@ def read_results_from_csv_file(csv_file):
logging
.
info
(
"Reading results from {}"
.
format
(
csv_file
))
with
open
(
csv_file
,
newline
=
""
)
as
csv_fh
:
csv_reader
=
csv
.
r
eader
(
csv_fh
,
delimiter
=
","
,
quotechar
=
'"'
)
csv_reader
=
DataclassR
eader
(
csv_fh
,
MossResult
,
delimiter
=
","
,
quotechar
=
'"'
)
next
(
csv_reader
,
None
)
# Skip CSV header line
# csv_reader = csv.reader(csv_fh, delimiter=",", quotechar='"')
# next(csv_reader, None) # Skip CSV header line
# results = [MossResult(*row) for row in csv_reader]
results
=
[
MossResult
(
*
row
)
for
row
in
csv_reader
]
results
=
list
(
csv_reader
)
logging
.
debug
(
"Read {} results"
.
format
(
len
(
results
)))
...
...
@@ -64,7 +86,7 @@ def include(result, percentage_threshold, lines_threshold):
percentage_threshold
<=
get_weight
(
result
)
and
lines_threshold
<=
result
.
lines
)
def
create
_results_graph
(
results
,
percentage_threshold
,
lines_threshold
):
def
get
_results_graph
(
results
,
percentage_threshold
,
lines_threshold
):
graph
=
nx
.
Graph
()
logging
.
debug
(
"Creating graph from {} initial results"
.
format
(
len
(
results
)))
...
...
@@ -79,13 +101,16 @@ def create_results_graph(results, percentage_threshold, lines_threshold):
color
=
get_color
(
weight
)
attributes
=
{
# Attributes for GraphViz
"color"
:
color
,
"penwidth"
:
2
,
"label"
:
"{0}% ({1})"
.
format
(
weight
,
result
.
lines
),
"labelURL"
:
result
.
match_file
,
"URL"
:
result
.
match_file
,
"target"
:
"match"
,
"fontcolor"
:
color
"fontcolor"
:
color
,
# Attributes for internal bookkeeping
"_result"
:
result
}
graph
.
add_weighted_edges_from
([
edge
],
**
attributes
)
...
...
@@ -97,7 +122,7 @@ def create_results_graph(results, percentage_threshold, lines_threshold):
return
graph
def
wri
te_cluster_files
(
subgraph
,
index
,
cluster_dot_file
,
c
rea
te_svg_file
s
):
def
crea
te_cluster_
dot_and_svg_
files
(
subgraph
,
index
,
cluster_dot_file
,
c
lus
te
r
_svg_file
=
None
):
logging
.
debug
(
"Writing cluster {} with {}/{} nodes/edge to file {}"
.
format
(
index
,
...
...
@@ -107,40 +132,73 @@ def write_cluster_files(subgraph, index, cluster_dot_file, create_svg_files):
nx
.
drawing
.
nx_pydot
.
write_dot
(
subgraph
,
cluster_dot_file
)
if
create_svg_files
:
logging
.
debug
(
"Calling dot to create SVG file from {}"
.
format
(
cluster_dot_file
))
subprocess
.
run
([
"dot"
,
"-Tsvg"
,
"-O"
,
cluster_dot_file
])
if
cluster_svg_file
:
logging
.
debug
(
"Calling dot to create SVG {} file from {}"
.
format
(
cluster_svg_file
,
cluster_dot_file
))
subprocess
.
run
([
"dot"
,
"-Tsvg"
,
"-o{}"
.
format
(
cluster_svg_file
),
cluster_dot_file
])
def
create_clusters
(
graph
,
cluster_file_pattern
,
create_svg_files
):
logging
.
info
(
"Computing connected component (CC) clusters"
)
clusters
=
sorted
(
nx
.
connected_components
(
graph
),
key
=
len
,
reverse
=
True
)
logging
.
info
(
"Found {} CC clusters, will write them to files {}"
.
format
(
len
(
clusters
),
cluster_file_pattern
.
format
(
"#"
,
"dot"
)))
cluster_entries
=
[]
for
index
,
cluster
in
enumerate
(
clusters
):
subgraph
=
graph
.
subgraph
(
cluster
).
copy
()
dot_file
=
cluster_file_pattern
.
format
(
index
,
"dot"
)
svg_file
=
None
if
create_svg_files
:
svg_file
=
cluster_file_pattern
.
format
(
index
,
"svg"
)
create_cluster_dot_and_svg_files
(
subgraph
,
index
,
dot_file
,
svg_file
)
for
(
_
,
_
,
data
)
in
subgraph
.
edges
(
data
=
True
):
cluster_entries
.
append
(
ClusterEntry
(
index
,
data
[
"_result"
],
dot_file
,
svg_file
))
return
cluster_entries
def
create_summary_csv_file
(
cluster_entries
,
summary_csv_file
):
logging
.
info
(
"Writing summary file {}"
.
format
(
summary_csv_file
))
if
cluster_entries
:
with
open
(
summary_csv_file
,
"w"
,
newline
=
""
)
as
csv_fh
:
csv_writer
=
csv
.
writer
(
csv_fh
)
csv_writer
.
writerow
(
cluster_entries
[
0
].
flat_headers
())
for
entry
in
cluster_entries
:
csv_writer
.
writerow
(
entry
.
flat_data
())
def
main
(
results_csv_file
=
DEFAULT_RESULTS_CSV_FILE
,
total_graph_dot_file
=
DEFAULT_TOTAL_GRAPH_DOT_FILE
,
cluster_
dot_
file_pattern
=
DEFAULT_CLUSTER_
DOT_
FILE_PATTERN
,
cluster_file_pattern
=
DEFAULT_CLUSTER_FILE_PATTERN
,
percentage_threshold
=
DEFAULT_THRESHOLD_PERCENTAGE
,
lines_threshold
=
DEFAULT_THRESHOLD_LINES
,
create_svg_files
=
DEFAULT_CREATE_SVG_FILES
):
create_svg_files
=
DEFAULT_CREATE_SVG_FILES
,
summary_csv_file
=
DEFAULT_SUMMARY_CSV_FILE
):
logutils
.
configure_level_and_format
()
results
=
read_results_from_csv_file
(
results_csv_file
)
graph
=
create
_results_graph
(
results
,
percentage_threshold
,
lines_threshold
)
graph
=
get
_results_graph
(
results
,
percentage_threshold
,
lines_threshold
)
logging
.
info
(
"Writing total graph to {}"
.
format
(
total_graph_dot_file
))
nx
.
drawing
.
nx_pydot
.
write_dot
(
graph
,
total_graph_dot_file
)
logging
.
info
(
"Computing connected component (CC) clusters"
)
clusters
=
sorted
(
nx
.
connected_components
(
graph
),
key
=
len
,
reverse
=
True
)
cluster_dot_file_pattern
=
cluster_dot_file_pattern
.
format
(
"cc"
,
"{}"
)
cluster_entries
=
create_clusters
(
graph
,
cluster_file_pattern
,
create_svg_files
)
logging
.
info
(
"Found {} CC clusters, will write them to files {}"
.
format
(
len
(
clusters
),
cluster_dot_file_pattern
.
format
(
"#"
)))
for
index
,
cluster
in
enumerate
(
clusters
):
subgraph
=
graph
.
subgraph
(
cluster
).
copy
()
dot_file
=
cluster_dot_file_pattern
.
format
(
index
)
write_cluster_files
(
subgraph
,
index
,
dot_file
,
create_svg_files
)
create_summary_csv_file
(
cluster_entries
,
summary_csv_file
)
if
__name__
==
"__main__"
:
...
...
setup.py
View file @
76420566
...
...
@@ -19,8 +19,10 @@ setup(
'mosspy'
,
'lxml'
,
'bs4'
,
'dataclass-csv'
,
'pydot'
,
'networkx'
'networkx'
,
'Jinja2'
],
# scripts=['bin/mossutils-moss'],
entry_points
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment