To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 0b5bd718 authored by felikskiszkurno's avatar felikskiszkurno
Browse files

Implemented Stochastic Gradient Descent and Gradient Boost Classifier.

Added plot summarizing all ML methods. Added some functions to avoid repeating code within slopestabilityML module.
Fixed multiple bugs.
All ML methods will train and predict the same datasets thanks to random.seed.
parent 44d9eb9f
......@@ -61,4 +61,6 @@ else:
for test_name in test_results.keys():
slopestabilitytools.plot_and_save(test_name, test_results[test_name], 'Test: ' + test_name)
svm_accuracy_score, svm_accuracy_labels = slopestabilityML.svm_run(test_results)
ml_results = slopestabilityML.run_all_tests(test_results)
#svm_accuracy_score, svm_accuracy_labels = slopestabilityML.svm_run(test_results)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
from .gbc_run import gbc_run
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
from sklearn import ensemble
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classification
# TODO: as in svm_run
def gbc_run(test_results, random_seed):
test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)
# Create classifier
clf = ensemble.GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
# Train classifier
accuracy_labels, accuracy_score = slopestabilityML.run_classification(test_training, test_prediction, test_results, clf)
# Plot
slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'GBC')
return accuracy_score, accuracy_labels
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
from .sgd_run import sgd_run
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
from sklearn.linear_model import SGDClassifier
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classification
def sgd_run(test_results, random_seed):
test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)
# Create classifier
clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
# Train classifier
accuracy_labels, accuracy_score = slopestabilityML.run_classification(test_training, test_prediction, test_results,
clf)
# Plot
slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'GBC')
return accuracy_score, accuracy_labels
......@@ -7,56 +7,50 @@ Created on 16.01.2021
"""
from sklearn import svm
import slopestabilitytools
import random
import math
import numpy as np
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import random
# TODO: for comparability with other ML methods, add option to define which test should be used for training externaly
def svm_run(test_results):
def svm_run(test_results, random_seed):
# https://stackabuse.com/implementing-svm-and-kernel-svm-with-pythons-scikit-learn/
test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)
accuracy_score = []
accuracy_labels = []
test_number = len(test_results.keys())
test_prediction = random.choices(list(test_results.keys()),
k=math.ceil(test_number * 0.1))
test_training = slopestabilitytools.set_diff(list(test_results.keys()), set(test_prediction))
print(test_prediction)
# Create classifier
clf = svm.SVC(gamma=0.001, C=100, kernel='linear')
# Train classifier
for test_name in test_training:
# Prepare data
data_set = test_results[test_name]
x_train = data_set.drop(['Z', 'INM', 'CLASS'], axis='columns')
y_train = data_set['CLASS']
# Train classifier
clf.fit(x_train, y_train)
# Predict with classfier
for test_name_pred in test_prediction:
# Prepare data
data_set_pred = test_results[test_name_pred]
print(data_set_pred)
x_question = data_set_pred.drop(['Z', 'INM', 'CLASS'], axis='columns')
y_answer = data_set_pred['CLASS']
y_pred = clf.predict(x_question)
# Evaluate result
accuracy_score.append(len(np.where(y_pred == y_answer)) / len(y_answer) * 100)
accuracy_labels.append(test_name_pred)
accuracy_labels, accuracy_score = slopestabilityML.run_classification(test_training, test_prediction, test_results, clf)
# for test_name in test_training:
#
# # Prepare data
# x_train, y_train = slopestabilityML.preprocess_data(test_results[test_name])
#
# # Train classifier
# clf.fit(x_train, y_train)
#
# # Predict with classfier
# for test_name_pred in test_prediction:
#
# # Prepare data
# x_question, y_answer = slopestabilityML.preprocess_data(test_results[test_name_pred])
#
# y_pred = clf.predict(x_question)
#
# # Evaluate result
# accuracy_score.append(len(np.where(y_pred == y_answer)) / len(y_answer) * 100)
# accuracy_labels.append(test_name_pred)
# Plot
slopestabilityML.plot_results(accuracy_labels, accuracy_score)
slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'SVM')
return accuracy_score, accuracy_labels
......@@ -9,6 +9,12 @@ Created on 16.01.2021
from .combine_results import combine_results
from .plot_results import plot_results
from .run_all_tests import run_all_tests
from .split_dataset import split_dataset
from .run_classification import run_classification
from .preprocess_data import preprocess_data
from .SVM.svm_run import svm_run
from .GBC.gbc_run import gbc_run
from .SGD.sgd_run import sgd_run
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import matplotlib.pyplot as plt
def combine_results():
return
def combine_results(ml_results):
fig = plt.figure()
#ax = fig.subplots(1)
fig.suptitle('Accuracy of different ML methods')
for method_name in ml_results.keys():
plt.scatter(ml_results[method_name]['labels'], ml_results[method_name]['score'], label=method_name)
plt.legend(loc='best')
fig.savefig('results/figures/ML_summary.eps')
fig.savefig('results/figures/ML_summary.png')
fig.savefig('results/figures/ML_summary.pdf')
\ No newline at end of file
......@@ -9,17 +9,17 @@ Created on 17.01.2021
import matplotlib.pyplot as plt
def plot_results(accuracy_labels, accuracy_score):
def plot_results(accuracy_labels, accuracy_score, clf_name):
fig = plt.figure()
ax = plt.subplot(111)
ax.scatter(accuracy_labels, accuracy_score)
plt.xlabel('Test name')
plt.ylabel('Correct points [%]')
plt.title('SVM classification accuracy')
plt.title(clf_name+' classification accuracy')
print('plot script is executed')
fig.savefig('results/figures/SVM.eps')
fig.savefig('results/figures/SVM.pdf')
fig.savefig('results/figures/SVM.png')
fig.savefig('results/figures/'+clf_name+'.eps')
fig.savefig('results/figures/'+clf_name+'.pdf')
fig.savefig('results/figures/'+clf_name+'.png')
return
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
def preprocess_data(data_set):
x_train = data_set.drop(['Z', 'INM', 'CLASS'], axis='columns')
y_train = data_set['CLASS']
return x_train, y_train
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
def run_all_tests():
import slopestabilityML
#import slopestabilityML.SVM.svm_run
#import slopestabilityML.GBC.gbc_run
return
def run_all_tests(test_results):
random_seed = 999
ml_results = {}
print('Running SVM...')
svm_accuracy_score, svm_accuracy_labels = slopestabilityML.SVM.svm_run(test_results, random_seed)
ml_results['svm'] = {'score': svm_accuracy_score, 'labels': svm_accuracy_labels}
print('Running GBC...')
gbc_accuracy_score, gbc_accuracy_labels = slopestabilityML.GBC.gbc_run(test_results, random_seed)
ml_results['gbc'] = {'score': gbc_accuracy_score, 'labels': gbc_accuracy_labels}
print('Running SGD...')
sgd_accuracy_score, sgd_accuracy_labels = slopestabilityML.SGD.sgd_run(test_results, random_seed)
ml_results['sgd'] = {'score': sgd_accuracy_score, 'labels': sgd_accuracy_labels}
slopestabilityML.combine_results(ml_results)
print('end')
return ml_results
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import slopestabilityML
import numpy as np
def run_classification(test_training, test_prediction, test_results, clf):
accuracy_score = []
accuracy_labels = []
for test_name in test_training:
# Prepare data
x_train, y_train = slopestabilityML.preprocess_data(test_results[test_name])
# Train classifier
clf.fit(x_train, y_train)
# Predict with classifier
for test_name_pred in test_prediction:
# Prepare data
x_question, y_answer = slopestabilityML.preprocess_data(test_results[test_name_pred])
y_pred = clf.predict(x_question)
# Evaluate result
accuracy_score.append(len(np.where(y_pred == y_answer)) / len(y_answer) * 100)
accuracy_labels.append(test_name_pred)
return accuracy_labels, accuracy_score
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import slopestabilitytools
import random
import math
def split_dataset(test_names, random_seed):
random.seed(random_seed)
test_number = len(test_names)
test_prediction = random.choices(list(test_names),
k=math.ceil(test_number * 0.1))
test_training = slopestabilitytools.set_diff(list(test_names), set(test_prediction))
return test_training, test_prediction
......@@ -21,12 +21,7 @@ def test_list(extension):
for file in file_list:
test_names.append(file[:file.find(extension)])
print(file[:file.find(extension)])
print('start')
print(test_names)
print('end')
test_names = sorted(test_names)
print(test_names)
return test_names
......@@ -14,7 +14,6 @@ def check_create_folder(folder_path):
if os.path.isdir(folder_path):
print("Folder for figures exists!")
else:
print(folder_path)
os.mkdir(folder_path)
print("Created folder for figures!")
......
......@@ -103,7 +103,6 @@ def model_params(n_of_tests, rho_spread, rho_max, layers_n_min, layers_n_max, de
layer_id = 1
for rho in rho_temp:
print(rho)
rho_final.append([layer_id, rho])
layer_id += 1
......
......@@ -15,20 +15,17 @@ import slopestabilitytools
def plot_and_save(test_name, test_result, plot_title):
x = test_result['X']
print(x)
y = test_result['Y']
inm = test_result['INM']
res = test_result['RES']
x_vec = np.unique(np.array(x))
y_vec = np.unique(np.array(y))
print(y_vec)
X, Y = np.meshgrid(x_vec, y_vec)
[m, n] = X.shape
inm_plot = np.array(inm).reshape((m, n))
res_plot = np.array(res).reshape((m, n))
print('plot_and_save')
print(X)
fig, ax = plt.subplots(3)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment