Commit 4bcf8bc4 authored by Feliks Kiszkurno's avatar Feliks Kiszkurno
Browse files

Fixing the mess after the Disaster

parent 55acef4b
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 8 10:29:00 2021
@author: Feliks Kiszkurno
"""
import slopestabilitytools.datamanagement
import slopestabilitytools
import slopestabilityML
import slostabcreatedata
import numpy as np
import settings
settings.init()
# Config
create_new_data = False # set to True if you need to reassign the classes
create_new_data_only = False # set to False in order to run ML classifications
reassign_classes = False; class_type = 'norm'
# Load existing data instead of creating new one.
if not create_new_data:
test_results = slopestabilitytools.datamanagement.import_tests()
# if reassign_classes is True:
# test_results = slopestabilitytools.reassign_classes(test_results, class_type)
# Check if folder structure for figures exists and create it if not
is_success = slopestabilitytools.folder_structure.create_folder_structure()
# Create new data
else:
# Prepare folder structure for output
is_success = slopestabilitytools.folder_structure.create_folder_structure()
# TODO Put this part into a function
# Settings
number_of_tests = 5
rho_spread_factor = 1.5
rho_max = 20
layers_min = 1
layers_max = 2
min_depth = 4
max_depth = 8
# Generate parameters for tests
# tests_horizontal = slopestabilitytools.model_params(number_of_tests,
# rho_spread_factor, rho_max,
# layers_min, layers_max,
# min_depth, max_depth)
tests_horizontal = {'hor_01': {'layer_n': 1, 'rho_values': [[1, 5], [2, 15]], 'layers_pos': np.array([-5])},
'hor_02': {'layer_n': 1, 'rho_values': [[1, 5], [2, 50]], 'layers_pos': np.array([-5])},
'hor_03': {'layer_n': 1, 'rho_values': [[1, 15], [2, 20]], 'layers_pos': np.array([-8])},
'hor_04': {'layer_n': 1, 'rho_values': [[1, 5], [2, 10]], 'layers_pos': np.array([-3])},
'hor_05': {'layer_n': 1, 'rho_values': [[1, 5], [2, 25]], 'layers_pos': np.array([-3])},
'hor_06': {'layer_n': 1, 'rho_values': [[1, 2], [2, 10]], 'layers_pos': np.array([-4])},
'hor_07': {'layer_n': 1, 'rho_values': [[1, 10], [2, 20]], 'layers_pos': np.array([-6])},
'hor_08': {'layer_n': 1, 'rho_values': [[1, 5], [2, 25]], 'layers_pos': np.array([-3])},
'hor_09': {'layer_n': 1, 'rho_values': [[1, 3], [2, 25]], 'layers_pos': np.array([-3])},
'hor_10': {'layer_n': 1, 'rho_values': [[1, 5], [2, 25]], 'layers_pos': np.array([-7])},
'hor_11': {'layer_n': 1, 'rho_values': [[1, 10], [2, 12]], 'layers_pos': np.array([-4])},
'hor_12': {'layer_n': 1, 'rho_values': [[1, 15], [2, 50]], 'layers_pos': np.array([-5])},
'hor_13': {'layer_n': 2, 'rho_values': [[1, 3], [2, 5], [3, 15]],
'layers_pos': np.array([-3, -6])},
'hor_14': {'layer_n': 2, 'rho_values': [[1, 2], [2, 4], [3, 8]],
'layers_pos': np.array([-4, -8])},
'hor_15': {'layer_n': 1, 'rho_values': [[1, 4], [2, 15], [3, 25]],
'layers_pos': np.array([-4, -8])},
'hor_16': {'layer_n': 1, 'rho_values': [[1, 5], [2, 20], [3, 50]],
'layers_pos': np.array([-4, -8])}
}
# tests_horizontal = {'hor_11': {'layer_n': 1, 'rho_values': [[1, 10], [2, 12]], 'layers_pos': np.array([-4])}}
# Create models and invert them
test_results = {}
for test_name in tests_horizontal.keys():
test_result_curr, test_rho_max, test_rho_min = slostabcreatedata.create_data(test_name,
tests_horizontal[test_name],
max_depth)
test_results.update({test_name: test_result_curr})
del test_result_curr
# Plot and save figures
slopestabilitytools.plot_and_save(test_name, test_results[test_name], 'Test: ' + test_name, test_rho_max,
test_rho_min)
# Evaluate data with ML techniques
if not create_new_data_only:
print('Running ML stuff...')
ml_results = slopestabilityML.run_all_tests(test_results)
# Finish the script if ML classifiaction was not executed
elif create_new_data_only:
print('Done')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
from sklearn import ensemble
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
# TODO: as in svm_run
def gbc_run(test_results, random_seed):
test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)
# Create classifier
clf = ensemble.GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
# Train classifier
result_class, accuracy_labels, accuracy_score, accuracy_labels_training, accuracy_score_training = \
slopestabilityML.run_classification(test_training, test_prediction, test_results, clf, 'GBC')
# Plot
slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'GBC_prediction')
slopestabilityML.plot_results(accuracy_labels_training, accuracy_score_training, 'GBC_training')
return result_class, accuracy_score, accuracy_labels, accuracy_score_training, accuracy_labels_training
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 26.03.2021
@author: Feliks Kiszkurno
"""
# https://letsfigureout.com/2020/03/08/nearest-neighbor-algorithm-with-python-and-numpy/
from .knn_run import knn_run
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
from sklearn.linear_model import SGDClassifier
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
def sgd_run(test_results, random_seed):
test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)
# Create classifier
clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
# Train classifier
result_class, accuracy_labels, accuracy_score, accuracy_labels_training, accuracy_score_training = \
slopestabilityML.run_classification(test_training, test_prediction, test_results, clf, 'SGD')
# Plot
slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'SGD_prediction')
slopestabilityML.plot_results(accuracy_labels_training, accuracy_score_training, 'SGD_training')
return result_class, accuracy_score, accuracy_labels, accuracy_score_training, accuracy_labels_training
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 16.01.2021
@author: Feliks Kiszkurno
"""
from sklearn import svm
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
# TODO: for comparability with other ML methods, add option to define which test should be used for training externaly
def svm_run(test_results, random_seed):
# https://stackabuse.com/implementing-svm-and-kernel-svm-with-pythons-scikit-learn/
test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)
accuracy_score = []
accuracy_labels = []
# Create classifier
clf = svm.SVC(gamma=0.001, C=100, kernel='linear')
# Train classifier
result_class, accuracy_labels, accuracy_score, accuracy_labels_training, accuracy_score_training = \
slopestabilityML.run_classification(test_training, test_prediction, test_results, clf, 'SVM')
# Plot
slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'SVM_prediction')
slopestabilityML.plot_results(accuracy_labels_training, accuracy_score_training, 'SVM_training')
return result_class, accuracy_score, accuracy_labels, accuracy_score_training, accuracy_labels_training
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 16.01.2021
@author: Feliks Kiszkurno
"""
from .combine_results import combine_results
from .plot_results import plot_results
from .run_every import run_all_tests
from .split_dataset import split_dataset
from .run_classi import run_classification
from .preprocess_data import preprocess_data
from .plot_class_res import plot_class_res
from .ask_committee import ask_committee
from .SVM.svm_run import svm_run
from .GBC.gbc_run import gbc_run
from .SGD.sgd_run import sgd_run
from .KNN.knn_run import knn_run
from .ADABOOST.adaboost_run import adaboost_run
from .RVM.rvm_run import rvm_run
from .MGC.max_grad_classi import max_grad_classi
from .MGC.mgc_run import mgc_run
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import matplotlib.pyplot as plt
from pathlib import Path
import slopestabilitytools
def combine_results(ml_results):
# Predictions
fig = plt.figure()
ax = fig.subplots(1)
fig.suptitle('Accuracy of different ML methods: predictions')
for method_name in sorted(ml_results.keys()):
plt.plot(ml_results[method_name]['labels'], ml_results[method_name]['score'], marker='x',
label=method_name)
plt.xlabel('Test name')
plt.setp(ax.get_xticklabels(), rotation=45)
plt.ylabel('Correct points [%]')
plt.legend(loc='lower right')
slopestabilitytools.save_plot(fig, '', 'ML_summary_prediction', skip_fileformat=True)
# Training
fig = plt.figure()
ax = fig.subplots(1)
fig.suptitle('Accuracy of different ML methods - training')
for method_name in sorted(ml_results.keys()):
plt.plot(ml_results[method_name]['labels_training'], ml_results[method_name]['score_training'], marker='x',
label=method_name)
plt.xlabel('Test name')
plt.setp(ax.get_xticklabels(), rotation=90)
plt.ylabel('Correct points [%]')
plt.legend(loc='lower right')
fig.tight_layout()
slopestabilitytools.save_plot(fig, '', 'ML_summary_training', skip_fileformat=True)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 17.01.2021
@author: Feliks Kiszkurno
"""
import matplotlib.pyplot as plt
from pathlib import Path
import slopestabilitytools
def plot_results(accuracy_labels, accuracy_score, clf_name):
clf_name_title = clf_name.replace("_", " ")
fig = plt.figure()
ax = plt.subplot(111)
ax.plot(accuracy_labels, accuracy_score, marker='x')
plt.setp(ax.get_xticklabels(), rotation=90)
plt.xlabel('Test name')
plt.ylabel('Correct points [%]')
plt.title(clf_name_title+' accuracy score')
print('plot script is executed')
fig.tight_layout()
slopestabilitytools.save_plot(fig, clf_name, '_accuracy', subfolder='ML/')
return
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import settings
import pandas as pd
def preprocess_data(data_set):
if settings.settings['norm'] is True:
x_train = data_set.drop(['X', 'Z', 'INM', 'INMN', 'RES', 'CLASS', 'CLASSN'], axis='columns')
else:
x_train = data_set.drop(['X', 'Z', 'INM', 'INMN', 'RESN', 'CLASS', 'CLASSN'], axis='columns')
if settings.settings['sen'] is False:
x_train = x_train.drop(['SEN'], axis='columns')
if settings.settings['norm_class'] is True:
y_train = pd.DataFrame(data_set['CLASSN'])
else:
y_train = pd.DataFrame(data_set['CLASS'])
if settings.settings['depth'] is False:
x_train = x_train.drop(['Y'], axis='columns')
return x_train, y_train
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import slopestabilityML
# import slopestabilityML.SVM.svm_run
# import slopestabilityML.GBC.gbc_run
def run_all_tests(test_results):
random_seed = 999
ml_results = {}
print('Running SVM...')
svm_accuracy_score, svm_accuracy_labels, svm_accuracy_score_training, svm_accuracy_labels_training = \
slopestabilityML.SVM.svm_run(test_results, random_seed)
ml_results['svm'] = {'score': svm_accuracy_score, 'labels': svm_accuracy_labels,
'score_training': svm_accuracy_score_training, 'labels_training': svm_accuracy_labels_training}
print('Running GBC...')
gbc_accuracy_score, gbc_accuracy_labels, gbc_accuracy_score_training, gbc_accuracy_labels_training = \
slopestabilityML.GBC.gbc_run(test_results, random_seed)
ml_results['gbc'] = {'score': gbc_accuracy_score, 'labels': gbc_accuracy_labels,
'score_training': gbc_accuracy_score_training, 'labels_training': gbc_accuracy_labels_training}
print('Running SGD...')
sgd_accuracy_score, sgd_accuracy_labels, sgd_accuracy_score_training, sgd_accuracy_labels_training = \
slopestabilityML.SGD.sgd_run(test_results, random_seed)
ml_results['sgd'] = {'score': sgd_accuracy_score, 'labels': sgd_accuracy_labels,
'score_training': sgd_accuracy_score_training, 'labels_training': sgd_accuracy_labels_training}
slopestabilityML.combine_results(ml_results)
print('end')
return ml_results
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 19.01.2021
@author: Feliks Kiszkurno
"""
import slopestabilityML
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
from matplotlib import ticker
from scipy import interpolate
import slopestabilitytools
from pathlib import Path
def run_classification(test_training, test_prediction, test_results, clf, clf_name):
accuracy_score = []
accuracy_labels = []
accuracy_score_training = []
accuracy_labels_training = []
num_feat = ['RES', 'SEN']
#cat_feat = ['CLASS']
cat_lab = [0, 1]
num_trans = StandardScaler()
#cat_trans = OneHotEncoder(categories=[cat_lab])
preprocessor = ColumnTransformer(transformers=[('num', num_trans, num_feat),])
#('cat', cat_trans, cat_feat)])
clf_pipeline_UM = make_pipeline(preprocessor, clf)
for test_name in test_training:
# Prepare data
x_train, y_train = slopestabilityML.preprocess_data(test_results[test_name])
# Train classifier
# print(type(x_train))
# print(type(y_train))
clf_pipeline_UM.fit(x_train, y_train)
score_training = clf_pipeline_UM.score(x_train, y_train)
accuracy_score_training.append(score_training * 100)
accuracy_labels_training.append(test_name)
# Predict with classifier
for test_name_pred in test_prediction:
# Prepare data
x_question, y_answer = slopestabilityML.preprocess_data(test_results[test_name_pred])
# y_pred = clf_pipeline_UM.score(x_question, y_answer)
y_pred = clf_pipeline_UM.predict(x_question)
# print(y_pred)
score = clf_pipeline_UM.score(x_question, y_answer)
print('score: '+str(score))
# TODO: Move plotting to a function for plotting a, b and a-b
x = test_results[test_name_pred]['X']
y = test_results[test_name_pred]['Y']
class_in = test_results[test_name]['CLASS']
class_out = y_pred
x_min = np.min(x)
x_max = np.max(x)
x_n = len(x)
y_min = np.min(y)
y_max = np.max(y)
y_start = y_max
y_end = y_min
y_n = len(y)
xi = np.linspace(x_min, x_max, x_n)
yi = np.linspace(y_start, y_end, y_n)
xx, yy = np.meshgrid(xi, yi)
class_in_i = interpolate.griddata((x, y), class_in, (xx, yy), method='nearest')
class_out_i = interpolate.griddata((x, y), class_out, (xx, yy), method='nearest')
class_diff = np.zeros_like(class_out_i)
class_diff[np.where(class_in_i == class_out_i)] = 1
cb = []
fig, _ax = plt.subplots(nrows=3, ncols=1)
ax = _ax.flatten()
fig.suptitle(test_name_pred+' '+clf_name)
fig.subplots_adjust(hspace=0.8)
im0 = ax[0].contourf(xi, yi, class_in_i)
ax[0].set_title('Input classes')
ax[0] = slopestabilitytools.set_labels(ax[0])
cb.append(plt.colorbar(im0, ax=ax[0], label='Class')) # , shrink=0.9)
tick_locator = ticker.MaxNLocator(nbins=4)
cb[0].locator = tick_locator
cb[0].update_ticks()
im1 = ax[1].contourf(xi, yi, class_out_i)
ax[1].set_title('Result of classification')
ax[1] = slopestabilitytools.set_labels(ax[1])
cb.append(plt.colorbar(im1, ax=ax[1], label='Class')) # , shrink=0.9)
tick_locator = ticker.MaxNLocator(nbins=4)
cb[1].locator = tick_locator
cb[1].update_ticks()
im2 = ax[2].contourf(xi, yi, class_diff)
ax[2].set_title('Difference')
ax[2] = slopestabilitytools.set_labels(ax[2])
cb.append(plt.colorbar(im2, ax=ax[2], label='Is class correct?')) # , shrink=0.9)
tick_locator = ticker.MaxNLocator(nbins=4)
cb[2].locator = tick_locator
cb[2].update_ticks()
fig.tight_layout()
fig.savefig(Path('results/figures/ML/prediction/eps/{}_ML_{}_class_res.eps'.format(test_name_pred, clf_name)), bbox_inches="tight")
fig.savefig(Path('results/figures/ML/prediction/png/{}_ML_{}_class_res.png'.format(test_name_pred, clf_name)), bbox_inches="tight")
fig.savefig(Path('results/figures/ML/prediction/pdf/{}_ML_{}_class_res.pdf'.format(test_name_pred, clf_name)), bbox_inches="tight")
# Evaluate result
#accuracy_score.append(len(np.where(y_pred == y_answer.to_numpy())) / len(y_answer.to_numpy()) * 100)
accuracy_score.append(score*100)
accuracy_labels.append(test_name_pred)
return accuracy_labels, accuracy_score, accuracy_labels_training, accuracy_score_training
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 15.01.2021
@author: Feliks Kiszkurno
"""
""" This module is created to contain all tools, that are not directly related to inversion or processing
List:
model_params: generates parameters used to develop models
directory_structure: create directory structure to contain figures and other files
"""
from .model_params import model_params
from .plot_and_save import plot_and_save
from .set_labels import set_labels
from .set_diff import set_diff
from .normalize import normalize
from .assign_classes import assign_classes
from .def_classes import def_classes
from .save_plot import save_plot
from .reassign_classes import reassign_classes
from .grid_data import grid_data
from .mov_avg import mov_avg
from .folder_structure.create_folder_structure import create_folder_structure
from .folder_structure.create_folder_for_test import create_folder_for_test
from .folder_structure.check_create_folder import check_create_folder
from .datamanagement.import_tests import import_tests
from .datamanagement.read_to_pandas import read_to_pandas
from .datamanagement.test_list import test_list
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 17.01.2021
@author: Feliks Kiszkurno
"""
import slopestabilitytools.datamanagement.test_list
import settings
import pandas as pd
def import_tests():
test_results = {}
test_names = slopestabilitytools.datamanagement.test_list('.csv')
#print('test')
#print(test_names)
for test_name in test_names:
test_result_curr = pd.read_csv(settings.settings['data_folder'] + '/' + test_name + '.csv', index_col=0)
test_results.update({test_name: test_result_curr})
return test_results