To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit da64d5d6 authored by Feliks Kiszkurno's avatar Feliks Kiszkurno
Browse files

Fixed problem with overwriting feature importance when using GridSearch

Added two unsupervised methods
Some bug fixes
Changed seetings
parent d4d60e7b
......@@ -21,9 +21,9 @@ from datetime import datetime
settings.init()
# Config
create_new_data = True # set to True if you need to reassign the classes
create_new_data = False # set to True if you need to reassign the classes
invert_existing_data = False # invert existing measurements
create_new_data_only = True # set to False in order to run ML classifications
create_new_data_only = False # set to False in order to run ML classifications
reassign_classes = False; class_type = 'norm'
param_path = os.path.abspath(os.path.join(os.getcwd()) + '/' + 'TestDefinitions/hor1_final_5case.csv')
test_definitions.init(path=param_path)
......
......@@ -35,3 +35,4 @@ from .RVM.rvm_run import rvm_run
from .MGC.max_grad_classi import max_grad_classi
from .MGC.mgc_run import mgc_run
from .DNN.dnn_run import dnn_run
from .clusters.kmeans_run import kmeans_run
......@@ -21,6 +21,7 @@ from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.inspection import permutation_importance
from sklearn.base import is_classifier
import slopestabilitytools
import test_definitions
......@@ -101,31 +102,36 @@ def classification_predict(test_prediction, test_results, clf_name, num_feat, *,
slopestabilityML.plot_sen_corr(y_pred, y_answer.to_numpy().reshape(y_answer.size), weights_np,
clf_name, test_name_pred, batch_name,
training=False)
if not is_classifier(clf_pipeline):
print('Skipping confusion matrix as clssifier {} doesnt support it...'.format(clf_name))
else:
conf_matr_temp = slopestabilityML.plot_confusion(clf_name, clf_pipeline, y_pred=x_question,
y_true=y_answer['CLASSN'].to_numpy().reshape(-1).astype('int'),
test_name=test_name_pred, training=False,
batch_name=batch_name)
confusion_matrix_sum = confusion_matrix_sum + conf_matr_temp
conf_matr_temp = slopestabilityML.plot_confusion(clf_name, clf_pipeline, y_pred=x_question,
y_true=y_answer['CLASSN'].to_numpy().reshape(-1).astype('int'),
test_name=test_name_pred, training=False,
batch_name=batch_name)
confusion_matrix_sum = confusion_matrix_sum + conf_matr_temp
importance = permutation_importance(clf_pipeline, x_question, y_pred)
slopestabilityML.plot_feature_importance(clf_name, importance, x_question, test_name_pred,
batch_name=batch_name)
log_file_name = settings.settings['log_file_name']
log_file = open(os.path.join(settings.settings['results_folder'], log_file_name), 'a')
log_file.write('\n')
log_file.write('{bn}, {tn} score: {score:.2f} %'.format(bn=batch_name, tn=test_name_pred, score=score * 100))
log_file.write('\n')
log_file.write('{bn}, {tn} feature list: {fl}'.format(bn=batch_name, tn=test_name_pred,
fl=x_question.columns.values.tolist()))
log_file.write('\n')
log_file.write('{bn}, {tn} feature importance: {fi}'.format(bn=batch_name, tn=test_name_pred,
fi=importance.importances_mean))
log_file.write('\n')
log_file.close()
if not is_classifier(clf_pipeline):
print('Skipping confusion matrix as clssifier {} doesnt support it...'.format(clf_name))
else:
importance = permutation_importance(clf_pipeline, x_question, y_pred)
slopestabilityML.plot_feature_importance(clf_name, importance, x_question, test_name_pred,
batch_name=batch_name)
# log_file_name = settings.settings['log_file_name']
# log_file = open(os.path.join(settings.settings['results_folder'], log_file_name), 'a')
# log_file.write('\n')
# log_file.write('{bn}, {tn} score: {score:.2f} %'.format(bn=batch_name, tn=test_name_pred, score=score * 100))
# log_file.write('\n')
# log_file.write('{bn}, {tn} feature list: {fl}'.format(bn=batch_name, tn=test_name_pred,
# fl=x_question.columns.values.tolist()))
# log_file.write('\n')
# #log_file.write('{bn}, {tn} feature importance: {fi}'.format(bn=batch_name, tn=test_name_pred,
# # fi=importance.importances_mean))
# log_file.write('\n')
# log_file.close()
if settings.settings['norm_class'] is True:
class_in = test_results_temp['CLASSN']
......
......@@ -21,6 +21,7 @@ from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.inspection import permutation_importance
from sklearn.base import is_classifier
import slopestabilitytools
import test_definitions
......@@ -161,13 +162,17 @@ def classification_train(test_training, test_results, clf, clf_name):
# weights = x_train['SEN']
# x_train.pop('SEN')
try:
clf_pipeline.fit(x_train, y_train, **{clf_pipeline.steps[1][0] + '__sample_weight': weights})
clf_pipeline.fit(x_train, y_train, **{clf_pipeline.steps[-1][-1].estimator + '__sample_weight': weights})
except TypeError:
clf_pipeline.fit(x_train, y_train)
try:
clf_pipeline.fit(x_train, y_train)
except TypeError:
clf_pipeline.fit(x_train, y_train, scoring="accuracy")
else:
clf_pipeline.fit(x_train, y_train)
clf_name_ext = clf_name + '.sav'
clf_file_name = os.path.join(settings.settings['clf_folder'], clf_name_ext)
joblib.dump(clf_pipeline, clf_file_name)
......@@ -189,36 +194,42 @@ def classification_train(test_training, test_results, clf, clf_name):
y_pred = clf_pipeline.predict(x_train_temp)
slopestabilityML.plot_sen_corr(y_pred, class_correct, test_results_combined['SEN'].loc[index], clf_name, name,
'training', training=True)
conf_matr_temp = slopestabilityML.plot_confusion(clf_name, clf, y_pred=x_train_temp,
y_true=class_correct,
test_name=name, training=True)
if not is_classifier(clf):
print('Skipping confusion matrix as clssifier {} doesnt support it...'.format(clf_name))
else:
conf_matr_temp = slopestabilityML.plot_confusion(clf_name, clf, y_pred=x_train_temp,
y_true=class_correct,
test_name=name, training=True)
confusion_matrix_sum = confusion_matrix_sum + conf_matr_temp
confusion_matrix_sum = confusion_matrix_sum + conf_matr_temp
result_class_training[name] = y_pred
score_training = accuracy_score(class_correct, y_pred)
accuracy_result_training.append(score_training * 100)
accuracy_labels_training.append(name)
importance = permutation_importance(clf_pipeline, x_train_temp, y_pred)
slopestabilityML.plot_feature_importance(clf_name, importance, x_train_temp, name)
log_file_name = settings.settings['log_file_name']
log_file = open(os.path.join(settings.settings['results_folder'], log_file_name), 'a')
log_file.write('\n')
log_file.write('Starting training on profile: {tn}'.format(tn=name))
log_file.write('\n')
log_file.write('{tn} score: {score:.2f} %'.format(tn=name, score=score_training * 100))
log_file.write('\n')
log_file.write('{tn} feature list: {fl}'.format(tn=name,
fl=x_train_temp.columns.values.tolist()))
log_file.write('\n')
log_file.write('{tn} feature importance: {fi}'.format(tn=name,
fi=importance.importances_mean))
log_file.write('\n')
log_file.close()
slopestabilityML.plot_feature_importance(clf_pipeline, importance, x_train_temp, name)
if not is_classifier(clf):
print('Skipping feature importance as clssifier {} doesnt support it...'.format(clf_name))
else:
importance = permutation_importance(clf_pipeline, x_train_temp, y_pred)
slopestabilityML.plot_feature_importance(clf_name, importance, x_train_temp, name)
slopestabilityML.plot_feature_importance(clf_name, importance, x_train_temp, name)
# log_file_name = settings.settings['log_file_name']
# log_file = open(os.path.join(settings.settings['results_folder'], log_file_name), 'a')
# log_file.write('\n')
# log_file.write('Starting training on profile: {tn}'.format(tn=name))
# log_file.write('\n')
# log_file.write('{tn} score: {score:.2f} %'.format(tn=name, score=score_training * 100))
# log_file.write('\n')
# log_file.write('{tn} feature list: {fl}'.format(tn=name,
# fl=x_train_temp.columns.values.tolist()))
# log_file.write('\n')
# #log_file.write('{tn} feature importance: {fi}'.format(tn=name,
# # fi=importance.importances_mean))
# log_file.write('\n')
# log_file.close()
# Evaluate the accuracy of interface depth detection
x_temp = x_position.loc[index].to_numpy()
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 03.07.2021
@author: Feliks Kiszkurno
"""
from .kmeans_run import kmeans_run
from .meanshift_run import meanshift_run
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 03.07.2021
@author: Feliks Kiszkurno
"""
from sklearn.cluster import KMeans
import numpy as np
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classification
import settings
def kmeans_run(test_results, random_seed):
# Split the data set
test_results, test_training, test_prediction = slopestabilityML.select_split_type(test_results, random_seed)
if settings.settings['optimize_ml'] is True:
hyperparameters = {#'n_clusters': list(np.arange(1, 10, 1)),
'algorithm': ['full', 'elkan'],
'init': ['k-means++', 'random']}
clf_base = KMeans(n_clusters=settings.settings['norm_class_num'])
clf = slopestabilityML.select_search_type(clf_base, hyperparameters)
else:
# Create classifier
clf = KMeans(n_clusters=settings.settings['norm_class_num'], init='k-means++', algorithm='elkan')
# Train classifier
results, result_class = \
slopestabilityML.run_classification(test_training, test_prediction, test_results, clf, 'KMeans')
# Plot
# slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'GBC_prediction')
# slopestabilityML.plot_results(accuracy_labels_training, accuracy_score_training, 'GBC_training')
return results, result_class
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 03.07.2021
@author: Feliks Kiszkurno
"""
from sklearn.cluster import MeanShift
import numpy as np
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classification
import settings
def meanshift_run(test_results, random_seed):
# Split the data set
test_results, test_training, test_prediction = slopestabilityML.select_split_type(test_results, random_seed)
if settings.settings['optimize_ml'] is True:
hyperparameters = {'cluster_all': [True, False],
'bin_seeding': [True, False]}
clf_base = MeanShift()
clf = slopestabilityML.select_search_type(clf_base, hyperparameters)
clf = MeanShift(cluster_all=False, bin_seeding=True)
else:
# Create classifier
clf = MeanShift(cluster_all=False, bin_seeding=True)
# Train classifier
results, result_class = \
slopestabilityML.run_classification(test_training, test_prediction, test_results, clf, 'MeanShift')
# Plot
# slopestabilityML.plot_results(accuracy_labels, accuracy_score, 'GBC_prediction')
# slopestabilityML.plot_results(accuracy_labels_training, accuracy_score_training, 'GBC_training')
return results, result_class
......@@ -13,6 +13,7 @@ import slopestabilityML.SGD.sgd_run
import slopestabilityML.KNN.knn_run
import slopestabilityML.ADABOOST.adaboost_run
import slopestabilityML.DNN.dnn_run
import slopestabilityML.clusters.kmeans_run
import slopestabilityML
import os
import settings
......@@ -29,51 +30,67 @@ def run_all_tests(test_results):
ml_results = {}
ml_results_class = {}
print('Running SVM...')
svm_results, svm_result_class = slopestabilityML.SVM.svm_run(test_results, random_seed)
ml_results['svm'] = svm_results
ml_results_class['svm'] = svm_result_class
gc.collect()
print('Running GBC...')
gbc_results, gbc_result_class = slopestabilityML.GBC.gbc_run(test_results, random_seed)
ml_results['gbc'] = gbc_results
ml_results_class['gbc'] = gbc_result_class
gc.collect()
print('Running SGD...')
sgd_results, sgd_result_class = slopestabilityML.SGD.sgd_run(test_results, random_seed)
ml_results['sgd'] = sgd_results
ml_results_class['sgd'] = sgd_result_class
gc.collect()
print('Running KNN...')
knn_results, knn_result_class = slopestabilityML.KNN.knn_run(test_results, random_seed)
ml_results['KNN'] = knn_results
ml_results_class['knn'] = knn_result_class
gc.collect()
# print('Running SVM...')
# svm_results, svm_result_class = slopestabilityML.SVM.svm_run(test_results, random_seed)
# ml_results['svm'] = svm_results
#
# ml_results_class['svm'] = svm_result_class
#
# gc.collect()
#
# print('Running GBC...')
# gbc_results, gbc_result_class = slopestabilityML.GBC.gbc_run(test_results, random_seed)
# ml_results['gbc'] = gbc_results
#
# ml_results_class['gbc'] = gbc_result_class
#
# gc.collect()
#
# print('Running SGD...')
# sgd_results, sgd_result_class = slopestabilityML.SGD.sgd_run(test_results, random_seed)
# ml_results['sgd'] = sgd_results
#
# ml_results_class['sgd'] = sgd_result_class
#
# gc.collect()
#
# print('Running KNN...')
# knn_results, knn_result_class = slopestabilityML.KNN.knn_run(test_results, random_seed)
# ml_results['KNN'] = knn_results
#
# ml_results_class['knn'] = knn_result_class
#
# gc.collect()
#
# print('Running ADABOOST...')
# ada_results, ada_result_class = slopestabilityML.ADABOOST.adaboost_run(test_results, random_seed)
# ml_results['ADA'] = ada_results
#
# ml_results_class['ada'] = ada_result_class
#
# gc.collect()
#
# print('Running DNN...')
# dnn_results, dnn_result_class = slopestabilityML.DNN.dnn_run(test_results, random_seed)
# ml_results['DNN'] = dnn_results
#
# ml_results_class['dnn'] = dnn_result_class
#
# gc.collect()
print('Running ADABOOST...')
ada_results, ada_result_class = slopestabilityML.ADABOOST.adaboost_run(test_results, random_seed)
ml_results['ADA'] = ada_results
print('Running KMeans...')
kmeans_results, kmeans_result_class = slopestabilityML.clusters.kmeans_run(test_results, random_seed)
ml_results['KMeans'] = kmeans_results
ml_results_class['ada'] = ada_result_class
ml_results_class['KMeans'] = kmeans_result_class
gc.collect()
print('Running DNN...')
ada_results, ada_result_class = slopestabilityML.DNN.dnn_run(test_results, random_seed)
ml_results['DNN'] = ada_results
print('Running MeanShift...')
meanshift_results, meanshift_result_class = slopestabilityML.clusters.meanshift_run(test_results, random_seed)
ml_results['MeanShift'] = meanshift_results
ml_results_class['dnn'] = ada_result_class
ml_results_class['MeanShift'] = meanshift_result_class
gc.collect()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment