Commit db3ee5b0 authored by Feliks Kiszkurno's avatar Feliks Kiszkurno
Browse files

Training is now implemented correctly and works. All side functions has been...

Training is now implemented correctly and works. All side functions has been adjusted and should work as well.
parent d7a62abf
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import slopestabilitytools\n",
"import slopestabilityML\n",
"import settings\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 24,
"outputs": [],
"source": [
"settings.init()\n",
"test_results = slopestabilitytools.datamanagement.import_tests(abs_path=os.path.abspath(os.path.join(os.getcwd(), os.pardir))+'/')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 25,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of tests used for prediction: 5\n",
"Number of tests used for training: 14\n"
]
}
],
"source": [
"random_seed = 999\n",
"test_training, test_prediction = slopestabilityML.split_dataset(test_results.keys(), random_seed)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [],
"source": [
"x_train = pd.DataFrame()\n",
"y_train = pd.DataFrame()\n",
"for name in test_training:\n",
" x_train_temp, y_train_temp = slopestabilityML.preprocess_data(test_results[name])\n",
" x_train = x_train.append(x_train_temp)\n",
" y_train = y_train.append(y_train_temp)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 29,
"outputs": [],
"source": [
"test_results_combined = pd.DataFrame()\n",
"for name in test_training:\n",
" test_results_combined = test_results_combined.append(test_results[name])\n",
"x_train_temp, y_train_temp = slopestabilityML.preprocess_data(test_results_combined)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 30,
"outputs": [
{
"ename": "ValueError",
"evalue": "Cannot index with multidimensional key",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m<ipython-input-30-a5b140cf1ae7>\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0mname\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mtest_training\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 5\u001B[0m \u001B[0mindex\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mtest_results_combined\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mtest_results_combined\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'NAME'\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m==\u001B[0m \u001B[0mname\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 6\u001B[0;31m \u001B[0mclass_correct\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mtest_results_combined\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'CLASSN'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mloc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mindex\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 7\u001B[0m \u001B[0mclass_pred\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0my_pred\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mloc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mindex\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 8\u001B[0m \u001B[0mscore_training\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0maccuracy_score\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0my_train\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0my_pred\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/anaconda3/envs/SlopeStability/lib/python3.7/site-packages/pandas/core/indexing.py\u001B[0m in \u001B[0;36m__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 877\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 878\u001B[0m \u001B[0mmaybe_callable\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mcom\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mapply_if_callable\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mobj\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 879\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_getitem_axis\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmaybe_callable\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0maxis\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0maxis\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 880\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 881\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_is_scalar_access\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mkey\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0mTuple\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/anaconda3/envs/SlopeStability/lib/python3.7/site-packages/pandas/core/indexing.py\u001B[0m in \u001B[0;36m_getitem_axis\u001B[0;34m(self, key, axis)\u001B[0m\n\u001B[1;32m 1095\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1096\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mhasattr\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m\"ndim\"\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mand\u001B[0m \u001B[0mkey\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mndim\u001B[0m \u001B[0;34m>\u001B[0m \u001B[0;36m1\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m-> 1097\u001B[0;31m \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"Cannot index with multidimensional key\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 1098\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1099\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_getitem_iterable\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0maxis\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0maxis\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;31mValueError\u001B[0m: Cannot index with multidimensional key"
]
}
],
"source": [
"score_training = []\n",
"label_training = []\n",
"y_pred = y_train_temp\n",
"for name in test_training:\n",
" index = test_results_combined[test_results_combined['NAME'] == name]\n",
" class_correct = test_results_combined['CLASSN'].loc[index]\n",
" class_pred = y_pred.loc[index]\n",
" score_training = accuracy_score(class_correct, y_pred)\n",
" score_training.append(score_training * 100)\n",
" label_training.append(name)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
......@@ -18,7 +18,7 @@ def init():
# Normalization and classes
settings['norm_class'] = True # True to use normalized classes, False to use class_ids
settings['norm_class_num'] = 5 # Number of classes for normalized data
settings['norm'] = False # True to use normalized data, False to use raw data
settings['norm'] = True # True to use normalized data, False to use raw data
settings['use_labels'] = False # True to use labels instead of classes
# Include sensitivity
......
......@@ -8,7 +8,7 @@ Created on 26.03.2021
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
import slopestabilityML.run_classification
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
......
......@@ -9,7 +9,7 @@ Created on 19.01.2021
from sklearn import ensemble
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
import slopestabilityML.run_classification
# TODO: as in svm_run
......
......@@ -9,7 +9,7 @@ Created on 26.01.2021
from sklearn.neighbors import KNeighborsClassifier
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
import slopestabilityML.run_classification
def knn_run(test_results, random_seed):
......
......@@ -10,7 +10,7 @@ from sklearn_rvm import EMRVC
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
import slopestabilityML.run_classification
def rvm_run(test_results, random_seed):
......
......@@ -10,7 +10,7 @@ from sklearn.linear_model import SGDClassifier
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
import slopestabilityML.run_classification
def sgd_run(test_results, random_seed):
......
......@@ -10,7 +10,7 @@ from sklearn import svm
import slopestabilityML.plot_results
import slopestabilityML.split_dataset
import slopestabilityML.run_classi
import slopestabilityML.run_classification
# TODO: for comparability with other ML methods, add option to define which test should be used for training externaly
......
......@@ -14,9 +14,9 @@ import numpy as np
def preprocess_data(data_set):
if settings.settings['norm'] is True:
x_train = data_set.drop(['X', 'Z', 'INM', 'INMN', 'RES', 'CLASS', 'CLASSN', 'LABELS'], axis='columns')
x_train = data_set.drop(['NAME', 'X', 'Z', 'INM', 'INMN', 'RES', 'CLASS', 'CLASSN', 'LABELS'], axis='columns')
else:
x_train = data_set.drop(['X', 'Z', 'INM', 'INMN', 'RESN', 'CLASS', 'CLASSN', 'LABELS'], axis='columns')
x_train = data_set.drop(['NAME', 'X', 'Z', 'INM', 'INMN', 'RESN', 'CLASS', 'CLASSN', 'LABELS'], axis='columns')
if settings.settings['sen'] is False:
x_train = x_train.drop(['SEN'], axis='columns')
......
......@@ -7,7 +7,7 @@ Created on 19.01.2021
"""
import settings
import slopestabilityML
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
......@@ -38,26 +38,6 @@ def run_classification(test_training, test_prediction, test_results, clf, clf_na
if settings.settings['depth'] is True:
num_feat.append('Y')
# if settings.settings['norm'] is True and settings.settings['sen'] is True:
# num_feat = ['RESN', 'SEN']
# elif settings.settings['norm'] is False and settings.settings['sen'] is True:
# num_feat = ['RES', 'SEN']
# elif settings.settings['norm'] is False and settings.settings['sen'] is False:
# num_feat = ['RES']
# elif settings.settings['norm'] is True and settings.settings['sen'] is False:
# num_feat = ['RESN']
#cat_feat = ['CLASS']
#cat_lab = [0, 1]
if settings.settings['norm_class'] is True:
#cat_feat = ['CLASSN']
cat_lab = np.linspace(0, settings.settings['norm_class_num'] - 1, settings.settings['norm_class_num'])
elif settings.settings['norm_class'] is False:
#cat_feat = ['CLASS']
cat_lab = [0, 1]
num_trans = StandardScaler()
if settings.settings['use_labels'] is True:
......@@ -72,27 +52,35 @@ def run_classification(test_training, test_prediction, test_results, clf, clf_na
clf_pipeline = make_pipeline(preprocessor, clf)
# This part is wrong
for test_name in test_training:
# Prepare data
print(test_name)
x_train, y_train = slopestabilityML.preprocess_data(test_results[test_name])
# Train classifier
clf_pipeline.fit(x_train, y_train)
y_pred = clf_pipeline.predict(x_train)
score_training1 = clf_pipeline.score(x_train, y_train)
score_training = accuracy_score(y_train, y_pred)
test_results_combined = pd.DataFrame()
for name in test_training:
test_results_combined = test_results_combined.append(test_results[name])
test_results_combined = test_results_combined.reset_index()
test_results_combined = test_results_combined.drop(['index'], axis='columns')
x_train, y_train = slopestabilityML.preprocess_data(test_results_combined)
accuracy_result_training.append(score_training * 100)
accuracy_labels_training.append(test_name)
clf_pipeline.fit(x_train, y_train)
slopestabilityML.plot_class_overview(test_results[test_name], test_name, y_train, y_pred, clf_name, training=True)
for name in test_training:
print(name)
index = test_results_combined.index[test_results_combined['NAME'] == name]
if settings.settings['norm_class'] is True:
class_correct = test_results_combined['CLASSN'].loc[index]
else:
class_correct = test_results_combined['CLASS'].loc[index]
y_pred = clf_pipeline.predict(x_train.loc[index])
score_training = accuracy_score(class_correct, y_pred)
accuracy_result_training.append(score_training * 100)
accuracy_labels_training.append(name)
#print(y_train.loc[index])
slopestabilityML.plot_class_overview(test_results_combined.loc[index], name, y_train.loc[index], y_pred, clf_name, training=True)
result_class = {}
# Predict with classifier
for test_name_pred in test_prediction:
# Prepare data
print(test_name_pred)
x_question, y_answer = slopestabilityML.preprocess_data(test_results[test_name_pred])
y_pred = clf_pipeline.predict(x_question)
result_class[test_name_pred] = y_pred
......
......@@ -119,10 +119,8 @@ def create_data(test_name, test_config, max_depth):
classesn = slopestabilitytools.assign_classes(slopestabilitytools.normalize(input_model2_array))
# Create sensitivity values
jac = ert_manager.fop.jacobian() #
# Normalization only for visualization!
# Coverage = cumulative sensitivity = all measurements
cov = ert_manager.coverage()
......@@ -135,10 +133,6 @@ def create_data(test_name, test_config, max_depth):
rho_max = np.max(rho_arr)
rho_min = np.min(rho_arr)
# TODO: this assumes only two resistivities, extend it to consider more
# result_array[np.where(result_array < rho_min)] = rho_min
# result_array[np.where(result_array > rho_max)] = rho_max
result_array_norm = np.log(result_array)
# result_array_norm = slopestabilitytools.normalize(result_array)
......@@ -154,7 +148,10 @@ def create_data(test_name, test_config, max_depth):
for idx in id_new:
labels[idx] = labels_translator[key]
experiment_results = pd.DataFrame(data={'X': ert_manager.paraDomain.cellCenters().array()[:, 0],
test_name_column = [test_name] * len(input_model2_array)
experiment_results = pd.DataFrame(data={'NAME': test_name_column,
'X': ert_manager.paraDomain.cellCenters().array()[:, 0],
'Y': ert_manager.paraDomain.cellCenters().array()[:, 1],
'Z': ert_manager.paraDomain.cellCenters().array()[:, 2],
'INM': input_model2_array,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment