Commit 2d653089 authored by Feliks Kiszkurno's avatar Feliks Kiszkurno
Browse files

Added proportion key word argument to split_data set

Estimator works, but has 50% accuracy.
parent 3c28e20b
%% Cell type:code id: tags:
``` python
import slopestabilitytools
import slopestabilityML
import settings
import os
import numpy as np
import pandas as pd
import test_definitions
from sklearn import svm
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
settings.init()
test_definitions.init()
```
%% Cell type:code id: tags:
``` python
print(os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + '/')
test_results = slopestabilitytools.datamanagement.import_tests(abs_path=os.path.abspath(os.path.join(os.getcwd(), os.pardir))+'/')
#for test_name in test_results.keys():
test_hist = {}
for test_name in test_results.keys():
hist, bins = np.histogram(test_results[test_name]['RESN'])
#hist = hist/np.sum(hist)
test_hist[test_name] = {'hist': hist,
'bins': bins,
'n_class': test_definitions.test_definitions[test_name]['layer_n']}
test_names_training, test_names_prediction = slopestabilityML.split_dataset(test_hist.keys(), 999)
```
%%%% Output: stream
/home/felikskrno/Documents/Studies/MasterThesis/PythonProjects/SlopeStability/
%% Cell type:code id: tags:
``` python
clf = svm.SVC(gamma=0.001, C=100, kernel='linear')
#num_feat = ['hist']
#num_trans = StandardScaler()
#preprocessor = ColumnTransformer(transformers=[('num', num_trans, num_feat)])#,
# #('cat', cat_trans, cat_feat)])
#clf_pipeline = make_pipeline(preprocessor, clf)
x_train = np.zeros((len(test_names_training), len(test_hist['hor_01']['hist'])))
y_train = np.zeros((len(test_names_training)))
for idx in range(len(test_names_training)):
x_train[idx,:] = test_hist[test_names_training[idx]]['hist']
y_train[idx] = test_hist[test_names_training[idx]]['n_class']
clf.fit(x_train, y_train)
y_pred = clf.predict(x_train)
training_accuray = len(np.where(y_train == y_pred)[0])/len(y_train)*100
print('Trained with accuracy: {accuracy:.2f}%'.format(accuracy=training_accuray))
```
%%%% Output: stream
Trained with accuracy: 100.00%
%% Cell type:code id: tags:
``` python
x_pred = np.zeros((len(test_names_prediction), len(test_hist['hor_01']['hist'])))
y_corr = np.zeros((len(test_names_prediction)))
for idx in range(len(test_names_prediction)):
x_pred[idx,:] = test_hist[test_names_prediction[idx]]['hist']
y_corr[idx] = test_hist[test_names_prediction[idx]]['n_class']
y_pred = clf.predict(x_pred)
prediction_accuray = len(np.where(y_corr == y_pred)[0])/len(y_pred)*100
print('Predicted with accuracy: {accuracy:.2f}%'.format(accuracy=prediction_accuray))
```
%%%% Output: stream
Predicted with accuracy: 50.00%
......
......@@ -11,13 +11,13 @@ import random
import math
def split_dataset(test_names, random_seed):
def split_dataset(test_names, random_seed, *, proportion=0.25):
random.seed(random_seed)
test_number = len(test_names)
test_prediction = random.choices(list(test_names),
k=math.ceil(test_number * 0.25))
k=math.ceil(test_number * proportion))
test_training = slopestabilitytools.set_diff(list(test_names), set(test_prediction))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment