Commit 2d653089 authored by Feliks Kiszkurno's avatar Feliks Kiszkurno
Browse files

Added proportion key word argument to split_data set

Estimator works, but has 50% accuracy.
parent 3c28e20b
......@@ -2,21 +2,33 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import slopestabilitytools\n",
"import slopestabilityML\n",
"import settings\n",
"import os\n",
"settings.init()"
"import numpy as np\n",
"import pandas as pd\n",
"import test_definitions\n",
"\n",
"from sklearn import svm\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"settings.init()\n",
"test_definitions.init()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 48,
"outputs": [
{
"name": "stdout",
......@@ -29,7 +41,79 @@
"source": [
"print(os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + '/')\n",
"test_results = slopestabilitytools.datamanagement.import_tests(abs_path=os.path.abspath(os.path.join(os.getcwd(), os.pardir))+'/')\n",
"#for test_name in test_results.keys():\n"
"test_hist = {}\n",
"for test_name in test_results.keys():\n",
" hist, bins = np.histogram(test_results[test_name]['RESN'])\n",
" #hist = hist/np.sum(hist)\n",
" test_hist[test_name] = {'hist': hist,\n",
" 'bins': bins,\n",
" 'n_class': test_definitions.test_definitions[test_name]['layer_n']}\n",
"test_names_training, test_names_prediction = slopestabilityML.split_dataset(test_hist.keys(), 999)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 49,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Trained with accuracy: 100.00%\n"
]
}
],
"source": [
"clf = svm.SVC(gamma=0.001, C=100, kernel='linear')\n",
"#num_feat = ['hist']\n",
"#num_trans = StandardScaler()\n",
"#preprocessor = ColumnTransformer(transformers=[('num', num_trans, num_feat)])#,\n",
"# #('cat', cat_trans, cat_feat)])\n",
"#clf_pipeline = make_pipeline(preprocessor, clf)\n",
"x_train = np.zeros((len(test_names_training), len(test_hist['hor_01']['hist'])))\n",
"y_train = np.zeros((len(test_names_training)))\n",
"for idx in range(len(test_names_training)):\n",
" x_train[idx,:] = test_hist[test_names_training[idx]]['hist']\n",
" y_train[idx] = test_hist[test_names_training[idx]]['n_class']\n",
"clf.fit(x_train, y_train)\n",
"y_pred = clf.predict(x_train)\n",
"training_accuray = len(np.where(y_train == y_pred)[0])/len(y_train)*100\n",
"print('Trained with accuracy: {accuracy:.2f}%'.format(accuracy=training_accuray))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 50,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted with accuracy: 50.00%\n"
]
}
],
"source": [
"x_pred = np.zeros((len(test_names_prediction), len(test_hist['hor_01']['hist'])))\n",
"y_corr = np.zeros((len(test_names_prediction)))\n",
"for idx in range(len(test_names_prediction)):\n",
" x_pred[idx,:] = test_hist[test_names_prediction[idx]]['hist']\n",
" y_corr[idx] = test_hist[test_names_prediction[idx]]['n_class']\n",
"y_pred = clf.predict(x_pred)\n",
"prediction_accuray = len(np.where(y_corr == y_pred)[0])/len(y_pred)*100\n",
"print('Predicted with accuracy: {accuracy:.2f}%'.format(accuracy=prediction_accuray))"
],
"metadata": {
"collapsed": false,
......
......@@ -11,13 +11,13 @@ import random
import math
def split_dataset(test_names, random_seed):
def split_dataset(test_names, random_seed, *, proportion=0.25):
random.seed(random_seed)
test_number = len(test_names)
test_prediction = random.choices(list(test_names),
k=math.ceil(test_number * 0.25))
k=math.ceil(test_number * proportion))
test_training = slopestabilitytools.set_diff(list(test_names), set(test_prediction))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment