To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit a2863cd7 authored by Martyna Plomecka's avatar Martyna Plomecka
Browse files

Merrged conflicts

parents 83e6e3e2 9e41be14
...@@ -39,12 +39,12 @@ def cross_validate_SVC(X, y): ...@@ -39,12 +39,12 @@ def cross_validate_SVC(X, y):
def cross_validate_RFC(X, y): def cross_validate_RFC(X, y):
logging.info("Cross-validation RFC...") logging.info("Cross-validation RFC...")
classifier = RandomForestClassifier(max_features='auto', random_state=42, n_jobs=-1) classifier = RandomForestClassifier(max_features='auto', random_state=42, n_jobs=-1)
parameters_RFC = {'n_estimators': [10, 100, 1000, 5000], 'max_depth': [5, 10, 50, 100, 500]} parameters_RFC = {'n_estimators': [10, 50, 100, 1000], 'max_depth': [5, 10, 50, 100, 500], 'min_samples_split' : [0.1, 0.4, 0.7, 1.0], 'min_samples_leaf' : [0.1, 0.5]}
cross_validate(classifier=classifier, parameters=parameters_RFC, X=X, y=y) cross_validate(classifier=classifier, parameters=parameters_RFC, X=X, y=y)
def cross_validate(classifier, parameters, X, y): def cross_validate(classifier, parameters, X, y):
X = X.reshape((36223, 500 * 129)) X = X.reshape((36223, 500 * 129))
clf = GridSearchCV(classifier, parameters, scoring='accuracy', n_jobs=-1, verbose=3) clf = GridSearchCV(classifier, parameters, scoring='accuracy', n_jobs=-1, verbose=3, cv=2)
clf.fit(X, y.ravel()) clf.fit(X, y.ravel())
export_dict(clf.cv_results_['mean_fit_time'], clf.cv_results_['std_fit_time'], clf.cv_results_['mean_score_time'], export_dict(clf.cv_results_['mean_fit_time'], clf.cv_results_['std_fit_time'], clf.cv_results_['mean_score_time'],
...@@ -58,17 +58,19 @@ def cross_validate(classifier, parameters, X, y): ...@@ -58,17 +58,19 @@ def cross_validate(classifier, parameters, X, y):
def try_sklearn_classifiers(X, y): def try_sklearn_classifiers(X, y):
logging.info("Training the simple classifiers: kNN, Linear SVM, Random Forest and Naive Bayes.") logging.info("Training the simple classifiers: kNN, Linear SVM, Random Forest and Naive Bayes.")
names = ["Nearest Neighbors", names = [# "Nearest Neighbors",
"Linear SVM", # "Linear SVM",
"Random Forest", "Random Forest",
"Naive Bayes", "Naive Bayes",
"Linear SVM"
] ]
classifiers = [ classifiers = [
KNeighborsClassifier(n_neighbors=100, weights='uniform', algorithm='auto', leaf_size=100, n_jobs=-1), # KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, n_jobs=-1),
LinearSVC(tol=1e-5, C=1, random_state=42, max_iter=10000), # LinearSVC(tol=1e-5, C=1, random_state=42, max_iter=1000),
RandomForestClassifier(n_estimators=1000, max_depth=50, max_features='auto', random_state=42, n_jobs=-1), RandomForestClassifier(n_estimators=30, max_depth=20, max_features='auto', random_state=42, n_jobs=-1),
GaussianNB() GaussianNB(),
LinearSVC(tol=1e-3, C=5, random_state=42, max_iter=500)
] ]
X = X.reshape((36223, 500 * 129)) X = X.reshape((36223, 500 * 129))
......
...@@ -51,7 +51,7 @@ def main(): ...@@ -51,7 +51,7 @@ def main():
try_sklearn_classifiers(trainX, trainY) try_sklearn_classifiers(trainX, trainY)
# cross_validate_kNN(trainX, trainY) # cross_validate_kNN(trainX, trainY)
# cross_validate_SVC(trainX, trainY) # cross_validate_SVC(trainX, trainY)
# cross_validate_RFC(trainX, trainY) cross_validate_RFC(trainX, trainY)
else: else:
# tune(trainX,trainY) # tune(trainX,trainY)
run(trainX,trainY) run(trainX,trainY)
......
BEST ESTIMATOR,BEST SCORE,BEST PARAMS
"RandomForestClassifier(max_depth=5, min_samples_leaf=0.1, min_samples_split=0.1,
n_estimators=1000, n_jobs=-1, random_state=42)",0.6598846369794981,"{'max_depth': 5, 'min_samples_leaf': 0.1, 'min_samples_split': 0.1, 'n_estimators': 1000}"
(36223, 500, 129)
(36223, 1)
Fitting 2 folds for each of 160 candidates, totalling 320 fits
INFO:root:Started the Logging
INFO:root:X training loaded.
INFO:root:(129, 500, 36223)
INFO:root:y training loaded.
INFO:root:(1, 36223)
INFO:root:Setting the shapes
INFO:root:(36223, 500, 129)
INFO:root:(36223, 1)
INFO:root:Cross-validation RFC...
INFO:root:--- Runtime: 6324.672133684158 seconds ---
INFO:root:Finished Logging
This diff is collapsed.
Model,Score,Runtime
Random Forest,0.6449965493443754,18.374440908432007
Naive Bayes,0.5421670117322291,54.8563334941864
Linear SVM,0.7054520358868185,2287.276668548584
RandomForestClassifier(n_estimators=30, max_depth=20, max_features='auto', random_state=42, n_jobs=-1),
GaussianNB(),
LinearSVC(tol=1e-3, C=5, random_state=42, max_iter=500)
INFO:root:Started the Logging
INFO:root:X training loaded.
INFO:root:(129, 500, 36223)
INFO:root:y training loaded.
INFO:root:(1, 36223)
INFO:root:Setting the shapes
INFO:root:(36223, 500, 129)
INFO:root:(36223, 1)
INFO:root:Training the simple classifiers: kNN, Linear SVM, Random Forest and Naive Bayes.
INFO:root:Random Forest
INFO:root:--- Score: 0.6449965493443754
INFO:root:--- Runtime: 18.374440908432007 for seconds ---
INFO:root:Naive Bayes
INFO:root:--- Score: 0.5421670117322291
INFO:root:--- Runtime: 54.8563334941864 for seconds ---
INFO:root:Linear SVM
INFO:root:--- Score: 0.7054520358868185
INFO:root:--- Runtime: 2287.276668548584 for seconds ---
INFO:root:--- Runtime: 2564.1652948856354 seconds ---
INFO:root:Finished Logging
Sender: LSF System <lsfadmin@eu-a6-008-18>
Subject: Job 166812903: <python /cluster/home/kard/dl-project/main.py> in cluster <euler> Done
Job <python /cluster/home/kard/dl-project/main.py> was submitted from host <eu-login-28> by user <kard> in cluster <euler> at Tue Mar 23 21:37:20 2021
Job was executed on host(s) <15*eu-a6-008-18>, in queue <bigmem.24h>, as user <kard> in cluster <euler> at Tue Mar 23 21:40:02 2021
</cluster/home/kard> was used as the home directory.
</cluster/home/kard/dl-project> was used as the working directory.
Started at Tue Mar 23 21:40:02 2021
Terminated at Tue Mar 23 22:22:58 2021
Results reported at Tue Mar 23 22:22:58 2021
Your job looked like:
------------------------------------------------------------
# LSBATCH: User input
python /cluster/home/kard/dl-project/main.py
------------------------------------------------------------
Successfully completed.
Resource usage summary:
CPU time : 2859.81 sec.
Max Memory : 95877 MB
Average Memory : 89903.56 MB
Total Requested Memory : 120000.00 MB
Delta Memory : 24123.00 MB
Max Swap : -
Max Processes : 3
Max Threads : 37
Run time : 2576 sec.
Turnaround time : 2738 sec.
The output (if any) follows:
2021-03-23 21:40:04.922001: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
(36223, 500, 129)
(36223, 1)
/cluster/apps/nss/gcc-6.3.0/python/3.8.5/x86_64/lib64/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn("Liblinear failed to converge, increase "
...@@ -3,7 +3,6 @@ matplotlib.use('Agg') ...@@ -3,7 +3,6 @@ matplotlib.use('Agg')
import pandas as pd import pandas as pd
from config import config from config import config
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np import numpy as np
import torch import torch
import pandas as pd import pandas as pd
...@@ -13,7 +12,6 @@ from subprocess import call ...@@ -13,7 +12,6 @@ from subprocess import call
import operator import operator
import shutil import shutil
sns.set_style('darkgrid')
import logging import logging
def plot_acc(hist, output_directory, model, val=False): def plot_acc(hist, output_directory, model, val=False):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment