Commit ed5e98f1 authored by slavenc's avatar slavenc
Browse files

removed unnecessary comments

parent 160d393f
......@@ -561,9 +561,7 @@
"def AR(signal, order=6):\n",
" ar, _, _ = arburg(signal, order) # only save AR coefs\n",
" return ar\n",
"\n",
"# Wavelets analysis\n",
"# import pywt\n"
"\n"
]
},
{
......@@ -1041,7 +1039,9 @@
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
......@@ -1154,7 +1154,9 @@
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
......
%% Cell type:markdown id: tags:
# Project 4
%% Cell type:markdown id: tags:
### Dependencies and Constants
%% Cell type:code id: tags:
``` python
import time
import numpy as np
from numpy.fft import fft # to get amplitudes
import pandas as pd
import scipy.signal as ss # for psd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedShuffleSplit
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
from biosppy.signals import eeg # signal processing
from biosppy.signals import emg # signal processing
from spectrum import arburg
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
PROTOTYPING = False
```
%%%% Output: stream
C:\Users\made_\Anaconda3\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
%% Cell type:markdown id: tags:
### Read data
%% Cell type:code id: tags:
``` python
start = time.time()
# import train sets
train_eeg1_raw = pd.read_csv('files/train_eeg1.csv').drop('Id', axis=1).values
train_eeg2_raw = pd.read_csv('files/train_eeg2.csv').drop('Id', axis=1).values
train_emg_raw = pd.read_csv('files/train_emg.csv').drop('Id', axis=1).values
# import test sets
test_eeg1_raw = pd.read_csv('files/test_eeg1.csv').drop('Id', axis=1).values
test_eeg2_raw = pd.read_csv('files/test_eeg2.csv').drop('Id', axis=1).values
test_emg_raw = pd.read_csv('files/test_emg.csv').drop('Id', axis=1).values
# import eeg features directly
eeg_train = pd.read_csv('files/eeg_feats_train.csv').values
eeg_test = pd.read_csv('files/eeg_feats_test.csv').values
# import emg features directly
emg_feats_train = pd.read_csv('files/emg_feats_train.csv').values
emg_feats_test = pd.read_csv('files/emg_feats_test.csv').values
# import reduced eeg features by pca (to 45 components - already scaled)
eeg_train_red = pd.read_csv('files/eeg_train_pca45.csv').values
eeg_test_red = pd.read_csv('files/eeg_test_pca45.csv').values
# import labels
train_labels_raw = pd.read_csv('files/train_labels.csv').drop('Id', axis=1).values
print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)
print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)
print(train_labels_raw.shape)
print(eeg_train.shape, eeg_test.shape)
print("Time: ", time.time() - start)
```
%%%% Output: stream
(64800, 512) (64800, 512) (64800, 512)
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-2-60963c3fdbae> in <module>
27
28 print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)
---> 29 print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)
30 print(train_labels_raw.shape)
31 print(eeg_train.shape, eeg_test.shape)
NameError: name 'test_emg_raw' is not defined
%% Cell type:markdown id: tags:
### Feature extraction for EEG signals
%% Cell type:code id: tags:
``` python
start = time.time()
def calculate_statistics(list_values):
n5 = np.nanpercentile(list_values, 5)
n25 = np.nanpercentile(list_values, 25)
n75 = np.nanpercentile(list_values, 75)
n95 = np.nanpercentile(list_values, 95)
median = np.nanpercentile(list_values, 50)
mean = np.nanmean(list_values)
std = np.nanstd(list_values)
var = np.nanvar(list_values)
rms = np.nanmean(np.sqrt(list_values**2))
return [n5, n25, n75, n95, median, mean, std, var, rms]
def calculate_crossings(list_values):
zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
no_zero_crossings = len(zero_crossing_indices)
mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
no_mean_crossings = len(mean_crossing_indices)
return [no_zero_crossings, no_mean_crossings]
def get_features(list_values):
crossings = calculate_crossings(list_values)
statistics = calculate_statistics(list_values)
return crossings + statistics
def extract_features(eeg1, eeg2, emg):
features = None
for i in range(eeg1.shape[0]):
if i % 1000 == 0:
print(i, "/", eeg1.shape[0])
row = np.array([])
signal = np.array([eeg1[i], eeg2[i]]).T
analysis = eeg.eeg(signal=signal, sampling_rate=128, show=False)
# theta
row = np.append(row, get_features(analysis["theta"]))
# row = np.append(row, get_features(analysis["theta"][:, 1]))
# alpha low
row = np.append(row, get_features(analysis["alpha_low"]))
# row = np.append(row, get_features(analysis["alpha_low"][:, 1]))
# alpha low
row = np.append(row, get_features(analysis["alpha_high"]))
# row = np.append(row, get_features(analysis["alpha_high"][:, 1]))
# beta
row = np.append(row, get_features(analysis["beta"]))
# row = np.append(row, get_features(analysis["beta"][:, 1]))
# gamma
row = np.append(row, get_features(analysis["gamma"][:, 0]))
# row = np.append(row, get_features(analysis["gamma"]))
# format
row = row.reshape((1, -1))
# concatenate
if features is None:
features = row
else:
features = np.concatenate((features, row), axis=0)
return features
X_train = extract_features(train_eeg1_raw, train_eeg2_raw, train_emg_raw)
if not PROTOTYPING:
X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)
print("X_test", X_test.shape)
print("X_train", X_train.shape)
print("Time: ", time.time() - start)
```
%%%% Output: stream
0 / 64800
1000 / 64800
2000 / 64800
3000 / 64800
4000 / 64800
5000 / 64800
6000 / 64800
7000 / 64800
8000 / 64800
9000 / 64800
10000 / 64800
11000 / 64800
12000 / 64800
13000 / 64800
14000 / 64800
15000 / 64800
16000 / 64800
17000 / 64800
18000 / 64800
19000 / 64800
20000 / 64800
21000 / 64800
22000 / 64800
23000 / 64800
24000 / 64800
25000 / 64800
26000 / 64800
27000 / 64800
28000 / 64800
29000 / 64800
30000 / 64800
31000 / 64800
32000 / 64800
33000 / 64800
34000 / 64800
35000 / 64800
36000 / 64800
37000 / 64800
38000 / 64800
39000 / 64800
40000 / 64800
41000 / 64800
42000 / 64800
43000 / 64800
44000 / 64800
45000 / 64800
46000 / 64800
47000 / 64800
48000 / 64800
49000 / 64800
50000 / 64800
51000 / 64800
52000 / 64800
53000 / 64800
54000 / 64800
55000 / 64800
56000 / 64800
57000 / 64800
58000 / 64800
59000 / 64800
60000 / 64800
61000 / 64800
62000 / 64800
63000 / 64800
64000 / 64800
0 / 43200
1000 / 43200
2000 / 43200
3000 / 43200
4000 / 43200
5000 / 43200
6000 / 43200
7000 / 43200
8000 / 43200
9000 / 43200
10000 / 43200
11000 / 43200
12000 / 43200
13000 / 43200
14000 / 43200
15000 / 43200
16000 / 43200
17000 / 43200
18000 / 43200
19000 / 43200
20000 / 43200
21000 / 43200
22000 / 43200
23000 / 43200
24000 / 43200
25000 / 43200
26000 / 43200
27000 / 43200
28000 / 43200
29000 / 43200
30000 / 43200
31000 / 43200
32000 / 43200
33000 / 43200
34000 / 43200
35000 / 43200
36000 / 43200
37000 / 43200
38000 / 43200
39000 / 43200
40000 / 43200
41000 / 43200
42000 / 43200
43000 / 43200
%%%% Output: error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-3-2aadf25068c8> in <module>
72 X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)
73 # save features for future imports
---> 74 pd.DataFrame.to_csv(X_train, 'files/eeg_feats_train.csv')
75 pd.DataFrame.to_csv(X_tests, 'files/eeg_feats_test.csv')
76 print("X_test", X_test.shape)
~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal)
3200 """
3201
-> 3202 df = self if isinstance(self, ABCDataFrame) else self.to_frame()
3203
3204 from pandas.io.formats.csvs import CSVFormatter
AttributeError: 'numpy.ndarray' object has no attribute 'to_frame'
%% Cell type:code id: tags:
``` python
# save features for future imports
pd.DataFrame.to_csv(pd.DataFrame(X_train), 'files/eeg_feats_train.csv', index=False)
pd.DataFrame.to_csv(pd.DataFrame(X_test), 'files/eeg_feats_test.csv', index=False)
```
%% Cell type:code id: tags:
``` python
# obtain features by simply doing a FFT on the data
# probably more suitable for a neural network approach
eeg1_freqs_train = []
eeg2_freqs_train = []
eeg1_freqs_test = []
eeg2_freqs_test = []
for i in range(train_eeg1_raw.shape[0]):
eeg1_freqs_train.append(np.real(fft(train_eeg1_raw[i])))
eeg2_freqs_train.append(np.real(fft(train_eeg2_raw[i])))
for i in range(test_eeg1_raw.shape[0]):
eeg1_freqs_test.append(np.real(fft(test_eeg1_raw[i])))
eeg2_freqs_test.append(np.real(fft(test_eeg2_raw[i])))
```
%% Cell type:code id: tags:
``` python
# concatenate frequency features from fft
start = time.time()
eeg_freqs_train = np.array(np.column_stack((eeg1_freqs_train, eeg2_freqs_train)))
eeg_freqs_test = np.array(np.column_stack((eeg1_freqs_test, eeg2_freqs_test)))
print("Time: ", time.time() - start)
# save features for future imports
#pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_train), 'files/eeg_freqs_train.csv', index=False)
#pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_test), 'files/eeg_freqs_test.csv', index=False)
```
%%%% Output: stream
Time: 2.0603151321411133
%% Cell type:markdown id: tags:
### homemade Feature Extraction for EMG signals
%% Cell type:code id: tags:
``` python
# functions are implemented from this paper:
# https://www.researchgate.net/publication/323587464_A_Comprehensive_Study_on_EMG_Feature_Extraction_and_Classifiers
# https://www.researchgate.net/publication/224148281_Evaluation_of_EMG_Feature_Extraction_for_Hand_Movement_Recognition_Based_on_Euclidean_Distance_and_Standard_Deviation
# Functions for the TIME Domain
# integrated EMG is the area under the rectified EMG signal
def IEMG(signal):
iemg = np.sum(np.abs(signal))
return iemg
# Mean Absolute Value
# PRE : Requires rectified signal
def MAV(signal, N):
mav = np.sum(np.abs(signal))/N
return mav
# Mean Absolute Value Slope (potentially computationally very expensive)
def MAVS(signal, N):
temp = 0
for i in range(signal.shape[0]-1):
temp += np.abs(signal[i+1] - signal[i])
mavs = temp/N
return mavs
# modified mean absolute value type 1
def MAV1(signal, N):
# interval borders
lower = 0.25 * N
upper = 0.75 * N
temp = 0
for i in range(signal.shape[0]):
if i >= lower and i <= upper:
temp += 1 * np.abs(signal[i])
else:
temp += 0.5 * np.abs(signal[i])
mav1 = temp/N
return mav1
# modified mean absolute value type 2
def MAV2(signal, N):
# interval borders
lower = 0.25 * N
upper = 0.75 * N
temp = 0
for i in range(signal.shape[0]):
if i >= lower and i <= upper:
temp += 1 * np.abs(signal[i])
elif i < lower:
temp += (4*i/N) * np.abs(signal[i])
elif i > upper:
temp += (4*(i-N)/N) * np.abs(signal[i])
mav2 = temp/N
return mav2
# Simple Square Integral (SSI) expresses the energy of the EMG signal
# PRE : Requires rectified signal
def SSI(signal, N):
ssi = np.sum(np.abs(signal)**2)/N # should square every value in signal element-wise
return ssi
# The variance of EMG signal
# PRE : Requires rectified signal
def VAREMG(signal, N):
varemg = np.sum(signal**2)/(N-1) # should square every value in signal element-wise
return varemg
# Root Mean Square
# PRE : Requires rectified signal
def RMS(signal, N):
rms = np.sqrt(np.sum(np.abs(signal)**2)/N) # should square every value in signal element-wise
return rms
# the 3rd temporal moment
def TM3(signal, N):
tm3 = np.sum(np.abs(signal**3))/N
return tm3
# the 4th temporal moment
def TM4(signal, N):
tm4 = np.sum(np.abs(signal**4))/N
return tm4
# the 5th temporal moment
def TM5(signal, N):
tm5 = np.sum(np.abs(signal**5))/N
return tm5
# Waveform Length
def WL(signal, N):
wl = 0
temp = 0
for j in range(signal.shape[0]-1):
temp = np.abs(signal[j+1] - signal[j])
wl += temp
return wl
```
%% Cell type:code id: tags:
``` python
# https://www.researchgate.net/publication/51997893_Techniques_for_Feature_Extraction_from_EMG_Signal
# Functions for the FREQUENCY Domain
# frequency median : requires the power spectrum density
def FMD(psd):
fmd = 0.5 * np.sum(psd)
return fmd
# frequency mean : requires psd, freqs and frequency median for faster computation
def FMN(psd, freqs, fmd):
fmd = fmd * 2 # simply sum of all psd elements
fmn = np.sum(np.multiply(psd, freqs))/fmd
return fmn
# same as FMD(), but based on amplitudes
def MMFD(amplitudes):
mmfd = 0.5 * np.sum(amplitudes)
return mmfd
# same as FMD(), but based on amplitudes
def MMNF(signal, amplitudes, mmfd):
freqs = np.fft.fftfreq(amplitudes.size) # freqs based on fourier transform
mmnf = np.sum(np.multiply(amplitudes, freqs))/mmfd
return mmnf
# estimate the AR coefficients of k-th order (k=6 based on literature research)
def AR(signal, order=6):
ar, _, _ = arburg(signal, order) # only save AR coefs
return ar
# Wavelets analysis
# import pywt
```
%% Cell type:code id: tags:
``` python
# PRE : raw emg signal
# POST: returns the extracted features
def extract_features_emg(data):
N = data.shape[0]
#onsets_list = [] # save onsets of EMG signals
#filtered_list = []
# generate more features
mav_list = []
ssi_list = []
vemg_list= []
rms_list = []
wl_list = []
iemg_list= []
mavs_list= []
mav1_list= []
mav2_list= []
tm3_list = []
tm4_list = []
tm5_list = []
fmd_list = []
fmn_list = []
mmfd_list= []
mmnf_list= []
ar_list = []
start = time.time()
for i in range(data.shape[0]):
_, filt_emg, _ = emg.emg(signal=data[i].T, sampling_rate=512, show=False) # obtain only filtered signal
freqs, psd = ss.welch(data[i], fs=512) # get the PSD of the signal for the frequencies and amplitudes
amplitudes = np.abs(fft(data[i]))
#filtered_list.append(filt_emg)
#onsets_list.append(onsets_emg)
# compute features
mav_list.append(MAV(filt_emg, N))
ssi_list.append(SSI(filt_emg, N))
vemg_list.append(VAREMG(filt_emg, N))
rms_list.append(RMS(filt_emg, N))
wl_list.append(WL(filt_emg, N))
iemg_list.append(IEMG(filt_emg))
mavs_list.append(MAVS(filt_emg, N))
mav1_list.append(MAV1(filt_emg, N))
mav2_list.append(MAV2(filt_emg, N))
tm3_list.append(TM3(filt_emg, N))
tm4_list.append(TM4(filt_emg, N))
tm5_list.append(TM5(filt_emg, N))
fmd_res = FMD(psd)
fmd_list.append(fmd_res)
fmn_list.append(FMN(psd, freqs, fmd_res))
mmfd_res = MMFD(amplitudes)
mmfd_list.append(mmfd_res)
mmnf_list.append(MMNF(data[i], amplitudes, mmfd_res))
ar_list.append(AR(filt_emg))
print("Time: ", time.time() - start)
emg_features = [mav_list,ssi_list,vemg_list,rms_list,wl_list,iemg_list,mavs_list,mav1_list,mav2_list,
tm3_list,tm4_list,tm5_list,fmd_list,fmn_list,mmfd_list,mmnf_list,ar_list]
return emg_features
```
%% Cell type:code id: tags:
``` python
# get emg features for X_train and X_test
emg_feats_train = extract_features_emg(train_emg_raw)
emg_feats_test = extract_features_emg(test_emg_raw)
```
%%%% Output: stream
Time: 1361.9812316894531
Time: 859.1871929168701
%% Cell type:code id: tags:
``` python
# extract the coefs and save them in separate lists
def extract_ar_coefs(features):
ar_feats_0 = []
ar_feats_1 = []
ar_feats_2 = []
ar_feats_3 = []
ar_feats_4 = []
ar_feats_5 = []
# 17th idx is where the AR coefs list is in
# we only care for the real part. the complex part is 0j anyway
for i in range(len(features[16])):
ar_feats_0.append(np.real(features[16][i][0]))
ar_feats_1.append(np.real(features[16][i][1]))
ar_feats_2.append(np.real(features[16][i][2]))
ar_feats_3.append(np.real(features[16][i][3]))
ar_feats_4.append(np.real(features[16][i][4]))
ar_feats_5.append(np.real(features[16][i][5]))
return ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5
```
%% Cell type:code id: tags:
``` python
# remove the AR features list and substitute them with the individual data lists
# else, scaling will not work properly
start = time.time()
ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_train)
emg_feats_train_mod = np.column_stack((np.transpose(emg_feats_train[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
ar_feats_3,ar_feats_4,ar_feats_5))
ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_test)
emg_feats_test_mod = np.column_stack((np.transpose(emg_feats_test[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
ar_feats_3,ar_feats_4,ar_feats_5))
print("Time: ", time.time() - start)
```
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-13d1648b70fd> in <module>
2 # else, scaling will not work properly
3 start = time.time()
----> 4 ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_train)
5 emg_feats_train_mod = np.column_stack((np.transpose(emg_feats_train[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
6 ar_feats_3,ar_feats_4,ar_feats_5))
NameError: name 'emg_feats_train' is not defined
%% Cell type:code id: tags:
``` python
# remove certain features (which are believed to be "bad")
temp = emg_feats_train_mod[:,0:6]
temp2= emg_feats_train_mod[:,16:]
emg_feats_train_mod2 = np.array(np.column_stack((temp, temp2)))
temp_ = emg_feats_test_mod[:,0:6]
temp2_= emg_feats_test_mod[:,16:]
emg_feats_test_mod2 = np.array(np.column_stack((temp_, temp2_)))
```
%% Cell type:code id: tags:
``` python
# save emg features for future imports
pd.DataFrame.to_csv(pd.DataFrame(emg_feats_train), 'files/emg_feats_train.csv', index=False)
pd.DataFrame.to_csv(pd.DataFrame(emg_feats_test), 'files/emg_feats_test.csv', index=False)
```
%% Cell type:code id: tags:
``` python
# create full train and testsets
X_train_ = np.array(np.column_stack((eeg_train, emg_feats_train_mod2)))
X_test_ = np.array(np.column_stack((eeg_test, emg_feats_test_mod2)))
```
%% Cell type:markdown id: tags:
### Splitting
%% Cell type:code id: tags:
``` python
start = time.time()
def split(X_train, y_train):
return train_test_split(
X_train,
y_train,
test_size=0.1,
shuffle=False,
random_state=0)
print(X_train_.shape, train_labels_raw.shape)
if PROTOTYPING:
X_train, X_test, y_train, y_test = split(X_train, train_labels_raw)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
else:
y_train = train_labels_raw
print("Time: ", time.time() - start)
```
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-7-15af0b5a8527> in <module>
9 random_state=0)
10
---> 11 print(X_train_.shape, train_labels_raw.shape)
12 if PROTOTYPING:
13 X_train, X_test, y_train, y_test = split(X_train, train_labels_raw)
NameError: name 'X_train_' is not defined
%% Cell type:markdown id: tags:
### Feature scaling
%% Cell type:code id: tags:
``` python
# trick(?): scale eeg feats first, then add emg feats and scale again...
start = time.time()
def scale(X_train, X_test):
scaler = StandardScaler().fit(X_train)
# scale
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
return X_train_scaled, X_test_scaled
X_train_s, X_test_s = scale(X_train_, X_test_)
print("Time: ", time.time() - start)
```
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-5-646202010b2f> in <module>
10 return X_train_scaled, X_test_scaled
11
---> 12 X_train_s, X_test_s = scale(X_train_, X_test_)
13
14 print("Time: ", time.time() - start)
NameError: name 'X_train_' is not defined
%% Cell type:markdown id: tags:
### Training
%% Cell type:code id: tags:
``` python
# GridSearch for SVC
start = time.time()
kernels = ("rbf", "sigmoid")
C_values = np.logspace(0, 1.5, num=2)
param_grid = {"kernel" : kernels,
"C" : C_values}
scoring_method = "balanced_accuracy"
cv = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)
classifier = SVC(kernel="rbf", class_weight="balanced", gamma="auto", decision_function_shape="ovo")
grid = GridSearchCV(estimator=classifier, param_grid=param_grid, scoring=scoring_method, cv=cv, verbose=11)
grid.fit(X_train_s, np.ravel(y_train))
best = grid.best_params_
print("Grid with best params: %s and score %f" % (grid.best_params_, grid.best_score_))
print("Time: ", time.time() - start)
```
%%%% Output: stream
Fitting 1 folds for each of 4 candidates, totalling 4 fits
[CV] C=1.0, kernel=rbf ...............................................
%%%% Output: stream
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
%%%% Output: stream
[CV] ................... C=1.0, kernel=rbf, score=0.926, total= 1.9min
[CV] C=1.0, kernel=sigmoid ...........................................
%%%% Output: stream
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.9min remaining: 0.0s
%%%% Output: stream
[CV] ............... C=1.0, kernel=sigmoid, score=0.753, total= 2.3min
[CV] C=31.622776601683793, kernel=rbf ................................
%%%% Output: stream
[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 4.1min remaining: 0.0s
%%%% Output: error
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-259-7d3d89b86907> in <module>
11 classifier = SVC(kernel="rbf", class_weight="balanced", gamma="auto", decision_function_shape="ovo")
12 grid = GridSearchCV(estimator=classifier, param_grid=param_grid, scoring=scoring_method, cv=cv, verbose=11)
---> 13 grid.fit(X_train_s, np.ravel(y_train))
14 best = grid.best_params_
15 print("Grid with best params: %s and score %f" % (grid.best_params_, grid.best_score_))
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
686 return results
687
--> 688 self._run_search(evaluate_candidates)
689
690 # For multi-metric evaluation, store the best_index_, best_params_ and
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1147 def _run_search(self, evaluate_candidates):
1148 """Search all candidates in param_grid"""
-> 1149 evaluate_candidates(ParameterGrid(self.param_grid))
1150
1151
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
665 for parameters, (train, test)
666 in product(candidate_params,
--> 667 cv.split(X, y, groups)))
668
669 if len(out) < 1:
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1004 self._iterating = self._original_iterator is not None
1005
-> 1006 while self.dispatch_one_batch(iterator):
1007 pass
1008
~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
832 return False
833 else:
--> 834 self._dispatch(tasks)
835 return True
836
~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
751 with self._lock:
752 job_idx = len(self._jobs)
--> 753 job = self._backend.apply_async(batch, callback=cb)
754 # A job can complete so quickly than its callback is
755 # called before we get here, causing self._jobs to
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
199 def apply_async(self, func, callback=None):
200 """Schedule a func to be run"""
--> 201 result = ImmediateResult(func)
202 if callback:
203 callback(result)
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
580 # Don't delay the application, to avoid keeping the input
581 # arguments in memory
--> 582 self.results = batch()
583
584 def get(self):
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
255 return [func(*args, **kwargs)
--> 256 for func, args, kwargs in self.items]
257
258 def __len__(self):
~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
255 return [func(*args, **kwargs)
--> 256 for func, args, kwargs in self.items]
257
258 def __len__(self):
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
514 estimator.fit(X_train, **fit_params)
515 else:
--> 516 estimator.fit(X_train, y_train, **fit_params)
517
518 except Exception as e:
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
207
208 seed = rnd.randint(np.iinfo('i').max)
--> 209 fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
210 # see comment on the other call to np.iinfo in this file
211
~\Anaconda3\lib\site-packages\sklearn\svm\base.py in _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
266 cache_size=self.cache_size, coef0=self.coef0,
267 gamma=self._gamma, epsilon=self.epsilon,
--> 268 max_iter=self.max_iter, random_seed=random_seed)
269
270 self._warn_from_fit_status()
KeyboardInterrupt:
%% Cell type:code id: tags:
``` python
# testing logspace for more refined C estimates
np.logspace(0.32,0.45,num=10)
```
%%%% Output: execute_result
array([2.08929613, 2.15995371, 2.23300085, 2.30851836, 2.38658979,
2.4673015 , 2.55074278, 2.63700596, 2.72618645, 2.81838293])
%% Cell type:code id: tags:
``` python
# SVM approach
start = time.time()
classifier = SVC(C=1, class_weight="balanced", gamma="auto", decision_function_shape="ovo")
classifier.fit(X_train_s, np.ravel(y_train))
y_predict = classifier.predict(X_test_s)
if PROTOTYPING:
print(balanced_accuracy_score(y_test, y_predict))
print("Time: ", time.time() - start)
```
%%%% Output: stream
C:\Users\made_\Anaconda3\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
%%%% Output: stream
Time: 165.16580438613892
%% Cell type:code id: tags:
``` python
# bagging
y_train = train_labels_raw
start = time.time()
classifier = BaggingClassifier(SVC(C=1, class_weight="balanced", gamma="auto", decision_function_shape="ovo"),
n_estimators=100, random_state=0)
classifier.fit(X_train_s, np.ravel(y_train))
y_predict = classifier.predict(X_test_s)
if PROTOTYPING:
print(balanced_accuracy_score(y_test, y_predict))
print("Time: ", time.time() - start)
```
%%%% Output: stream
Time: 8023.956521987915
%% Cell type:code id: tags:
``` python
# scale freqs from fft alone
eeg_freqs_train_s, eeg_freqs_test_s = scale(eeg_freqs_train, eeg_freqs_test)
```
%% Cell type:code id: tags:
``` python
y_train = train_labels_raw
z,s = np.shape(eeg_freqs_train_s)
# define the model architecture
lahead = s
ann = Sequential()
ann.add(LSTM(200, input_shape = (lahead,1), return_sequences = True))
#ann.add(Dropout(0.2))
ann.add(LSTM(100))
#ann.add(Dropout(0.25))
#ann.add(LSTM(512, activation = 'relu'))
#ann.add(Dropout(0.25))
#
#ann.add(LSTM(256, activation = 'relu'))
#ann.add(Dropout(0.25))
ann.add(Dense(50, activation = 'relu'))
#ann.add(Dropout(0.25))
ann.add(Dense(4, activation='softmax'))
opt = keras.optimizers.Adam(lr = 1e-3, decay = 1e-5)
ann.compile(loss = 'sparse_categorical_crossentropy',
optimizer = opt,
metrics = ['accuracy'])
# reshape to fulfill LSTM shape requirements
reshape_1 = lambda x: x.reshape((x.shape[0], x.shape[1], 1)) # reshape x trainset
reshape_2 = lambda y: y.reshape((y.shape[0], 1)) # reshape y
X_reshaped = reshape_1(eeg_freqs_train_s)
X_test_reshaped = reshape_1(eeg_freqs_test_s)
y_reshaped = reshape_2(y_train)
# fit and predict
ann.fit(X_reshaped, y_reshaped, epochs = 15, batch_size = 10000, class_weight = 'balanced')
y_predict = ann.predict_classes(X_test_reshaped)
```
%%%% Output: stream
Epoch 1/15
%%%% Output: error
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\client\session.py in __call__(self, *args)
1450 return tf_session.TF_SessionRunCallable(
-> 1451 self._session._session, self._handle, args, status, None)
1452 else:
KeyboardInterrupt:
During handling of the above exception, another exception occurred:
ResourceExhaustedError Traceback (most recent call last)
<ipython-input-8-8bb9b645a9ff> in <module>
35
36 # fit and predict (later add class weights and also optimize them)
---> 37 ann.fit(X_reshaped, y_reshaped, epochs = 15, batch_size = 10000, class_weight = 'balanced')
38 y_predict = ann.predict_classes(X_test_reshaped)
~\Anaconda3\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038 steps_per_epoch=steps_per_epoch,
-> 1039 validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,
~\Anaconda3\lib\site-packages\keras\engine\training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 outs = to_list(outs)
201 for l, o in zip(out_labels, outs):
~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in __call__(self, inputs)
2713 return self._legacy_call(inputs)
2714
-> 2715 return self._call(inputs)
2716 else:
2717 if py_any(is_tensor(x) for x in inputs):
~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in _call(self, inputs)
2673 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
2674 else:
-> 2675 fetched = self._callable_fn(*array_vals)
2676 return fetched[:len(self.outputs)]
2677
~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\client\session.py in __call__(self, *args)
1452 else:
1453 return tf_session.TF_DeprecatedSessionRunCallable(
-> 1454 self._session._session, self._handle, args, status, None)
1455
1456 def __del__(self):
~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
517 None, None,
518 compat.as_text(c_api.TF_Message(self.status.status)),
--> 519 c_api.TF_GetCode(self.status.status))
520 # Delete the underlying status object from memory otherwise it stays alive
521 # as there is a reference to status from this from the traceback due to
ResourceExhaustedError: OOM when allocating tensor with shape[10000,200] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
[[Node: lstm_3/while/MatMul_1 = MatMul[T=DT_FLOAT, _class=["loc:@training_1/Adam/gradients/lstm_3/while/MatMul_1_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](lstm_3/while/TensorArrayReadV3, lstm_3/while/MatMul_1/Enter)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
%% Cell type:markdown id: tags:
### Write result
%% Cell type:code id: tags:
``` python
start = time.time()
output = pd.read_csv('files/sample.csv')
for i in range(output.shape[0]):
output.iat[i, 1] = y_predict[i]
output.to_csv("files/SVM_OvO_3_otherfeats_bagged.csv", index=False)
print("Time: ", time.time() - start)
```
%%%% Output: stream
Time: 0.5066447257995605
%% Cell type:markdown id: tags:
### Experimental Testing
%% Cell type:code id: tags:
``` python
best
```
%%%% Output: execute_result
{'C': 2.154434690031882, 'kernel': 'rbf'}
%% Cell type:code id: tags:
``` python
grid.best_estimator_
```
%%%% Output: execute_result
SVC(C=2.154434690031882, cache_size=200, class_weight='balanced', coef0=0.0,
decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment