Commit ed5e98f1 authored by slavenc's avatar slavenc
Browse files

removed unnecessary comments

parent 160d393f
%% Cell type:markdown id: tags:
# Project 4
%% Cell type:markdown id: tags:
### Dependencies and Constants
%% Cell type:code id: tags:
``` python
import time
import numpy as np
from numpy.fft import fft # to get amplitudes
import pandas as pd
import scipy.signal as ss # for psd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedShuffleSplit
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
from biosppy.signals import eeg # signal processing
from biosppy.signals import emg # signal processing
from spectrum import arburg
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
PROTOTYPING = False
```
%%%% Output: stream
C:\Users\made_\Anaconda3\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
C:\Users\made_\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
%% Cell type:markdown id: tags:
### Read data
%% Cell type:code id: tags:
``` python
start = time.time()
# import train sets
train_eeg1_raw = pd.read_csv('files/train_eeg1.csv').drop('Id', axis=1).values
train_eeg2_raw = pd.read_csv('files/train_eeg2.csv').drop('Id', axis=1).values
train_emg_raw = pd.read_csv('files/train_emg.csv').drop('Id', axis=1).values
# import test sets
test_eeg1_raw = pd.read_csv('files/test_eeg1.csv').drop('Id', axis=1).values
test_eeg2_raw = pd.read_csv('files/test_eeg2.csv').drop('Id', axis=1).values
test_emg_raw = pd.read_csv('files/test_emg.csv').drop('Id', axis=1).values
# import eeg features directly
eeg_train = pd.read_csv('files/eeg_feats_train.csv').values
eeg_test = pd.read_csv('files/eeg_feats_test.csv').values
# import emg features directly
emg_feats_train = pd.read_csv('files/emg_feats_train.csv').values
emg_feats_test = pd.read_csv('files/emg_feats_test.csv').values
# import reduced eeg features by pca (to 45 components - already scaled)
eeg_train_red = pd.read_csv('files/eeg_train_pca45.csv').values
eeg_test_red = pd.read_csv('files/eeg_test_pca45.csv').values
# import labels
train_labels_raw = pd.read_csv('files/train_labels.csv').drop('Id', axis=1).values
print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)
print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)
print(train_labels_raw.shape)
print(eeg_train.shape, eeg_test.shape)
print("Time: ", time.time() - start)
```
%%%% Output: stream
(64800, 512) (64800, 512) (64800, 512)
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-2-60963c3fdbae> in <module>
27
28 print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)
---> 29 print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)
30 print(train_labels_raw.shape)
31 print(eeg_train.shape, eeg_test.shape)
NameError: name 'test_emg_raw' is not defined
%% Cell type:markdown id: tags:
### Feature extraction for EEG signals
%% Cell type:code id: tags:
``` python
start = time.time()
def calculate_statistics(list_values):
n5 = np.nanpercentile(list_values, 5)
n25 = np.nanpercentile(list_values, 25)
n75 = np.nanpercentile(list_values, 75)
n95 = np.nanpercentile(list_values, 95)
median = np.nanpercentile(list_values, 50)
mean = np.nanmean(list_values)
std = np.nanstd(list_values)
var = np.nanvar(list_values)
rms = np.nanmean(np.sqrt(list_values**2))
return [n5, n25, n75, n95, median, mean, std, var, rms]
def calculate_crossings(list_values):
zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
no_zero_crossings = len(zero_crossing_indices)
mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
no_mean_crossings = len(mean_crossing_indices)
return [no_zero_crossings, no_mean_crossings]
def get_features(list_values):
crossings = calculate_crossings(list_values)
statistics = calculate_statistics(list_values)
return crossings + statistics
def extract_features(eeg1, eeg2, emg):
features = None
for i in range(eeg1.shape[0]):
if i % 1000 == 0:
print(i, "/", eeg1.shape[0])
row = np.array([])
signal = np.array([eeg1[i], eeg2[i]]).T
analysis = eeg.eeg(signal=signal, sampling_rate=128, show=False)
# theta
row = np.append(row, get_features(analysis["theta"]))
# row = np.append(row, get_features(analysis["theta"][:, 1]))
# alpha low
row = np.append(row, get_features(analysis["alpha_low"]))
# row = np.append(row, get_features(analysis["alpha_low"][:, 1]))
# alpha low
row = np.append(row, get_features(analysis["alpha_high"]))
# row = np.append(row, get_features(analysis["alpha_high"][:, 1]))
# beta
row = np.append(row, get_features(analysis["beta"]))
# row = np.append(row, get_features(analysis["beta"][:, 1]))
# gamma
row = np.append(row, get_features(analysis["gamma"][:, 0]))
# row = np.append(row, get_features(analysis["gamma"]))
# format
row = row.reshape((1, -1))
# concatenate
if features is None:
features = row
else:
features = np.concatenate((features, row), axis=0)
return features
X_train = extract_features(train_eeg1_raw, train_eeg2_raw, train_emg_raw)
if not PROTOTYPING:
X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)
print("X_test", X_test.shape)
print("X_train", X_train.shape)
print("Time: ", time.time() - start)
```
%%%% Output: stream
0 / 64800
1000 / 64800
2000 / 64800
3000 / 64800
4000 / 64800
5000 / 64800
6000 / 64800
7000 / 64800
8000 / 64800
9000 / 64800
10000 / 64800
11000 / 64800
12000 / 64800
13000 / 64800
14000 / 64800
15000 / 64800
16000 / 64800
17000 / 64800
18000 / 64800
19000 / 64800
20000 / 64800
21000 / 64800
22000 / 64800
23000 / 64800
24000 / 64800
25000 / 64800
26000 / 64800
27000 / 64800
28000 / 64800
29000 / 64800
30000 / 64800
31000 / 64800
32000 / 64800
33000 / 64800
34000 / 64800
35000 / 64800
36000 / 64800
37000 / 64800
38000 / 64800
39000 / 64800
40000 / 64800
41000 / 64800
42000 / 64800
43000 / 64800
44000 / 64800
45000 / 64800
46000 / 64800
47000 / 64800
48000 / 64800
49000 / 64800
50000 / 64800
51000 / 64800
52000 / 64800
53000 / 64800
54000 / 64800
55000 / 64800
56000 / 64800
57000 / 64800
58000 / 64800
59000 / 64800
60000 / 64800
61000 / 64800
62000 / 64800
63000 / 64800
64000 / 64800
0 / 43200
1000 / 43200
2000 / 43200
3000 / 43200
4000 / 43200
5000 / 43200
6000 / 43200
7000 / 43200
8000 / 43200
9000 / 43200
10000 / 43200
11000 / 43200
12000 / 43200
13000 / 43200
14000 / 43200
15000 / 43200
16000 / 43200
17000 / 43200
18000 / 43200
19000 / 43200
20000 / 43200
21000 / 43200
22000 / 43200
23000 / 43200
24000 / 43200
25000 / 43200
26000 / 43200
27000 / 43200
28000 / 43200
29000 / 43200
30000 / 43200
31000 / 43200
32000 / 43200
33000 / 43200
34000 / 43200
35000 / 43200
36000 / 43200
37000 / 43200
38000 / 43200
39000 / 43200
40000 / 43200
41000 / 43200
42000 / 43200
43000 / 43200
%%%% Output: error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-3-2aadf25068c8> in <module>
72 X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)
73 # save features for future imports
---> 74 pd.DataFrame.to_csv(X_train, 'files/eeg_feats_train.csv')
75 pd.DataFrame.to_csv(X_tests, 'files/eeg_feats_test.csv')
76 print("X_test", X_test.shape)
~\Anaconda3\lib\site-packages\pandas\core\generic.py in to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal)
3200 """
3201
-> 3202 df = self if isinstance(self, ABCDataFrame) else self.to_frame()
3203
3204 from pandas.io.formats.csvs import CSVFormatter
AttributeError: 'numpy.ndarray' object has no attribute 'to_frame'
%% Cell type:code id: tags:
``` python
# save features for future imports
pd.DataFrame.to_csv(pd.DataFrame(X_train), 'files/eeg_feats_train.csv', index=False)
pd.DataFrame.to_csv(pd.DataFrame(X_test), 'files/eeg_feats_test.csv', index=False)
```
%% Cell type:code id: tags:
``` python
# obtain features by simply doing a FFT on the data
# probably more suitable for a neural network approach
eeg1_freqs_train = []
eeg2_freqs_train = []
eeg1_freqs_test = []
eeg2_freqs_test = []
for i in range(train_eeg1_raw.shape[0]):
eeg1_freqs_train.append(np.real(fft(train_eeg1_raw[i])))
eeg2_freqs_train.append(np.real(fft(train_eeg2_raw[i])))
for i in range(test_eeg1_raw.shape[0]):
eeg1_freqs_test.append(np.real(fft(test_eeg1_raw[i])))
eeg2_freqs_test.append(np.real(fft(test_eeg2_raw[i])))
```
%% Cell type:code id: tags:
``` python
# concatenate frequency features from fft
start = time.time()
eeg_freqs_train = np.array(np.column_stack((eeg1_freqs_train, eeg2_freqs_train)))
eeg_freqs_test = np.array(np.column_stack((eeg1_freqs_test, eeg2_freqs_test)))
print("Time: ", time.time() - start)
# save features for future imports
#pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_train), 'files/eeg_freqs_train.csv', index=False)
#pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_test), 'files/eeg_freqs_test.csv', index=False)
```
%%%% Output: stream
Time: 2.0603151321411133
%% Cell type:markdown id: tags:
### homemade Feature Extraction for EMG signals
%% Cell type:code id: tags:
``` python
# functions are implemented from this paper:
# https://www.researchgate.net/publication/323587464_A_Comprehensive_Study_on_EMG_Feature_Extraction_and_Classifiers
# https://www.researchgate.net/publication/224148281_Evaluation_of_EMG_Feature_Extraction_for_Hand_Movement_Recognition_Based_on_Euclidean_Distance_and_Standard_Deviation
# Functions for the TIME Domain
# integrated EMG is the area under the rectified EMG signal
def IEMG(signal):
iemg = np.sum(np.abs(signal))
return iemg
# Mean Absolute Value
# PRE : Requires rectified signal
def MAV(signal, N):
mav = np.sum(np.abs(signal))/N
return mav
# Mean Absolute Value Slope (potentially computationally very expensive)
def MAVS(signal, N):
temp = 0
for i in range(signal.shape[0]-1):
temp += np.abs(signal[i+1] - signal[i])
mavs = temp/N
return mavs
# modified mean absolute value type 1
def MAV1(signal, N):
# interval borders
lower = 0.25 * N
upper = 0.75 * N
temp = 0
for i in range(signal.shape[0]):
if i >= lower and i <= upper:
temp += 1 * np.abs(signal[i])
else:
temp += 0.5 * np.abs(signal[i])
mav1 = temp/N
return mav1
# modified mean absolute value type 2
def MAV2(signal, N):
# interval borders
lower = 0.25 * N
upper = 0.75 * N
temp = 0
for i in range(signal.shape[0]):
if i >= lower and i <= upper:
temp += 1 * np.abs(signal[i])
elif i < lower:
temp += (4*i/N) * np.abs(signal[i])
elif i > upper:
temp += (4*(i-N)/N) * np.abs(signal[i])
mav2 = temp/N
return mav2
# Simple Square Integral (SSI) expresses the energy of the EMG signal
# PRE : Requires rectified signal
def SSI(signal, N):
ssi = np.sum(np.abs(signal)**2)/N # should square every value in signal element-wise
return ssi
# The variance of EMG signal
# PRE : Requires rectified signal
def VAREMG(signal, N):
varemg = np.sum(signal**2)/(N-1) # should square every value in signal element-wise
return varemg
# Root Mean Square
# PRE : Requires rectified signal
def RMS(signal, N):
rms = np.sqrt(np.sum(np.abs(signal)**2)/N) # should square every value in signal element-wise
return rms
# the 3rd temporal moment
def TM3(signal, N):
tm3 = np.sum(np.abs(signal**3))/N
return tm3
# the 4th temporal moment
def TM4(signal, N):
tm4 = np.sum(np.abs(signal**4))/N
return tm4
# the 5th temporal moment
def TM5(signal, N):
tm5 = np.sum(np.abs(signal**5))/N
return tm5
# Waveform Length
def WL(signal, N):
wl = 0
temp = 0
for j in range(signal.shape[0]-1):
temp = np.abs(signal[j+1] - signal[j])
wl += temp
return wl
```
%% Cell type:code id: tags:
``` python
# https://www.researchgate.net/publication/51997893_Techniques_for_Feature_Extraction_from_EMG_Signal
# Functions for the FREQUENCY Domain
# frequency median : requires the power spectrum density
def FMD(psd):
fmd = 0.5 * np.sum(psd)
return fmd
# frequency mean : requires psd, freqs and frequency median for faster computation
def FMN(psd, freqs, fmd):
fmd = fmd * 2 # simply sum of all psd elements
fmn = np.sum(np.multiply(psd, freqs))/fmd
return fmn
# same as FMD(), but based on amplitudes
def MMFD(amplitudes):
mmfd = 0.5 * np.sum(amplitudes)
return mmfd
# same as FMD(), but based on amplitudes
def MMNF(signal, amplitudes, mmfd):
freqs = np.fft.fftfreq(amplitudes.size) # freqs based on fourier transform
mmnf = np.sum(np.multiply(amplitudes, freqs))/mmfd
return mmnf
# estimate the AR coefficients of k-th order (k=6 based on literature research)
def AR(signal, order=6):
ar, _, _ = arburg(signal, order) # only save AR coefs
return ar
# Wavelets analysis
# import pywt
```
%% Cell type:code id: tags:
``` python
# PRE : raw emg signal
# POST: returns the extracted features
def extract_features_emg(data):
N = data.shape[0]
#onsets_list = [] # save onsets of EMG signals
#filtered_list = []
# generate more features
mav_list = []
ssi_list = []
vemg_list= []
rms_list = []
wl_list = []
iemg_list= []
mavs_list= []
mav1_list= []
mav2_list= []
tm3_list = []
tm4_list = []
tm5_list = []
fmd_list = []
fmn_list = []
mmfd_list= []
mmnf_list= []
ar_list = []
start = time.time()
for i in range(data.shape[0]):
_, filt_emg, _ = emg.emg(signal=data[i].T, sampling_rate=512, show=False) # obtain only filtered signal
freqs, psd = ss.welch(data[i], fs=512) # get the PSD of the signal for the frequencies and amplitudes
amplitudes = np.abs(fft(data[i]))
#filtered_list.append(filt_emg)
#onsets_list.append(onsets_emg)
# compute features
mav_list.append(MAV(filt_emg, N))
ssi_list.append(SSI(filt_emg, N))
vemg_list.append(VAREMG(filt_emg, N))
rms_list.append(RMS(filt_emg, N))
wl_list.append(WL(filt_emg, N))
iemg_list.append(IEMG(filt_emg))
mavs_list.append(MAVS(filt_emg, N))
mav1_list.append(MAV1(filt_emg, N))
mav2_list.append(MAV2(filt_emg, N))
tm3_list.append(TM3(filt_emg, N))
tm4_list.append(TM4(filt_emg, N))
tm5_list.append(TM5(filt_emg, N))
fmd_res = FMD(psd)
fmd_list.append(fmd_res)
fmn_list.append(FMN(psd, freqs, fmd_res))
mmfd_res = MMFD(amplitudes)
mmfd_list.append(mmfd_res)
mmnf_list.append(MMNF(data[i], amplitudes, mmfd_res))
ar_list.append(AR(filt_emg))
print("Time: ", time.time() - start)
emg_features = [mav_list,ssi_list,vemg_list,rms_list,wl_list,iemg_list,mavs_list,mav1_list,mav2_list,
tm3_list,tm4_list,tm5_list,fmd_list,fmn_list,mmfd_list,mmnf_list,ar_list]
return emg_features
```
%% Cell type:code id: tags:
``` python
# get emg features for X_train and X_test
emg_feats_train = extract_features_emg(train_emg_raw)
emg_feats_test = extract_features_emg(test_emg_raw)
```
%%%% Output: stream
Time: 1361.9812316894531
Time: 859.1871929168701
%% Cell type:code id: tags:
``` python
# extract the coefs and save them in separate lists
def extract_ar_coefs(features):
ar_feats_0 = []
ar_feats_1 = []
ar_feats_2 = []
ar_feats_3 = []
ar_feats_4 = []
ar_feats_5 = []
# 17th idx is where the AR coefs list is in
# we only care for the real part. the complex part is 0j anyway
for i in range(len(features[16])):
ar_feats_0.append(np.real(features[16][i][0]))
ar_feats_1.append(np.real(features[16][i][1]))
ar_feats_2.append(np.real(features[16][i][2]))
ar_feats_3.append(np.real(features[16][i][3]))
ar_feats_4.append(np.real(features[16][i][4]))
ar_feats_5.append(np.real(features[16][i][5]))
return ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5
```
%% Cell type:code id: tags:
``` python
# remove the AR features list and substitute them with the individual data lists
# else, scaling will not work properly
start = time.time()
ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_train)
emg_feats_train_mod = np.column_stack((np.transpose(emg_feats_train[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
ar_feats_3,ar_feats_4,ar_feats_5))
ar_feats_0, ar_feats_1, ar_feats_2, ar_feats_3, ar_feats_4, ar_feats_5 = extract_ar_coefs(emg_feats_test)
emg_feats_test_mod = np.column_stack((np.transpose(emg_feats_test[0:16]),ar_feats_0,ar_feats_1,ar_feats_2,
ar_feats_3,ar_feats_4,ar_feats_5))
print("Time: ", time.time() - start)
```
%%%% Output: error