Commit 160d393f authored by slavenc's avatar slavenc
Browse files

added project 4 files

parent 731286fc
files/*
!files/.gitkeep
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Project 4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dependencies and Constants"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\made_\\Anaconda3\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n",
"C:\\Users\\made_\\AppData\\Roaming\\Python\\Python36\\site-packages\\tensorflow\\python\\framework\\dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
"C:\\Users\\made_\\AppData\\Roaming\\Python\\Python36\\site-packages\\tensorflow\\python\\framework\\dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
"C:\\Users\\made_\\AppData\\Roaming\\Python\\Python36\\site-packages\\tensorflow\\python\\framework\\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
"C:\\Users\\made_\\AppData\\Roaming\\Python\\Python36\\site-packages\\tensorflow\\python\\framework\\dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
"C:\\Users\\made_\\AppData\\Roaming\\Python\\Python36\\site-packages\\tensorflow\\python\\framework\\dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
"C:\\Users\\made_\\AppData\\Roaming\\Python\\Python36\\site-packages\\tensorflow\\python\\framework\\dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
]
}
],
"source": [
"import time\n",
"import numpy as np\n",
"from numpy.fft import fft # to get amplitudes\n",
"import pandas as pd\n",
"import scipy.signal as ss # for psd\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import balanced_accuracy_score\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedShuffleSplit\n",
"from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier\n",
"from biosppy.signals import eeg # signal processing\n",
"from biosppy.signals import emg # signal processing\n",
"from spectrum import arburg\n",
"import keras\n",
"from keras.models import Sequential\n",
"from keras.layers import LSTM, Dense, Dropout, BatchNormalization\n",
"\n",
"\n",
"PROTOTYPING = False"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(64800, 512) (64800, 512) (64800, 512)\n"
]
},
{
"ename": "NameError",
"evalue": "name 'test_emg_raw' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-2-60963c3fdbae>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_eeg1_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_eeg2_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_emg_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 29\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_eeg1_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_eeg2_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_emg_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 30\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_labels_raw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0meeg_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0meeg_test\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'test_emg_raw' is not defined"
]
}
],
"source": [
"start = time.time()\n",
"\n",
"# import train sets\n",
"train_eeg1_raw = pd.read_csv('files/train_eeg1.csv').drop('Id', axis=1).values\n",
"train_eeg2_raw = pd.read_csv('files/train_eeg2.csv').drop('Id', axis=1).values\n",
"train_emg_raw = pd.read_csv('files/train_emg.csv').drop('Id', axis=1).values\n",
"\n",
"# import test sets\n",
"test_eeg1_raw = pd.read_csv('files/test_eeg1.csv').drop('Id', axis=1).values\n",
"test_eeg2_raw = pd.read_csv('files/test_eeg2.csv').drop('Id', axis=1).values\n",
"test_emg_raw = pd.read_csv('files/test_emg.csv').drop('Id', axis=1).values\n",
"\n",
"# import eeg features directly\n",
"eeg_train = pd.read_csv('files/eeg_feats_train.csv').values\n",
"eeg_test = pd.read_csv('files/eeg_feats_test.csv').values\n",
"\n",
"# import emg features directly\n",
"emg_feats_train = pd.read_csv('files/emg_feats_train.csv').values\n",
"emg_feats_test = pd.read_csv('files/emg_feats_test.csv').values\n",
"\n",
"# import reduced eeg features by pca (to 45 components - already scaled)\n",
"eeg_train_red = pd.read_csv('files/eeg_train_pca45.csv').values\n",
"eeg_test_red = pd.read_csv('files/eeg_test_pca45.csv').values\n",
"\n",
"# import labels\n",
"train_labels_raw = pd.read_csv('files/train_labels.csv').drop('Id', axis=1).values\n",
"\n",
"print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)\n",
"print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)\n",
"print(train_labels_raw.shape)\n",
"print(eeg_train.shape, eeg_test.shape)\n",
"\n",
"print(\"Time: \", time.time() - start)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Feature extraction for EEG signals"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 / 64800\n",
"1000 / 64800\n",
"2000 / 64800\n",
"3000 / 64800\n",
"4000 / 64800\n",
"5000 / 64800\n",
"6000 / 64800\n",
"7000 / 64800\n",
"8000 / 64800\n",
"9000 / 64800\n",
"10000 / 64800\n",
"11000 / 64800\n",
"12000 / 64800\n",
"13000 / 64800\n",
"14000 / 64800\n",
"15000 / 64800\n",
"16000 / 64800\n",
"17000 / 64800\n",
"18000 / 64800\n",
"19000 / 64800\n",
"20000 / 64800\n",
"21000 / 64800\n",
"22000 / 64800\n",
"23000 / 64800\n",
"24000 / 64800\n",
"25000 / 64800\n",
"26000 / 64800\n",
"27000 / 64800\n",
"28000 / 64800\n",
"29000 / 64800\n",
"30000 / 64800\n",
"31000 / 64800\n",
"32000 / 64800\n",
"33000 / 64800\n",
"34000 / 64800\n",
"35000 / 64800\n",
"36000 / 64800\n",
"37000 / 64800\n",
"38000 / 64800\n",
"39000 / 64800\n",
"40000 / 64800\n",
"41000 / 64800\n",
"42000 / 64800\n",
"43000 / 64800\n",
"44000 / 64800\n",
"45000 / 64800\n",
"46000 / 64800\n",
"47000 / 64800\n",
"48000 / 64800\n",
"49000 / 64800\n",
"50000 / 64800\n",
"51000 / 64800\n",
"52000 / 64800\n",
"53000 / 64800\n",
"54000 / 64800\n",
"55000 / 64800\n",
"56000 / 64800\n",
"57000 / 64800\n",
"58000 / 64800\n",
"59000 / 64800\n",
"60000 / 64800\n",
"61000 / 64800\n",
"62000 / 64800\n",
"63000 / 64800\n",
"64000 / 64800\n",
"0 / 43200\n",
"1000 / 43200\n",
"2000 / 43200\n",
"3000 / 43200\n",
"4000 / 43200\n",
"5000 / 43200\n",
"6000 / 43200\n",
"7000 / 43200\n",
"8000 / 43200\n",
"9000 / 43200\n",
"10000 / 43200\n",
"11000 / 43200\n",
"12000 / 43200\n",
"13000 / 43200\n",
"14000 / 43200\n",
"15000 / 43200\n",
"16000 / 43200\n",
"17000 / 43200\n",
"18000 / 43200\n",
"19000 / 43200\n",
"20000 / 43200\n",
"21000 / 43200\n",
"22000 / 43200\n",
"23000 / 43200\n",
"24000 / 43200\n",
"25000 / 43200\n",
"26000 / 43200\n",
"27000 / 43200\n",
"28000 / 43200\n",
"29000 / 43200\n",
"30000 / 43200\n",
"31000 / 43200\n",
"32000 / 43200\n",
"33000 / 43200\n",
"34000 / 43200\n",
"35000 / 43200\n",
"36000 / 43200\n",
"37000 / 43200\n",
"38000 / 43200\n",
"39000 / 43200\n",
"40000 / 43200\n",
"41000 / 43200\n",
"42000 / 43200\n",
"43000 / 43200\n"
]
},
{
"ename": "AttributeError",
"evalue": "'numpy.ndarray' object has no attribute 'to_frame'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-3-2aadf25068c8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[0mX_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mextract_features\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_eeg1_raw\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_eeg2_raw\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_emg_raw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[1;31m# save features for future imports\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'files/eeg_feats_train.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_tests\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'files/eeg_feats_test.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"X_test\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[1;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal)\u001b[0m\n\u001b[0;32m 3200\u001b[0m \"\"\"\n\u001b[0;32m 3201\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3202\u001b[1;33m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_frame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3203\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3204\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcsvs\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mCSVFormatter\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'to_frame'"
]
}
],
"source": [
"start = time.time()\n",
"\n",
"def calculate_statistics(list_values):\n",
" n5 = np.nanpercentile(list_values, 5)\n",
" n25 = np.nanpercentile(list_values, 25)\n",
" n75 = np.nanpercentile(list_values, 75)\n",
" n95 = np.nanpercentile(list_values, 95)\n",
" median = np.nanpercentile(list_values, 50)\n",
" mean = np.nanmean(list_values)\n",
" std = np.nanstd(list_values)\n",
" var = np.nanvar(list_values)\n",
" rms = np.nanmean(np.sqrt(list_values**2))\n",
" return [n5, n25, n75, n95, median, mean, std, var, rms]\n",
" \n",
"def calculate_crossings(list_values):\n",
" zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]\n",
" no_zero_crossings = len(zero_crossing_indices)\n",
" mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]\n",
" no_mean_crossings = len(mean_crossing_indices)\n",
" return [no_zero_crossings, no_mean_crossings]\n",
" \n",
"def get_features(list_values):\n",
" crossings = calculate_crossings(list_values)\n",
" statistics = calculate_statistics(list_values)\n",
" return crossings + statistics\n",
"\n",
"def extract_features(eeg1, eeg2, emg):\n",
" features = None\n",
" \n",
" for i in range(eeg1.shape[0]):\n",
" if i % 1000 == 0:\n",
" print(i, \"/\", eeg1.shape[0])\n",
" row = np.array([])\n",
"\n",
" signal = np.array([eeg1[i], eeg2[i]]).T\n",
" analysis = eeg.eeg(signal=signal, sampling_rate=128, show=False) \n",
"\n",
" # theta\n",
" row = np.append(row, get_features(analysis[\"theta\"]))\n",
" # row = np.append(row, get_features(analysis[\"theta\"][:, 1]))\n",
"\n",
" # alpha low\n",
" row = np.append(row, get_features(analysis[\"alpha_low\"]))\n",
" # row = np.append(row, get_features(analysis[\"alpha_low\"][:, 1]))\n",
"\n",
" # alpha low\n",
" row = np.append(row, get_features(analysis[\"alpha_high\"]))\n",
" # row = np.append(row, get_features(analysis[\"alpha_high\"][:, 1]))\n",
"\n",
" # beta\n",
" row = np.append(row, get_features(analysis[\"beta\"]))\n",
" # row = np.append(row, get_features(analysis[\"beta\"][:, 1]))\n",
"\n",
" # gamma\n",
" row = np.append(row, get_features(analysis[\"gamma\"][:, 0]))\n",
" # row = np.append(row, get_features(analysis[\"gamma\"]))\n",
"\n",
" # format\n",
" row = row.reshape((1, -1))\n",
"\n",
" # concatenate\n",
" if features is None:\n",
" features = row\n",
" else:\n",
" features = np.concatenate((features, row), axis=0)\n",
" return features\n",
"\n",
"X_train = extract_features(train_eeg1_raw, train_eeg2_raw, train_emg_raw)\n",
"\n",
"\n",
"if not PROTOTYPING:\n",
" X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)\n",
" print(\"X_test\", X_test.shape)\n",
"print(\"X_train\", X_train.shape)\n",
"\n",
"print(\"Time: \", time.time() - start)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# save features for future imports\n",
"pd.DataFrame.to_csv(pd.DataFrame(X_train), 'files/eeg_feats_train.csv', index=False)\n",
"pd.DataFrame.to_csv(pd.DataFrame(X_test), 'files/eeg_feats_test.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# obtain features by simply doing a FFT on the data\n",
"# probably more suitable for a neural network approach\n",
"eeg1_freqs_train = []\n",
"eeg2_freqs_train = []\n",
"eeg1_freqs_test = []\n",
"eeg2_freqs_test = []\n",
"for i in range(train_eeg1_raw.shape[0]):\n",
" eeg1_freqs_train.append(np.real(fft(train_eeg1_raw[i])))\n",
" eeg2_freqs_train.append(np.real(fft(train_eeg2_raw[i])))\n",
" \n",
"for i in range(test_eeg1_raw.shape[0]):\n",
" eeg1_freqs_test.append(np.real(fft(test_eeg1_raw[i])))\n",
" eeg2_freqs_test.append(np.real(fft(test_eeg2_raw[i])))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time: 2.0603151321411133\n"
]
}
],
"source": [
"# concatenate frequency features from fft\n",
"start = time.time()\n",
"eeg_freqs_train = np.array(np.column_stack((eeg1_freqs_train, eeg2_freqs_train)))\n",
"eeg_freqs_test = np.array(np.column_stack((eeg1_freqs_test, eeg2_freqs_test)))\n",
"print(\"Time: \", time.time() - start)\n",
"\n",
"# save features for future imports\n",
"#pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_train), 'files/eeg_freqs_train.csv', index=False)\n",
"#pd.DataFrame.to_csv(pd.DataFrame(eeg_freqs_test), 'files/eeg_freqs_test.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### homemade Feature Extraction for EMG signals"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# functions are implemented from this paper:\n",
"# https://www.researchgate.net/publication/323587464_A_Comprehensive_Study_on_EMG_Feature_Extraction_and_Classifiers\n",
"# https://www.researchgate.net/publication/224148281_Evaluation_of_EMG_Feature_Extraction_for_Hand_Movement_Recognition_Based_on_Euclidean_Distance_and_Standard_Deviation\n",
"\n",
"# Functions for the TIME Domain\n",
"\n",
"# integrated EMG is the area under the rectified EMG signal \n",
"def IEMG(signal):\n",
" iemg = np.sum(np.abs(signal))\n",
" return iemg\n",
"\n",
"# Mean Absolute Value\n",
"# PRE : Requires rectified signal\n",
"def MAV(signal, N):\n",
" mav = np.sum(np.abs(signal))/N\n",
" return mav\n",
"\n",
"\n",
"# Mean Absolute Value Slope (potentially computationally very expensive)\n",
"def MAVS(signal, N):\n",
" temp = 0\n",
" for i in range(signal.shape[0]-1):\n",
" temp += np.abs(signal[i+1] - signal[i])\n",
" mavs = temp/N\n",
" return mavs\n",
"\n",
"\n",
"# modified mean absolute value type 1\n",
"def MAV1(signal, N):\n",
" # interval borders\n",
" lower = 0.25 * N\n",
" upper = 0.75 * N \n",
" temp = 0\n",
" for i in range(signal.shape[0]):\n",
" if i >= lower and i <= upper:\n",
" temp += 1 * np.abs(signal[i])\n",
" else:\n",
" temp += 0.5 * np.abs(signal[i])\n",
" mav1 = temp/N\n",
" return mav1\n",
"\n",
"\n",
"# modified mean absolute value type 2\n",
"def MAV2(signal, N):\n",
" # interval borders\n",
" lower = 0.25 * N\n",
" upper = 0.75 * N \n",
" temp = 0\n",
" for i in range(signal.shape[0]):\n",
" if i >= lower and i <= upper:\n",
" temp += 1 * np.abs(signal[i])\n",
" elif i < lower:\n",
" temp += (4*i/N) * np.abs(signal[i])\n",
" elif i > upper:\n",
" temp += (4*(i-N)/N) * np.abs(signal[i])\n",
" \n",
" mav2 = temp/N\n",
" return mav2\n",
"\n",
"\n",
"# Simple Square Integral (SSI) expresses the energy of the EMG signal\n",
"# PRE : Requires rectified signal\n",
"def SSI(signal, N):\n",
" ssi = np.sum(np.abs(signal)**2)/N # should square every value in signal element-wise\n",
" return ssi\n",
"\n",
"# The variance of EMG signal\n",
"# PRE : Requires rectified signal\n",
"def VAREMG(signal, N):\n",
" varemg = np.sum(signal**2)/(N-1) # should square every value in signal element-wise\n",
" return varemg\n",
"\n",
"# Root Mean Square\n",
"# PRE : Requires rectified signal\n",
"def RMS(signal, N):\n",
" rms = np.sqrt(np.sum(np.abs(signal)**2)/N) # should square every value in signal element-wise\n",
" return rms\n",
"\n",
"# the 3rd temporal moment\n",
"def TM3(signal, N):\n",
" tm3 = np.sum(np.abs(signal**3))/N\n",
" return tm3\n",
"\n",
"# the 4th temporal moment\n",
"def TM4(signal, N):\n",
" tm4 = np.sum(np.abs(signal**4))/N\n",
" return tm4\n",
"\n",
"# the 5th temporal moment\n",
"def TM5(signal, N):\n",
" tm5 = np.sum(np.abs(signal**5))/N\n",
" return tm5\n",
"\n",
"# Waveform Length\n",
"def WL(signal, N):\n",
" wl = 0\n",
" temp = 0\n",
" for j in range(signal.shape[0]-1):\n",
" temp = np.abs(signal[j+1] - signal[j])\n",
" wl += temp\n",
" return wl\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# https://www.researchgate.net/publication/51997893_Techniques_for_Feature_Extraction_from_EMG_Signal\n",
"# Functions for the FREQUENCY Domain\n",
"\n",
"# frequency median : requires the power spectrum density\n",
"def FMD(psd):\n",
" fmd = 0.5 * np.sum(psd)\n",
" return fmd\n",
"\n",
"# frequency mean : requires psd, freqs and frequency median for faster computation\n",
"def FMN(psd, freqs, fmd):\n",
" fmd = fmd * 2 # simply sum of all psd elements\n",
" fmn = np.sum(np.multiply(psd, freqs))/fmd\n",
" return fmn\n",
"\n",
"# same as FMD(), but based on amplitudes\n",
"def MMFD(amplitudes):\n",
" mmfd = 0.5 * np.sum(amplitudes)\n",
" return mmfd\n",
"\n",
"# same as FMD(), but based on amplitudes\n",
"def MMNF(signal, amplitudes, mmfd):\n",
" freqs = np.fft.fftfreq(amplitudes.size) # freqs based on fourier transform\n",
" mmnf = np.sum(np.multiply(amplitudes, freqs))/mmfd\n",
" return mmnf\n",
" \n",
"\n",
"# estimate the AR coefficients of k-th order (k=6 based on literature research)\n",
"def AR(signal, order=6):\n",
" ar, _, _ = arburg(signal, order) # only save AR coefs\n",
" return ar\n",
"\n",
"# Wavelets analysis\n",
"# import pywt\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# PRE : raw emg signal\n",
"# POST: returns the extracted features\n",
"def extract_features_emg(data):\n",
" N = data.shape[0]\n",
" #onsets_list = [] # save onsets of EMG signals\n",
" #filtered_list = []\n",
" # generate more features\n",
" mav_list = []\n",
" ssi_list = []\n",
" vemg_list= []\n",
" rms_list = []\n",
" wl_list = []\n",
" iemg_list= []\n",
" mavs_list= []\n",
" mav1_list= []\n",
" mav2_list= []\n",
" tm3_list = []\n",
" tm4_list = []\n",
" tm5_list = []\n",
" fmd_list = []\n",
" fmn_list = []\n",
" mmfd_list= []\n",
" mmnf_list= []\n",
" ar_list = []\n",
"\n",
" start = time.time()\n",
" for i in range(data.shape[0]):\n",
" _, filt_emg, _ = emg.emg(signal=data[i].T, sampling_rate=512, show=False) # obtain only filtered signal\n",
" freqs, psd = ss.welch(data[i], fs=512) # get the PSD of the signal for the frequencies and amplitudes\n",
" amplitudes = np.abs(fft(data[i]))\n",
" #filtered_list.append(filt_emg)\n",
" #onsets_list.append(onsets_emg)\n",
" # compute features\n",
" mav_list.append(MAV(filt_emg, N))\n",
" ssi_list.append(SSI(filt_emg, N))\n",
" vemg_list.append(VAREMG(filt_emg, N))\n",
" rms_list.append(RMS(filt_emg, N))\n",
" wl_list.append(WL(filt_emg, N))\n",
" iemg_list.append(IEMG(filt_emg))\n",
" mavs_list.append(MAVS(filt_emg, N))\n",
" mav1_list.append(MAV1(filt_emg, N))\n",
" mav2_list.append(MAV2(filt_emg, N))\n",
" tm3_list.append(TM3(filt_emg, N))\n",
" tm4_list.append(TM4(filt_emg, N))\n",
" tm5_list.append(TM5(filt_emg, N))\n",
" fmd_res = FMD(psd)\n",
" fmd_list.append(fmd_res)\n",
" fmn_list.append(FMN(psd, freqs, fmd_res))\n",
" mmfd_res = MMFD(amplitudes)\n",
" mmfd_list.append(mmfd_res)\n",
" mmnf_list.append(MMNF(data[i], amplitudes, mmfd_res))\n",
" ar_list.append(AR(filt_emg))\n",
"\n",
" print(\"Time: \", time.time() - start)\n",
" emg_features = [mav_list,ssi_list,vemg_list,rms_list,wl_list,iemg_list,mavs_list,mav1_list,mav2_list,\n",
" tm3_list,tm4_list,tm5_list,fmd_list,fmn_list,mmfd_list,mmnf_list,ar_list]\n",
" \n",
" return emg_features"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time: 1361.9812316894531\n",
"Time: 859.1871929168701\n"
]
}
],