Commit 14344e29 authored by Lukas's avatar Lukas
Browse files

changes for new experiments

parent 38fe49e8
......@@ -49,14 +49,19 @@ Cluster can be set to clustering(), clustering2() or clustering3(), where differ
config['gaze-reg'] = True # Set to False if you want to run the saccade classification task
config['data-fraction'] = 1.0 # Set to 1.0 if you want to use the whole dataset, experimental feature only for regression task \
#config['data_mode'] = 'fix_only'
config['data_mode'] = 'sacc_only'
#config['data_mode'] = 'sacc_fix'
#config['data_mode'] = 'fix_sacc_fix'
# Choose to either run the kerastuner on the model or
#config['run'] = 'kerastuner'
config['run'] = 'ensemble'
config['ensemble'] = 1 #number of models in the ensemble method
# Choosing model
#config['model'] = 'cnn'
config['model'] = 'inception'
config['model'] = 'cnn'
#config['model'] = 'inception'
#config['model'] = 'eegnet'
#config['model'] = 'deepeye'
#config['model'] = 'xception'
......@@ -104,7 +109,9 @@ config['deepeye-rnn'] = {}
if config['gaze-reg']:
# Compute the maximum fixation duration, which is used for preprocessing, e.g. zero-padding
config['cnn']['input_shape'] = (int(config['max_duration']), 129) # e.g. for max_duration 300 we have shape (150,129)
#TODO: automatically set the input shapes depending on the dataset to run, i.e. fix only, sacc only, etc.
#config['cnn']['input_shape'] = (int(config['max_duration']), 129) # e.g. for max_duration 300 we have shape (150,129)
config['cnn']['input_shape'] = (100, 129)
config['pyramidal_cnn']['input_shape'] = (int(config['max_duration']), 129)
config['inception']['input_shape'] = (int(config['max_duration']), 129)
config['deepeye']['input_shape'] = (int(config['max_duration']), 129)
......
......@@ -10,17 +10,13 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13-final"
"version": "3.8.8-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3.6.13 64-bit ('thesis': conda)",
"metadata": {
"interpreter": {
"hash": "b0868c9c87b4d20466c93a6c5980a4012ae8c057ee998d93bb8733e0fdff3d8b"
}
}
"display_name": "Python 3",
"language": "python"
}
},
"nbformat": 4,
......@@ -28,7 +24,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 89,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -44,6 +40,32 @@
"from tqdm import tqdm "
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['/device:GPU:0']"
]
},
"metadata": {},
"execution_count": 40
}
],
"source": [
"from tensorflow.python.client import device_lib\n",
"\n",
"def get_available_gpus():\n",
" local_device_protos = device_lib.list_local_devices()\n",
" return [x.name for x in local_device_protos if x.device_type == 'GPU']\n",
"\n",
"get_available_gpus()"
]
},
{
"source": [
"## Define the functions to load the data from a single mat file of a single participant"
......@@ -53,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -73,7 +95,7 @@
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
......@@ -103,18 +125,9 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 28,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Events shape: (953,)\n",
"EEG data shape: (68229, 133)\n"
]
}
],
"outputs": [],
"source": [
"_events = load_sEEG_events('./data/full_data/AA7/AA7_WI1_EEG.mat') # access event i via events[i]\n",
"_data = load_sEEG_data('./data/full_data/AA7/AA7_WI1_EEG.mat')\n",
......@@ -123,7 +136,7 @@
},
{
"cell_type": "code",
"execution_count": 82,
"execution_count": 29,
"metadata": {},
"outputs": [
{
......@@ -134,7 +147,7 @@
]
},
"metadata": {},
"execution_count": 82
"execution_count": 29
}
],
"source": [
......@@ -144,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": 117,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
......@@ -153,7 +166,7 @@
},
{
"cell_type": "code",
"execution_count": 95,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
......@@ -180,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 119,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
......@@ -234,25 +247,11 @@
" y_list.append([y_datapoint])\n",
"\n",
"X = np.asarray(x_list)\n",
"y = np.asarray(y_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y.shape"
"y = np.asarray(y_list)\n",
"\n",
"X_reshaped = np.transpose(X, (0,2,3,1))\n",
"X_reshaped = X_reshaped[:,:,:129,:]\n",
"y_reshaped = np.transpose(y, (0,2,1,3,4))"
]
},
{
......
%% Cell type:code id: tags:
```
``` python
import numpy as np
import h5py
import logging
import scipy.io
from config import config
from tqdm import tqdm
from sklearn import preprocessing
import pandas as pd
import os
from tqdm import tqdm
```
%% Cell type:code id: tags:
``` python
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
get_available_gpus()
```
%%%% Output: execute_result
['/device:GPU:0']
%% Cell type:markdown id: tags:
## Define the functions to load the data from a single mat file of a single participant
%% Cell type:code id: tags:
```
``` python
def load_sEEG_events(abs_dir_path):
"""
Extracts the sEEG.event section of a participants mat file
Returns the events as a numpy array, accessible event after event (time series)
Filters out everything else, like participants pushing buttons
"""
f = scipy.io.loadmat(abs_dir_path)
sEEG = f['sEEG']
df = pd.DataFrame(sEEG[0])
events = df['event'][0][0] # dereferenced to obtain the fixation, saccade, blinks, ...
#print("Events shape: {}".format(events.shape))
return events # access the i-th event via events[i]
```
%% Cell type:code id: tags:
```
``` python
def load_sEEG_data(abs_dir_path):
"""
Returns the 133 channels of a participant
129 EEG channels plus 4 (time, x, y and pupil size)
Returns the data as a numpy array, accessible via time as first coefficient
"""
f = scipy.io.loadmat(abs_dir_path)
sEEG = f['sEEG']
df = pd.DataFrame(sEEG[0])
data = df['data'][0].T # transpose to access time series
#print("EEG data shape: {}".format(data.shape))
return data # access the i-th recorded sample via data[i], recordings at 2ms intervals
```
%% Cell type:markdown id: tags:
## Read out the data from the files
### We get the starttime, endtime, and other statistics from sEEG.event
### We get the EEG data from sEEG.data
### We have to synchronize via sEEG.event.latency, which is the starttime as sample number, and sEEG.data.endtime as endtime sample number
%% Cell type:code id: tags:
```
``` python
_events = load_sEEG_events('./data/full_data/AA7/AA7_WI1_EEG.mat') # access event i via events[i]
_data = load_sEEG_data('./data/full_data/AA7/AA7_WI1_EEG.mat')
```
%%%% Output: stream
Events shape: (953,)
EEG data shape: (68229, 133)
%% Cell type:code id: tags:
```
``` python
_event = _events[1]
_event[6]
```
%%%% Output: execute_result
array([[390.2]])
%% Cell type:code id: tags:
```
``` python
x, y = _data[4927:4937].shape
```
%% Cell type:code id: tags:
```
``` python
# Define the keywords for the events that we look for
L_saccade = 'L_saccade'
L_fixation = 'L_fixation'
L_blink = 'L_blink'
R_saccade = 'R_saccade'
R_fixation = 'R_fixation'
R_blink = 'R_blink'
# Define what to extract from the data
mode = 'sacc_only'
#mode = 'sacc_fix'
#mode = 'fix_sacc_fix'
event_names = None
if mode == 'sacc_only':
event_names = [L_saccade, R_saccade]
else:
# for now I will incude the blinks in the data
event_names = [L_saccade, L_fixation, L_blink, R_saccade, R_fixation, R_blink]
```
%% Cell type:code id: tags:
```
``` python
# Loop over all directories in /data/full_data and extract and concat the events from all people
rootdir = './data/full_data' # modify it if necessary
x_list = []
y_list = []
for subdir, dirs, files in os.walk(rootdir):
for file in files:
# Get the correct path to the current file
path = os.path.join(subdir, file)
events = load_sEEG_events(path) # access event i via events[i]
data = load_sEEG_data(path)
# Now, depending on the mode, extract the data and create the EEG data matrix and labels
for i in range(len(events)):
event = events[i]
if event[0][0] not in event_names: # dereference the event name, e.g. 'L_saccade'
continue
start_time = int(event[1])
end_time = int(event[4])
sac_x_end = event[6]
sac_y_end = event[7]
# extract optional information from sEEG.event
# extract the EEG data from sEEG.data
x_datapoint = np.array(data[start_time:end_time])
x_len, y_len = x_datapoint.shape
# Pad the saccade only data, currently pad all to length 100
if x_len < 20 or x_len > 100:
continue
padding_size = 100 - x_len
if config['padding'] == 'zero':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)))
elif config['padding'] == 'repeat':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)), mode='reflect')
else:
raise Exception("Choose a valid padding scheme in config.py")
y_datapoint = np.array([sac_x_end, sac_y_end])
# Append to X and y
x_list.append([x_datapoint])
y_list.append([y_datapoint])
X = np.asarray(x_list)
y = np.asarray(y_list)
```
%% Cell type:code id: tags:
```
X.shape
```
%% Cell type:code id: tags:
```
y.shape
X_reshaped = np.transpose(X, (0,2,3,1))
X_reshaped = X_reshaped[:,:,:129,:]
y_reshaped = np.transpose(y, (0,2,1,3,4))
```
%% Cell type:code id: tags:
```
``` python
```
......
......@@ -36,30 +36,30 @@ def run(trainX, trainY):
print('Beginning model number {}/{} ...'.format(i+1, config['ensemble']))
if config['model'] == 'cnn':
reg = Regression_CNN(input_shape=config['cnn']['input_shape'], kernel_size=64, epochs = 50,
reg = Regression_CNN(input_shape=config['cnn']['input_shape'], kernel_size=64, epochs = 10,
nb_filters=16, verbose=True, batch_size=64, use_residual=True, depth=12,
learning_rate=0.001, regularization=0.03)
learning_rate=0.001)#, regularization=0.03)
elif config['model'] == 'inception':
reg = Regression_INCEPTION(input_shape=config['inception']['input_shape'], use_residual=True,
kernel_size=64, nb_filters=32, depth=16, bottleneck_size=32, epochs=50,
learning_rate=0.001, regularization=0.03)
learning_rate=0.001)#, regularization=0.03)
elif config['model'] == 'xception':
reg = Regression_XCEPTION(input_shape=config['inception']['input_shape'], use_residual=True,
kernel_size=64, nb_filters=32, depth=24, epochs=50,
learning_rate=0.001, regularization=0.03)
learning_rate=0.001)#, regularization=0.03)
elif config['model'] == 'deepeye':
reg = Regression_DEEPEYE(input_shape=config['deepeye']['input_shape'], use_residual=True,
kernel_size=64, nb_filters=32, depth=10, epochs=50, preprocessing=False,
use_separable_convolution=True, use_simple_convolution=True,
bottleneck_size=32,
learning_rate=0.001, regularization=0.03)
learning_rate=0.001)#, regularization=0.03)
elif config['model'] == 'pyramidal_cnn':
reg = Regression_PyramidalCNN(input_shape=config['cnn']['input_shape'], epochs=50,
learning_rate=0.001, regularization=0.03)
learning_rate=0.001)#, regularization=0.03)
else:
logging.info('Cannot start the program. Please choose one model in the config.py file')
......
......@@ -11,7 +11,7 @@ def get_mat_data(data_dir, verbose=True):
if config['gaze-reg']:
# call the regression task data loader
return get_regression_data(verbose= verbose)
return get_regression_data(verbose=verbose)
with h5py.File(data_dir + config['trainX_file'], 'r') as f:
X = f[config['trainX_variable']][:]
......
......@@ -9,6 +9,8 @@ import scipy.io
from config import config
from tqdm import tqdm
from sklearn import preprocessing
import os
import pandas as pd
def max_fixation_duration():
......@@ -41,6 +43,10 @@ def load_regression_data(verbose=True):
X, y, the data matrix and the labels
"""
if config['data_mode'] == 'sacc_only':
logging.info("Using saccade only dataset")
return get_sacc_data(verbose=verbose)
# Load the labels
y = scipy.io.loadmat(config['data_dir'] + config['trainY_variable'])
labels = y['label'] # shape (85413, 1) for label.mat
......@@ -109,3 +115,105 @@ def load_regression_data(verbose=True):
return X_reshaped, y_reshaped
def get_sacc_data(verbose=True):
L_saccade = 'L_saccade'
R_saccade = 'R_saccade'
event_names = [L_saccade, R_saccade]
# Loop over all directories in /data/full_data and extract and concat the events from all people
rootdir = './data/full_data' # modify it if necessary
x_list = []
y_list = []
for subdir, dirs, files in os.walk(rootdir):
for file in files:
# Get the correct path to the current file
path = os.path.join(subdir, file)
events = load_sEEG_events(path) # access event i via events[i]
data = load_sEEG_data(path)
# Now, depending on the mode, extract the data and create the EEG data matrix and labels
for i in range(len(events)):
event = events[i]
if event[0][0] not in event_names: # dereference the event name, e.g. 'L_saccade'
continue
start_time = int(event[1])
end_time = int(event[4])
sac_x_end = event[6]
sac_y_end = event[7]
# extract optional information from sEEG.event
# extract the EEG data from sEEG.data
x_datapoint = np.array(data[start_time:end_time])
x_len, y_len = x_datapoint.shape
# Pad the saccade only data, currently pad all to length 100
if x_len < 20 or x_len > 100:
continue
padding_size = 100 - x_len
if config['padding'] == 'zero':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)))
elif config['padding'] == 'repeat':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)), mode='reflect')
else:
raise Exception("Choose a valid padding scheme in config.py")
y_datapoint = np.array([sac_x_end, sac_y_end])
# Append to X and y
x_list.append([x_datapoint])
y_list.append([y_datapoint])
X = np.asarray(x_list)
y = np.asarray(y_list)
if verbose:
logging.info("y training loaded.")
logging.info(y.shape)
logging.info("X training loaded.")
logging.info(X.shape)
X_reshaped = np.transpose(X, (0,2,3,1))
# Cut off the last 4 columns (time, x, y, pupil size)
X_reshaped = X_reshaped[:,:,:129,:]
y_reshaped = np.transpose(y, (0,2,1,3,4))
if verbose:
logging.info("Settings the shapes")
logging.info(X_reshaped.shape)
logging.info(y_reshaped.shape)
return X_reshaped, y_reshaped
def load_sEEG_events(abs_dir_path):
"""
Extracts the sEEG.event section of a participants mat file
Returns the events as a numpy array, accessible event after event (time series)
Filters out everything else, like participants pushing buttons
"""
f = scipy.io.loadmat(abs_dir_path)
sEEG = f['sEEG']
df = pd.DataFrame(sEEG[0])
events = df['event'][0][0] # dereferenced to obtain the fixation, saccade, blinks, ...
#print("Events shape: {}".format(events.shape))
return events # access the i-th event via events[i]
def load_sEEG_data(abs_dir_path):
"""
Returns the 133 channels of a participant
129 EEG channels plus 4 (time, x, y and pupil size)
Returns the data as a numpy array, accessible via time as first coefficient
"""
f = scipy.io.loadmat(abs_dir_path)
sEEG = f['sEEG']
df = pd.DataFrame(sEEG[0])
data = df['data'][0].T # transpose to access time series
#print("EEG data shape: {}".format(data.shape))
return data # access the i-th recorded sample via data[i], recordings at 2ms intervals
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment