Commit 38fe49e8 authored by Lukas's avatar Lukas
Browse files

prep for new tasks

parent ec019c53
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3.6.13 64-bit ('thesis': conda)",
"metadata": {
"interpreter": {
"hash": "b0868c9c87b4d20466c93a6c5980a4012ae8c057ee998d93bb8733e0fdff3d8b"
}
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import h5py\n",
"import logging\n",
"import scipy.io\n",
"from config import config \n",
"from tqdm import tqdm \n",
"from sklearn import preprocessing\n",
"import pandas as pd\n",
"import os\n",
"from tqdm import tqdm "
]
},
{
"source": [
"## Define the functions to load the data from a single mat file of a single participant"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"def load_sEEG_events(abs_dir_path):\n",
" \"\"\"\n",
" Extracts the sEEG.event section of a participants mat file \n",
" Returns the events as a numpy array, accessible event after event (time series)\n",
" Filters out everything else, like participants pushing buttons \n",
" \"\"\"\n",
" f = scipy.io.loadmat(abs_dir_path)\n",
" sEEG = f['sEEG']\n",
" df = pd.DataFrame(sEEG[0])\n",
" events = df['event'][0][0] # dereferenced to obtain the fixation, saccade, blinks, ... \n",
" #print(\"Events shape: {}\".format(events.shape))\n",
" return events # access the i-th event via events[i]"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"def load_sEEG_data(abs_dir_path):\n",
" \"\"\"\n",
" Returns the 133 channels of a participant\n",
" 129 EEG channels plus 4 (time, x, y and pupil size)\n",
" Returns the data as a numpy array, accessible via time as first coefficient \n",
" \"\"\"\n",
" f = scipy.io.loadmat(abs_dir_path)\n",
" sEEG = f['sEEG']\n",
" df = pd.DataFrame(sEEG[0])\n",
" data = df['data'][0].T # transpose to access time series \n",
" #print(\"EEG data shape: {}\".format(data.shape))\n",
" return data # access the i-th recorded sample via data[i], recordings at 2ms intervals "
]
},
{
"source": [
"## Read out the data from the files\n",
"### We get the starttime, endtime, and other statistics from sEEG.event\n",
"### We get the EEG data from sEEG.data\n",
"### We have to synchronize via sEEG.event.latency, which is the starttime as sample number, and sEEG.data.endtime as endtime sample number "
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Events shape: (953,)\n",
"EEG data shape: (68229, 133)\n"
]
}
],
"source": [
"_events = load_sEEG_events('./data/full_data/AA7/AA7_WI1_EEG.mat') # access event i via events[i]\n",
"_data = load_sEEG_data('./data/full_data/AA7/AA7_WI1_EEG.mat')\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[390.2]])"
]
},
"metadata": {},
"execution_count": 82
}
],
"source": [
"_event = _events[1]\n",
"_event[6]"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"x, y = _data[4927:4937].shape"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"# Define the keywords for the events that we look for \n",
"L_saccade = 'L_saccade'\n",
"L_fixation = 'L_fixation'\n",
"L_blink = 'L_blink'\n",
"R_saccade = 'R_saccade'\n",
"R_fixation = 'R_fixation'\n",
"R_blink = 'R_blink'\n",
"\n",
"# Define what to extract from the data \n",
"mode = 'sacc_only'\n",
"#mode = 'sacc_fix'\n",
"#mode = 'fix_sacc_fix'\n",
"\n",
"event_names = None\n",
"if mode == 'sacc_only':\n",
" event_names = [L_saccade, R_saccade]\n",
"else:\n",
" # for now I will incude the blinks in the data\n",
" event_names = [L_saccade, L_fixation, L_blink, R_saccade, R_fixation, R_blink]"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
"# Loop over all directories in /data/full_data and extract and concat the events from all people\n",
"rootdir = './data/full_data' # modify it if necessary \n",
"\n",
"x_list = []\n",
"y_list = []\n",
"\n",
"for subdir, dirs, files in os.walk(rootdir):\n",
" for file in files:\n",
" # Get the correct path to the current file\n",
" path = os.path.join(subdir, file)\n",
" events = load_sEEG_events(path) # access event i via events[i]\n",
" data = load_sEEG_data(path)\n",
" \n",
" # Now, depending on the mode, extract the data and create the EEG data matrix and labels\n",
" for i in range(len(events)):\n",
" event = events[i]\n",
" if event[0][0] not in event_names: # dereference the event name, e.g. 'L_saccade'\n",
" continue\n",
" \n",
" start_time = int(event[1])\n",
" end_time = int(event[4])\n",
"\n",
" sac_x_end = event[6]\n",
" sac_y_end = event[7]\n",
"\n",
" # extract optional information from sEEG.event\n",
"\n",
" # extract the EEG data from sEEG.data\n",
" x_datapoint = np.array(data[start_time:end_time])\n",
" x_len, y_len = x_datapoint.shape\n",
" \n",
" # Pad the saccade only data, currently pad all to length 100\n",
" if x_len < 20 or x_len > 100:\n",
" continue \n",
" \n",
" padding_size = 100 - x_len\n",
" if config['padding'] == 'zero':\n",
" x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)))\n",
" elif config['padding'] == 'repeat':\n",
" x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)), mode='reflect')\n",
" else:\n",
" raise Exception(\"Choose a valid padding scheme in config.py\")\n",
"\n",
" y_datapoint = np.array([sac_x_end, sac_y_end])\n",
"\n",
" # Append to X and y \n",
" x_list.append([x_datapoint])\n",
" y_list.append([y_datapoint])\n",
"\n",
"X = np.asarray(x_list)\n",
"y = np.asarray(y_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}
\ No newline at end of file
%% Cell type:code id: tags:
```
import numpy as np
import h5py
import logging
import scipy.io
from config import config
from tqdm import tqdm
from sklearn import preprocessing
import pandas as pd
import os
from tqdm import tqdm
```
%% Cell type:markdown id: tags:
## Define the functions to load the data from a single mat file of a single participant
%% Cell type:code id: tags:
```
def load_sEEG_events(abs_dir_path):
"""
Extracts the sEEG.event section of a participants mat file
Returns the events as a numpy array, accessible event after event (time series)
Filters out everything else, like participants pushing buttons
"""
f = scipy.io.loadmat(abs_dir_path)
sEEG = f['sEEG']
df = pd.DataFrame(sEEG[0])
events = df['event'][0][0] # dereferenced to obtain the fixation, saccade, blinks, ...
#print("Events shape: {}".format(events.shape))
return events # access the i-th event via events[i]
```
%% Cell type:code id: tags:
```
def load_sEEG_data(abs_dir_path):
"""
Returns the 133 channels of a participant
129 EEG channels plus 4 (time, x, y and pupil size)
Returns the data as a numpy array, accessible via time as first coefficient
"""
f = scipy.io.loadmat(abs_dir_path)
sEEG = f['sEEG']
df = pd.DataFrame(sEEG[0])
data = df['data'][0].T # transpose to access time series
#print("EEG data shape: {}".format(data.shape))
return data # access the i-th recorded sample via data[i], recordings at 2ms intervals
```
%% Cell type:markdown id: tags:
## Read out the data from the files
### We get the starttime, endtime, and other statistics from sEEG.event
### We get the EEG data from sEEG.data
### We have to synchronize via sEEG.event.latency, which is the starttime as sample number, and sEEG.data.endtime as endtime sample number
%% Cell type:code id: tags:
```
_events = load_sEEG_events('./data/full_data/AA7/AA7_WI1_EEG.mat') # access event i via events[i]
_data = load_sEEG_data('./data/full_data/AA7/AA7_WI1_EEG.mat')
```
%%%% Output: stream
Events shape: (953,)
EEG data shape: (68229, 133)
%% Cell type:code id: tags:
```
_event = _events[1]
_event[6]
```
%%%% Output: execute_result
array([[390.2]])
%% Cell type:code id: tags:
```
x, y = _data[4927:4937].shape
```
%% Cell type:code id: tags:
```
# Define the keywords for the events that we look for
L_saccade = 'L_saccade'
L_fixation = 'L_fixation'
L_blink = 'L_blink'
R_saccade = 'R_saccade'
R_fixation = 'R_fixation'
R_blink = 'R_blink'
# Define what to extract from the data
mode = 'sacc_only'
#mode = 'sacc_fix'
#mode = 'fix_sacc_fix'
event_names = None
if mode == 'sacc_only':
event_names = [L_saccade, R_saccade]
else:
# for now I will incude the blinks in the data
event_names = [L_saccade, L_fixation, L_blink, R_saccade, R_fixation, R_blink]
```
%% Cell type:code id: tags:
```
# Loop over all directories in /data/full_data and extract and concat the events from all people
rootdir = './data/full_data' # modify it if necessary
x_list = []
y_list = []
for subdir, dirs, files in os.walk(rootdir):
for file in files:
# Get the correct path to the current file
path = os.path.join(subdir, file)
events = load_sEEG_events(path) # access event i via events[i]
data = load_sEEG_data(path)
# Now, depending on the mode, extract the data and create the EEG data matrix and labels
for i in range(len(events)):
event = events[i]
if event[0][0] not in event_names: # dereference the event name, e.g. 'L_saccade'
continue
start_time = int(event[1])
end_time = int(event[4])
sac_x_end = event[6]
sac_y_end = event[7]
# extract optional information from sEEG.event
# extract the EEG data from sEEG.data
x_datapoint = np.array(data[start_time:end_time])
x_len, y_len = x_datapoint.shape
# Pad the saccade only data, currently pad all to length 100
if x_len < 20 or x_len > 100:
continue
padding_size = 100 - x_len
if config['padding'] == 'zero':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)))
elif config['padding'] == 'repeat':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)), mode='reflect')
else:
raise Exception("Choose a valid padding scheme in config.py")
y_datapoint = np.array([sac_x_end, sac_y_end])
# Append to X and y
x_list.append([x_datapoint])
y_list.append([y_datapoint])
X = np.asarray(x_list)
y = np.asarray(y_list)
```
%% Cell type:code id: tags:
```
X.shape
```
%% Cell type:code id: tags:
```
y.shape
```
%% Cell type:code id: tags:
```
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment