To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 5ede05a3 authored by Lukas Wolf's avatar Lukas Wolf
Browse files

added ard's data preparation with minor changes to run it from the project directory

parent e2f10638
load('all_EEGprocuesan.mat');
for i = 1: size(all_EEGprocuesan,2)
for ii = 1:size(all_EEGprocuesan,3)
final(:,ii,i) = all_EEGprocuesan(:,i,ii);
end
end
cd('\\130.60.169.45\methlab\Neurometric\Antisaccades\code\eeglab14_1_2b')
eeglab;
close all
X.srate = 500
X.nbchan = 129
X.pnts = 500
X.trials = 1
X.xmin = 0
X.event = []
X.setname = []
for i = 1:size(final,1)
X.data = final(i,:,:);
downsamplEEG(i) = pop_resample(X,125);
end
save('downsamplEEG', 'downsamplEEG', '-v7.3')
% EEGprocuesan = 129X500X73
% fixEEGprocuesan = 73 X 500X129
clc
clear
x = dir('\\130.60.169.45\methlab\ETH_AS\preprocessed2')
subjects = {x.name};
subjects = {subjects{4:end-3}}';
clear x
cd('\\130.60.169.45\methlab\ETH_AS')
%%
all_EEGprocuesan = []
for subj = 1:100%186 - BA5 didnt work, 346- BY2
datapath = strcat('\\130.60.169.45\methlab\ETH_AS\preprocessed2\',subjects{subj});
cd (datapath)
if exist(strcat('EEGprocuesan.mat')) > 0
datafile= strcat('EEGprocuesan.mat');
load (datafile);
end
final_EEGprocuesan = [];
for i = 1: size(EEGprocuesan.data,1)
for ii = 1:size(EEGprocuesan.data,3)
final_EEGprocuesan(ii,:,i) = EEGprocuesan.data(i,:,ii);
end
end
all_EEGprocuesan = vertcat(all_EEGprocuesan ,final_EEGprocuesan);
size(all_EEGprocuesan,1);
end
save('all_EEGprocuesan', 'all_EEGprocuesan', '-v7.3')
\ No newline at end of file
% EEGprocuesan = 129X500X73
% fixEEGprocuesan = 73 X 500X129
clc
clear
x = dir('\\130.60.169.45\methlab\ETH_AS\preprocessed2')
subjects = {x.name};
subjects = {subjects{4:end-3}}';
clear x
cd('\\130.60.169.45\methlab\ETH_AS')
%%
all_trialinfoprosan = []
for subj = 1:100 %= 5 %186 - BA5 didnt work, 346- BY2
datapath = strcat('\\130.60.169.45\methlab\ETH_AS\preprocessed2\',subjects{subj});
cd (datapath)
if exist(strcat('trialinfoprosan.mat')) > 0
datafile= strcat('trialinfoprosan.mat');
load (datafile);
end
B = trialinfoprosan.cues;
A = all_trialinfoprosan;
all_trialinfoprosan = vertcat(A,B);
size(all_trialinfoprosan,1)
end
save('all_trialinfoprosan', 'all_trialinfoprosan', '-v7.3')
\ No newline at end of file
clc
clear
cd('\\130.60.169.45\methlab\Neurometric\Antisaccades\code\eeglab14_1_2b')
eeglab;
close all
x = dir('\\130.60.169.45\methlab\ETH_AS\preprocessed2')
subjects = {x.name};
subjects = {subjects{4:end-3}}';
clear x
cd('\\130.60.169.45\methlab\ETH_AS')
%%
for subj = 1:length(subjects) %186 - BA5 didnt work, 346- BY2
datapath = strcat('\\130.60.169.45\methlab\ETH_AS\preprocessed2\',subjects{subj});
cd (datapath)
if exist(strcat('gip_',subjects{subj},'_AS_EEG.mat')) > 0
datafile= strcat('gip_',subjects{subj},'_AS_EEG.mat');
load (datafile)
elseif exist(strcat('oip_',subjects{subj},'_AS_EEG.mat')) > 0
datafile= strcat('oip_',subjects{subj},'_AS_EEG.mat');
load (datafile)
end
%% Re-reference to average reference
EEG = pop_reref(EEG,[]);
%% triggers renaming
countblocks = 1;
for e = 1:length(EEG.event)
if strcmp(EEG.event(e).type, 'boundary')
countblocks = countblocks + 1;
continue;
end
if countblocks == 2 || countblocks == 3 || countblocks == 4 % antisaccade blocks
if strcmp(EEG.event(e).type,'10 ') % change 10 to 12 for AS
EEG.event(e).type = '12 ';
elseif strcmp(EEG.event(e).type,'11 ')
EEG.event(e).type = '13 '; % change 11 to 13 for AS
end
if strcmp(EEG.event(e).type,'40 ')
EEG.event(e).type = '41 ';
end
end
end
EEG.event(strcmp('boundary',{EEG.event.type})) = [];
rmEventsIx = strcmp('L_fixation',{EEG.event.type});
rmEv = EEG.event(rmEventsIx);
EEG.event(rmEventsIx) = [];
EEG.event(1).dir = []; %left or right
EEG.event(1).cond = [];%pro or anti
%% rename EEG.event.type
previous = '';
for e = 1:length(EEG.event)
if strcmp(EEG.event(e).type, 'L_saccade')
if strcmp(previous, '10 ')
EEG.event(e).type = 'saccade_pro_left'
EEG.event(e).cond = 'pro';
EEG.event(e).dir = 'left';
%pro left
elseif strcmp(previous, '11 ')
EEG.event(e).type = 'saccade_pro_right'
EEG.event(e).cond = 'pro';
EEG.event(e).dir = 'right';
elseif strcmp(previous, '12 ')
EEG.event(e).type = 'saccade_anti_left'
EEG.event(e).cond = 'anti';
EEG.event(e).dir = 'left';
elseif strcmp(previous, '13 ')
EEG.event(e).type = 'saccade_anti_right'
EEG.event(e).cond = 'anti';
EEG.event(e).dir = 'right';
else
EEG.event(e).type = 'invalid';
end
end
if ~strcmp(EEG.event(e).type, 'L_fixation') ...
&& ~strcmp(EEG.event(e).type, 'L_blink')
previous = EEG.event(e).type;
end
end
%% remove everything from EEG.event which is not saccade or trigger
tmpinv=find(strcmp({EEG.event.type}, 'invalid') | strcmp({EEG.event.type}, 'L_blink'))
EEG.event(tmpinv)=[]
%% removing errors
% if 10 and the sub didn't look left then error
% pro left sac_start_x > sac_endpos_x --> correct condition
tmperrsacc1=find(strcmp({EEG.event.type}, 'saccade_pro_left') & [EEG.event.sac_startpos_x]< [EEG.event.sac_endpos_x]);
tmperr1=[tmperrsacc1 (tmperrsacc1-1)];
EEG.event(tmperr1)=[];
tmperrsacc2=find(strcmp({EEG.event.type}, 'saccade_anti_left') & [EEG.event.sac_startpos_x]> [EEG.event.sac_endpos_x]);
tmperr2=[tmperrsacc2 (tmperrsacc2-1)];
EEG.event(tmperr2)=[];
tmperrsacc3=find(strcmp({EEG.event.type}, 'saccade_pro_right') & [EEG.event.sac_startpos_x]> [EEG.event.sac_endpos_x]);
tmperr3=[tmperrsacc3 (tmperrsacc3-1)]
EEG.event(tmperr3)=[];
tmperrsacc4=find(strcmp({EEG.event.type}, 'saccade_anti_right') & [EEG.event.sac_startpos_x]< [EEG.event.sac_endpos_x]);
tmperr4=[tmperrsacc4 (tmperrsacc4-1)];
EEG.event(tmperr4)=[];
%% amplitude too small
tmperrsacc6=find(strcmp({EEG.event.type}, 'saccade_pro_right') ...
& [EEG.event.sac_amplitude]<1.5)
tmperrsacc7=find(strcmp({EEG.event.type}, 'saccade_pro_left') ...
& [EEG.event.sac_amplitude]<1.5)
tmperrsacc8=find(strcmp({EEG.event.type}, 'saccade_anti_left') ...
& [EEG.event.sac_amplitude]<1.5)
tmperrsacc9=find(strcmp({EEG.event.type}, 'saccade_anti_right') ...
& [EEG.event.sac_amplitude]<1.5)
tmperr69=[tmperrsacc6 (tmperrsacc6-1) tmperrsacc7 (tmperrsacc7-1) tmperrsacc8 (tmperrsacc8-1) tmperrsacc9 (tmperrsacc9-1)]
EEG.event(tmperr69)=[];
clear tmperrsacc1 tmperrsacc2 tmperrsacc3 tmperrsacc4 tmperrsacc6 tmperrsacc7 tmperrsacc8 tmperrsacc9
%% delete cues where there was no saccade afterwards
% tmperrcue10 = []
% tmperrcue11 = []
%
%start with pro left cue 10
tmperrcue10= find(strcmp({EEG.event.type}, '10 ')) ;
for iii=1:length(tmperrcue10)
pos = tmperrcue10(iii)
if ~ (strcmp(EEG.event(pos+1).type , 'saccade_pro_left'))
EEG.event(pos).type='missingsacc'; %cue
end
end
%%11
tmperrcue11 = find(strcmp({EEG.event.type}, '11 ')) ;
for iii=1:length(tmperrcue11)
pos = tmperrcue11(iii)
if ~ (strcmp(EEG.event(pos+1).type , 'saccade_pro_right'))
EEG.event(pos).type='missingsacc'; %cue
end
end
tmpinv=find(strcmp({EEG.event.type}, 'missingsacc')) ;
EEG.event(tmpinv)=[];
%% delete saccades and cues when the saccade comes faster than 100ms after cue
tmpevent=length(EEG.event)
saccpro=find(strcmp({EEG.event.type},'saccade_pro_right')==1 | strcmp({EEG.event.type},'saccade_pro_left')==1)% find rows where there is a saccade
saccanti=find(strcmp({EEG.event.type},'saccade_anti_right')==1 | strcmp({EEG.event.type},'saccade_anti_left')==1);%find rows where there is a saccade
for b=1:size(saccpro,2)
if (EEG.event(saccpro(1,b)).latency-EEG.event(saccpro(1,b)-1).latency)<50 %50 because 100ms
EEG.event(saccpro(b)).type='micro'; %saccade
EEG.event(saccpro(b)-1).type = 'micro'; %cue
end
end
for b=1:size(saccanti,2)
if (EEG.event(saccanti(b)).latency-EEG.event(saccanti(1,b)-1).latency)<50;
EEG.event(saccanti(b)-1).type ='micro';
EEG.event(saccanti(b)).type ='micro';
end
end
tmpinv=find(strcmp({EEG.event.type}, 'micro')) ;
EEG.event(tmpinv)=[];
%% epoching
EEGprocuesan= pop_epoch(EEG, {'10','11'}, [0, 1]);
%how many epochs
trialinfoprosan.epochs=size(EEGprocuesan.data, 3);
%% important
tmp=find(strcmp({EEGprocuesan.event.type}, '11 ') | strcmp({EEGprocuesan.event.type}, '10 '))
right= find(strcmp({EEGprocuesan.event(tmp).type},'11 ')==1);
left= find(strcmp({EEGprocuesan.event(tmp).type},'10 ')==1);
trialinfoprosan.cues = nan(length(tmp),1);
trialinfoprosan.cues(left)= 0;
trialinfoprosan.cues(right)= 1;
%% save epoched data
if size(EEGprocuesan.data,3) ~= size(trialinfoprosan.cues,1)
error('this is bad')
end
save EEGprocuesan EEGprocuesan
save trialinfoprosan trialinfoprosan
end
##################################################################
# Data preparation configurations
import time
import os
import numpy as np
preparation_config = dict()
# The task for which we want to prepare the data. Possible choices that are implemented so far are:
# 'LR_task' (dataset: 'antisaccade'):
# 'Direction_task' (dataset: 'dots' or 'processing_speed'):
# 'Position_task' (dataset: 'dots'):
# 'Segmentation_task' (dataset: 'antisaccade', 'dots', or 'processing_speed'):
preparation_config['task'] = 'Position_task'
preparation_config['dataset'] = 'dots'
# We provide two types of preprocessing on the dataset (minimal preprocessing and maximal preprocessing). Choices are
# 'max'
# 'min'
preparation_config['preprocessing'] = 'max' # or min
preparation_config['preprocessing_path'] = 'synchronized_' + preparation_config['preprocessing']
# We provide also dataset where features are extracted
# (typically used for training with standard machine learning methods).
# The feature extraction that we have implemented is hilbert transformed data for phase and amplitude.
preparation_config['feature_extraction'] = False
# Maybe for later we can also include the bandpassed data on
# top of the feature extracted data (this is not implemented yet).
preparation_config['including_bandpass_data'] = False # or True (for later)
#The directory of output file and the name
preparation_config['SAVE_PATH'] = './data/prepared/'
preparation_config['output_name'] = preparation_config['task'] + '_with_' + preparation_config['dataset']
preparation_config['output_name'] = preparation_config['output_name'] + '_' + preparation_config['preprocessing_path']
preparation_config['output_name'] = preparation_config['output_name'] + ('_hilbert.npz' if preparation_config['feature_extraction'] else '.npz')
##################################################################################
# We prepare some helper variables to locate the correct datasets and the files that we need and to use them.
preparation_config['LOAD_ANTISACCADE_PATH'] = './data/measured/antisaccade_task_data/' + preparation_config['preprocessing_path'] + '/'
preparation_config['ANTISACCADE_FILE_PATTERN'] = '[go]ip_..._AS_EEG.mat'
preparation_config['ANTISACCADE_HILBERT_FILE_PATTERN'] = '[go]ip_..._AS_EEG.mat'
preparation_config['LOAD_DOTS_PATH'] = './data/measured/dots_data/' + preparation_config['preprocessing_path'] + '/'
preparation_config['DOTS_FILE_PATTERN'] = '(ep|EP).._DOTS._EEG.mat'
preparation_config['DOTS_HILBERT_FILE_PATTERN'] = '(ep|EP).._DOTS._EEG.mat'
preparation_config['LOAD_PROCESSING_SPEED_PATH'] = './data/measured/processing_speed_data/' + preparation_config['preprocessing_path'] + '/'
preparation_config['PROCESSING_SPEED_FILE_PATTERN'] = '..._WI2_EEG.mat'
preparation_config['PROCESSING_SPEED_HILBERT_FILE_PATTERN'] = '..._WI2_EEG.mat'
##################################################################################
##################################################################################
##################################################################################
##################################################################################
##################################################################################
##################################################################################
##################################################################################
# Internal information about each dataset (antisaccade, dots, processing_speeed)
preparation_config['saccade_trigger'] = ['L_saccade', 'R_saccade']
preparation_config['fixation_trigger'] = ['L_fixation', 'R_fixation']
preparation_config['blink_trigger'] = ['L_blink', 'R_blink']
# Anti-saccade dataset
preparation_config['antisaccade'] = dict()
preparation_config['antisaccade']['cue_trigger'] = ['10', '11']
preparation_config['antisaccade']['matlab_struct'] = 'EEG'
#Dots dataset
preparation_config['dots'] = dict()
preparation_config['dots']['cue_trigger'] = list(map(str, range(1, 28))) + list(map(str, range(101, 128)))
preparation_config['dots']['end_cue_trigger'] =['41']
preparation_config['dots']['matlab_struct'] = 'sEEG'
preparation_config['dots']['tar_pos'] = np.array([
[400, 300], [650, 500], [400, 100], [100, 450], [700, 450], [100, 500],
[200, 350], [300, 400], [100, 150], [150, 500], [150, 100], [700, 100],
[300, 200], [100, 100], [700, 500], [500, 400], [600, 250], [650, 100],
[400, 300], [200, 250], [400, 500], [700, 150], [500, 200], [100, 300],
[700, 300], [600, 350], [400, 300]
])
# Processing speed dataset
preparation_config['processing_speed'] = dict()
preparation_config['processing_speed']['matlab_struct'] = 'sEEG'
#Maybe we should do logging here as well ...
\ No newline at end of file
import os
import numpy as np
import h5py
import scipy.io
import pandas as pd
import re
from tqdm import tqdm
class Preparator:
def __init__(self, load_directory='./', save_directory='./', load_file_pattern='*', save_file_name='all.npz',verbose=False):
self.load_directory = load_directory
self.save_directory = save_directory
self.load_file_pattern = re.compile(load_file_pattern)
self.save_file_name = save_file_name
self.extract_pattern = None
self.extract_pattern = None
self.start_time = None
self.length_time = None
self.start_channel = None
self.end_channel = None
self.on_blocks = None
self.off_blocks = None
self.filters = []
self.ignore_events = []
self.labels = []
self.verbose = verbose
self.padding = True
print("Preparator is initialized with: ")
print("Directory to load data: " + self.load_directory)
print("Directory to save data: " + self.save_directory)
print("Looking for file that match: " + load_file_pattern)
print("Will store the merged file with name: " + self.save_file_name)
def extract_data_at_events(self, extract_pattern, name_start_time, start_time, name_length_time, length_time, start_channel, end_channel, padding=True):
self.extract_pattern = extract_pattern
self.start_time = start_time
self.length_time = length_time
self.start_channel = start_channel
self.end_channel = end_channel
self.padding = padding
print("Preparator is instructed to look for events that match structure: " + str(self.extract_pattern))
print("Time dimension -- Cut start info: " + name_start_time)
print("Time dimension -- Cut length info: " + name_length_time)
print("Channel dimension -- Cut start info: " + str(start_channel))
print("Channel dimension -- Cut end info: " + str(end_channel))
def blocks(self, on_blocks, off_blocks):
self.on_blocks = on_blocks
self.off_blocks = off_blocks
print("Blocks to be used are: " + str(on_blocks))
print("Blocks to be ignored are: " + str(off_blocks))
def addFilter(self, name, f):
self.filters.append((name, f))
print('Preparator is instructed to use filter: ' + name)
def addLabel(self, name, f):
self.labels.append((name, f))
print('Preparator is instructed to use label: ' + name)
def ignoreEvent(self, name, f):
self.ignore_events.append((name, f))
print('Preparator is instructed to ignore the event: ' + name)
def run(self):
print("Starting collecting data.")
all_EEG = []
all_labels = []
subj_counter = 1
progress = tqdm(sorted(os.listdir(self.load_directory)))
for subject in progress:
if os.path.isdir(self.load_directory + subject):
# if subject == 'BY2':
# continue
# if subject == 'EP18':
# break
cur_dir = self.load_directory + subject + '/'
for f in sorted(os.listdir(cur_dir)):
if not self.load_file_pattern.match(f):
continue
progress.set_description('Loading ' + f)
# load the mat file
events = None
data = None
# preparator.py - line 93
if h5py.is_hdf5(cur_dir + f):
hdf5file = h5py.File(cur_dir + f, 'r')
EEG = hdf5file[list(hdf5file.keys())[1]] # removal of a repeated h5py.File() call here
events = self._load_hdf5_events(EEG)
data = np.array(EEG['data'], dtype='float')
else:
matfile = scipy.io.loadmat(cur_dir + f)
EEG = matfile[list(matfile.keys())[3]][0,0] #eventually at the end to remove the repetition in the load method
events = self._load_v5_events(EEG)
data = np.array(EEG['data'], dtype='float').T
events = self._ignore_events(events)
if self.verbose: print(events)
select = self._filter_blocks(events)
select &= self._filter_events(events)
trials = self._extract_events(data, events, select)
labels = self._extract_labels(events, select, subj_counter)
all_EEG.append(trials)
all_labels.append(labels)
subj_counter += 1
# save the concatenated arrays
print('Saving data...')
EEG = np.concatenate(all_EEG, axis=0)
labels = np.concatenate(all_labels, axis=0)
print("Shapes of EEG are: ")
print(EEG.shape)
print("Shapes of labels are: ")
print(labels.shape)
np.savez(self.save_directory + self.save_file_name, EEG=EEG, labels=labels)
def _load_v5_events(self, EEG):
if self.verbose: print("Loading the events from the subject. ")
# extract the useful event data
events = pd.DataFrame()
events['type'] = [el[0].strip() for el in EEG['event'][0]['type']]
# if self.verbose: print(events)
events['latency'] = [el[0, 0] for el in EEG['event'][0]['latency']]
# if self.verbose: print(events)
events['amplitude'] = [el[0, 0] for el in EEG['event'][0]['sac_amplitude']]
# if self.verbose: print(events)
events['start_x'] = [el[0, 0] for el in EEG['event'][0]['sac_startpos_x']]
# if self.verbose: print(events)
events['end_x'] = [el[0, 0] for el in EEG['event'][0]['sac_endpos_x']]
# if self.verbose: print(events)
events['start_y'] = [el[0, 0] for el in EEG['event'][0]['sac_startpos_y']]
# if self.verbose: print(events)
events['end_y'] = [el[0, 0] for el in EEG['event'][0]['sac_endpos_y']]
# if self.verbose: print(events)
events['duration'] = [el[0, 0] for el in EEG['event'][0]['duration']]
# if self.verbose: print(events)
events['avgpos_x'] = [el[0, 0] for el in EEG['event'][0]['fix_avgpos_x']]
# if self.verbose: print(events)
events['avgpos_y'] = [el[0, 0] for el in EEG['event'][0]['fix_avgpos_y']]
# if self.verbose: print(events)
events['endtime'] = [el[0, 0] for el in EEG['event'][0]['endtime']]
if self.verbose:
print("Events loaded are: ")
print(events)
return events
def _load_hdf5_events(self, EEG):
if self.verbose: print("Loading the events from the subject. ")
# extract the useful event data
events = pd.DataFrame()
events['type'] = [''.join(map(chr, EEG[ref][:, 0])).strip() for ref in EEG['event']['type'][:, 0]]
#if self.verbose: print(events)
events['latency'] = [EEG[ref][0, 0] for ref in EEG['event']['latency'][:, 0]]
# if self.verbose: print(events)
events['amplitude'] = [EEG[ref][0, 0] for ref in EEG['event']['sac_amplitude'][:, 0]]
#if self.verbose: print(events)