To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 0e2a26c5 authored by Ard Kastrati's avatar Ard Kastrati
Browse files

Made some small changes

parent 6ef639cc
......@@ -5,7 +5,6 @@ import scipy.io
import pandas as pd
import re
from tqdm import tqdm
from preparation_config import preparation_config as config
class Preparator:
......@@ -28,12 +27,11 @@ class Preparator:
self.labels = []
self.verbose = verbose
self.padding = True
if self.verbose:
print("Preparator is initialized with: ")
print("Directory to load data: " + self.load_directory)
print("Directory to save data: " + self.save_directory)
print("Looking for file that match: " + load_file_pattern)
print("Will store the merged file with name: " + self.save_file_name)
print("Preparator is initialized with: ")
print("Directory to load data: " + self.load_directory)
print("Directory to save data: " + self.save_directory)
print("Looking for file that match: " + load_file_pattern)
print("Will store the merged file with name: " + self.save_file_name)
def extract_data_at_events(self, extract_pattern, name_start_time, start_time, name_length_time, length_time, start_channel, end_channel, padding=True):
......@@ -44,34 +42,32 @@ class Preparator:
self.end_channel = end_channel
self.padding = padding
if self.verbose:
print("Preparator is instructed to look for events that match structure: " + str(self.extract_pattern))
print("Time dimension -- Cut start info: " + name_start_time)
print("Time dimension -- Cut length info: " + name_length_time)
print("Channel dimension -- Cut start info: " + str(start_channel))
print("Channel dimension -- Cut end info: " + str(end_channel))
print("Preparator is instructed to look for events that match structure: " + str(self.extract_pattern))
print("Time dimension -- Cut start info: " + name_start_time)
print("Time dimension -- Cut length info: " + name_length_time)
print("Channel dimension -- Cut start info: " + str(start_channel))
print("Channel dimension -- Cut end info: " + str(end_channel))
def blocks(self, on_blocks, off_blocks):
self.on_blocks = on_blocks
self.off_blocks = off_blocks
if self.verbose:
print("Blocks to be used are: " + str(on_blocks))
print("Blocks to be ignored are: " + str(off_blocks))
print("Blocks to be used are: " + str(on_blocks))
print("Blocks to be ignored are: " + str(off_blocks))
def addFilter(self, name, f):
self.filters.append((name, f))
if self.verbose: print('Preparator is instructed to use filter: ' + name)
print('Preparator is instructed to use filter: ' + name)
def addLabel(self, name, f):
self.labels.append((name, f))
if self.verbose: print('Preparator is instructed to use label: ' + name)
print('Preparator is instructed to use label: ' + name)
def ignoreEvent(self, name, f):
self.ignore_events.append((name, f))
if self.verbose: print('Preparator is instructed to ignore the event: ' + name)
print('Preparator is instructed to ignore the event: ' + name)
def run(self):
if self.verbose: print("Starting collecting data.")
print("Starting collecting data.")
all_EEG = []
all_labels = []
subj_counter = 1
......@@ -88,27 +84,22 @@ class Preparator:
# break
cur_dir = self.load_directory + subject + '/'
debug = 0
for f in sorted(os.listdir(cur_dir)):
# skip the non-matching files
print(f)
if not self.load_file_pattern.match(f):
continue
print(subject)
if debug == 1:
break
debug = 1 - debug
progress.set_description('Loading ' + f)
# load the mat file
events = None
if h5py.is_hdf5(cur_dir + f):
if self.verbose: print("It is a HDF5 file. All is fine.")
hdf5file = h5py.File(cur_dir + f, 'r')
EEG = hdf5file[list(h5py.File(cur_dir + f, 'r').keys())[1]]
events = self._load_hdf5_events(EEG)
else:
EEG = scipy.io.loadmat(cur_dir + f)['sEEG'][0]
events = self._load_v5_events(EEG)
# EEG = scipy.io.loadmat(cur_dir + f)['sEEG'][0]
# events = self._load_v5_events(EEG)
raise NotImplementedError("Matlab v5 files cannot be loaded. I still have to implement this.")
events = self._ignore_events(events)
if self.verbose: print(events)
......@@ -125,11 +116,13 @@ class Preparator:
print('Saving data...')
EEG = np.concatenate(all_EEG, axis=0)
labels = np.concatenate(all_labels, axis=0)
print("Shapes of EEG are: ")
print(EEG.shape)
print("Shapes of labels are: ")
print(labels.shape)
np.savez(self.save_directory + self.save_file_name, EEG=EEG, labels=labels)
# THIS IS NOT FINISHED
def _load_v5_events(self, EEG):
if self.verbose: print("Loading the events from the subject. ")
# extract the useful event data
......@@ -238,25 +231,25 @@ class Preparator:
all_trials = []
# extract the useful data
print(EEG['data'])
if self.verbose: print(EEG['data'])
data = np.array(EEG['data'], dtype='float')
start = self.start_time(events).loc[select]
length = events['type'].apply(lambda x: self.length_time).loc[select]
end_block = events['latency'].shift(-len(self.extract_pattern)).loc[select]
print(start)
print(length)
print(end_block)
if self.verbose: print(start)
if self.verbose: print(length)
if self.verbose: print(end_block)
for s, l, e in zip(start, length, end_block):
if s + l > e and self.padding:
#Need to pad since, the required length is bigger then the last block
print(str(s) + ", " + str(l) + ", " + str(e) + " that is need to pad")
if self.verbose: print(str(s) + ", " + str(l) + ", " + str(e) + " that is need to pad")
unpadded_data = data[int(s - 1):int(e - 1), (self.start_channel - 1):self.end_channel]
x_len, y_len = unpadded_data.shape
padding_size = int(s + l - e)
append_data = np.pad(unpadded_data, pad_width=((0, padding_size), (0, 0)), mode='reflect')
print(append_data)
if self.verbose: print(append_data)
else:
append_data = data[int(s - 1):int(s + l - 1), (self.start_channel - 1):self.end_channel]
all_trials.append(append_data)
......@@ -279,71 +272,3 @@ class Preparator:
labels = np.concatenate((labels, np.asarray(f(events).loc[select]).reshape(-1,1)), axis=1)
if self.verbose: print(labels)
return labels
"""
assert((self.fixed_length is not None and self.length is not None) == False)
if self.fixed_length is not None:
if self.verbose: print("Fixed length, setting the duration to this fixed length " + str(self.fixed_length))
start = events['latency']
duration = events['latency'].apply(lambda x: self.fixed_length)
end = events['latency'].apply(lambda x: (x + self.fixed_length))
if self.verbose: print(start)
if self.verbose: print(duration)
if self.verbose: print(end)
elif self.length is not None:
if self.verbose: print("The length to be extracted is: " + str(self.length))
start = events['latency']
duration = events['latency'].apply(lambda x: self.length)
end = events['endtime'].shift(-len(self.extract_pattern) + 1)
if self.verbose: print(start)
if self.verbose: print(duration)
if self.verbose: print(end)
else:
raise ValueError("Please specify either length or fixed length")
# Get the valid trials
start_trial = start.loc[select]
duration_trial = duration.loc[select]
end_trial = end.loc[select]
if self.verbose:
print("Getting the valid trials start and their duration.")
print(start_trial)
print(duration_trial)
print(end_trial)
# Cut the data
if self.verbose: print("Cutting the data.")
all_trials = []
# extract the useful data
data = np.array(EEG['data'], dtype='float')
for start, duration, end in zip(start_trial, duration_trial, end_trial):
if end - start > duration:
# Need to cut
if self.verbose: print("Cutting the data since too long")
start = start + (end - start - duration)/2
print("Starting " + str(start-1) + " and ending in " + str(start + duration - 1))
all_trials.append(data[int(start - 1):int(start + duration - 1), :129])
elif end - start < duration:
if self.verbose: print("Padding the data since too short")
# Need to pad
unpadded_data = data[int(start - 1):int(end - start - 1), :]
x_len, y_len = unpadded_data.shape
padding_size = duration - x_len
padded_data = np.pad(unpadded_data, pad_width=((0, padding_size), (0, 0)))
all_trials.append(padded_data[:,:129])
else: # They are equal
if self.verbose: print("No cutting or padding is needed.")
all_trials.append(data[int(start - 1):int(start + duration - 1), :129])
all_trials = np.array(all_trials)
if self.verbose: print("Extracted all this data from this participant.")
if self.verbose: print(all_trials)
return all_trials
"""
\ No newline at end of file
......@@ -134,7 +134,10 @@ def direction_with_processing_speed_feature_extraction_dataset():
preparator = Preparator(load_directory=config['LOAD_PROCESSING_SPEED_PATH'], save_directory=config['SAVE_PROCESSING_SPEED_PATH'],
load_file_pattern=config['PROCESSING_SPEED_FILE_PATTERN'], save_file_name='DirectionTask_pretraining_data_feature_extracted.npz')
preparator.extract_data_at_events(extract_pattern=[saccade]) # we are interested only on saccade on-set (where the data is feature extracted by hilbert trafo)
preparator.extract_data_at_events(extract_pattern=[fixation, saccade, fixation], name_start_time='at saccade onset',
start_time=lambda events: (events['latency'].shift(-1)),
name_length_time='Size 1', length_time=1,
start_channel=1, end_channel=129, padding=False) # we are interested only on saccade on-set (where the data is feature extracted by hilbert trafo)
preparator.addFilter(name='Keep only saccades around fixations', f=lambda events: events['type'].shift(1).isin(fixation) & events['type'].shift(1).isin(fixation))
preparator.addFilter(name='Keep only long enough fixations1', f=lambda events: events['duration'].shift(1) > 50)
preparator.addFilter(name='Keep only big enough saccade', f=lambda events: events['amplitude'] > 0.5)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment