Commit 6b5eefdd authored by Lukas Wolf's avatar Lukas Wolf
Browse files

clean

parent c445dbd1
......@@ -26,7 +26,15 @@ class BaseNet:
else:
self.model = self._build_model()
self.model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
# Compile the model depending on the task
if config['task'] == 'prosaccade-clf':
self.model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
elif config['task'] == 'gaze-reg':
self.model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(), metrics=['mean_squared_error'])
elif config['task'] == 'angle-reg':
from utils.losses import angle_loss
self.model.compile(loss=angle_loss, optimizer=keras.optimizers.Adam())
if self.verbose:
self.model.summary()
......@@ -45,7 +53,6 @@ class BaseNet:
csv_logger = CSVLogger(config['batches_log'], append=True, separator=';')
ckpt_dir = config['model_dir'] + '/' + config['model'] + '_' + 'best_model.h5'
ckpt = tf.keras.callbacks.ModelCheckpoint(ckpt_dir, verbose=1, monitor='val_accuracy', save_best_only=True, mode='auto')
X_train, X_val, y_train, y_val = train_val_split(x, y, 0.2, subjectID)
prediction_ensemble = prediction_history((X_val, y_val))
......
......@@ -125,7 +125,7 @@ class ConvNet(ABC):
X_train, X_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
prediction_ensemble = prediction_history((X_val,y_val))
# Create a callback for tensorboard
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=config['tensorboard_log_dir'], histogram_freq=1)
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=config['tensorboard_log_dir'], histogram_freq=1)
"""
# Define callbacks for model intepretability experiments
# For integrated gradients, create a validation set with 0 and 1 labels and treat them separately
......@@ -142,8 +142,7 @@ class ConvNet(ABC):
# Fit the model on the training data
hist = self.model.fit(X_train, y_train, verbose=2, batch_size=self.batch_size,
validation_data=(X_val,y_val), epochs=self.epochs,
callbacks=[csv_logger, ckpt, prediction_ensemble, early_stop, integrated_grad_one,
integrated_grad_zero, tensorboard_callback])
callbacks=[csv_logger, ckpt, prediction_ensemble, early_stop])
return hist, prediction_ensemble
......@@ -160,4 +159,4 @@ def split_valid_data(X, y):
X_one.append(el)
y_one.append(label)
return np.array(X_zero), np.array(y_zero), np.array(X_one), np.array(y_one)
\ No newline at end of file
return np.array(X_zero), np.array(y_zero), np.array(X_one), np.array(y_one)
......@@ -29,9 +29,9 @@ TODO: write a proper description how to set the fields in the config
"""
# Choose which task to run
#config['task'] = 'prosaccade-clf'
config['task'] = 'prosaccade-clf'
#config['task'] = 'gaze-reg'
config['task'] = 'angle-reg'
#config['task'] = 'angle-reg'
# Choose from which experiment the dataset to load. Can only be chosen for angle-pred and gaze-reg
if config['task'] != 'prosaccade-clf':
......@@ -46,9 +46,6 @@ if config['task'] == 'gaze-reg':
elif config['task'] == 'angle-reg':
config['data_mode'] = 'fix_sacc_fix'
# Choose how much data to use on gaze-reg
config['data-fraction'] = 1.0 # Set to 1.0 if you want to use the whole dataset, experimental feature only for regression task \
"""
Parameters that can be chosen:
cnn: The simple CNN architecture
......@@ -72,6 +69,7 @@ config['model'] = 'cnn'
#config['model'] = 'inception'
#config['model'] = 'eegnet'
#config['model'] = 'deepeye'
#config['model'] = 'deepeye-rnn'
#config['model'] = 'xception'
#config['model'] = 'pyramidal_cnn'
#config['model'] = 'siamese' # Note that you have to set data_mode to sacc_fix for this model
......@@ -88,6 +86,7 @@ config['run'] = 'ensemble'
config['ensemble'] = 1 #number of models in the ensemble method
# Other functions that can be chosen optionally
config['tensorboard_on'] = False
config['sanity_check'] = False
config['plot_model'] = False
config['plot_filters'] = False #TODO: make this work, valueerror from tf because of (1,w,h) instead of (none,w,h) expected
......@@ -160,16 +159,16 @@ if config['task'] != 'prosaccade-clf':
config['inception']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['deepeye']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['xception']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['deepeye-rnn']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
elif config['data_mode'] == 'fix_sacc_fix' and config['dataset'] == 'calibration_task':
config['cnn']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['pyramidal_cnn']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['inception']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['deepeye']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
config['xception']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
# These models are not yet implemented for regression
#config['deepeye-rnn']['input_shape'] = (int(config['max_duration']), 129)
config['deepeye-rnn']['input_shape'] = (config['max_saccade'] + 2 * config['fixation_padlength'], 129)
#config['eegnet']['channels'] = 129
#config['eegnet']['samples'] = config['max_duration'] = 150
#config['eegnet']['samples'] = 150
else:
# Left-right classification (prosaccade) task
config['cnn']['input_shape'] = (125, 129) if config['downsampled'] else (500, 129)
......@@ -206,9 +205,10 @@ config['info_log'] = config['model_dir'] + '/' + 'info.log'
config['batches_log'] = config['model_dir'] + '/' + 'batches.log'
# Create a directory to store logs for tensorboard
config['tensorboard_log_dir'] = config['model_dir'] + "/logs/fit/" + timestamp
if not os.path.exists(config['tensorboard_log_dir']):
os.makedirs(config['tensorboard_log_dir'])
if config['tensorboard_on']:
config['tensorboard_log_dir'] = config['model_dir'] + "/logs/fit/" + timestamp
if not os.path.exists(config['tensorboard_log_dir']):
os.makedirs(config['tensorboard_log_dir'])
# Save config to model dir
import pickle
......
......@@ -78,8 +78,6 @@ def run(trainX, trainY):
if config['split']:
config['model'] = config['model'] + '_cluster'
hist.history['val_loss'] = loss
hist.history['val_accuracy'] = accuracy
plot_loss(hist, config['model_dir'], config['model'], val = True)
......
......@@ -9,13 +9,6 @@ import logging
import time
import os
# Create debugging log for tensorboard
import tensorflow as tf
debug_dir = config['tensorboard_log_dir'] + '/tfdbg'
if not os.path.exists(debug_dir):
os.makedirs(debug_dir)
tf.debugging.experimental.enable_dump_debug_info(debug_dir, tensor_debug_mode="FULL_HEALTH", circular_buffer_size=-1)
# Import the correct functions depending on the task
if config['task'] == 'gaze-reg' or config['task'] == 'angle-reg':
from ensemble_regression import run # Regression tasks
......
......@@ -4,7 +4,7 @@
#SBATCH --output=log/%j.out # where to store the output (%j is the JOBID), subdirectory must exist
#SBATCH --error=log/%j.err # where to store error messages
#SBATCH --gres=gpu:1
#SBATCH --mem=80G
#SBATCH --mem=100G
echo "Running on host: $(hostname)"
echo "In directory: $(pwd)"
......
......@@ -69,43 +69,34 @@ def load_regression_data(verbose=True):
def get_fix_data(verbose=True):
"""
Returns X, y for the gaze regression task with EEG data X only from fixations
The files containing the data must be properly named and located in the right ./data/ directory
"""
# Define these variables that are needed to load the fixation data (old dataset, processing speed?)
# Define the variables that are needed to load the fixation data (old dataset, processing speed?)
config['trainX_file'] = 'EEGdata-002.mat'
config['trainY_file'] = 'label.mat'
config['trainX_variable'] = 'EEGdata'
config['trainY_variable'] = 'label'
# Load the labels
y = scipy.io.loadmat(config['data_dir'] + config['trainY_variable'])
labels = y['label'] # shape (85413, 1) for label.mat
# Load the EEG data
f = h5py.File(config['data_dir'] + config['trainX_file'], 'r')
X = np.array(f[config['trainX_variable']]).T
# Compute how much of the data we want to preprocess and return
#testsize = int(config['data-fraction'] * len(X))
x_list = []
y_list = []
# Run through the data
#for i in tqdm(range(testsize), desc='Loading regression data'):
for i in range(len(X)):
# Read the datapoint from X and check how long the duration is
ref = X[i][0]
x_datapoint = np.array(f[ref])
x_len, y_len = x_datapoint.shape # x is number of time samples (2*x in ms is time), y_len=129 channels
# Check whether the point fits the desired range
if x_len < config['min_fixation'] or x_len > config['max_fixation']:
continue
# Create the 2D regression label
label = labels[i]
y_datapoint = np.array([label[0][1][0][0], label[0][2][0][0]])
# Pad the data
padding_size = config['max_fixation'] - x_len
if config['padding'] == 'zero':
......@@ -114,26 +105,21 @@ def get_fix_data(verbose=True):
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)), mode='reflect')#.flatten()
else:
raise Exception("Choose a valid padding scheme in config.py")
x_list.append(x_datapoint)
y_list.append(y_datapoint)
X = np.asarray(x_list)
# Reshape data and normalize it
norm = np.linalg.norm(X)
X = X / norm
#norm = np.linalg.norm(X)
#X = X / norm
y = np.asarray(y_list)
if verbose:
logging.info("y training loaded.")
logging.info(y.shape)
logging.info("X training loaded.")
logging.info(X.shape)
# Save the precomputed data for future usage
# Save the precomputed data for further usage
#np.save("./data/precomputed/fix_only_X", X)
#np.save("./data/precomputed/fix_only_y", y)
return X, y
......@@ -144,42 +130,34 @@ def get_sacc_data(task='processing_speed_task', verbose=True):
L_saccade = 'L_saccade'
R_saccade = 'R_saccade'
event_names = [L_saccade, R_saccade]
# Loop over all directories in /data/full_data and extract and concat the events from all people
# Get the correct dataset
if task == 'processing_speed_task':
rootdir = './data/processing_speed_task' # modify it if necessary
elif task == 'calibration_task':
rootdir = './data/calibration_task'
else:
raise Exception("Data mode not valid with current data loader")
# Start extracting the data from the directories
x_list = []
y_list = []
for subdir, dirs, files in os.walk(rootdir):
for file in files:
# Get the correct path to the current file
path = os.path.join(subdir, file)
events = load_sEEG_events(path) # access event i via events[i]
data = load_sEEG_data(path)
# Now, depending on the mode, extract the data and create the EEG data matrix and labels
# Extract from this file
for i in range(len(events)):
event = events[i]
if event[0][0] not in event_names: # dereference the event name, e.g. 'L_saccade'
continue
continue # Not a saccade, we need a saccade as first event
start_time = int(event[1])
end_time = int(event[4])
# extract the EEG data from sEEG.data
x_datapoint = np.array(data[start_time:end_time])
x_len, y_len = x_datapoint.shape
# Pad the saccade only data, currently pad all to length 100
if x_len < config['min_saccade'] or x_len > config['max_saccade']:
continue
padding_size = config['max_saccade'] - x_len
if config['padding'] == 'zero':
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)))
......@@ -187,22 +165,18 @@ def get_sacc_data(task='processing_speed_task', verbose=True):
x_datapoint = np.pad(x_datapoint, pad_width=((0,padding_size),(0,0)), mode='reflect')
else:
raise Exception("Choose a valid padding scheme in config.py")
# Extract the label as saccade endposition
sac_x_end = float(event[6]) # dereference for the double value of the x coordinate
sac_y_end = float(event[7])
y_datapoint = np.array([sac_x_end, sac_y_end])
# Append to X and y
x_list.append(x_datapoint)
y_list.append(y_datapoint)
X = np.asarray(x_list)
X = X[:,:,:129] # Cut off the last 4 columns (time, x, y, pupil size)
# Normalize the data
norm = np.linalg.norm(X)
X = X / norm
# norm = np.linalg.norm(X)
# X = X / norm
y = np.asarray(y_list)
if verbose:
......@@ -210,11 +184,9 @@ def get_sacc_data(task='processing_speed_task', verbose=True):
logging.info(y.shape)
logging.info("X training loaded.")
logging.info(X.shape)
# Save the precomputed data for future usage
#np.save("./data/precomputed/sacc_only_X", X)
#np.save("./data/precomputed/sacc_only_y", y)
return X, y
def get_sacc_fix_data(task='processing_speed_task', verbose=True):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment