To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit c8930138 authored by Lukas Wolf's avatar Lukas Wolf
Browse files

udate group experiments

parent c6a914d0
from torch._C import Value from torch._C import Value
from config import config #from config import config
import logging import logging
import numpy as np import numpy as np
...@@ -13,7 +13,8 @@ class Trainer: ...@@ -13,7 +13,8 @@ class Trainer:
Can also choose between tensorflow and pytorch implementation of the models Can also choose between tensorflow and pytorch implementation of the models
""" """
def __init__(self): def __init__(self, config):
self.config = config
# self.X = X # self.X = X
# self.y = y # self.y = y
self.X, self.y = self.load_data() self.X, self.y = self.load_data()
...@@ -26,15 +27,15 @@ class Trainer: ...@@ -26,15 +27,15 @@ class Trainer:
# Run the training # Run the training
# For now I will just create an Ensemble and run it # For now I will just create an Ensemble and run it
logging.info("------------------------------------------------------------------------------------") logging.info("------------------------------------------------------------------------------------")
logging.info("Trainer: created a {} trainer".format(config['framework'])) logging.info("Trainer: created a {} trainer".format(self.config['framework']))
if config['framework'] == 'tensorflow': if self.config['framework'] == 'tensorflow':
from tf_models.Ensemble.Ensemble_tf import Ensemble_tf from tf_models.Ensemble.Ensemble_tf import Ensemble_tf
ensemble = Ensemble_tf(nb_models=config['ensemble'], model_type=config['model']) ensemble = Ensemble_tf(nb_models=self.config['ensemble'], model_type=self.config['model'])
ensemble.run(self.X, self.y) ensemble.run(self.X, self.y)
elif config['framework'] == 'pytorch': elif self.config['framework'] == 'pytorch':
from torch_models.Ensemble.Ensemble_torch import Ensemble_torch from torch_models.Ensemble.Ensemble_torch import Ensemble_torch
ensemble = Ensemble_torch(nb_models=config['ensemble'], model_type=config['model']) ensemble = Ensemble_torch(nb_models=self.config['ensemble'], model_type=self.config['model'])
ensemble.run(self.X, self.y) ensemble.run(self.X, self.y)
else: else:
raise ValueError("Choose a valid deep learning framework") raise ValueError("Choose a valid deep learning framework")
...@@ -48,18 +49,18 @@ class Trainer: ...@@ -48,18 +49,18 @@ class Trainer:
Data has to be prepared with the preparator and stored in data/prepared Data has to be prepared with the preparator and stored in data/prepared
""" """
try: try:
if config['task'] == 'prosaccade-clf': if self.config['task'] == 'prosaccade-clf':
logging.info("Loading LR task data") logging.info("Loading LR task data")
data = np.load('./data/prepared/LR_task_with_antisaccade_synchronised_' + config['preprocessing'] + '.npz') data = np.load('./data/prepared/LR_task_with_antisaccade_synchronised_' + self.config['preprocessing'] + '.npz')
elif config['task'] == 'gaze-reg': elif self.config['task'] == 'gaze-reg':
logging.info("Loading coordinate task data") logging.info("Loading coordinate task data")
data = np.load('./data/prepared/Position_task_with_dots_synchronised_' + config['preprocessing'] + '.npz') data = np.load('./data/prepared/Position_task_with_dots_synchronised_' + self.config['preprocessing'] + '.npz')
elif config['task'] == 'angle-reg' or config['task'] == 'amplitude-reg': elif self.config['task'] == 'angle-reg' or self.config['task'] == 'amplitude-reg':
logging.info(f"Loading {config['task']} regression data") logging.info(f"Loading {self.config['task']} regression data")
if config['dataset'] == 'calibration_task': if self.config['dataset'] == 'calibration_task':
data = np.load('./data/prepared/Direction_task_with_dots_synchronised_' + config['preprocessing'] + '.npz') data = np.load('./data/prepared/Direction_task_with_dots_synchronised_' + self.config['preprocessing'] + '.npz')
else: else:
data = np.load('./data/prepared/Direction_task_with_processing_speed_synchronised_' + config['preprocessing'] + '.npz') data = np.load('./data/prepared/Direction_task_with_processing_speed_synchronised_' + self.config['preprocessing'] + '.npz')
else: else:
raise ValueError("Choose a valid task in config.py") raise ValueError("Choose a valid task in config.py")
except: except:
......
...@@ -42,8 +42,8 @@ config['root_dir'] = '.' ...@@ -42,8 +42,8 @@ config['root_dir'] = '.'
config['preprocessing'] = 'min' # options: min and max config['preprocessing'] = 'min' # options: min and max
config['task'] = 'prosaccade-clf' #config['task'] = 'prosaccade-clf'
#config['task'] = 'gaze-reg' config['task'] = 'gaze-reg'
#config['task'] = 'angle-reg' #config['task'] = 'angle-reg'
#config['task'] = 'amplitude-reg' #config['task'] = 'amplitude-reg'
...@@ -62,18 +62,18 @@ config['framework'] = 'pytorch' ...@@ -62,18 +62,18 @@ config['framework'] = 'pytorch'
#config['framework'] = 'tensorflow' #config['framework'] = 'tensorflow'
################################################################## ##################################################################
# Choose model # Choose models
################################################################## ##################################################################
config['ensemble'] = 5 #number of models in the ensemble config['ensemble'] = 3 #number of models in the ensemble
config['pretrained'] = False # We can use a model pretrained on processing speed task config['pretrained'] = False # We can use a model pretrained on processing speed task
# MODELS FOR BENCHMARK # MODELS FOR BENCHMARK
#config['model'] = 'cnn' config['model'] = 'cnn'
#config['model'] = 'inception' #config['model'] = 'inception'
#config['model'] = 'eegnet' #config['model'] = 'eegnet'
#config['model'] = 'xception' #config['model'] = 'xception'
config['model'] = 'pyramidal_cnn' #config['model'] = 'pyramidal_cnn'
# EXPERIMENTAL MODELS # EXPERIMENTAL MODELS
#config['model'] = 'deepeye' #config['model'] = 'deepeye'
...@@ -88,10 +88,10 @@ with open('hyperparams.json', 'r') as file: ...@@ -88,10 +88,10 @@ with open('hyperparams.json', 'r') as file:
params = json.load(file) params = json.load(file)
config['learning_rate'] = params[config['model']][config['task']]['learning_rate'] config['learning_rate'] = params[config['model']][config['task']]['learning_rate']
config['regularization'] = params[config['model']][config['task']]['regularization'] config['regularization'] = params[config['model']][config['task']]['regularization']
config['epochs'] = 50 config['epochs'] = 2
config['batch_size'] = 64 config['batch_size'] = 64
config['early_stopping'] = True config['early_stopping'] = True
config['patience'] = 20 config['patience'] = 10
################################################################## ##################################################################
# Choose between ensemble and kerasTuner # Choose between ensemble and kerasTuner
...@@ -115,52 +115,54 @@ config['split'] = False ...@@ -115,52 +115,54 @@ config['split'] = False
#config['cluster'] = clustering() #config['cluster'] = clustering()
##################################################################
# Manage the model directory and output directory structure def create_folder():
################################################################## ##################################################################
# Create a unique output directory for this experiment. # Manage the model directory and output directory structure
timestamp = str(int(time.time())) ##################################################################
model_folder_name = timestamp # Create a unique output directory for this experiment.
model_folder_name += "_tf" if config['framework']=='tensorflow' else '_pytorch' timestamp = str(int(time.time()))
model_folder_name += "_pretrained_" + config['model'] if config['pretrained'] else "_" + config['model'] model_folder_name = timestamp
# Modify the model folder name depending on which task tuns model_folder_name += "_tf" if config['framework']=='tensorflow' else '_pytorch'
model_folder_name += "_" + config['task'] model_folder_name += "_pretrained_" + config['model'] if config['pretrained'] else "_" + config['model']
model_folder_name += "_prep" + config['preprocessing'] # Modify the model folder name depending on which task tuns
model_folder_name += f"_lr_{str(config['learning_rate'])}" model_folder_name += "_" + config['task']
model_folder_name += "_prep" + config['preprocessing']
if config['split']: model_folder_name += f"_lr_{str(config['learning_rate'])}"
model_folder_name += '_cluster'
if config['downsampled']: if config['split']:
model_folder_name += '_downsampled' model_folder_name += '_cluster'
if config['ensemble']>1: if config['downsampled']:
model_folder_name += '_ensemble' model_folder_name += '_downsampled'
if config['ensemble']>1:
config['model_dir'] = os.path.abspath(os.path.join(config['log_dir'], model_folder_name)) model_folder_name += '_ensemble'
if not os.path.exists(config['model_dir']):
os.makedirs(config['model_dir']) config['model_dir'] = os.path.abspath(os.path.join(config['log_dir'], model_folder_name))
if not os.path.exists(config['model_dir']):
config['info_log'] = config['model_dir'] + '/' + 'info.log' os.makedirs(config['model_dir'])
config['batches_log'] = config['model_dir'] + '/' + 'batches.log'
config['info_log'] = config['model_dir'] + '/' + 'info.log'
# Create a directory to store logs for tensorboard config['batches_log'] = config['model_dir'] + '/' + 'batches.log'
if config['tensorboard_on']:
config['tensorboard_log_dir'] = config['model_dir'] + "/logs/fit/" + timestamp # Create a directory to store logs for tensorboard
if not os.path.exists(config['tensorboard_log_dir']): if config['tensorboard_on']:
os.makedirs(config['tensorboard_log_dir']) config['tensorboard_log_dir'] = config['model_dir'] + "/logs/fit/" + timestamp
if not os.path.exists(config['tensorboard_log_dir']):
if not os.path.exists(config['model_dir'] + "/best_models/"): os.makedirs(config['tensorboard_log_dir'])
os.makedirs(config['model_dir'] + "/best_models/")
if not os.path.exists(config['model_dir'] + "/best_models/"):
if not os.path.exists(config['model_dir'] + "/plots/"): os.makedirs(config['model_dir'] + "/best_models/")
os.makedirs(config['model_dir'] + "/plots/")
if not os.path.exists(config['model_dir'] + "/plots/"):
if not os.path.exists(config['model_dir'] + "/metrics/"): os.makedirs(config['model_dir'] + "/plots/")
os.makedirs(config['model_dir'] + "/metrics/")
if not os.path.exists(config['model_dir'] + "/metrics/"):
# Save config to model dir os.makedirs(config['model_dir'] + "/metrics/")
import pickle
config_path = config['model_dir'] + "/config.p" # Save config to model dir
pickle.dump(config, open(config_path, "wb")) import pickle
config_path = config['model_dir'] + "/config.p"
pickle.dump(config, open(config_path, "wb"))
......
...@@ -5,15 +5,15 @@ ...@@ -5,15 +5,15 @@
"regularization": 0 "regularization": 0
}, },
"gaze-reg": { "gaze-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"angle-reg": { "angle-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"amplitude-reg": { "amplitude-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
} }
}, },
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
"regularization": 0 "regularization": 0
}, },
"gaze-reg": { "gaze-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"angle-reg": { "angle-reg": {
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
"regularization": 0 "regularization": 0
}, },
"gaze-reg": { "gaze-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"angle-reg": { "angle-reg": {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
"regularization": 0 "regularization": 0
}, },
"gaze-reg": { "gaze-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"angle-reg": { "angle-reg": {
...@@ -77,15 +77,15 @@ ...@@ -77,15 +77,15 @@
"regularization": 0 "regularization": 0
}, },
"gaze-reg": { "gaze-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"angle-reg": { "angle-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
}, },
"amplitude-reg": { "amplitude-reg": {
"learning_rate": 1e-3, "learning_rate": 1e-4,
"regularization": 0 "regularization": 0
} }
} }
......
from config import config from config import config, create_folder
import numpy as np import numpy as np
import scipy import scipy
#from utils.utils import select_best_model, comparison_plot_accuracy, comparison_plot_loss #from utils.utils import select_best_model, comparison_plot_accuracy, comparison_plot_loss
...@@ -15,10 +15,7 @@ def main(): ...@@ -15,10 +15,7 @@ def main():
# Set recursion limit higher # Set recursion limit higher
#sys.setrecursionlimit(200000) #sys.setrecursionlimit(200000)
# Start logging # Start logging
logging.basicConfig(filename=config['info_log'], level=logging.INFO)
logging.info('Started the Logging')
log_config()
start_time = time.time()
# Load the data # Load the data
""" """
...@@ -35,13 +32,84 @@ def main(): ...@@ -35,13 +32,84 @@ def main():
""" """
# Create trainer that runs ensemble of models # Create trainer that runs ensemble of models
trainer = Trainer() benchmark_task('prosaccade-clf')
trainer.train() benchmark_task('gaze-reg')
benchmark_task('angle-reg')
benchmark_task('amplitude-reg')
# select_best_model() # select_best_model()
# comparison_plot_loss() # comparison_plot_loss()
#logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time))
#logging.info('Finished Logging')
def benchmark_task(task):
# 1 group: 1 learning rate with 5 models and 4 tasks = 20
##################################################################
# Task 1 Prosaccade
##################################################################
config['task'] = task
# For angle and direction reg we can choose 2 datasets
if task == 'angle-reg' or task == 'amplitude-reg':
config['dataset'] = 'calibration_task'
#config['dataset'] = 'processing_speed_task'
# MODELS FOR BENCHMARK
config['model'] = 'cnn'
create_folder()
logging.basicConfig(filename=config['info_log'], level=logging.INFO)
logging.info('Started the Logging')
log_config()
start_time = time.time()
trainer = Trainer(config)
trainer.train()
logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time)) logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time))
logging.info('Finished Logging') start_time = time.time()
config['model'] = 'inception'
create_folder()
logging.basicConfig(filename=config['info_log'], level=logging.INFO)
logging.info('Started the Logging')
log_config()
start_time = time.time()
trainer = Trainer(config)
trainer.train()
logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time))
start_time = time.time()
config['model'] = 'eegnet'
create_folder()
logging.basicConfig(filename=config['info_log'], level=logging.INFO)
logging.info('Started the Logging')
log_config()
start_time = time.time()
trainer = Trainer(config)
trainer.train()
logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time))
start_time = time.time()
config['model'] = 'xception'
create_folder()
trainer = Trainer(config)
trainer.train()
logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time))
start_time = time.time()
config['model'] = 'pyramidal_cnn'
create_folder()
logging.basicConfig(filename=config['info_log'], level=logging.INFO)
logging.info('Started the Logging')
log_config()
start_time = time.time()
trainer = Trainer(config)
trainer.train()
logging.info("--- Runtime: %s seconds ---" % (time.time() - start_time))
start_time = time.time()
if __name__=='__main__': if __name__=='__main__':
......
...@@ -82,7 +82,7 @@ class BaseNet(nn.Module): ...@@ -82,7 +82,7 @@ class BaseNet(nn.Module):
nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=1) nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=1)
) )
else: # elif config['task'] == 'amplitude-reg': else: # elif config['task'] == 'amplitude-reg':
self.loss_fn = nn.L1Loss() self.loss_fn = nn.MSELoss()
self.output_layer = nn.Sequential( self.output_layer = nn.Sequential(
nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=1) nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=1)
) )
...@@ -121,7 +121,7 @@ class BaseNet(nn.Module): ...@@ -121,7 +121,7 @@ class BaseNet(nn.Module):
# Create the optimizer # Create the optimizer
optimizer = torch.optim.Adam(list(self.parameters()), lr=config['learning_rate']) optimizer = torch.optim.Adam(list(self.parameters()), lr=config['learning_rate'])
# Create a history to track ensemble performance # Create a history to track ensemble performance
#prediction_ensemble = Prediction_history(dataloader=test_dataloader, model=self) prediction_ensemble = Prediction_history(dataloader=test_dataloader, model=self)
# Train the model # Train the model
epochs = config['epochs'] epochs = config['epochs']
metrics = {'train_loss':[], 'val_loss':[], 'train_acc':[], 'val_acc':[]} if config['task'] == 'prosaccade-clf' else {'train_loss':[], 'val_loss':[]} metrics = {'train_loss':[], 'val_loss':[], 'train_acc':[], 'val_acc':[]} if config['task'] == 'prosaccade-clf' else {'train_loss':[], 'val_loss':[]}
...@@ -147,7 +147,7 @@ class BaseNet(nn.Module): ...@@ -147,7 +147,7 @@ class BaseNet(nn.Module):
# print("Free GPU mem after test loop:") # print("Free GPU mem after test loop:")
# print(f"memory {psutil.virtual_memory()}") # print(f"memory {psutil.virtual_memory()}")
# Add the predictions on the validation set, even if model was early stopped # Add the predictions on the validation set, even if model was early stopped
#prediction_ensemble.on_epoch_end() prediction_ensemble.on_epoch_end()
# print("Free GPU mem after prediction hist:") # print("Free GPU mem after prediction hist:")
# print(f"memory {psutil.virtual_memory()}") # print(f"memory {psutil.virtual_memory()}")
# Impementation of early stopping # Impementation of early stopping
...@@ -155,12 +155,11 @@ class BaseNet(nn.Module): ...@@ -155,12 +155,11 @@ class BaseNet(nn.Module):
if patience > config['patience']: if patience > config['patience']:
logging.info(f"Early stopping the model after {t} epochs") logging.info(f"Early stopping the model after {t} epochs")
self.early_stopped = True self.early_stopped = True
break
if val_loss_epoch >= best_val_loss: if val_loss_epoch >= best_val_loss:
logging.info(f"Validation loss did not improve, best was {best_val_loss}") logging.info(f"Validation loss did not improve, best was {best_val_loss}")
patience +=1 patience +=1
else: else:
best_val_loss = val_acc_epoch best_val_loss = val_loss_epoch
logging.info(f"Improved validation loss to: {best_val_loss}") logging.info(f"Improved validation loss to: {best_val_loss}")
patience = 0 patience = 0
...@@ -174,4 +173,4 @@ class BaseNet(nn.Module): ...@@ -174,4 +173,4 @@ class BaseNet(nn.Module):
if config['save_models']: if config['save_models']:
ckpt_dir = config['model_dir'] + '/best_models/' + config['model'] + '_nb_{}_'.format(self.model_number) + 'best_model.pth' ckpt_dir = config['model_dir'] + '/best_models/' + config['model'] + '_nb_{}_'.format(self.model_number) + 'best_model.pth'
torch.save(self.state_dict(), ckpt_dir) torch.save(self.state_dict(), ckpt_dir)
#return prediction_ensemble return prediction_ensemble
\ No newline at end of file \ No newline at end of file
...@@ -42,7 +42,7 @@ class Ensemble_torch: ...@@ -42,7 +42,7 @@ class Ensemble_torch:
elif config['task'] == 'gaze-reg': elif config['task'] == 'gaze-reg':
self.loss_fn = nn.MSELoss() self.loss_fn = nn.MSELoss()
else: # amplitude-task else: # amplitude-task
self.loss_fn = nn.L1Loss() self.loss_fn = nn.MSELoss() # can also try MAE
def run(self, x, y): def run(self, x, y):
...@@ -82,37 +82,35 @@ class Ensemble_torch: ...@@ -82,37 +82,35 @@ class Ensemble_torch:
logging.info("------------------------------------------------------------------------------------") logging.info("------------------------------------------------------------------------------------")
logging.info('Start training model number {}/{} ...'.format(i+1, self.nb_models)) logging.info('Start training model number {}/{} ...'.format(i+1, self.nb_models))
model = create_model(model_type=self.model_type, model_number=i) model = create_model(model_type=self.model_type, model_number=i)
model.fit(train_dataloader, validation_dataloader, test_dataloader) pred_ensemble = model.fit(train_dataloader, validation_dataloader, test_dataloader)
# Collect the prediction on the test set # Collect the prediction on the test set
prediction_list = sum_predictions(test_dataloader, model, model_number=i, prediction_list=prediction_list) #prediction_list = sum_predictions(test_dataloader, model, model_number=i, prediction_list=prediction_list)
"""
Compute ensemble metrics
if i == 0: if i == 0:
pred = pred_ensemble.predhis pred = pred_ensemble.predhis
else: else:
for j, pred_epoch in enumerate(pred_ensemble.predhis): for j, pred_epoch in enumerate(pred_ensemble.predhis):
for batch, predictions in enumerate(pred_epoch): for batch, predictions in enumerate(pred_epoch):
pred[j][batch] = pred[j][batch] + predictions pred[j][batch] = pred[j][batch] + predictions
"""
logging.info('Finished training model number {}/{} ...'.format(i+1, self.nb_models)) logging.info('Finished training model number {}/{} ...'.format(i+1, self.nb_models))
logging.info("------------------------------------------------------------------------------------") logging.info("------------------------------------------------------------------------------------")
"""
#Divide prediction list by number of models and compute the final loss and accuracy of the ensemble #Divide prediction list by number of models and compute the final loss and accuracy of the ensemble
ensemble_loss = compute_loss(loss_fn=self.loss_fn, dataloader=test_dataloader, pred_list=prediction_list, nb_models=self.nb_models) ensemble_loss = compute_loss(loss_fn=self.loss_fn, dataloader=test_dataloader, pred_list=prediction_list, nb_models=self.nb_models)
if config['task'] == 'prosaccade-clf': if config['task'] == 'prosaccade-clf':
ensemble_acc = compute_accuracy(dataloader=test_dataloader, pred_list=prediction_list, nb_models=self.nb_models) ensemble_acc = compute_accuracy(dataloader=test_dataloader, pred_list=prediction_list, nb_models=self.nb_models)
logging.info(f"ENSEMBLE ACCURACY ON TEST SET: {ensemble_acc}") logging.info(f"ENSEMBLE ACCURACY ON TEST SET: {ensemble_acc}")
logging.info(f"ENSEMBLE LOSS ON TEST SET: {ensemble_loss}") logging.info(f"ENSEMBLE LOSS ON TEST SET: {ensemble_loss}")
""" """
# Create the ensemble metrics # Create the ensemble metrics
for j, pred_epoch in enumerate(pred): for j, pred_epoch in enumerate(pred):
loss.append(compute_loss(loss_fn=self.loss_fn, dataloader=test_dataloader, pred_list=pred_epoch, nb_models=config['ensemble'])) loss.append(compute_loss(loss_fn=self.loss_fn, dataloader=test_dataloader, pred_list=pred_epoch, nb_models=config['ensemble']))
if config['task'] == 'prosaccade-clf': if config['task'] == 'prosaccade-clf':
accuracy.append(compute_accuracy(dataloader=test_dataloader, pred_list=pred_epoch, nb_models=config['ensemble'])) accuracy.append(compute_accuracy(dataloader=test_dataloader, pred_list=pred_epoch, nb_models=config['ensemble']))
"""
# Adapt model name if necessary