To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit d679800a authored by Lukas Wolf's avatar Lukas Wolf
Browse files

debug metrics

parent 5b62a48f
......@@ -2,12 +2,11 @@
# let's keep it here to have a clean code on other methods that we try
import time
import os
from Clusters.cluster import clustering as clustering
from Clusters.cluster2 import clustering as clustering2
from Clusters.cluster3 import clustering as clustering3
#from Clusters.cluster import clustering as clustering
#from Clusters.cluster2 import clustering as clustering2
#from Clusters.cluster3 import clustering as clustering3
config = dict()
"""
Parameters that can be chosen:
cnn: The simple CNN architecture
......@@ -40,8 +39,10 @@ config['root_dir'] = '.'
# Choose task and dataset
##################################################################
#config['task'] = 'prosaccade-clf'
config['task'] = 'gaze-reg'
config['preprocessing'] = 'min' # options: min and max
config['task'] = 'prosaccade-clf'
#config['task'] = 'gaze-reg'
#config['task'] = 'angle-reg'
if config['task'] != 'prosaccade-clf':
......@@ -52,32 +53,32 @@ if config['task'] != 'prosaccade-clf':
# Choose framework
##################################################################
config['framework'] = 'torch'
config['framework'] = 'pytorch'
#config['framework'] = 'tensorflow'
##################################################################
# Choose model
##################################################################
config['ensemble'] = 2 #number of models in the ensemble
config['ensemble'] = 3 #number of models in the ensemble
config['pretrained'] = False # We can use a model pretrained on processing speed task
#config['model'] = 'cnn'
#config['model'] = 'inception'
config['model'] = 'eegnet'
#config['model'] = 'xception'
#config['model'] = 'gazenet'
#config['model'] = 'eegnet'
config['model'] = 'xception'
#config['model'] = 'pyramidal_cnn'
#config['model'] = 'deepeye'
#config['model'] = 'deepeye-rnn'
#config['model'] = 'gazenet'
#config['model'] = 'siamese' # Note that you have to set data_mode to sacc_fix for this model
##################################################################
# Hyper-parameters and training configuration.
##################################################################
config['learning_rate'] = 1e-2 # fix only: 1e-2, sac only: 1e-3, sac_fix: 1e-3 , fix_sac_fix: 1e-4, for inception on angle 1e-5
config['learning_rate'] = 1e-3 # fix only: 1e-2, sac only: 1e-3, sac_fix: 1e-3 , fix_sac_fix: 1e-4, for inception on angle 1e-5
config['regularization'] = 0 # fix only: 1e-3, sac only: 1e-2, sac_fix: 1, fix_sac_fix: 5, for inception on angle 0
config['epochs'] = 2
config['epochs'] = 3
config['batch_size'] = 64
##################################################################
......@@ -92,7 +93,7 @@ config['run'] = 'ensemble'
config['tensorboard_on'] = False
config['sanity_check'] = False
config['plot_model'] = True
config['save_models'] = False
config['save_models'] = True
##################################################################
# Options for prosaccade task, currently not used for regression
......
......@@ -9,7 +9,6 @@ import psutil
from torch_models.torch_utils.utils import timing_decorator
from memory_profiler import profile
class Prediction_history:
"""
Collect predictions of the given validation set after each epoch
......@@ -21,7 +20,7 @@ class Prediction_history:
self.model = model
#@timing_decorator
#@profile
@profile
def on_epoch_end(self):
with torch.no_grad():
y_pred = []
......@@ -30,15 +29,8 @@ class Prediction_history:
if torch.cuda.is_available():
x = x.cuda()
y = y.cuda()
pred = self.model(x)
y_pred.append(pred)
if batch==0:
cat = torch.cat((y, pred), dim=1)
print(f"COMPARE Y AND PRED")
print(cat)
# Remove batch from GPU
del x
del y
......@@ -59,9 +51,8 @@ class BaseNet(nn.Module):
self.verbose = verbose
self.model_number = model_number
self.batch_size = batch_size
self.nb_channels = self.input_shape[1]
self.timesamples = self.input_shape[0]
#self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.nb_channels = self.input_shape[1]
# Create output layer depending on task and
if config['task'] == 'prosaccade-clf':
......@@ -116,7 +107,7 @@ class BaseNet(nn.Module):
# Create the optimizer
optimizer = torch.optim.Adam(list(self.parameters()), lr=config['learning_rate'])
# Create a history to track ensemble performance
prediction_ensemble = Prediction_history(dataloader=test_dataloader, device=self.device, model=self)
prediction_ensemble = Prediction_history(dataloader=test_dataloader, model=self)
# Train the model
epochs = config['epochs']
for t in range(epochs):
......@@ -135,7 +126,7 @@ class BaseNet(nn.Module):
# print("Free GPU mem after prediction hist:")
# print(f"memory {psutil.virtual_memory()}")
# Done with training this model
if config['save_models'] and self.model_number==0:
if config['save_models']:
ckpt_dir = config['model_dir'] + '/best_models/' + config['model'] + '_nb_{}_'.format(self.model_number) + 'best_model.pth'
torch.save(self.state_dict(), ckpt_dir)
return prediction_ensemble
\ No newline at end of file
......@@ -61,6 +61,7 @@ class ConvNet(ABC, BaseNet):
# Stack the modules
shortcut_cnt = 0
for d in range(self.depth):
#print(f"x after block {d} {x.size()}")
x = self.conv_blocks[d](x)
if self.use_residual and d % 3 == 2:
res = self.shortcuts[shortcut_cnt](input_res)
......@@ -72,8 +73,12 @@ class ConvNet(ABC, BaseNet):
x = self.gap_layer_pad(x)
x = self.gap_layer(x)
#print(f"x after gap {x.size()}")
x = x.view(self.batch_size, -1)
#print(f"x before output {x.size()}")
output = self.output_layer(x) # Defined in BaseNet
return output
......
......@@ -14,7 +14,7 @@ from torch_models.InceptionTime.InceptionTime import Inception
from torch_models.Xception.Xception import XCEPTION
from torch_models.EEGNet.eegNet import EEGNet
from torch_models.PyramidalCNN.PyramidalCNN import PyramidalCNN
from torch_models.GazeNet.Repo.gazeNet.model import gazeNET
#from torch_models.GazeNet.Repo.gazeNet.model import gazeNET
class Ensemble_torch:
"""
......@@ -110,6 +110,6 @@ def create_model(model_type, model_number):
elif model_type == 'pyramidal_cnn':
model = PyramidalCNN(input_shape=config['cnn']['input_shape'], epochs=config['epochs'],
model_number=model_number, batch_size=config['batch_size'])
elif model_type == 'gazenet':
model = gazeNET(input_shape=config['gazenet']['input_shape'], seed=42, batch_size=config['batch_size'])
# elif model_type == 'gazenet':
# model = gazeNET(input_shape=config['gazenet']['input_shape'], seed=42, batch_size=config['batch_size'])
return model
\ No newline at end of file
......@@ -11,7 +11,7 @@ class XCEPTION(ConvNet):
as separable convolutions and can achieve better accuracy than the Inception architecture. It is made of modules in a specific depth.
Each module, in our implementation, consists of a separable convolution followed by batch normalization and a ReLu activation layer.
"""
def __init__(self, input_shape, kernel_size=40, nb_filters=128, verbose=True, epochs=1, batch_size=64,
def __init__(self, input_shape, kernel_size=40, nb_filters=64, verbose=True, epochs=1, batch_size=64,
use_residual=True, depth=6, model_number=0, regularization=0):
self.regularization = regularization
self.nb_features = nb_filters # Exception passes a tensor of shape (timesamples, nb_filters) through the network
......
import logging
from config import config
import torch
from torch import nn
from torch_models.torch_utils.utils import get_gpu_memory
from torch_models.torch_utils.utils import timing_decorator
from memory_profiler import profile
#import torch.profiler
#@timing_decorator
#@profile
def train_loop(dataloader, model, loss_fn, optimizer, device):
@profile
def train_loop(dataloader, model, loss_fn, optimizer):
"""
Performs one epoch of training the model through the dataset stored in dataloader
Using the given loss_fn and optimizer
Returns training loss of the epoch to be tracked by the caller
"""
size = len(dataloader.dataset)
training_loss, correct = 0, 0
for batch, (X, y) in enumerate(dataloader):
# Move tensors to GPU
# print(f"device type {type(device)}")
# print(f"device {device}")
# X.to(device)
# y.to(device)
if torch.cuda.is_available():
X = X.cuda()
y = y.cuda()
# print(f"X type {X.type()}")
# print(f"X on cuda: {X.is_cuda}")
# Compute prediction and loss
pred = model(X)
loss = loss_fn(pred.float(), y.float())
correct = 0
# Backpropagation
loss = loss_fn(pred, y)
# Backpropagation and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
# profiler.step()
# Print metrics every n batches
if batch == size - 1:
loss, current = loss.item(), batch * len(X)
logging.info(f"Avg training loss: {loss:>7f} samples passed [{current:>5d}/{size:>5d}]")
if config['task'] == 'prosaccade-clf':
pred = (pred > 0.5).float()
correct += (pred == y).float().sum()
correct /= config['batch_size']
logging.info(f"Avg training accuracy {correct:>8f}")
# Remove from GPU
#del X
#del y
#torch.cuda.empty_cache()
# Add up metrics
training_loss += loss.item()
pred = (pred > 0.5).float()
correct += (pred == y).float().sum()
logging.info(f"Avg training loss: {training_loss/size:>7f}")
if config['task'] == 'prosaccade-clf':
logging.info(f"Avg training accuracy {correct/size:>8f}")
#@timing_decorator
#@profile
def test_loop(dataloader, model, loss_fn, device):
@profile
def test_loop(dataloader, model, loss_fn):
"""
Performs one prediction run through the test set stored in the dataloader
Prints the loss function computed with the prediction pred and the labels y
"""
size = len(dataloader.dataset)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
for batch, (X, y) in enumerate(dataloader):
# Move tensors to GPU
if torch.cuda.is_available():
X = X.cuda()
y = y.cuda()
#X.to(device)
#y.to(device)
# Predict
pred = model(X)
# Compute metrics
test_loss += loss_fn(pred.float(), y.float()).item()
test_loss += loss_fn(pred, y).item()
if config['task'] == 'prosaccade-clf':
pred = (pred > 0.5).float()
correct += (pred == y).float().sum()
# Remove from GPU
#del X
#del y
#torch.cuda.empty_cache()
test_loss /= size
logging.info(f"Avg test loss: {test_loss:>8f}")
#print(f"correct {correct}")
#print(f"test loss {test_loss}")
#print(f"size {size}")
#print(f"correct/size {correct / size}")
#print(f"test loss / size {test_loss / size}")
if config['task'] == 'prosaccade-clf':
correct /= size
logging.info(f"Avg test accuracy {correct:>8f}")
......@@ -2,6 +2,8 @@ import torch
import subprocess as sp
import os
import time
from config import config
from memory_profiler import profile
def get_gpu_memory():
"""
......@@ -28,13 +30,14 @@ def timing_decorator(func):
return wrapper
@profile
def compute_loss(loss_fn, dataloader, pred_list, nb_models):
"""
Computes the loss across al batches between the true labels in the dataloader and the batch predictions in pred_list
Computes the loss across all batches between the true labels in the dataloader and the batch predictions in pred_list
------------------
Input:
loss_fn: pytorch loss function
dataloader: contains the validation dataset X,y
dataloader: contains the validation dataset X, y
pred_list: list of of tensors (one for each batch, size (batch, y))
nb_models: number of models in the ensemble. divide by it to obtain averaged output
------------------
......@@ -45,9 +48,19 @@ def compute_loss(loss_fn, dataloader, pred_list, nb_models):
for batch, (X, y) in enumerate(dataloader):
if torch.cuda.is_available():
y = y.cuda()
loss.append(loss_fn(y, torch.div(pred_list[batch], nb_models).float()))
# Average the predictions from the ensemble
pred = pred_list[batch]
pred = torch.div(pred, nb_models).float() # is already on gpu
"""
if batch==0:
cat = torch.cat((y, pred), dim=1)
print("Ensemble labels labels vs predictions")
print(cat)
"""
loss.append(loss_fn(y, pred) / config['batch_size'])
return sum(loss) / len(loss)
@profile
def compute_accuracy(dataloader, pred_list, nb_models):
"""
Computes the accuracy across al batches between the true labels in the dataloader and the batch predictions in pred_list
......@@ -64,8 +77,16 @@ def compute_accuracy(dataloader, pred_list, nb_models):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
if torch.cuda.is_available():
pred = pred_list[batch].cuda()
y = y.cuda()
pred = torch.round(pred)
correct += (pred - y).type(torch.float).sum().item()
# Average the predictions from the ensemble
pred = pred_list[batch]
pred = torch.div(pred, nb_models).float() # is already on gpu pred = torch.round(pred)
"""
if batch==0:
cat = torch.cat((y, pred), dim=1)
print("Ensemble labels labels vs predictions")
print(cat)
"""
pred = (pred > 0.5).float()
correct += (pred == y).float().sum()
return correct / size
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment