Commit 7b735dae authored by Lukas Wolf's avatar Lukas Wolf
Browse files

new torch models

parent 6a9d31a3
......@@ -2,6 +2,7 @@ import tensorflow as tf
from config import config
from utils.utils import *
from tf_models.utils.losses import angle_loss
from tf_models.utils.plot import plot_model
import logging
from tf_models.CNN.CNN import CNN
......@@ -12,9 +13,6 @@ from tf_models.Xception.Xception import XCEPTION
from tf_models.InceptionTime.Inception import INCEPTION
from tf_models.EEGNet.eegNet import EEGNet
#TODO: Creata a BaseEnsemble class that both tensorflow and torch ensembles inherit
# The trainer then has the same interface methods (run, predict, load, etc.) to interact with ensembles
class Ensemble_tf:
"""
The Ensemble is a model itself, which contains a number of models that are averaged on prediction.
......@@ -71,6 +69,8 @@ class Ensemble_tf:
print("------------------------------------------------------------------------------------")
print('Start training model number {}/{} ...'.format(i+1, self.nb_models))
model = self.models[i]
if config['plot_model'] and i == 0:
plot_model(model.get_model())
hist, pred_ensemble = model.fit(X,y)
# Collect the predictions on the validation sets
if i == 0:
......
......@@ -29,43 +29,6 @@ def plot_batches_log_loss(model_name):
save_path = '../images/'
plt.savefig(fname=save_path+model_name)
#TODO: create a class FilterPlot for better structuring
def plot_filters(model, model_dir):
"""
Create a plot for every filter in every convolutional module and save it in the models directory under filterplots
"""
#dir = './runs/' # must be correct relative to caller
path = model_dir + '/filterplots/'
# create a dir for the plots
os.makedirs(path)
print(model.input_shape)
(none, img_height, img_width) = model.input_shape
# Run the gradient ascent algorithm for all convolutional layers
for layer in model.layers:
if hasattr(layer, 'kernel_size'):
# Set up a model that returns the activation values for our target layer
feature_extractor = keras.Model(inputs=model.inputs, outputs=layer.output)
# Run it for all filters in conv layer
weights = layer.get_weights()
nb_filters = weights[0].shape[2]
for i in range(nb_filters):
loss, img = visualize_filter(i, img_width, img_height, feature_extractor)
# Plot and save it
plt.imshow(img.T, interpolation='none', cmap='Blue')
plt.title(config['model'] + ", " + layer_name + ", Filter # {}".format(i))
plt.ylabel("Channels")
plt.xlabel("Time samples")
plt.margins(0,0)
plt.axvline(x=64)
plt.xlim(0,70)
plt.xticks(np.arange(0, 65, step=16))
#plt.gca().xaxis.set_major_locator(plt.NullLocator())
#plt.gca().yaxis.set_major_locator(plt.NullLocator())
plt.savefig(path + layer_name + "_filternum_{}".format(i))
#plt.show("InceptionTime: " + layer_name)
def plot_model(model, dir=config['model_dir'], show_shapes=True):
"""
Plot the model as graph and save it
......@@ -73,6 +36,14 @@ def plot_model(model, dir=config['model_dir'], show_shapes=True):
pathname = dir + "/model_plot.png"
keras.utils.plot_model(model, to_file=pathname, show_shapes=show_shapes)
"""
Following some functionality for the gradient ascent method to compute inputs that maximize specific kernel activations
"""
def compute_loss(input_image, filter_index, feature_extractor):
"""
Part of the gradient ascent algorithm to maximize filter activation
......
......@@ -3,35 +3,35 @@ from torch import nn
import numpy as np
from config import config
import logging
from sklearn.model_selection import train_test_split
from torch_models.torch_utils.dataloader import create_dataloader
from torch_models.torch_utils.training import train_loop, test_loop
from torch_models.torch_utils.training import train_loop, test_loop
class Prediction_history():
class Prediction_history:
"""
Prediction history for pytorch model ensembles
Collect predictions of the given validation set after each epoch
"""
def __init__(self, X_val, y_val) -> None:
# Create tensor
self.X_val = torch.tensor(X_val)
self.y_val = torch.tensor(y_val)
self.X_val.cuda()
self.y_val.cuda()
def __init__(self, dataloader) -> None:
self.dataloader = dataloader
self.predhis = []
def on_epoch_end(self, model):
y_pred = model(self.X_val.float())
# Transform back to numpy array because ensemble handles it that way
self.predhis.append(y_pred.numpy())
"""
When epoch ends predict the validation set and store it in predhis=[ypred_epoch_1, ypred_epoch_2,...]
"""
y_pred = []
for x, y in self.dataloader:
y_pred.append(model(x.float()))
self.predhis.append(y_pred)
class BaseNet(nn.Module):
"""
BaseNet class for ConvNet and EEGnet to inherit common functionality
"""
def __init__(self, input_shape, epochs=50, verbose=True, model_number=0, batch_size=64):
"""
Initialize common variables of models based on BaseNet
Create the common output layer dependent on the task to run
"""
super().__init__()
self.input_shape = input_shape
self.epochs = epochs
......@@ -41,64 +41,54 @@ class BaseNet(nn.Module):
self.nb_channels = self.input_shape[1]
self.timesamples = self.input_shape[0]
# Set the number of features that are passed throught the internal network (except input layer)
if config['model'] == 'cnn':
self.num_features = 16
elif config['model'] == 'deepeye':
self.num_features = 164
else: # all other current models have tensors of width 64
self.num_features = 64
# Compute the number of features for the output layer
eegNet_out = 4*2*7
convNet_out = self.num_features * self.timesamples
# Create output layer depending on task and
if config['task'] == 'prosaccade_clf':
self.loss_fn = nn.BCELoss()
self.output_layer = nn.Sequential(
nn.Linear(in_features=eegNet_out if config['model'] == 'eegnet' else convNet_out, out_features=1)
nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=1),
nn.Sigmoid()
)
elif config['task'] == 'gaze-reg':
self.loss_fn = nn.MSELoss()
self.output_layer = nn.Sequential(
nn.Linear(in_features=eegNet_out if config['model'] == 'eegnet' else convNet_out, out_features=2)
nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=2)
)
else: #elif config['task'] == 'angle-reg':
from torch_models.torch_utils.custom_losses import angle_loss
self.loss_fn = angle_loss
self.output_layer = nn.Sequential(
nn.Linear(in_features=eegNet_out if config['model'] == 'eegnet' else convNet_out, out_features=1)
nn.Linear(in_features=self.get_nb_features_output_layer(), out_features=1)
)
# abstract method
def forward(self, x):
"""
Implements a forward pass of the network
"""
pass
# abstract method
def get_nb_features_output_layer(self):
"""
Return the number of features that the output layer should take as input
"""
pass
def get_model(self):
return self
# abstract method
def _split_model(self):
pass
# abstract method
def _build_model(self):
pass
def fit(self, x, y, subjectID=None):
logging.info("------------------------------------------------------------------------------------")
logging.info(f"Fitting model number {self.model_number}")
# Create a split
x = np.transpose(x, (0, 2, 1)) # (batch_size, samples, channels) to (bs, ch, samples) as torch conv layers want it
X_train, X_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
# Create dataloaders
train_dataloader = create_dataloader(X_train, y_train, batch_size=config['batch_size'])
test_dataloader = create_dataloader(X_val, y_val, batch_size=config['batch_size'])
def fit(self, train_dataloader, test_dataloader, subjectID=None):
"""
Fit the model on the dataset defined by data x and labels y
"""
print("------------------------------------------------------------------------------------")
print(f"Fitting model number {self.model_number}")
# Create the optimizer
optimizer = torch.optim.Adam(list(self.parameters()), lr=config['learning_rate'])
# Create history and log
prediction_ensemble = Prediction_history(X_val, y_val)
prediction_ensemble = Prediction_history(dataloader=test_dataloader)
# Train the model
epochs = config['epochs']
for t in range(epochs):
......@@ -106,10 +96,8 @@ class BaseNet(nn.Module):
train_loop(train_dataloader, self.float(), self.loss_fn, optimizer)
test_loop(test_dataloader, self.float(), self.loss_fn)
prediction_ensemble.on_epoch_end(model=self)
logging.info(f"Finished model number {self.model_number}")
print(f"Finished model number {self.model_number}")
# Save model
ckpt_dir = config['model_dir'] + '/best_models/' + config['model'] + '_nb_{}_'.format(self.model_number) + 'best_model.h5'
torch.save(self, ckpt_dir)
ckpt_dir = config['model_dir'] + '/best_models/' + config['model'] + '_nb_{}_'.format(self.model_number) + 'best_model.pth'
torch.save(self.state_dict(), ckpt_dir)
return prediction_ensemble
\ No newline at end of file
from torch.nn.modules.batchnorm import BatchNorm1d
from config import config
from torch_models.ConvNetTorch import ConvNet
import torch
import torch.nn as nn
import math
from torch_models.Modules import Shortcut_layer
from torch_models.torch_utils.padding import pad_conv1d, pad_pool1d
from torch_models.Modules import Pad_Conv, Pad_Pool
class CNN(ConvNet):
"""
......@@ -13,21 +8,24 @@ class CNN(ConvNet):
"""
def __init__(self, input_shape, kernel_size=64, epochs = 50, nb_filters=16, verbose=True, batch_size=64,
use_residual=True, depth=12, regularization=0.01, model_number=0):
super().__init__(input_shape, kernel_size=kernel_size, epochs=epochs,
self.regularization = regularization
self.nb_features = nb_filters # For CNN simply the number of filters inside the network
super().__init__(input_shape, kernel_size=kernel_size, epochs=epochs, nb_features=self.nb_features,
nb_filters=nb_filters, verbose=verbose, batch_size=batch_size,
use_residual=use_residual, depth=depth, model_number=model_number)
self.regularization = regularization
self.nb_filters = nb_filters
def _module(self, depth):
"""
The module of CNN is made of a simple convolution with batch normalization and ReLu activation. Finally, MaxPooling is also used.
We use two custom padding modules such that keras-like padding='same' is achieved, i.e. tensor shape stays constant.
"""
return nn.Sequential(
nn.Conv1d(in_channels=self.nb_channels if depth==0 else self.num_features,
out_channels=self.num_features, kernel_size=self.kernel_size),
nn.BatchNorm1d(num_features=self.num_features),
Pad_Conv(kernel_size=self.kernel_size, value=0),
nn.Conv1d(in_channels=self.nb_channels if depth==0 else self.nb_features,
out_channels=self.nb_features, kernel_size=self.kernel_size, bias=False),
nn.BatchNorm1d(num_features=self.nb_features),
nn.ReLU(),
Pad_Pool(left=0, right=1, value=0),
nn.MaxPool1d(kernel_size=2, stride=1)
)
"""
......@@ -37,28 +35,4 @@ class CNN(ConvNet):
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation(activation='relu')(x)
x = tf.keras.layers.MaxPool1D(pool_size=2, strides=1, padding='same')(x)
"""
def _run_conv(self, module, tensor):
"""
Run the tensor x through the CNN module such that shape stays the same
Similar to tf.keras padding=same
"""
# Dereference the parts of the module
conv = module[0]
batchnorm = module[1]
activation = module[2]
maxpool = module[3]
# Pad for convolution
x = pad_conv1d(tensor, kernel_size=self.kernel_size, value=0)
#print(f"x after pad conv {x.size()}")
x = conv(x)
#print(f"x after conv {x.size()}")
x = batchnorm(x)
x = activation(x)
# Pad for maxpooling
x = pad_pool1d(x, value=0)
#print(f"x after pad pool {x.size()}")
x = maxpool(x)
#print(f"x after maxpool {x.size()}")
return x
\ No newline at end of file
"""
\ No newline at end of file
from __future__ import print_function
import sys
from abc import ABC, abstractmethod
from re import X
......@@ -10,18 +7,14 @@ import torch.nn as nn
import torch.nn.functional as F
from config import config
import logging
from torch_models.torch_utils.padding import pad_conv1d, pad_pool1d
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
from torch_models.Modules import Pad_Conv, Pad_Pool
class ConvNet(ABC, BaseNet):
"""
This class defines all the common functionality for more complex convolutional nets
"""
def __init__(self, input_shape, kernel_size=32, nb_filters=32, verbose=True, batch_size=64,
def __init__(self, input_shape, nb_features, kernel_size=32, nb_filters=32, verbose=True, batch_size=64,
use_residual=False, depth=6, epochs=2, preprocessing = False, model_number=0):
"""
We define the layers of the network in the __init__ function
......@@ -35,11 +28,13 @@ class ConvNet(ABC, BaseNet):
self.kernel_size = kernel_size
self.nb_filters = nb_filters
self.preprocessing = preprocessing
#self.nb_features = nb_features
# Define all the convolutional and shortcut modules that we will need in the model
self.conv_blocks = nn.ModuleList([self._module(d) for d in range(self.depth)])
self.shortcuts = nn.ModuleList([self._shortcut(d) for d in range(int(self.depth / 3))])
self.gap_layer = nn.AvgPool1d(kernel_size=2, stride=1)
self.gap_layer_pad = Pad_Pool(left=0, right=1, value=0)
#logging.info('Parameters of {}, model number {}: '.format(self, model_number))
logging.info('--------------- use residual : ' + str(self.use_residual))
......@@ -51,68 +46,42 @@ class ConvNet(ABC, BaseNet):
if self.verbose:
print(self)
def forward(self, x):
"""
Implements the forward pass of the network
Note that every layer used here must be defined in __init__ to be persistent
Note that every layer used here must be defined in __init__ to be persistent and able to be trained
"""
if self.preprocessing:
preprocessed = self._preprocessing(x)
x = preprocessed
x = self._preprocessing(x)
input_res = x # residual shortcut connection
# Stack the modules
shortcut_cnt = 0
for d in range(self.depth):
conv_block = self.conv_blocks[d]
#print(f"x before conv at depth {d} {x.size()}")
x = self._run_conv(module=conv_block, tensor=x)
x = self.conv_blocks[d](x)
#print(f"x after conv at depth {d} {x.size()}")
if self.use_residual and d % 3 == 2:
shortcut_block = self.shortcuts[shortcut_cnt]
#print(f"input_res before shortcut at depth {d} {input_res.size()}")
res = self._run_shortcut(module=shortcut_block, tensor=input_res)
res = self.shortcuts[shortcut_cnt](input_res)
#print(f"input_res after shortcut at depth {d} {input_res.size()}")
shortcut_cnt += 1
#print(f" x shape {x.size()} res shape {res.size()}")
x = torch.add(x, res)
x = nn.functional.relu(x)
input_res = x
# Pad for the avgpool1d gap_layer
x = pad_pool1d(x)
x = self.gap_layer_pad(x) # Pad for the avgpool1d gap_layer
x = self.gap_layer(x)
#print(f"x size after gap layer: {x.size()}")
if config['split']:
return x
#print(f"x size after gap layer: {x.size()}")
print(f"x size before view: {x.size()}")
#print(f"x size before view: {x.size()}")
x = x.view(self.batch_size, -1)
print(f"x size before output layer: {x.size()}")
#print(f"x size before output layer: {x.size()}")
output = self.output_layer(x) # Defined in BaseNet
return output
def _run_conv(self, module, x):
# Implemented by each model for its custom convolution module
pass
def _run_shortcut(self, module, tensor):
"""
Run the tensor x through the shortcut such that shape stays the same
Similar to tf.keras padding=same
Equivalent for all models implementing ConvNetTorch
"""
# Dereference the parts of the module
conv = module[0]
batchnorm = module[1]
# Pad for convolution
x = pad_conv1d(tensor, kernel_size=self.kernel_size, value=0)
x = conv(x)
x = batchnorm(x)
return x
def _shortcut(self, depth):
"""
......@@ -120,10 +89,17 @@ class ConvNet(ABC, BaseNet):
This is the same for all our convolutional models
"""
return nn.Sequential(
nn.Conv1d(in_channels=self.nb_channels if depth==0 else self.num_features,
out_channels=self.num_features, kernel_size=self.kernel_size),
nn.BatchNorm1d(num_features=self.num_features)
)
Pad_Conv(kernel_size=self.kernel_size, value=0),
nn.Conv1d(in_channels=self.nb_channels if depth==0 else self.nb_features,
out_channels=self.nb_features, kernel_size=self.kernel_size),
nn.BatchNorm1d(num_features=self.nb_features)
)
def get_nb_features_output_layer(self):
"""
Return number of features passed into the output layer of the network
"""
return self.nb_features * self.timesamples
# abstract method
def _preprocessing(self, input_tensor):
......
......@@ -9,12 +9,11 @@ class Pad_Pool(nn.Module):
def __init__(self, left=0, right=1, value=0):
super().__init__()
self.left = left
self.rigth = right
self.right = right
self.value = value
def forward(self, x):
pad = nn.ConstantPad1d(padding=(self.left, self.right), value=self.value)
return pad(x)
return nn.ConstantPad1d(padding=(self.left, self.right), value=self.value)(x)
class Pad_Conv(nn.Module):
"""
......@@ -23,11 +22,30 @@ class Pad_Conv(nn.Module):
"""
def __init__(self, kernel_size, value=0):
super().__init__()
self.kernel_size = kernel_size
self.value = value
self.left = math.floor(kernel_size/2)-1
self.right = math.floor(kernel_size/2)
def forward(self, x):
pad = nn.ConstantPad1d(padding=(self.left, self.right), value=self.value)
return pad(x)
\ No newline at end of file
return nn.ConstantPad1d(padding=(self.left, self.right), value=self.value)(x)
class TCSConv1d(nn.Module):
"""
Implements a 1D separable convolution with constant tensor shape, similar to padding='same' in keras
"""
def __init__(self, mother, depth):
super(TCSConv1d, self).__init__()
self.pad_depthwise = Pad_Conv(mother.kernel_size)
self.depthwise = nn.Conv1d(in_channels=mother.nb_channels if depth==0 else mother.nb_filters,
out_channels=mother.nb_filters, kernel_size=mother.kernel_size, bias=False,
groups=mother.nb_channels if depth==0 else mother.nb_features) # groups=in_channels makes it separable
self.pad_pointwise = Pad_Conv(mother.kernel_size)
self.pointwise = nn.Conv1d(mother.nb_filters, mother.nb_filters, kernel_size=1)
def forward(self, x):
x = self.pad_depthwise(x)
x = self.depthwise(x)
x = self.pad_pointwise(x)
x = self.pointwise(x)
return x
\ No newline at end of file
......@@ -18,9 +18,8 @@ def train_loop(dataloader, model, loss_fn, optimizer):
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 20 == 0:
if batch % 32 == 0:
loss, current = loss.item(), batch * len(X)
#TODO: Log this to some file to create plots, check tensorflow logs
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
......
import torch
def compute_loss(loss_fn, dataloader, pred_list, nb_models):
"""
Computes the loss across al batches between the true labels in the dataloader and the batch predictions in pred_list
------------------
Input:
loss_fn: pytorch loss function
dataloader: contains the validation dataset X,y
pred_list: list of of tensors (one for each batch, size (batch, y))
nb_models: number of models in the ensemble. divide by it to obtain averaged output
------------------
Output:
Scalar value of the mean loss over all batches of the validation set
"""
loss = []
for batch, (X, y) in enumerate(dataloader):
loss.append(loss_fn(y.float(), torch.div(pred_list[batch], nb_models).float()))
return sum(loss) / len(loss)
def compute_accuracy(dataloader, pred_list, nb_models):
"""
Computes the accuracy across al batches between the true labels in the dataloader and the batch predictions in pred_list
------------------
Input:
dataloader: contains the validation dataset X,y
pred_list: list of of tensors (one for each batch, size (batch, y))
nb_models: number of models in the ensemble. divide by it to obtain averaged output
------------------
Output:
Scalar value of the mean accuracy over all batches of the validation set in the dataloader
"""
correct = 0
size = len(dataloader.dataset)
for batch, (X,y) in dataloader:
pred = pred_list[batch]
correct += (pred - y).type(torch.float).sum().item()
return correct / size
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment