Merge branch 'Livio_net' into 'main'

Livio net See merge request !2

Merge branch 'Livio_net' into 'main'
Livio net See merge request !2
63b8430b · lziltener · f23a3382 · 4f9dfdb6 · 63b8430b
Commit 63b8430b authored 1 year ago by lziltener
--- a/task4/template_solution.py
+++ b/task4/template_solution.py
@@ -11,6 +11,9 @@ from torch.utils.data import DataLoader, TensorDataset
 from sklearn.model_selection import train_test_split
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.linear_model import Ridge
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import WhiteKernel, RBF, Matern, DotProduct, RationalQuadratic
+from sklearn.model_selection import GridSearchCV

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

@@ -44,24 +47,52 @@ class Net(nn.Module):
        super().__init__()
        # T0DO: Define the architecture of the model. It should be able to be trained on pretraing data 
        # and then used to extract features from the training and test data.
-        self.encoder = nn.Sequential(
+        self.linear_embedding = nn.Sequential(
+            # you start with shape [256, 1000]
            nn.Linear(1000, 250),
+            # shape is [256, 250]
            nn.ReLU(), 
-            nn.Linear(250, 125),
+            nn.Linear(250, 128),
+            # shape is [256, 128]
+            nn.ReLU(),
+        )
+
+        self.encoder = nn.Sequential(
+            # N 1, 128
+            # convolution1d and 2d expects either 3d shape or 4d shape
+            # in the 1d case a 3d shape is expected
+            # [batch size, CHANNELS, data shape] 
+            # Convolution2d expects [batch size, CHANNELS, shape_x == rows, shape_y == cols]
+            # expected shape [256, 1, 128] BUT YOU PROVIDE [256, 128]
+            # x = x.reshape(x.shape[0], 1, x.shape[1])
+
+            nn.Conv1d(in_channels=1, out_channels=8, kernel_size=32, stride=2, padding=1), # N 8, 50
+            nn.ReLU(),
+            nn.Conv1d(8, 16, 7, stride=2, padding=1), # N 16, 24
            nn.ReLU(),
-            nn.Linear(125, 64),
+            nn.Conv1d(16, 32, 3, stride=2, padding=1), # N 32, 12
            nn.ReLU(),
-            nn.Linear(64, 32),
+            nn.Conv1d(32, 64, 12), # N 64, 1
        )

        self.decoder = nn.Sequential(
-            nn.Linear(32, 64),
+            # N 64, 1
+            nn.ConvTranspose1d(64, 32, 12), # N 32, 12
            nn.ReLU(),
-            nn.Linear(64, 125),
+            nn.ConvTranspose1d(32, 16, 3, stride=2, padding=1), # N 16, 24
            nn.ReLU(),
-            nn.Linear(125, 250),
+            nn.ConvTranspose1d(16, 8, 7, stride=2, padding=1),# N 8, 50
            nn.ReLU(),
-            nn.Linear(250, 1000),
+            nn.ConvTranspose1d(8, 1, 32, stride=2, padding=0), # N 1, 128
+            nn.ReLU(),
+        )
+
+        # your shape after the decoder is [256, 1, 128]
+        # now you need to reshape again
+        self.linear_decode = nn.Sequential(
+            nn.Linear(128, 250), # N 1, 250
+            nn.ReLU(),
+            nn.Linear(250, 1000), # 1, 1000
            nn.Sigmoid(),
        )

@@ -76,8 +107,24 @@ class Net(nn.Module):
        """
        # T0DO: Implement the forward pass of the model, in accordance with the architecture 
        # defined in the constructor.
+
+        # you start with [256, 1000]
+        x = self.linear_embedding(x)
+        # here you are [256, 128]
+        x = x.reshape(x.shape[0], 1, x.shape[1])
+        # here the shape is [256, 1, 128]
+        
        encoded = self.encoder(x)
+        
+
+        # start with the shape of [256, 64, 1]
        decoded = self.decoder(encoded)
+        # end with the shape of [256, 1, 128]
+        
+        decoded = decoded.squeeze()
+        
+        decoded = self.linear_decode(decoded)
+
        return decoded
    
 def make_feature_extractor(x, y, batch_size=256, eval_size=1000):
@@ -105,10 +152,12 @@ def make_feature_extractor(x, y, batch_size=256, eval_size=1000):
    # T0DO: Implement the training loop. The model should be trained on the pretraining data. Use validation set 
    # to monitor the loss.
    model.to(device)
-    n_epochs = 10
+    n_epochs = 100
    loss_fn = nn.MSELoss()
-    val_loss = 0
+    val_loss_prev = 1
+    counter = 0
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
+    #optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

    dataset = TensorDataset(x_tr, y_tr)
    train_loader = DataLoader(dataset=dataset, batch_size=batch_size,shuffle= True, pin_memory=True, num_workers=4)
@@ -117,6 +166,7 @@ def make_feature_extractor(x, y, batch_size=256, eval_size=1000):
    val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size,shuffle= True, pin_memory=True, num_workers=4)

    for epoch in range(n_epochs):
+        model.train(True)
        for batch, (X, y) in enumerate(train_loader):
            pred = model(X)
            loss = loss_fn(pred, X) #compute MSE between pred and input, thats the whole point of autoencoders!
@@ -128,12 +178,20 @@ def make_feature_extractor(x, y, batch_size=256, eval_size=1000):
        print("Epoch: ", epoch + 1, "Training-Loss: ", loss.item())

        #compute validation-error
+        model.train(False)
        for batch, (X, y) in enumerate(val_loader):
            val_pred = model(X)
            val_loss = loss_fn(val_pred, X)
        
        print("Validation-Loss: ", val_loss.item())
-    
+        print("val/train-ratio: ", val_loss.item()/loss.item(), "\n") #if this ratio differs much from 1 we might overfit
+        if(counter > 0 and  val_loss.item() > val_loss_prev):
+            break
+
+        if (val_loss.item() > val_loss_prev):
+            counter +=1
+
+        val_loss_prev = val_loss.item()
    


@@ -149,7 +207,11 @@ def make_feature_extractor(x, y, batch_size=256, eval_size=1000):
        """
        model.eval()
        # TODO: Implement the feature extraction, a part of a pretrained model used later in the pipeline.
-        return model.encoder(x) #figure out how to inherit the encoder of our net
+        x_linear = model.linear_embedding(x)
+        x_shaped = x_linear.reshape(x_linear.shape[0], 1, x_linear.shape[1])
+        encoded = model.encoder(x_shaped)
+        squeezed = encoded.squeeze()
+        return squeezed #figure out how to inherit the encoder of our net

    return make_features

@@ -190,7 +252,17 @@ def get_regression_model():
    """
    # T0DO: Implement the regression model. It should be able to be trained on the features extracted
    # by the feature extractor.
-    model = Ridge(alpha=1)
+    #model = Ridge(alpha=1)
+    gpr = GaussianProcessRegressor(random_state=42, n_restarts_optimizer=5)
+
+    params = [RBF(l)*Matern(nu=0.5) + WhiteKernel(3) for l in np.linspace(3, 5, 4)]
+    params = params + [RBF(2.5) + WhiteKernel(l) for l in np.linspace(2, 5, 6)]
+    params = params + [Matern(nu=l) + WhiteKernel(2) for l in [0.5, 1,5, 2.5]]
+
+    params_dict = {'kernel': params}
+
+    model = GridSearchCV(gpr, param_grid=params_dict, scoring='r2', cv=4)
+
    return model

 # Main function. You don't have to change this
@@ -214,6 +286,7 @@ if __name__ == '__main__':
    x_train = pretrained_feature_obj.transform(torch.from_numpy(x_train).to(torch.float32))
    x_train = x_train.detach().numpy()
    regression_model.fit(x_train, y_train)
+    print("\n regression-score: ", regression_model.score(x_train, y_train))
    x_test_trans = pretrained_feature_obj.transform(torch.from_numpy(x_test.to_numpy()).to(torch.float32))
    x_test_trans = x_test_trans.detach().numpy()
    y_pred = regression_model.predict(x_test_trans) #call predictor