To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 8369c01c authored by gizemyuce's avatar gizemyuce
Browse files

Merge branch 'master' of gitlab.ethz.ch:ccarlos/slt-coding-exercises-21

parents bffe550c af9777f8
.python-version
.DS_Store
.ipynb_checkpoints/
import sklearn as skl
from sklearn.utils.validation import check_is_fitted
import pandas as pd
import numpy as np
from treelib import Tree
import matplotlib.pyplot as plt
def read_data_csv(sheet, y_names=None):
"""Parse a column data store into X, y arrays
Args:
sheet (str): Path to csv data sheet.
y_names (list of str): List of column names used as labels.
Returns:
X (np.ndarray): Array with feature values from columns that are not
contained in y_names (n_samples, n_features)
y (dict of np.ndarray): Dictionary with keys y_names, each key
contains an array (n_samples, 1) with the label data from the
corresponding column in sheet.
"""
data = pd.read_csv(sheet)
feature_columns = [c for c in data.columns if c not in y_names]
X = data[feature_columns].values
y = dict([(y_name, data[[y_name]].values) for y_name in y_names])
return X, y
class DeterministicAnnealingClustering(skl.base.BaseEstimator,
skl.base.TransformerMixin):
"""Template class for DAC
Attributes:
cluster_centers (np.ndarray): Cluster centroids y_i
(n_clusters, n_features)
cluster_probs (np.ndarray): Assignment probability vectors
p(y_i | x) for each sample (n_samples, n_clusters)
bifurcation_tree (treelib.Tree): Tree object that contains information
about cluster evolution during annealing.
Parameters:
n_clusters (int): Maximum number of clusters returned by DAC.
random_state (int): Random seed.
"""
def __init__(self, n_clusters=8, random_state=42, metric="euclidian"):
self.n_clusters = n_clusters
self.random_state = random_state
self.metric = metric
self.T = None
self.T_min = None
self.cluster_centers = None
self.cluster_probs = None
self.n_eff_clusters = list()
self.temperatures = list()
self.distortions = list()
self.bifurcation_tree = Tree()
# Not necessary, depends on your implementation
self.bifurcation_tree_cut_idx = None
# Add more parameters, if necessary. You can also modify any other
# attributes defined above
def fit(self, samples):
"""Compute DAC for input vectors X
Preferred implementation of DAC as described in reference [1].
Args:
samples (np.ndarray): Input array with shape (samples, n_features)
"""
# TODO:
def _calculate_cluster_probs(self, dist_mat, temperature):
"""Predict assignment probability vectors for each sample in X given
the pairwise distances
Args:
dist_mat (np.ndarray): Distances (n_samples, n_centroids)
temperature (float): Temperature at which probabilities are
calculated
Returns:
probs (np.ndarray): Assignment probability vectors
(new_samples, n_clusters)
"""
# TODO:
return [None]
def get_distance(self, samples, clusters):
"""Calculate the distance matrix between samples and codevectors
based on the given metric
Args:
samples (np.ndarray): Samples array (n_samples, n_features)
clusters (np.ndarray): Codebook (n_centroids, n_features)
Returns:
D (np.ndarray): Distances (n_samples, n_centroids)
"""
# TODO:
return []
def predict(self, samples):
"""Predict assignment probability vectors for each sample in X.
Args:
samples (np.ndarray): Input array with shape (new_samples, n_features)
Returns:
probs (np.ndarray): Assignment probability vectors
(new_samples, n_clusters)
"""
distance_mat = self.get_distance(samples, self.cluster_centers)
probs = self._calculate_cluster_probs(distance_mat, self.T_min)
return probs
def transform(self, samples):
"""Transform X to a cluster-distance space.
In the new space, each dimension is the distance to the cluster centers
Args:
samples (np.ndarray): Input array with shape
(new_samples, n_features)
Returns:
Y (np.ndarray): Cluster-distance vectors (new_samples, n_clusters)
"""
check_is_fitted(self, ["cluster_centers"])
distance_mat = self.get_distance(samples, self.cluster_centers)
return distance_mat
def plot_bifurcation(self):
"""Show the evolution of cluster splitting
This is a pseudo-code showing how you may be using the tree
information to make a bifurcation plot. Your implementation may be
entire different or based on this code.
"""
check_is_fitted(self, ["bifurcation_tree"])
clusters = [[] for _ in range(len(np.unique(self.n_eff_clusters)))]
for node in self.bifurcation_tree.all_nodes_itr():
c_id = node.data['cluster_id']
my_dist = node.data['distance']
if c_id > 0 and len(clusters[c_id]) == 0:
clusters[c_id] = list(np.copy(clusters[c_id-1]))
clusters[c_id].append(my_dist)
# Cut the last iterations, usually it takes too long
cut_idx = self.bifurcation_tree_cut_idx + 20
beta = [1 / t for t in self.temperatures]
plt.figure(figsize=(10, 5))
for c_id, s in enumerate(clusters):
plt.plot(s[:cut_idx], beta[:cut_idx], '-k',
alpha=1, c='C%d' % int(c_id),
label='Cluster %d' % int(c_id))
plt.legend()
plt.xlabel("distance to parent")
plt.ylabel(r'$1 / T$')
plt.title('Bifurcation Plot')
plt.show()
def plot_phase_diagram(self):
"""Plot the phase diagram
This is an example of how to make phase diagram plot. The exact
implementation may vary entirely based on your self.fit()
implementation. Feel free to make any modifications.
"""
t_max = np.log(max(self.temperatures))
d_min = np.log(min(self.distortions))
y_axis = [np.log(i) - d_min for i in self.distortions]
x_axis = [t_max - np.log(i) for i in self.temperatures]
plt.figure(figsize=(12, 9))
plt.plot(x_axis, y_axis)
region = {}
for i, c in list(enumerate(self.n_eff_clusters)):
if c not in region:
region[c] = {}
region[c]['min'] = x_axis[i]
region[c]['max'] = x_axis[i]
for c in region:
if c == 0:
continue
plt.text((region[c]['min'] + region[c]['max']) / 2, 0.2,
'K={}'.format(c), rotation=90)
plt.axvspan(region[c]['min'], region[c]['max'], color='C' + str(c),
alpha=0.2)
plt.title('Phases diagram (log)')
plt.xlabel('Temperature')
plt.ylabel('Distortion')
plt.show()
......@@ -68,7 +68,7 @@ The name of the branch should be your-gitlab-username/slt-ce-i, where i denotes
git checkout -b ccarlos/slt-ce-0
```
The instructions for each exercise can be found directly in the notebook.
The instructions for each exercise can be found directly in the notebook. Exercises must be solved individually.
Once you are done, `put your legi at the beginning of the notebook.`
......@@ -125,7 +125,9 @@ Course grade = 0.3 * 5.5 + 0.7 * 5.0 = 5.15 --> 5.25
Hand-Ins are due by **noon** of the respective hand-in day, and the hand-in period typically starts one week earlier.
`Exercises can not be handed in after the deadline, because the server is blocked!`
`Deadlines are strict. Late submissions or submissions done via email will be rejected and the exercise will be graded as failed.`
| | Exercise | Release | Hand-In |
|---|--------------------------|-----------|-------------------|
......
7810;6053
7798;5709
7264;5575
7324;5560
7547;5503
7744;5476
7821;5457
7883;5408
7874;5405
7927;5365
7848;5358
7802;5317
7962;5287
7913;5280
7724;5210
7503;5191
7759;5143
7890;5130
7254;5129
7790;5038
7142;5032
7606;5009
7772;4989
7744;4933
7846;4923
7622;4917
6937;4917
7576;4915
7783;4912
7716;4909
7295;4887
7777;4869
7700;4854
7726;4833
7702;4815
7583;4813
7654;4795
7417;4788
7267;4779
6806;4755
5259;4751
7698;4745
7570;4741
7617;4724
7752;4721
7673;4718
7692;4666
7547;4664
7259;4630
5387;4623
7679;4581
7674;4579
7631;4573
7520;4572
7848;4546
5685;4546
7832;4542
6735;4509
7647;4504
7338;4481
4602;4478
4606;4468
7399;4467
7037;4446
7458;4428
7364;4427
6058;4426
6868;4418
3832;4410
6670;4401
7443;4375
7160;4370
6139;4369
7333;4335
6237;4332
5385;4318
6911;4296
6304;4294
7111;4288
6740;4282
7698;4279
7613;4275
7360;4275
6779;4273
7207;4270
6241;4268
7432;4265
4354;4262
6589;4256
7817;4252
6051;4246
5356;4241
7554;4236
7534;4227
4217;4224
7349;4219
7128;4215
3950;4215
6947;4209
7549;4208
5168;4208
6524;4207
5871;4202
7542;4198
6660;4193
7216;4180
6607;4173
7601;4171
6123;4167
6450;4160
6713;4154
7355;4151
7604;4146
7541;4141
7506;4138
4871;4132
2906;4131
6488;4128
6312;4126
6008;4117
4427;4109
4679;4084
5955;4081
6891;4075
7705;4065
7562;4058
4634;4054
4607;4049
6557;4047
7344;4046
5543;4042
7124;4039
7466;4037
6259;4030
6366;4002
5597;3993
4655;3992
7805;3991
3396;3990
6603;3982
6537;3982
4342;3966
7037;3965
7345;3951
7271;3948
5336;3943
5964;3935
7660;3924
7872;3922
6567;3922
6602;3920
4806;3914
7909;3912
5926;3912
7449;3911
6333;3909
3108;3908
7844;3902
5427;3894
6862;3892
6621;3891
6150;3888
7388;3879
7351;3877
4694;3877
6340;3870
6425;3867
6577;3858
6864;3854
5706;3844
4496;3844
4574;3843
3824;3838
5803;3824
5720;3823
6454;3821
6120;3821
7988;3820
6376;3819
7841;3818
5778;3813
5457;3808
5671;3807
4293;3788
7423;3776
7342;3775
5541;3769
5621;3768
7750;3760
6327;3745
7879;3743
199;3743
6652;3742
5678;3742
5207;3742
7429;3737
7262;3725
6427;3717
1851;3710
6207;3700
6069;3695
4780;3694
7603;3690
5751;3681
6365;3679
6958;3678
6317;3673
5417;3673
6426;3656
7922;3655
7331;3634
5965;3624
4965;3622
6833;3618
6798;3610
7667;3608
1047;3602
7803;3598
7370;3588
952;3583
7906;3580
250;3578
5111;3569
6453;3567
7492;3560
6140;3558
5315;3557
5316;3554
4232;3551
7408;3534
8013;3523
5160;3517
7141;3514
5887;3508
4694;3502
7633;3499
7919;3496
1784;3494
1482;3494
236;3494
6713;3488
7696;3486
536;3481
317;3476
5649;3472
6235;3471
7199;3469
5540;3468
5400;3461
5796;3459
2342;3439
7494;3430
7321;3429
6265;3426
8001;3418
226;3415
6148;3413
5987;3402
7582;3396
7422;3390
6623;3389
7475;3388
7654;3377
7838;3375
6570;3371
4364;3362
7316;3360
4857;3359
7533;3358
5719;3352
7452;3339
7747;3329
5841;3328
3229;3312
7076;3302
7657;3301
6360;3301
525;3297
5619;3291
7989;3271
5697;3269
6050;3242
7082;3235
5539;3235
741;3235
6731;3234
7453;3229
7695;3220
7299;3219
863;3219
7861;3216
5960;3207
4252;3206
6402;3190
5342;3188
6656;3181
7532;3175
7434;3173
5679;3171
6518;3165
4537;3143
806;3123
6113;3101
7440;3100
6204;3099
7715;3086
7503;3086
5821;3086
7131;3081
7909;3080
920;3065
6468;3050
5677;3049
218;3031
6881;3029
5650;3023
197;3021
5531;3011
6387;3008
4458;3007
6190;2985
7055;2981
7238;2957
5930;2948
7543;2929
5291;2929
4196;2929
6617;2928
4831;2917
2835;2912
174;2901
5350;2867
7346;2858
6044;2848
4898;2840
3307;2833
1918;2832
7125;2823
6422;2820
5881;2817
141;2814
7851;2809
4929;2803
5963;2789
5470;2774
7458;2741
1263;2734
6766;2732
4763;2720
3461;2718
7309;2717
6848;2712
178;2702
1882;2684
4584;2643
3174;2627
7049;2570
7753;2564
6597;2563
4476;2555
1575;2555
7304;2550
10;2537
6800;2532
5296;2520
7104;2510
6547;2506
7267;2466
3189;2411
5117;2409
4973;2406
4488;2378
7351;2376
6007;2359
4612;2341
7015;2333
3233;2329
240;2327
6686;2312
6307;2295
7448;2291
7087;2274
2067;2254
5260;2230
4174;2190
36;2185
7856;2181
7315;2181
3319;2151
2126;2150
7418;2139
6885;2138
4959;2123
4996;2115
5681;2109
5277;2078
7643;2048
3390;2043
8080;2039
6139;2032
2694;2026
7152;2000
7822;1992
7416;1953
7352;1952
354;1950
6493;1931
7905;1921
8229;1905
6803;1886
4012;1886
4759;1883
8101;1876
7989;1876
8063;1860
8080;1835
7004;1805
6252;1795
6826;1774
7218;1773
464;1773
809;1766
7240;1762
7046;1757
8098;1746