To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit e35e5537 authored by stehess's avatar stehess
Browse files

initial commit of source code for publication

parent d07e324f
"""
Stephan Wegner & Vithurjan Visuvalingam
pdz, ETH Zürich
2020
This file contains all functions to predict the eye-hand coordination behaviour patterns of test sets.
"""
import numpy as np
from tensorflow.keras.models import load_model
import pandas as pd
import os
import sys
i=0
while i<=3:
os.chdir(os.path.dirname(os.getcwd()))
i+=1
sys.path.append(os.getcwd())
import definitions
from src.ThreeDCNN.models.data_generator.ThreeDimCNN_datagenerator import ThreeDimCNN_datagenerator
from src.ThreeDCNN.models.prediction import utils
if __name__ == '__main__':
print('start prediction')
# configuration
use_activity_to_segment_length_relationship = False
classification_activities = ['Background', 'Guiding', 'Directing', 'Checking', 'Observing'] #28/12/2020, Checking
# paths to store and load data
ROOT_DIR = definitions.ROOT_DIR
path_to_sequences = ROOT_DIR + "\\" + r"data\datasets\filled_values_segments\test"
path_to_test_images = ROOT_DIR + "\\" + r"data\datasets\extracted_images\test"
path_to_segment_id_label_mapping = ROOT_DIR + "\\" + r'data\datasets\test_segment_dataset.csv'
path_to_image_id_label_mapping = ROOT_DIR + "\\" + r"data\datasets\test_filled_values_id_label_map.csv"
path_to_save_figures = ROOT_DIR + "\\" + r"reports\figures"
path_to_store_predictions = ROOT_DIR + "\\" + r"reports\predictions"
### load annotation data ### TASK: LOAD ONLY ONE DATASET Px at once
#image-level
image_id_label_mapping = pd.read_csv(path_to_image_id_label_mapping) #link between GT-segment label and corresponding image
image_ids = image_id_label_mapping['ids'] #ids: video-num_frame-time[ms]
image_labels = image_id_label_mapping['labels'] #labels: 0,1,2,3 (BG, Guiding, Directing, Observing)
image_ids_split = [ID.split('_') for ID in image_ids]
# segment-level
segment_id_label_map = pd.read_csv(path_to_segment_id_label_mapping)
segment_ids = list(segment_id_label_map['ids']) #ids: video-num_frame-time[ms]_segment-length
segment_labels = list(segment_id_label_map['labels']) #labels: 0,1,2,3 (BG, Guiding, Directing, Observing)
segment_ids_split = [ID.split('_') for ID in segment_ids]
#This loop goes over single test set videos and evaluates one after the other
for i in range(len(definitions.test_nums)):
video_num = definitions.test_nums[i]
name= definitions.name_base + str(video_num)
print('###################################')
print('video:', name)
print('###################################')
index_segment_id_video_num = [i for i in range(len(segment_ids_split)) if segment_ids_split[i][0] == str(video_num)] # note, which positions in array belong to current video number
index_image_id_video_num = [i for i in range(len(image_ids_split)) if image_ids_split[i][0] == str(video_num)] ## note, which positions in array belong to current video number
segment_ids_num = []
segment_labels_num = []
image_ids_num = []
image_labels_num = []
j = 0
k = 0
# keep only those entries, which belong to current video number
while j < len(index_segment_id_video_num):
segment_ids_num.append(segment_ids[index_segment_id_video_num[j]])
segment_labels_num.append(segment_labels[index_segment_id_video_num[j]])
j+=1
while k < len(index_image_id_video_num):
image_ids_num.append(image_ids[index_image_id_video_num[k]])
image_labels_num.append(image_labels[index_image_id_video_num[k]])
k+=1
dict_segment_id_label_map = dict(zip(segment_ids_num, segment_labels_num))
dict_ids = {}
dict_ids['test'] = segment_ids_num
#image-level (needed for frame by frame comparison)
test_images = image_ids_num
test_images_labels = image_labels_num
#num_videos = [str(elem) for elem in test_nums]
num_videos= [str(elem) for elem in [video_num]]
start_points_of_videos = utils.get_start_end_list_of_videos(test_images)
start_indexes_of_videos = dict(zip(num_videos, start_points_of_videos[:len(start_points_of_videos) - 1]))
###############################################
# Classification Network Prediction #
###############################################
print('##################################')
print('start prediction classification NN')
print('##################################')
# load classification network model
class_model = load_model(definitions.path_to_load_classification_network)
# prepare test generator
test_params = {'dim_feature_sequence': (16, 128, 128, 3),
'batch_size': 1,
'n_classes': len(definitions.HEC_classes),
'shuffle': False}
test_generator = ThreeDimCNN_datagenerator(dict_ids['test'], dict_segment_id_label_map, **test_params)
# predict on test set
class_predictions = class_model.predict_generator(test_generator)
# get class labels
class_labels = np.array([np.argmax(x) for x in class_predictions])
# show performance of the model
true_class_labels = [dict_segment_id_label_map[ID] for ID in segment_ids_num]
# get max predicted probability to get scores
class_scores = []
for i in range(len(class_labels)):
class_scores.append(class_predictions[i][class_labels[i]])
# create a mapping from segments to the class scores
segment_test_ids_class_score_map = dict(zip(segment_ids_num, class_scores))
# create a mapping from segments to activity labels
segment_test_ids_class_activity_map = dict(zip(segment_ids_num, class_labels))
# print the performance showed by confusion matrix and classification report
print('confusion matrix and classification report segments')
target_names = classification_activities
utils.print_performance(true_class_labels, class_labels, target_names)
utils.save_activity_confusion_matrix(t_labels=true_class_labels,
p_labels=class_labels,
activities=classification_activities,
title_name='Classification Network',
path=path_to_save_figures + '\\' + 'ConfusionMat',
plot_name= name + '-Classification_network_segments_3DCNN_confusion_matrix')
# create a classification report plot and save it
utils.save_classification_report(t_labels=true_class_labels,
p_labels=class_labels,
activities=classification_activities,
title_name="Classification Network",
path=path_to_save_figures + '\\' + 'ClassificationRep',
plot_name= name + '-Classification_network_segments_3DCNN_report')
#Save all predictions for later analysis
Y_pred = class_predictions
y_pred = np.argmax(Y_pred, axis=1)
Ypred_0 = []
for i in range(len(Y_pred)):
Ypred_0.append(Y_pred[i][0])
Ypred_1 = []
for i in range(len(Y_pred)):
Ypred_1.append(Y_pred[i][1])
Ypred_2 = []
for i in range(len(Y_pred)):
Ypred_2.append(Y_pred[i][2])
Ypred_3 = []
for i in range(len(Y_pred)):
Ypred_3.append(Y_pred[i][3])
Ypred_4 = []
for i in range(len(Y_pred)):
Ypred_4.append(Y_pred[i][4])
y_true = []
for i in range(int(len(true_class_labels))):
y_true = np.append(y_true, int(np.argmax(
test_generator[i][1]))) # validation_generator[0][1].shape = batch_size x 4 labels
t_dict = {'labels pred': y_pred, 'labels true': y_true, 'val id': segment_ids_num[:len(y_pred)],
'value pred 0': Ypred_0, 'value pred 1': Ypred_1, 'value pred 2': Ypred_2, 'value pred 3': Ypred_3, 'value pred 4': Ypred_4} #28/12/2020, Y pred 4
df = pd.DataFrame(t_dict)
df.to_csv(path_to_save_figures + f"\\ComparisonPredTrue\\" + name + '_' + r'classification_predictions-true.csv', sep=',', index=False)
### Prediction using only Class Scores ###
# labels given from the classification network prediction
candidate_labels = dict(zip(list(segment_ids_num), list(class_labels))) #class labels are predicted labels by NN
# get all as activity labeled segments
candidate_segments = []
candidate_scores = []
#Keep only non-BG segments
for ID in segment_ids_num:
if candidate_labels[ID] > 0:
candidate_segments.append(ID)
candidate_scores.append(segment_test_ids_class_score_map[ID])
# calculate with NMS the best segments using the classification score
classification_score_predictions = utils.non_maximum_suppresion_all_best_segments(segments=candidate_segments, scores=candidate_scores, path_to_sequences=path_to_sequences, start_indexes_of_videos= start_indexes_of_videos)
# save prediction using only the classification network
only_classification_labels = [candidate_labels[ID] for ID in classification_score_predictions]
t_dict = {'ids': classification_score_predictions, 'labels': only_classification_labels}
df = pd.DataFrame(t_dict)
df.to_csv(path_to_store_predictions + "\\" + name + r'only_classification_score.csv', sep=',', index=False)
print([i for i in ['Background', 'Action']])
print(['Background','Action']==[i for i in ['Background', 'Action']])
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
def index_of_id(id, start_indexes_of_videos):
num_video = (id.split('_')[0])
start_index = start_indexes_of_videos[num_video]
time = float(id.split('_')[1])
offset = round(time / (100 / 3))
return int(start_index + offset)
def IoU(segment1, segment2, start_indexes_of_videos):
first_segment = list(range(index_of_id(segment1[0], start_indexes_of_videos=start_indexes_of_videos), index_of_id(segment1[-1], start_indexes_of_videos=start_indexes_of_videos) + 1))
second_segment = list(range(index_of_id(segment2[0], start_indexes_of_videos= start_indexes_of_videos), index_of_id(segment2[-1], start_indexes_of_videos= start_indexes_of_videos) + 1))
intersection = np.intersect1d(first_segment, second_segment)
if len(intersection) == 0:
return 0
union = np.union1d(first_segment, second_segment)
return len(intersection) / len(union)
def non_maximum_suppresion_all_segments(segments, scores, threshold, path_to_sequences, start_indexes_of_videos):
#segments = ID: video-num_start-time_segment-length
#scores = int: max-score for segment [0:1]
threshold_segments = []
best_segments = []
#Reduce segments to those with score >= threshold
for i in range(len(segments)):
if scores[i] >= threshold:
threshold_segments.append(segments[i])
#Add thershold segment only, if there is not intersection to other best-segments, starting with first threshold segment
best_segments.append(threshold_segments[0])
for i in range(len(threshold_segments)-1):
sequence_current = np.load(path_to_sequences + "\\" + threshold_segments[i + 1] + ".npy", allow_pickle=True)
segment_best_new = None
for j in range(len(best_segments)):
sequence_best_j = np.load(path_to_sequences + "\\" + best_segments[j] + ".npy", allow_pickle=True)
if IoU(segment1= sequence_current, segment2= sequence_best_j, start_indexes_of_videos= start_indexes_of_videos) == 0:
segment_best_new = threshold_segments[i+1]
if segment_best_new != None:
best_segments.append(segment_best_new)
print('ratio threshold segments best segments:', len(threshold_segments)/len(best_segments)*100, '%')
return best_segments
def non_maximum_suppresion_all_best_segments(segments, scores, path_to_sequences, start_indexes_of_videos):
#segments = ID: video-num_start-time_segment-length
#scores = int: max-score for segment [0:1]
best_segments = []
best_scores = []
#Add thershold segment only, if there is not intersection to other best-segments, starting with first threshold segment
best_segments.append(segments[0])
best_scores.append(scores[0])
for i in range(len(segments)-1):
sequence_current = np.load(path_to_sequences + "\\" + segments[i + 1] + ".npy", allow_pickle=True)
segment_best_new = None
score_best_new = None
for j in range(len(best_segments)):
sequence_best_j = np.load(path_to_sequences + "\\" + best_segments[j] + ".npy", allow_pickle=True)
if IoU(segment1= sequence_current, segment2= sequence_best_j, start_indexes_of_videos= start_indexes_of_videos) == 0:
segment_best_new = segments[i+1]
score_best_new = scores[i+1]
#if there is a segment already in the best segments which has an intersection with proposed segment but lower score, it is replaced
elif IoU(segment1= sequence_current, segment2= sequence_best_j, start_indexes_of_videos= start_indexes_of_videos) != 0 and scores[i+1] > best_scores[j]:
best_scores[j] = scores[i+1]
best_segments[j] = segments[i+1]
if segment_best_new != None:
best_segments.append(segment_best_new)
best_scores.append(score_best_new)
print('ratio threshold segments best segments:', len(best_segments/len(segments))*100, '%')
return best_segments
def get_image_ids_from_segment(id, path_to_sequences, start_indexes_of_videos):
segment = np.load(path_to_sequences + "\\" + id + ".npy", allow_pickle=True)
indexes = list(range(index_of_id(segment[0], start_indexes_of_videos= start_indexes_of_videos), index_of_id(segment[-1], start_indexes_of_videos= start_indexes_of_videos) + 1))
return indexes
def save_classification_report(t_labels, p_labels, activities, title_name, path, plot_name):
labels = np.arange(4)
target_names = activities
clf_report = classification_report(t_labels,
p_labels,
labels=labels,
target_names=target_names,
output_dict=True)
plt.title(title_name)
fig = sn.heatmap(pd.DataFrame(clf_report).iloc[:-1, :].T, annot=True, cmap="RdBu", vmin=0, vmax=1)
plt.xlabel("Metrics")
plt.savefig(path + "\\" + plot_name + '.png', dpi=300, bbox_inches='tight')
plt.clf()
clf_report = classification_report(t_labels,
p_labels,
target_names=target_names)
path = path + f"\\" + plot_name + f".txt"
text_file = open(path, 'w')
text_file.write(clf_report)
text_file.close()
def print_performance(t_labels, p_labels, names):
print('Confusion Matrix')
print(confusion_matrix(t_labels, p_labels))
print('Classification Report')
print(classification_report(t_labels, p_labels, target_names=names))
def save_activity_confusion_matrix(t_labels, p_labels, activities, title_name, path, plot_name):
matrix = confusion_matrix(t_labels, p_labels)
#matrix = matrix / int(len(t_labels)) #23/11/20 NOT INTUITIVE TO DIVIDE BY TOTAL NUMBER OF PREDICTED SEGMENTS, Better: divide by represenations/class or absulute values
plt.figure(figsize=(4, 4))
plt.title(title_name)
df_cm = pd.DataFrame(matrix, index=[i for i in activities],
columns=[i for i in activities])
fig = sn.heatmap(df_cm, annot=True, cmap="Blues", vmin=0, vmax=1)
plt.xlabel("Predicted label")
plt.xticks(rotation=45)
plt.ylabel("True Label")
plt.savefig(path + "\\" + plot_name + '.png', dpi=300,
bbox_inches='tight')
plt.clf()
df_conf = pd.DataFrame.from_dict(matrix)
df_conf.to_csv(path + f"\\" + plot_name + f".csv")
def get_start_end_list_of_videos(list_of_ids):
cur_id = '-1'
indexes = []
for i in range(len(list_of_ids)):
id = list_of_ids[i].split('_')[0]
if id != cur_id:
indexes.append(i)
cur_id = id
indexes.append(len(list_of_ids)) #add last index to have the stop value of last video
return indexes
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv3D, MaxPooling3D, SpatialDropout3D, Dropout, BatchNormalization
def directing_model(filter_base, kernel_base, activation, initializer, regularizer, optimizer):
model = Sequential()
model.add(Conv3D(filter_base, kernel_size=kernel_base, activation=activation, input_shape=(16, 128, 128, 3),
kernel_initializer=initializer, kernel_regularizer=regularizer))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(BatchNormalization())
# model.add(SpatialDropout3D(0.2))#is not recommended
model.add(Conv3D(2 * filter_base, kernel_size=kernel_base, padding='same', activation=activation,
kernel_initializer=initializer, kernel_regularizer=regularizer))
model.add(Conv3D(2 * filter_base, kernel_size=kernel_base, padding='same', activation=activation,
kernel_initializer=initializer, kernel_regularizer=regularizer))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(256, activation=activation, kernel_initializer=initializer, kernel_regularizer=regularizer))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', kernel_initializer=initializer))
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['categorical_accuracy'])
model.summary()
return model
\ No newline at end of file
"""
This code is originally from https://github.com/agoila/lisa-faster-R-CNN
"""
from keras.callbacks import Callback
from keras.callbacks import BaseLogger
import matplotlib.pyplot as plt
import numpy as np
import json
import os
class EpochCheckpoint(Callback):
def __init__(self, outputPath, every=5, startAt=0):
# call the parent constructor
super(Callback, self).__init__()
# store the base output path for the model, the number of
# epochs that must pass before the model is serialized to
# disk and the current epoch value
self.outputPath = outputPath
self.every = every
self.intEpoch = startAt
def on_train_batch_begin(self, batch, logs=None):
keys = list(logs.keys())
#print("...Training: start of batch {}; got log keys: {}".format(batch, keys))
def on_train_batch_end(self, batch, logs=None):
keys = list(logs.keys())
#print("...Training: end of batch {}; got log keys: {}".format(batch, keys))
def on_test_begin(self, logs=None):
"""Do something"""
def on_test_end(self, logs=None):
"""Do something"""
def on_test_batch_begin(self, batch, logs=None):
"""Do something"""
def on_test_batch_end(self, batch, logs=None):
"""Do something"""
def on_epoch_end(self, epoch, logs={}):
# check to see if the model should be serialized to disk
if (self.intEpoch + 1) % self.every == 0:
p = os.path.sep.join([self.outputPath,
"epoch_{}.h5".format(self.intEpoch + 1)])
self.model.save(p, overwrite=True)
# increment the internal epoch counter
self.intEpoch += 1
class TrainingMonitor(BaseLogger):
def __init__(self, figPath, jsonPath=None, startAt=0):
# store the output path for the figure, the path to the JSON
# serialized file, and the starting epoch
super(TrainingMonitor, self).__init__()
self.figPath = figPath
self.jsonPath = jsonPath
self.startAt = startAt
def on_train_batch_begin(self, batch, logs=None):
keys = list(logs.keys())
#print("...Training: start of batch {}; got log keys: {}".format(batch, keys))
def on_train_batch_end(self, batch, logs=None):
keys = list(logs.keys())
#print("...Training: end of batch {}; got log keys: {}".format(batch, keys))
def on_test_begin(self, logs=None):
"""Do something"""
def on_test_end(self, logs=None):
"""Do something"""
def on_test_batch_begin(self, batch, logs=None):
"""Do something"""
def on_test_batch_end(self, batch, logs=None):
"""Do something"""
def on_train_begin(self, logs={}):
# initialize the history dictionary
self.H = {}
# if the JSON history path exists, load the training history
if self.jsonPath is not None:
if os.path.exists(self.jsonPath):
self.H = json.loads(open(self.jsonPath).read())
# check to see if a starting epoch was supplied
if self.startAt > 0:
# loop over the entries in the history log and
# trim any entries that are past the starting
# epoch
for k in self.H.keys():
self.H[k] = self.H[k][:self.startAt]
def on_epoch_end(self, epoch, logs={}):
# loop over the logs and update the loss, accuracy, etc.
# for the entire training process
for (k, v) in logs.items():
l = self.H.get(k, [])
l.append(v)
self.H[k] = l
# check to see if the training history should be serialized
# to file
if self.jsonPath is not None:
f = open(self.jsonPath, "w")
f.write(json.dumps(self.H))
f.close()
# ensure at least two epochs have passed before plotting
# (epoch starts at zero)
if len(self.H["loss"]) > 1:
# plot the training loss and accuracy
N = np.arange(0, len(self.H["loss"]))
plt.style.use("ggplot")
plt.figure()
plt.plot(N, self.H["loss"], label="train_loss")
plt.plot(N, self.H["val_loss"], label="val_loss")
#plt.plot(N, self.H["categorical_accuracy"], label="train_acc")
#plt.plot(N, self.H["val_categorical_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy [Epoch {}]".format(
len(self.H["loss"])))
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
# save the figure
plt.savefig(self.figPath)
plt.close()
\ No newline at end of file
"""
Stephan Wegner & Vithurjan Visuvalingam
pdz, ETH Zürich
2020
This file contains all functions to train the proposal NN
"""
print('start train proposal NN')
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers, regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Conv3D, MaxPooling3D, BatchNormalization
from tensorflow.keras.backend import set_session
from time import gmtime, strftime
import random
import argparse
import os
import sys
import math
import proposal_model as pm
i=0
while i<=3:
os.chdir(os.path.dirname(os.getcwd()))
i+=1
sys.path.append(os.getcwd())
from definitions import ROOT_DIR
from definitions import train_val_nums
from src.ThreeDCNN.models.data_generator.ThreeDimCNN_datagenerator import ThreeDimCNN_datagenerator
#set tensorflow configuration
config = tf.ConfigProto(
gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
# device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
set_session(session)