To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

create_segments.py 3.96 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
Stephan Wegner & Vithurjan Visuvalingam
pdz, ETH Zürich
2020

This file contains all functions to create segments for 3DCNN from the extract features and images
"""

import pandas as pd
import numpy as np
import argparse
import os
import sys

i=0
while i<=2:
    os.chdir(os.path.dirname(os.getcwd()))
    i+=1
sys.path.append(os.getcwd())
import definitions
from src.ThreeDCNN.dataset_creation import utils

if __name__ == '__main__':
    print("start create segments")
    ROOT_DIR = definitions.ROOT_DIR
    # Args
    parser = argparse.ArgumentParser(description='Create segments for training or test set')
    parser.add_argument('--segment_input_dir', default=os.path.join(ROOT_DIR, 'data/datasets'))
    parser.add_argument('--segment_output_dir', default=os.path.join(ROOT_DIR, 'data/datasets/filled_values_segments'))
    parser.add_argument('--mode', required=True)
    args = parser.parse_args()

    if args.mode == 'train':
        id_label_mapping = pd.read_csv(args.segment_input_dir + "\\" + r"train_val_filled_values_id_label_map.csv")
        first_video_number = definitions.train_val_nums[0]
        path_to_save_segments = args.segment_output_dir + "\\" + r"train_val"

    elif args.mode == 'test':
        id_label_mapping = pd.read_csv(args.segment_input_dir + "\\" + r"test_filled_values_id_label_map.csv")
        first_video_number = definitions.test_nums[0]
        path_to_save_segments = args.segment_output_dir+ "\\" + r"test"
        path_to_save_gt_segments = args.segment_output_dir+ "\\" + r"test" + '\\GT_segments'

    else:
        print("chose --mode = train or test")


    ids = id_label_mapping['ids'] #ids = num_video_frame-time[ms]
    labels = id_label_mapping['labels'] #labels = 0,1,2,3 for linked frames


    # get start points of the videos and the video numbers of each video
    indexes, num_videos = utils.get_start_end_list_of_videos(ids) #indexes = index in list-of-ids for start and end of video, num_videos = num_video

    # datastructure needed for index retrieval of given ID
    start_indexes_of_videos = dict(zip(num_videos, indexes[:len(indexes) - 1]))
    segment_size = 16  # number of frames per segment

    # overlap E (0,1)
    overlap = 0.5    #50% overlap
    length = 16  # timespan of the segments, e.g 16 = 0.5 s

    classes = []
    for i in range(len(definitions.HEC_classes)):
        classes.append(i)

    parameter_segments = [[length, overlap]] # it is also possible to create segments of different length, just add more sets of parameters

    """
    # Create set of segments with length 0.5 s
    """
    label_segments=[]
    segments =[]
    i=0
    while i < len(parameter_segments):
        length = parameter_segments[i][0]
        overlap = parameter_segments[i][1]
        label, segment = utils.labels_segments(indexes=indexes, ids=ids, length=length, segment_size=segment_size, overlap=overlap, labels_raw=labels, classes= classes, start_indexes_of_videos= start_indexes_of_videos)
        label_segments = label_segments + label
        segments = segments + segment
        i += 1
    # name each segment according to its video number its starting point and its length
    ids_predict = []
    for segment in segments:
        ids_predict.append(segment[0] + "_" + str(round(utils.time_stamp(segment[-1]) - utils.time_stamp(segment[0]))))
    # save the snapshot ids corresponding to a segment in a directory
    for i in range(len(segments)):
        np.save(os.path.join(path_to_save_segments, str(ids[i])), np.array(segments[i]))

    # create a dataframe having all information about the data generated and save it
    segment_dataset = pd.DataFrame(data={"ids": ids_predict, "labels": label_segments})


    if args.mode == 'train':
        segment_dataset.to_csv(ROOT_DIR + "\\" + r"data\datasets\train_val_segment_dataset.csv", sep=',', index=False)

    elif args.mode == 'test':
        segment_dataset.to_csv(ROOT_DIR + "\\" + r"data\datasets\test_segment_dataset.csv", sep=',', index=False)

    else:
        print("chose --mode = train or test")