test_management.py 6.25 KB
Newer Older
1
2
3
import stuett
from tests.stuett.sample_data import *

matthmey's avatar
matthmey committed
4
5
6
7
8
import datetime as dt
import pandas as pd
import base64
import os

9
import pytest
matthmey's avatar
matthmey committed
10
11
12
import zarr
from pathlib import Path

matthmey's avatar
matthmey committed
13
14
test_data_dir = Path(__file__).absolute().parent.joinpath("..", "data")
stuett.global_config.set_setting("user_dir", test_data_dir.joinpath("user_dir/"))
matthmey's avatar
matthmey committed
15

16
17
18
19
20
21
22
23
24
25
26
27
28

class TestSeismicSource(object):
    @pytest.mark.slow
    def test_seismic_source(self):
        # first test without config
        seismic_source = stuett.data.SeismicSource()

        # with config
        seismic_source = stuett.data.SeismicSource(config)

    @pytest.mark.slow
    def test_call(self):
        # first test without config
29
        seismic_source = stuett.data.SeismicSource(use_arclink=True)
30
31
32

        x = seismic_source(config)
        x = x.compute()
matthmey's avatar
matthmey committed
33
34
35
36

        print(x)
        return

matthmey's avatar
matthmey committed
37
        assert x.mean() == -55.73599312780376
38
39

        # with config
matthmey's avatar
matthmey committed
40
        seismic_source = stuett.data.SeismicSource(config, use_arclink=True)
41

matthmey's avatar
matthmey committed
42
        request = {"start_time": start_time + offset, "end_time": end_time + offset}
43
44
        x = seismic_source(request)
        x = x.compute()
matthmey's avatar
matthmey committed
45
        assert x.mean() == -61.23491399790934
46

matthmey's avatar
matthmey committed
47
        # TODO: make test to compare start_times
48

matthmey's avatar
matthmey committed
49
@pytest.mark.slow
50
51
52
53
def test_gsn():
    # test_data = pd.read_csv(test_data_dir + 'matterhorn_27_temperature_rock.csv',index_col='time')
    gsn_node = stuett.data.GSNDataSource(deployment="matterhorn", position=30, vsensor="temperature_rock")
    x = gsn_node({"start_time":"2017-07-01","end_time":"2017-07-02"})
matthmey's avatar
matthmey committed
54
55
56
    
    assert x.sum() == 1600959.34
    #TODO: proper testing
57
58


matthmey's avatar
matthmey committed
59
# test_gsn()
60
61


62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def test_delayed():
    filename = Path(test_data_dir).joinpath(
        "timeseries", "MH30_temperature_rock_2017.csv"
    )
    node = stuett.data.CsvSource(filename)

    x = node(delayed=True)
    from dask.delayed import Delayed

    assert isinstance(x, Delayed)

    x = x.compute()

    non_delayed = node(delayed=False)

    assert x.mean() == 0.16201109532441677
    assert non_delayed.mean() == 0.16201109532441677


matthmey's avatar
matthmey committed
81
82
83
def test_freeze():
    # with config
    # seismic_source = stuett.data.SeismicSource(config,use_arclink=True)
84
85
86
    filename = Path(test_data_dir).joinpath(
        "timeseries", "MH30_temperature_rock_2017.csv"
    )
matthmey's avatar
matthmey committed
87
88
    node = stuett.data.CsvSource(filename)

89
90
    user_dir = stuett.global_config.get_setting("user_dir")
    store_name = user_dir.joinpath("frozen", "test.zarr")
matthmey's avatar
matthmey committed
91
    import shutil
92
93

    shutil.rmtree(store_name, ignore_errors=True)
matthmey's avatar
matthmey committed
94
95
96
97
98

    store = zarr.DirectoryStore(store_name)
    # account_name = stuett.global_config.get_setting('azure')['account_name'] if stuett.global_config.setting_exists('azure') else "storageaccountperma8980"
    # account_key  = stuett.global_config.get_setting('azure')['account_key']  if stuett.global_config.setting_exists('azure')  else None
    # store = zarr.ABSStore(container='hackathon-on-permafrost', prefix='dataset/test.zarr', account_name=account_name, account_key=account_key, blob_service_kwargs={})
matthmey's avatar
matthmey committed
99

matthmey's avatar
matthmey committed
100
    freezer = stuett.data.Freezer(store)
matthmey's avatar
matthmey committed
101

102
103
104
    request = {"start_time": "2017-07-01", "end_time": "2017-08-01"}
    x = node(request=request)
    x = freezer(x)
matthmey's avatar
matthmey committed
105

106
107
    request = {"start_time": "2017-09-01", "end_time": "2017-10-01"}
    x = freezer(node(request=request))
matthmey's avatar
matthmey committed
108

matthmey's avatar
matthmey committed
109
    # x = freezer()
110
    print("final", x)
matthmey's avatar
matthmey committed
111

112
113
114
115
    shutil.rmtree(store_name, ignore_errors=True)


# test_freeze()
matthmey's avatar
matthmey committed
116
117
118
119


def test_image_filenames():
    # first test without config
matthmey's avatar
matthmey committed
120
    node = stuett.data.MHDSLRFilenames(base_directory=test_data_dir.joinpath("MHDSLR"))
matthmey's avatar
matthmey committed
121
122
123
124
125
126
127
128
129
130
131
132

    start_time = dt.datetime(2017, 8, 6, 9, 56, 12, tzinfo=dt.timezone.utc)
    end_time = dt.datetime(2017, 8, 6, 10, 14, 10, tzinfo=dt.timezone.utc)

    offset = dt.timedelta(days=1)
    config_0 = {
        "channel": channels[0],
        "station": stations[0],
        "start_time": start_time,
        "end_time": end_time,
    }

133
    data = node(request=config_0)
matthmey's avatar
matthmey committed
134

135
    config_1 = config.copy()
matthmey's avatar
matthmey committed
136
    # this should return and empty list
137
138
139
140
141
142
143
144
145
146
147
    data = node(config_1)

    # Test if we do not provide a end_time
    del config_0["end_time"]
    data = node(config_0)

    del config_1["end_time"]
    data = node(config_1)

    config_1["start_time"] = dt.datetime(2018, 8, 6, 20, 0, 0, tzinfo=dt.timezone.utc)
    data = node(config_1)
matthmey's avatar
matthmey committed
148
149


matthmey's avatar
matthmey committed
150
151
152
# test_image_filenames()


matthmey's avatar
matthmey committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def test_mhdslrimage():
    base_dir = Path(test_data_dir).joinpath("MHDSLR")
    node = stuett.data.MHDSLRImages(base_directory=base_dir)

    start_time = dt.datetime(2017, 8, 6, 9, 50, 12, tzinfo=dt.timezone.utc)
    end_time = dt.datetime(2017, 8, 6, 10, 12, 10, tzinfo=dt.timezone.utc)

    offset = dt.timedelta(days=1)
    config = {
        "start_time": start_time,
        "end_time": end_time,
    }

    data = node(config)

    config["output_format"] = "base64"
    data = node(config)

matthmey's avatar
matthmey committed
171
172
    # TODO: assert data

matthmey's avatar
matthmey committed
173
174
175
176
    from PIL import Image

    img = Image.open(base_dir.joinpath("2017-08-06", "20170806_095212.JPG"))
    img_base64 = base64.b64encode(img.tobytes())
177

matthmey's avatar
matthmey committed
178
    assert data[0].values == img_base64
179
180


matthmey's avatar
matthmey committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# test_mhdslrimage()


def test_csv():
    filename = Path(test_data_dir).joinpath(
        "timeseries", "MH30_temperature_rock_2017.csv"
    )

    node = stuett.data.CsvSource(filename)
    x = node()

    length = len(x)

    print(x)

    # TODO: test with start and end time


def test_annotations():
    filename = Path(test_data_dir).joinpath("annotations", "boundingbox_timeseries.csv")
    node = stuett.data.BoundingBoxAnnotation(filename)

    filename = Path(test_data_dir).joinpath("annotations", "boundingbox_images.csv")
    node = stuett.data.BoundingBoxAnnotation(filename)

    targets = node()

    targets = targets.swap_dims({"index": "time"})
    targets = targets.sortby("time")

    # print(targets.sel(time=slice('2016-01-01','2016-01-04')))
    # print(targets)

214

matthmey's avatar
matthmey committed
215
216
217
# test_annotations()


matthmey's avatar
matthmey committed
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# def test_datasets():
#     filename = Path(test_data_dir).joinpath("annotations", "boundingbox_timeseries.csv")
#     label = stuett.data.BoundingBoxAnnotation(filename)

#     filename = Path(test_data_dir).joinpath(
#         "timeseries", "MH30_temperature_rock_2017.csv"
#     )
#     data = stuett.data.CsvSource(filename)

#     dataset = stuett.data.LabeledDataset(
#         data,
#         label,
#         dataset_slice={"time": slice("2017-08-01", "2017-08-02")},
#         batch_dims={"time": pd.to_timedelta(10, "s")},
#     )
233
# test_datasets()