Commit f84be3f4 authored by matthmey's avatar matthmey

fixed bug in spectrogram

parent 15bbcb32
......@@ -346,6 +346,17 @@ optional = false
python-versions = "*"
version = "0.2.0"
[[package]]
category = "main"
description = "Largest-Triangle-Three-Buckets algorithm for downsampling time series–like data"
name = "lttb"
optional = false
python-versions = ">=3.5"
version = "0.2.0"
[package.dependencies]
numpy = "*"
[[package]]
category = "main"
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
......@@ -929,7 +940,7 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"]
testing = ["pathlib2", "contextlib2", "unittest2"]
[metadata]
content-hash = "9a05d00236e28e47fe7ae6369984909e9cc2ec0373f77da360918d73521793be"
content-hash = "72ed166be84e61ca0d776ce287d31c30ca620314ef75ee02ff984e3ccc35320b"
python-versions = "^3.7" # Compatible python versions must be declared here
[metadata.files]
......@@ -1125,6 +1136,10 @@ llvmlite = [
locket = [
{file = "locket-0.2.0.tar.gz", hash = "sha256:1fee63c1153db602b50154684f5725564e63a0f6d09366a1cb13dffcec179fb4"},
]
lttb = [
{file = "lttb-0.2.0-py3-none-any.whl", hash = "sha256:394d6112c259f87ec78b5ebf93fac31a2c2d430ef76abb1e2b9de2790efeec8b"},
{file = "lttb-0.2.0.tar.gz", hash = "sha256:fd02222d08cce0aba9d91ca3a1c0d506e6037eafca8e8e529a80725e2565a072"},
]
lxml = [
{file = "lxml-4.4.1-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:1f4f214337f6ee5825bf90a65d04d70aab05526c08191ab888cb5149501923c5"},
{file = "lxml-4.4.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e9c028b5897901361d81a4718d1db217b716424a0283afe9d6735fe0caf70f79"},
......
......@@ -32,6 +32,7 @@ zarr = "^2.3.2"
xarray = { git = "https://github.com/niowniow/xarray.git", branch = "strided_rolling" }
pillow = "^6.2.1"
xarray-extras = "^0.4.2"
lttb = "^0.2.0"
# Optional dependencies (extras)
......
......@@ -55,7 +55,7 @@ class Node(object):
def __call__(self, data=None, request=None, delayed=False):
if delayed:
return dask.delayed(self.forward(data=data, request=request))
return dask.delayed(self.forward)(data=data, request=request)
else:
return self.forward(data=data, request=request)
......@@ -137,6 +137,8 @@ def configuration(delayed, request, keys=None, default_merge=None):
if not isinstance(delayed, list):
collections = [delayed]
else:
collections = delayed
# dsk = dask.base.collections_to_dsk(collections)
dsk, dsk_keys = dask.base._extract_graph_and_keys(collections)
......@@ -260,7 +262,7 @@ def configuration(delayed, request, keys=None, default_merge=None):
collection = [Delayed(key=key, dsk=out) for key in in_keys]
else:
collection = Delayed(key=in_keys[0], dsk=out)
if isinstance(collection, list):
if isinstance(delayed, list):
collection = [collection]
return collection
......
......@@ -46,7 +46,7 @@ class DataSource(StuettNode):
config.update(request)
if delayed:
return dask.delayed(self.forward(config))
return dask.delayed(self.forward)(config)
else:
return self.forward(config)
......@@ -149,8 +149,8 @@ class SeismicSource(DataSource):
x = obspy_to_array(x)
# we assume that all starttimes are equal
starttime = x.starttime.values.squeeze()[0]
for s in x.starttime.values.squeeze():
starttime = x.starttime.values.reshape((-1,))[0]
for s in x.starttime.values.reshape((-1,)):
if s != starttime:
raise RuntimeError(
"Please make sure that starttime of each seimsic channel is equal"
......@@ -178,11 +178,11 @@ class SeismicSource(DataSource):
station_inventory=None,
detrend=True,
taper=False,
pre_filt=(0.025, 0.05, 124.0, 125.0),
pre_filt=(0.025, 0.05, 45.0, 49.0),
water_level=60,
apply_filter=True,
freqmin=0.002,
freqmax=125,
freqmax=50,
resample=False,
resample_rate=250,
rotation_angle=None,
......@@ -554,7 +554,6 @@ class MHDSLRFilenames(DataSource):
delta_t = dt.timedelta(seconds=delta_seconds)
num_filename_errors = 0
images_list = []
# p = Path(permasense_vault_dir + 'gsn-binaries/matterhorn/5015/camera1/')
p = Path(base_directory)
if not p.is_dir():
......
......@@ -6,6 +6,7 @@ import numpy as np
import xarray as xr
import scipy.signal
import pandas as pd
import lttb
class MinMaxDownsampling(StuettNode):
......@@ -18,8 +19,8 @@ class MinMaxDownsampling(StuettNode):
def forward(self, data=None, request=None):
rolling = data.rolling(time=self.rate, stride=self.rate)
# x_min = rolling.construct("time", stride=self.rate).min("time").dropna('time')
# x_max = rolling.construct("time", stride=self.rate).max("time").dropna('time')
# x_min = rolling.construct("buckets", stride=self.rate).min("time").dropna('time')
# x_max = rolling.construct("buckets", stride=self.rate).max("time").dropna('time')
x_min = rolling.min().dropna("time")
x_max = rolling.max().dropna("time")
......@@ -32,6 +33,40 @@ class MinMaxDownsampling(StuettNode):
return x_ds
class LTTBDownsampling(StuettNode):
def __init__(self, rate=1):
# Based on and many thanks to https://github.com/javiljoen/lttb.py
self.rate = rate
def forward(self, data=None, request=None):
rolling = data.rolling(time=self.rate, stride=self.rate) #TODO: make this not dependend on time dimension
print(rolling)
n_out = data.sizes['time']//self.rate
# print(data.sizes['time']//self.rate)
data_bins = rolling.construct("buckets")
return
mean = rolling.mean()
print(mean)
out = []
for i, (index, roll) in enumerate(rolling):
print(i)
if i == 0:
prev = data[0]
print(prev)
print(mean.sel(time=index))
print(roll)
prev = roll
# x_min = rolling.min().dropna("time")
small_data = lttb.downsample(data, n_out=20)
return x_ds
class Downsampling(StuettNode):
def __init__(self):
......@@ -75,9 +110,13 @@ class Spectrogram(StuettNode):
config["dim"] = data.dims[-1]
axis = data.get_axis_num(config["dim"])
if config["sampling_rate"] is None:
config["sampling_rate"] = data.attrs["sampling_rate"]
if 'sampling_rate' not in data.attrs:
raise RuntimeError("Please provide a sampling_rate attribute "
"to your config or your input data")
else:
config["sampling_rate"] = data.attrs["sampling_rate"]
samples = data.values
noverlap = config["nfft"] - config["stride"]
......@@ -104,10 +143,13 @@ class Spectrogram(StuettNode):
None
) # TODO: change when xarray #3291 is fixed
# Create a new DataArray for the spectogram
dims = data.dims[:-1] + ("frequency", data.dims[-1])
dims = data.dims[:axis] + ("frequency",) + data.dims[axis+1:] + (config["dim"],)
coords = dict(data.coords)
coords.update({"frequency": ("frequency", freqs), config["dim"]: ds_coords})
coords.update({"frequency": ("frequency", freqs), config["dim"]: ds_coords})
dataarray = xr.DataArray(spectrum, dims=dims, coords=coords)
return dataarray
......@@ -203,21 +203,20 @@ def test_annotations():
# test_annotations()
def test_datasets():
filename = Path(test_data_dir).joinpath("annotations", "boundingbox_timeseries.csv")
label = stuett.data.BoundingBoxAnnotation(filename)
filename = Path(test_data_dir).joinpath(
"timeseries", "MH30_temperature_rock_2017.csv"
)
data = stuett.data.CsvSource(filename)
dataset = stuett.data.LabeledDataset(
data,
label,
dataset_slice={"time": slice("2017-08-01", "2017-08-02")},
batch_dims={"time": pd.to_timedelta(10, "s")},
)
# def test_datasets():
# filename = Path(test_data_dir).joinpath("annotations", "boundingbox_timeseries.csv")
# label = stuett.data.BoundingBoxAnnotation(filename)
# filename = Path(test_data_dir).joinpath(
# "timeseries", "MH30_temperature_rock_2017.csv"
# )
# data = stuett.data.CsvSource(filename)
# dataset = stuett.data.LabeledDataset(
# data,
# label,
# dataset_slice={"time": slice("2017-08-01", "2017-08-02")},
# batch_dims={"time": pd.to_timedelta(10, "s")},
# )
# test_datasets()
......@@ -47,15 +47,16 @@ class TestMinMaxDownsampling(object):
def test_spectrogram():
spectrogram = stuett.data.Spectrogram(nfft=2, stride=1)
spectrogram = stuett.data.Spectrogram(nfft=2, stride=1, dim='time')
np.random.seed(123)
da = xr.DataArray(
np.random.rand(9, 2),
np.random.rand(9, 3),
[
("time", pd.date_range("2000-01-01", periods=9)),
("channel", ["EHE", "EHZ"]),
("channel", ["EHE", "EHZ", "EHN"]),
],
attrs={"sampling_rate":0.000011574}
)
x = spectrogram(da)
......@@ -64,3 +65,24 @@ def test_spectrogram():
test_spectrogram()
def test_lttbdownsampling():
lttb_node = stuett.data.LTTBDownsampling(rate=2)
np.random.seed(123)
da = xr.DataArray(
np.random.rand(9, 2),
[
("time", pd.date_range("2000-01-01", periods=9)),
("channel", ["EHE", "EHZ"]),
],
)
x = lttb_node(da)
print(x)
# test_lttbdownsampling()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment