To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 2e775481 authored by matthmey's avatar matthmey
Browse files

Initial commit

parents
"""
Copyright (c) 2019, Swiss Federal Institute of Technology (ETH Zurich)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import numpy as np
import pandas as pd
tmp_dir = 'tmp/'
mode = 'microseismic_events'
import permasense
permasense_vault_dir = '/home/perma/permasense_vault/'
permasense.global_settings.get_global_settings()['permasense_vault_dir'] = permasense_vault_dir
test_params = {}
test_params['dataframe'] = 'tmp/microseismic_test_events.csv'
labels = list(pd.read_csv('common/microseismic_labels.csv')['microseismic_labels'].values)
from aednn.datasets.MHdataset import microseismic_dataset as dataset_class
dataset_test = dataset_class('tmp/',name='%s_test'%mode,params=test_params,transform_type='custom_melspec',labels=labels)
X_test, y_test = dataset_test.get_data(reload=False)
from aednn.models.geocnn import geocnn as model
X_test = model.preprocessing(X_test.astype(np.float32),bypass_transpose=True)
# shuffle test data
indices = np.arange(X_test.shape[0])
np.random.seed(5467)
np.random.shuffle(indices)
X_test = X_test[indices]
y_test = y_test[indices]
X_test = X_test[...,0]
X_test = np.squeeze(X_test) # shape is (num_image, T, F) --> after flattening, the first 64 values build the first spectrogram column etc.
y_test = y_test[:,3]
print(X_test.shape)
# X_test = np.arange(X_test.size).reshape(X_test.shape)
# X_test = np.arange(X_test.size).reshape(X_test.shape)
print(X_test.shape)
# print(X_test)
f = open('tmp/mh_test_data.bin', 'wb')
print(np.sum(y_test))
print(y_test.shape)
# print(np.array(y_test).astype(np.float32))
# we are going to interleave the samples and their labels that at the beginning we have alteranting pos/neg
positive_mask = np.where(y_test==1)[0]
negative_mask = np.where(y_test==0)[0]
short_mask = negative_mask
long_mask = positive_mask
indices = np.zeros(y_test.shape,dtype=np.int)
indices[0:2*len(short_mask):2] = short_mask
indices[1:2*len(short_mask):2] = long_mask[:len(short_mask)]
indices[2*len(short_mask):] = long_mask[len(short_mask):]
y_test = y_test[indices]
X_test = X_test[indices]
print(y_test)
for ii in range(X_test.shape[0]):
data = np.array(X_test[ii]).flatten().astype(np.float32)
# label = np.array(np.where(y_test[ii])[0]).astype(np.float32)
label = np.array(y_test[ii]).astype(np.float32)
# label = np.float32(ii)
# print(label)
label.tofile(f)
data.tofile(f)
f.close()
# WRITE to c file
for i in range(8):
fh = open('tmp/event{}.h'.format(i), 'w')
fc = open('tmp/event{}.c'.format(i), 'w')
event = X_test[i].flatten()
# write to header file
fh.write('const int8_t label{};'.format(i))
fh.write('const float event{}[{}];'.format(i,len(event)))
# write to c file
fc.write('#include "stm32f4xx_hal.h"\n\n')
fc.write('const int8_t label{} = {};'.format(i,y_test[i].astype(np.int8)))
fc.write('const float event{}[{}] = '.format(i,len(event)))
string = '{'
for j in range(event.shape[0]):
string += '{}'.format(event[j].astype(np.float32))
if j != event.shape[0]-1:
string += ','
string += '};\n'
fc.write(string)
# print(string)
fh.close()
fc.close()
# test it
from keras.models import model_from_json, load_model
from INQ_layers import INQ_Conv2D, INQ_Dense, Flatten
import INQ_model
from keras.models import Model
import aednn.custom_metrics
from keras.utils.generic_utils import get_custom_objects
# generate a fake custom loss
def binary_crossentropy_weighted(y_true, y_pred):
return y_true*y_pred*0
get_custom_objects().update({"binary_crossentropy_weighted": binary_crossentropy_weighted})
model_path = 'results/%s/inq/keras.netbest'%mode
net = load_model(model_path,custom_objects={'INQ_Model': INQ_model.INQ_Model, 'INQ_Conv2D': INQ_Conv2D, 'INQ_Dense': INQ_Dense})
print(net.summary())
X_test0 = X_test[:16]
X_test0 = np.expand_dims(X_test0,-1)
X_test0 = X_test0.transpose((0,2, 1, 3))
print(np.sum(X_test0,axis=(1,2,3)))
out = net.predict(X_test0,batch_size=16).squeeze()
print(out)
out[out>=0.45] = 1
out[out<0.45] = 0
print(out)
layer_number=-6
print(net.layers[layer_number].name)
net = Model(net.layers[0].input,net.layers[layer_number].output)
out = net.predict(X_test0,batch_size=16).squeeze()
print(out)
np.savetxt('tmp/layer%d.txt'%layer_number,out)
# generate a fake custom loss
def binary_crossentropy_weighted(y_true, y_pred):
return y_true*y_pred*0
from keras.utils.generic_utils import get_custom_objects
get_custom_objects().update({"binary_crossentropy_weighted": binary_crossentropy_weighted})
model_path = 'results/%s/test/keras.netbest'%mode
net = load_model(model_path,custom_objects={'INQ_Model': INQ_model.INQ_Model, 'INQ_Conv2D': INQ_Conv2D, 'INQ_Dense': INQ_Dense})
out = net.predict(X_test0,batch_size=16).squeeze()
print(out)
out[out>=0.65] = 1
out[out<0.65] = 0
print(out)
"""
Copyright (c) 2019, Swiss Federal Institute of Technology (ETH Zurich)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import numpy as np
import pandas as pd
from plotly.offline import plot
import plotly.graph_objs as go
from aednn.toolbox import threshold, evaluate, check_overlap
from aednn.datasets.MHdataset import microseismic_dataset
from aednn.models.geocnn import geocnn
import pandas as pd
import numpy as np
from tqdm import tqdm
from keras.models import load_model
import permasense
permasense_vault_dir = '/home/perma/permasense_vault/'
permasense.global_settings.get_global_settings()['permasense_vault_dir'] = permasense_vault_dir
import permasense.microseismic.data_loaders as ms_loader
from INQ_layers import INQ_Conv2D, INQ_Dense, Flatten
import INQ_model
import argparse
modes = ['microseismic','microseismic_events','quakenet','quakenet_events','svm','svm_events','image','ensemble']
runs = ['test','inq']
parser = argparse.ArgumentParser()
parser.add_argument("-g", "--gpu", type=str,
help="use the given gpu")
parser.add_argument("-m", "--mode", type=str,
help="choose a mode from %s"%str(modes))
parser.add_argument("-r", "--run", type=str, default='inq',
help="choose a run from %s"%str(runs))
args = parser.parse_args()
mode = args.mode
run = args.run
# Load dummy dataset for transformation
dataset = microseismic_dataset('tmp/',name='none',params={'dataframe':pd.DataFrame.from_dict({'microseismic_labels':['none'],'start_time':[0]}),'datadir':0},transform_type='custom_melspec')
# Load model
net = load_model('results/%s/%s/keras.netbest'%(mode,run),custom_objects={'INQ_Model': INQ_model.INQ_Model, 'INQ_Conv2D': INQ_Conv2D, 'INQ_Dense': INQ_Dense})
val_threshold = np.load('results/%s/%s/val_threshold.npy'%(mode,run))
def extract_waveforms(name):
events = pd.read_csv('tmp/microseismic_%s_events.csv'%name)
events['start_time'] = pd.to_datetime(events['start_time'],utc=True)
events['end_time'] = pd.to_datetime(events['end_time'],utc=True)
# events['microseismic_labels'] = events['microseismic_labels'].map(eval)
# events['microseismic_labels'] =
events['labels'] = events['microseismic_labels'].apply(lambda x: 'mountaineer' in x)
# events['labels'] = events['labels'].apply(lambda x: np.unique(x))
# events['labels'] = events['labels'].apply(lambda x: x=='mountaineer')
waveform_list = []
output_list = []
for i in range(200):
# for i in tqdm(range(len(events))):
event = events.iloc[i]
# print(event['microseismic_labels'])
# print(event['labels'])
waveform = np.zeros((12800,))
waveform[:12800] = ms_loader.get_numpy_array(event['start_time'],event['end_time'],station='MH36',channels=['EHZ']).reshape((-1,))[:12800]
waveform_list.append(waveform)
spec = dataset.transform(waveform)
spec = np.expand_dims(spec,-1)
spec_pp = geocnn.preprocessing(spec)
spec_pp = np.expand_dims(spec_pp,0)
value = net.predict(spec_pp,batch_size=1)[0]
decision = threshold(value,val_threshold)
ground_truth = event['labels'].astype(np.int)
output_list.append([ground_truth,value,decision])
nn_out = np.array(output_list).squeeze()
waveforms = np.array(waveform_list)
# we are going to interleave the samples and their labels that at the beginning we have alteranting pos/neg
positive_mask = np.where(nn_out[:,0]==1)[0]
negative_mask = np.where(nn_out[:,0]==0)[0]
if len(positive_mask) > len(negative_mask):
short_mask = negative_mask
long_mask = positive_mask
else:
short_mask = positive_mask
long_mask = negative_mask
indices = np.zeros((len(nn_out),),dtype=np.int)
indices[0:2*len(short_mask):2] = short_mask
indices[1:2*len(short_mask):2] = long_mask[:len(short_mask)]
indices[2*len(short_mask):] = long_mask[len(short_mask):]
nn_out = nn_out[indices]
waveforms = waveforms[indices]
print('waveforms shape',waveforms.shape)
np.save('tmp/waveforms.npy',waveforms)
print('nn_out shape',nn_out.shape)
np.save('tmp/nn_out.npy',nn_out)
print(nn_out)
extract_waveforms('test')
"""
Copyright (c) 2019, Swiss Federal Institute of Technology (ETH Zurich)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import numpy as np
import pandas as pd
from keras.models import model_from_json, load_model
from INQ_layers import INQ_Conv2D, INQ_Dense, Flatten
import INQ_model
from keras.models import Model
from aednn.models.geocnn import geocnn
# we are going to use two transformations to the input signal
# 1. transform to time-frequency representation with dataset.transform
# 2. log scaling with geocnn.transform
from aednn.datasets.MHdataset import microseismic_dataset
# Parameters
tmp_dir = 'tmp/'
mode = 'microseismic_events'
run = 'zeropadding'
run = 'nozeropadding'
model_path = 'results/%s/%s/keras.netbest'%(mode,run)
# load the waveforms and the ground truth
waveforms = np.load('tmp/waveforms.npy')
ground_truths = np.load('tmp/nn_out.npy')[:,0]
# load the model
net = load_model(model_path,custom_objects={'INQ_Model': INQ_model.INQ_Model, 'INQ_Conv2D': INQ_Conv2D, 'INQ_Dense': INQ_Dense})
net.summary()
# initialize dataset class without parameters. we only want to use the transform() function
dataset = microseismic_dataset('tmp/',name='none',params={'dataframe':pd.DataFrame.from_dict({'microseismic_labels':['none'],'start_time':[0]}),'datadir':0},transform_type='custom_melspec')
# load the threshold
val_threshold = np.load('results/%s/%s/val_threshold.npy'%(mode,run))
# create a model which stops at a given layer
layer_number=-6
print('Short Model last layer name',net.layers[layer_number].name)
short_model = Model(net.layers[0].input,net.layers[layer_number].output)
# loop through the waveforms
for i in range(len(waveforms)):
waveform = waveforms[i]
ground_truth = ground_truths[i]
spec = dataset.transform(waveform)
spec = np.expand_dims(spec,-1)
spec_pp = geocnn.preprocessing(spec)
spec_pp = np.expand_dims(spec_pp,0)
value = net.predict(spec_pp,batch_size=1)[0]
out = value
out[out>=val_threshold] = 1
out[out<val_threshold] = 0
print('Output model',value, 'Decision', out, 'Ground truth', ground_truth)
values = short_model.predict(spec_pp,batch_size=1)[0]
# print('Output short model',values)
# break # Remove this to test for more waveforms
"""
Copyright (c) 2019, Swiss Federal Institute of Technology (ETH Zurich)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
import pandas as pd
import numpy as np
from aednn.datasets.MHdataset import image_dataset, microseismic_dataset
import aednn.models.mhcnn as image_model
from aednn.models.geocnn import geocnn as microseismic_model
from aednn.toolbox import *
import aednn.custom_metrics
import permasense
permasense.global_settings.get_global_settings()['permasense_vault_dir'] = '/home/perma/permasense_vault/'
from toolbox import *
import keras
from keras import backend as K
from keras.models import load_model
import os
import glob
import time
modes = ['microseismic','image','ensemble']
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-g", "--gpu", type=str,
help="use the given gpu")
parser.add_argument("-m", "--mode", type=str,
help="choose a mode from %s"%str(modes))
args = parser.parse_args()
if args.gpu:
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
GPU_ID = str(args.gpu)
if args.mode:
if args.mode not in modes:
raise ValueError('Please choose an appropriate mode from %s with the -m option'%str(modes))
mode = str(args.mode)
else:
raise ValueError('Please choose an appropriate mode from %s with the -m option'%str(modes))
def binary_crossentropy_weighted(y_true, y_pred):
return y_true*y_pred*0
custom_objects = {"binary_crossentropy_weighted": binary_crossentropy_weighted}
tmp_dir = get_tmp_dir()
image_labels = list(pd.read_csv('common/image_labels.csv')['image_labels'].values)
microseismic_labels = list(pd.read_csv('common/microseismic_labels.csv')['microseismic_labels'].values)
os.makedirs('tmp/pred_segments/',exist_ok=True)
print(image_labels,microseismic_labels)
# dataframe_train = pd.read_csv('/usr/itetnas03/data-tik-01/matthmey/datasets/MHdataset/annotations/dataset/train_all.csv')
if mode == 'microseismic':
dataframe = pd.read_csv('tmp/full_microseismic.csv')
labels = microseismic_labels
elif mode == 'image':
dataframe = pd.read_csv('tmp/full_image.csv')
labels = image_labels
# dataframe = dataframe_train.append(dataframe_test)
dataframe.sort_values('start_time',inplace=True)
batch_size = 360
number_of_batches = int( (len(dataframe) + batch_size) // batch_size )
test_params = {}
# test_params['dataframe'] = dataframe.iloc[0:2]
# dataset_test = dataset_class(tmp_dir,name='dataset_classify',params=test_params,labels=image_labels, restore_from_disk=False,verbose=False)
# X_class, y_class = dataset_test.get_data()
print('Loading model')
microseismic_net = load_model('results/microseismic/test/keras.netbest',custom_objects=custom_objects)
image_net = load_model('results/image/test/keras.netbest',custom_objects=custom_objects)
# Make sure that normalization and dropout are set to test mode
K.set_learning_phase(0)
for i in range(number_of_batches):
continue
ts = dataframe.iloc[i*batch_size:(i+1)*batch_size]
test_params['dataframe'] = ts
start_time = time.time()
if mode == 'microseismic' or mode == 'ensemble':
print(test_params['dataframe'].iloc[0]['start_time'])
dataset_test = microseismic_class(tmp_dir,name='microseismic_classify',params=test_params,labels=microseismic_labels, restore_from_disk=False,verbose=False)
X_class, y_class = dataset_test.get_data()
X_class = microseismic_model.preprocessing(X_class[:,:512,...].astype(K.floatx()))
y_pred = microseismic_net.predict(X_class,batch_size=120,verbose=0)
y_pred = threshold(y_pred,0.35)
y_pred_ms = y_pred
ts['microseismic_labels_pred'] = "[]"
for j in range(y_pred.shape[0]):
if y_pred[j,0] == 1:
ts.loc[ts.index[j],'microseismic_labels_pred'] = "['mountaineer']"
if mode == 'image' or mode == 'ensemble':
dataset_test = image_dataset(tmp_dir,name='image_classify',params=test_params,labels=image_labels, restore_from_disk=False,verbose=False)
X_class, y_class = dataset_test.get_data()
print(test_params['dataframe'].iloc[0]['start_time'], X_class.shape)
X_class = image_model.preprocessing(X_class.astype(K.floatx()))
y_pred = image_net.predict(X_class,batch_size=128,verbose=0)
y_pred = threshold(y_pred,0.5)
y_pred = y_pred[:,4:5]
y_pred_img = y_pred
ts['image_labels_pred'] = "[]"
for j in range(y_pred.shape[0]):
if y_pred[j,0] == 1:
ts.loc[ts.index[j],'image_labels_pred'] = "['mountaineer']"
if mode == 'ensemble':
y_pred = np.logical_or(y_pred_img,y_pred_ms)
print(time.time()-start_time)
ts['labels_pred'] = "[]"
for j in range(y_pred.shape[0]):
if y_pred[j,0] == 1:
ts.loc[ts.index[j],'labels_pred'] = "['mountaineer']"
ts.to_csv('tmp/pred_segments/%d.csv'%i)
# this is not the best solution to merge the csv.
# they need to be manually edited afterwards.
# 1. Open the resulting file
# 2. Copy the first line
# 3. Search an replace the first line with nothing such that every line which is equal to the first line gets removed
# 4. Reinsert the first line if you have deleted it as well