To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 7635cfe3 authored by Roman Trüb's avatar Roman Trüb
Browse files

adapted fetcher to work with updated dwt_parse.py

cleanup
parent 1f78635d
......@@ -430,35 +430,27 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, logqueue
(itemtype, obsid, fdir, f, workerstate) = queueitem
input_filename = "%s/%s" % (fdir, f)
loggername = "(%s.%d) " % (cur_p.name, obsid)
# DEBUG
shutil.copyfile(input_filename, "%s_raw" % resultfile_path)
# parse the file
# first line of the log file contains the variable names
varnames = ""
with open(input_filename, "r") as f:
varnames = f.readline().strip().split()
# parse raw datatrace log
df_parsed = dwt.parse_dwt_output(input_filename)
# apply linear regression to correct the timestamps
try:
df_corrected = dwt.correct_ts_with_regression(df_parsed)
# process raw datatrace log (parse & apply time correction)
dfData, dfLocalTs, dfOverflow = dwt.processDatatraceOutput(input_filename)
except ValueError:
logqueue.put_nowait((loggername, logging.WARNING, "Empty data trace results file."))
else:
df = df_corrected
# remove timestamp rows (which contain nan values) -> drop corresponding lines, note: PC column can contain nan!
df.dropna(subset=['comparator', 'operation'], inplace=True)
# convert columns to int if required; if there were nan values, comparator column was stored as nan but we need int; round is necessary otherwise 0.999999 is converted to 0 which is wrong
if 'float' in str(df_corrected.comparator.dtypes):
df_corrected.comparator = df_corrected.comparator.round().astype(int)
if 'float' in str(df_corrected.data.dtypes):
df_corrected.data = df_corrected.data.round().astype(int)
# add observer and node ID
df['obsid'] = obsid
df['nodeid'] = nodeid
dfData['obsid'] = obsid
dfData['nodeid'] = nodeid
# convert comparator ID to variable name
df['varname'] = df.comparator.apply(lambda x: (varnames[x] if x < len(varnames) else str(x)))
dfData['varname'] = dfData.comparator.apply(lambda x: (varnames[x] if x < len(varnames) else str(x)))
# append datatrace elements from obsever to datatrace log file
with open(resultfile_path, "a") as outfile:
df.to_csv(
dfData.to_csv(
outfile,
columns=['global_ts', 'obsid', 'nodeid', 'varname', 'data', 'operation', 'PC'],
index=False,
......@@ -469,9 +461,6 @@ def worker_datatrace(queueitem=None, nodeid=None, resultfile_path=None, logqueue
_errors.append((msg, obsid))
logqueue.put_nowait((loggername, logging.ERROR, msg))
finally:
# debug
# shutil.copyfile(input_filename, "%s_raw" % resultfile_path)
# shutil.copyfile(tmpfile1, "%s_uncorrected.csv" % resultfile_path)
# delete files
os.remove(input_filename)
processeditem = list(queueitem)
......
......@@ -541,26 +541,45 @@ def timeCorrection(dfData, dfLocalTs, dfOverflow):
# calculate linear regression
# FIXME: try more elaborate regresssions (piecewise linear, regression splines)
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
# ## DEBUG visualize
# import matplotlib.pyplot as plt
# # plt.close('all')
# # regression
# fig, ax = plt.subplots()
# ax.scatter(x, y, marker='.', label='Data', c='r')
# ax.plot(x, slope*x + intercept, label='Regression', c='b')
# ax.set_title('Regression')
# ax.set_xlabel('LocalTs')
# ax.set_ylabel('GlobalTs')
# ax.legend()
# # residuals
# fig, ax = plt.subplots()
# ax.plot(x, y - slope*x + intercept, label='Residual', c='b')
# ax.set_title('Residuals')
# ax.set_xlabel('LocalTs')
# ax.set_ylabel('Diff')
# ax.legend()
slope_a, intercept_a, r_value_a, p_value_a, std_err_a = stats.linregress(x, y)
# slope_inv, intercept_inv, r_value_inv, p_value_inv, std_err_inv = stats.linregress(y, x)
# slope_b = 1/slope_inv
# intercept_b = -intercept_inv/slope_inv
#
# slope_gmr = 1/2 * (slope_a + slope_b)
# intercept_gmr = 1/2 * (intercept_a + intercept_b)
slope = slope_a
intercept = intercept_a
## DEBUG visualize
import matplotlib.pyplot as plt
plt.close('all')
# regression
fig, ax = plt.subplots()
ax.scatter(x, y, marker='.', label='Data (uncorrected)', c='r')
print('slope_a : {:.20f}; intercept_a: {:.6f}'.format(slope_a, intercept_a))
# print('slope_gmr: {:.20f}; intercept_gmr: {:.6f}'.format(slope_gmr, intercept_gmr))
# print('slope_b : {:.20f}; intercept_b: {:.6f}'.format(slope_b, intercept_b))
ax.plot(x, slope_a*x + intercept_a, label='Regression (x->y)', c='b', marker='.')
# ax.plot(x, slope_b*x + intercept_b, label='Regression (y->x)', c='g', marker='.')
# ax.plot(x, slope_gmr*x + intercept_gmr, label='Regression (GMR)', c='orange', marker='.')
ax.set_title('Regression')
ax.set_xlabel('LocalTs')
ax.set_ylabel('GlobalTs')
ax.legend()
# residuals
res = slope*x + intercept - y
print('mean of residuals (first half): {}'.format(np.mean(res[:int(len(res)/2)])))
print('mean of residuals (second half): {}'.format(np.mean(res[int(len(res)/2):])))
fig, ax = plt.subplots()
ax.plot(x, res, label='Residual', c='b', marker='.')
ax.plot(x, pd.DataFrame(res).rolling(100, center=True, min_periods=1).mean().to_numpy(), label='Residual (moving avg)', c='orange', marker='.')
ax.set_title('Residuals')
ax.set_xlabel('LocalTs')
ax.set_ylabel('Diff')
ax.legend()
# add corrected timestamps to dataframe
dfDataCorr['global_ts'] = dfDataCorr.local_ts * slope + intercept
......
#! /usr/bin/env python3
"""
Copyright (c) 2020, ETH Zurich, Computer Engineering Group
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
Author: Lukas Daschinger
"""
import sys
import pandas as pd
import matplotlib.pyplot as plt
def configure_read_parse(filename='swo_read_log_corrected.csv'):
# read data in the file ./log_table.csv into a pandas data frame
df = pd.read_csv(filename)
# need to forward fill the NaN values.
# (there are rows with a global ts but no data which would cause problems in plotting)
df['data'] = df['data'].fillna(method='ffill')
# at the very beginning there are no values to forward fill so the variable is set to zero to not appear in plot
df['data'] = df['data'].fillna(0)
# we need to extract the data values and timestamps of comp0, comp1, comp2 and comp3 separately
x = df['global_ts'].to_numpy()
y = df['data'].to_numpy()
x0 = df.loc[df['comparator'] == 0, 'global_ts'].to_numpy()
y0 = df.loc[df['comparator'] == 0, 'data'].to_numpy()
x1 = df.loc[df['comparator'] == 1, 'global_ts'].to_numpy()
y1 = df.loc[df['comparator'] == 1, 'data'].to_numpy()
x2 = df.loc[df['comparator'] == 2, 'global_ts'].to_numpy()
y2 = df.loc[df['comparator'] == 2, 'data'].to_numpy()
x3 = df.loc[df['comparator'] == 3, 'global_ts'].to_numpy()
y3 = df.loc[df['comparator'] == 3, 'data'].to_numpy()
# draw a graph
# plt.scatter(x, y, color="g")
plt.step(x0, y0)
plt.step(x1, y1)
plt.step(x2, y2)
plt.step(x3, y3)
# putting labels
plt.xlabel('time [s]')
plt.ylabel('variable')
plt.show()
if __name__ == '__main__':
if len(sys.argv) > 1:
configure_read_parse(sys.argv[1])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment