To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit b713fcae authored by holukas's avatar holukas
Browse files

preparing v0.16.0

parent e0706cee
import fnmatch
import matplotlib.dates as mdates
import matplotlib.gridspec as gridspec
# matplotlib.use('Qt5Agg')
......@@ -9,8 +11,12 @@ import gui.plotfuncs
import logger
from gui import gui_elements
from gui import tabs
from modboxes.default.Plots.styles.LightTheme import *
from help import infoboxes
from inout.DataFunctions import export_to_main
from inout.VarGroups import *
from modboxes.default.Plots.styles.LightTheme import *
# pd.set_option('display.width', 1000)
# pd.set_option('display.max_columns', 15)
# pd.set_option('display.max_rows', 20)
......@@ -23,8 +29,9 @@ class addContent(tabs.buildTab):
super().__init__(app_obj, title, tab_id, tab_template='SVP')
# Add settings menu contents
self.drp_define_nighttime_data_as, self.lne_z, self.btn_keep_marked, \
self.btn_remove_marked, self.btn_add_as_new_var, self.btn_calc = \
self.drp_define_nighttime_based_on, self.drp_set_nighttime_if, self.lne_nighttime_threshold, \
self.lne_z, self.btn_keep_marked, self.btn_remove_marked, self.btn_add_as_new_var, self.btn_calc, \
self.lne_outlier_timewin, self.drp_repeat = \
self.add_settings_fields()
# Add variables required in settings
......@@ -40,7 +47,9 @@ class addContent(tabs.buildTab):
# self.axes_dict = self.add_axes()
def populate_settings_fields(self):
pass
for ix, colname_tuple in enumerate(self.tab_data_df.columns):
if any(fnmatch.fnmatch(colname_tuple[0], id) for id in NIGHTTIME_DETECTION):
self.drp_define_nighttime_based_on.addItem(self.col_list_pretty[ix])
# def add_axes(self):
# gs = gridspec.GridSpec(1, 1) # rows, cols
......@@ -55,46 +64,64 @@ class addContent(tabs.buildTab):
gui.gui_elements.add_header_subheader_to_grid_top(layout=self.sett_layout,
txt=["Double Difference",
"Outlier Detection"])
gui.gui_elements.add_info_hover_to_grid(layout=self.sett_layout, row=0, col=1,
txt_info_hover=infoboxes.double_diff)
gui.gui_elements.add_spacer_item_to_grid(layout=self.sett_layout, row=1, col=0)
gui.gui_elements.add_header_in_grid_row(layout=self.sett_layout, txt="Nighttime", row=2)
drp_define_nighttime_data_as = gui_elements.grd_LabelDropdownPair(txt='Define Nighttime Data As',
css_ids=['', ''],
layout=self.sett_layout,
row=3, col=0,
orientation='horiz')
drp_define_nighttime_data_as.addItem('EddyPro daytime column = 0')
gui.gui_elements.add_spacer_item_to_grid(layout=self.sett_layout, row=4, col=0)
gui.gui_elements.add_header_in_grid_row(layout=self.sett_layout, txt="Outlier Threshold", row=5)
lne_z = gui_elements.add_label_linedit_pair_to_grid(txt='Threshold Value (z)',
css_ids=['', 'cyan'], layout=self.sett_layout,
row=6, col=0, orientation='horiz')
# Day / night
gui.gui_elements.add_header_in_grid_row(
row=2, layout=self.sett_layout, txt='Day / Night')
drp_define_nighttime_based_on = gui_elements.grd_LabelDropdownPair(
txt='Define Nighttime Based On', css_ids=['', 'cyan'], layout=self.sett_layout,
row=3, col=0, orientation='horiz')
drp_set_nighttime_if = gui_elements.grd_LabelDropdownPair(
txt='Set To Nighttime If', css_ids=['', 'cyan'], layout=self.sett_layout,
row=4, col=0, orientation='horiz')
drp_set_nighttime_if.addItems(['Smaller Than Threshold', 'Larger Than Threshold'])
lne_nighttime_threshold = gui_elements.add_label_linedit_pair_to_grid(
txt='Threshold', css_ids=['', 'cyan'], layout=self.sett_layout,
row=5, col=0, orientation='horiz')
lne_nighttime_threshold.setText('20')
gui_elements.add_spacer_item_to_grid(self.sett_layout, 6, 0)
# Outlier settings
gui.gui_elements.add_header_in_grid_row(
row=7, layout=self.sett_layout, txt="Outlier Threshold")
lne_outlier_timewin = gui_elements.add_label_linedit_pair_to_grid(
txt='Time Window (Days)', css_ids=['', 'cyan'], layout=self.sett_layout,
row=8, col=0, orientation='horiz')
lne_outlier_timewin.setText('13')
lne_z = gui_elements.add_label_linedit_pair_to_grid(
txt='Threshold Value (z)', css_ids=['', 'cyan'], layout=self.sett_layout,
row=9, col=0, orientation='horiz')
lne_z.setText('5.5')
gui.gui_elements.add_spacer_item_to_grid(layout=self.sett_layout, row=7, col=0)
btn_calc = gui_elements.add_button_to_grid(grid_layout=self.sett_layout,
txt='Calculate Outliers', css_id='',
row=8, col=0, rowspan=1, colspan=2)
btn_keep_marked = gui_elements.add_button_to_grid(grid_layout=self.sett_layout,
txt='Keep Marked Values', css_id='',
row=9, col=0, rowspan=1, colspan=2)
btn_remove_marked = gui_elements.add_button_to_grid(grid_layout=self.sett_layout,
txt='Remove Marked Values', css_id='',
row=10, col=0, rowspan=1, colspan=2)
gui.gui_elements.add_spacer_item_to_grid(layout=self.sett_layout, row=11, col=0)
btn_add_as_new_var = gui_elements.add_button_to_grid(grid_layout=self.sett_layout,
txt='+ Add As New Var', css_id='btn_add_as_new_var',
row=12, col=0, rowspan=1, colspan=2)
self.sett_layout.setRowStretch(13, 1)
return drp_define_nighttime_data_as, lne_z, btn_keep_marked, \
btn_remove_marked, btn_add_as_new_var, btn_calc
drp_repeat = gui_elements.grd_LabelDropdownPair(
txt='Repeat Until All Outliers Removed', css_ids=['', ''], layout=self.sett_layout,
row=10, col=0, orientation='horiz')
drp_repeat.addItems(['No', 'Yes'])
gui.gui_elements.add_spacer_item_to_grid(
row=11, col=0, layout=self.sett_layout)
# Buttons
btn_calc = gui_elements.add_button_to_grid(
grid_layout=self.sett_layout, txt='Calculate Outliers', css_id='',
row=12, col=0, rowspan=1, colspan=2)
btn_keep_marked = gui_elements.add_button_to_grid(
grid_layout=self.sett_layout, txt='Keep Marked Values', css_id='',
row=13, col=0, rowspan=1, colspan=2)
btn_remove_marked = gui_elements.add_button_to_grid(
grid_layout=self.sett_layout, txt='Remove Marked Values', css_id='',
row=14, col=0, rowspan=1, colspan=2)
gui.gui_elements.add_spacer_item_to_grid(
row=15, col=0, layout=self.sett_layout)
btn_add_as_new_var = gui_elements.add_button_to_grid(
grid_layout=self.sett_layout, txt='+ Add As New Var', css_id='btn_add_as_new_var',
row=16, col=0, rowspan=1, colspan=2)
self.sett_layout.setRowStretch(17, 1)
return drp_define_nighttime_based_on, drp_set_nighttime_if, lne_nighttime_threshold, \
lne_z, btn_keep_marked, btn_remove_marked, btn_add_as_new_var, btn_calc, \
lne_outlier_timewin, drp_repeat
class Run(addContent):
......@@ -102,6 +129,7 @@ class Run(addContent):
marker_isset = False
ready_to_export = False
marker_filter = None # Filter to mark detected outliers
grouping_col = ('_group', '[#]')
sub_outdir = "outlier_removal_double_diff"
def __init__(self, app_obj, title, tab_id):
......@@ -113,6 +141,7 @@ class Run(addContent):
self.set_colnames()
tabs.buildTab.update_btn_status(obj=self)
self.axes_dict = self.make_axes_dict()
self.get_settings_from_fields()
def select_target(self):
"""Select target var from list"""
......@@ -137,37 +166,83 @@ class Run(addContent):
pass
def get_settings_from_fields(self):
z_val = float(self.lne_z.text())
return z_val
self.z_val = float(self.lne_z.text())
self.nighttime_col = self.get_col_from_drp_pretty(drp=self.drp_define_nighttime_based_on)
self.set_nighttime_if = self.drp_set_nighttime_if.currentText()
self.nighttime_threshold = int(self.lne_nighttime_threshold.text())
self.outlier_timewin = float(self.lne_outlier_timewin.text())
self.repeat = self.drp_repeat.currentText()
def get_col_from_drp_pretty(self, drp):
""" Set target to selected variable. """
target_pretty = drp.currentText()
target_pretty_ix = self.col_list_pretty.index(target_pretty)
target_col = self.col_dict_tuples[target_pretty_ix]
return target_col
def calc(self):
self.get_settings_from_fields()
self.class_df = self.class_df[[self.target_col]]
daytime_col = self.add_daynight_data()
z_val = self.get_settings_from_fields()
self.add_daynight_data()
self.init_new_cols()
# Group into 13-day blocks
grouped_freq = self.class_df.groupby(pd.Grouper(level=self.class_df.index.name, freq='13D'))
for key_freq, group_freq_df in grouped_freq:
print('FROM {} TO {} VALUES {}'.format(
group_freq_df.index[0], group_freq_df.index[-1], len(group_freq_df)))
# Group by daytime to split into day- and night-data
grouped_daytime = group_freq_df.groupby(daytime_col)
for key_daytime, group_daytime_df in grouped_daytime:
# Following equations in reference
_group_df = group_daytime_df.copy()
_group_df = self.calc_di(df=_group_df,
shift_by=1) # Calculate differences di and median of differences Md
_group_df = self.calc_MAD(df=_group_df)
_group_df = self.calc_upper_lower_lim(df=_group_df, z=z_val)
_group_df = self.generate_flag(df=_group_df)
# Add results for this group to focus_df (np.nan in focus_df is replaced w/ results)
self.class_df[self.priority_cols] = \
self.class_df[self.priority_cols].combine_first(_group_df[self.priority_cols])
self.mark_in_plot()
# Group into e.g. 13-day blocks
freq = f"{self.outlier_timewin}D"
num_outliers = 1
while num_outliers > 0:
grouped_freq = self.class_df.groupby(pd.Grouper(level=self.class_df.index.name, freq=freq))
for key_freq, group_freq_df in grouped_freq:
print(f"FROM {group_freq_df.index[0]}"
f" TO {group_freq_df.index[-1]}"
f" VALUES {len(group_freq_df)}")
# Group by daytime to split into day- and night-data
grouped_daytime = group_freq_df.groupby(self.grouping_col)
for key_daytime, group_daynight_df in grouped_daytime:
# Following equations in reference
_group_df = group_daynight_df.copy()
_group_df = self.calc_di(df=_group_df, shift_by=1)
_group_df = self.calc_MAD(df=_group_df)
_group_df = self.calc_upper_lower_lim(df=_group_df, z=self.z_val)
_group_df = self.generate_flag(df=_group_df)
# Add results for this group to focus_df (np.nan in focus_df is replaced w/ results)
self.class_df[self.priority_cols] = \
self.class_df[self.priority_cols].combine_first(_group_df[self.priority_cols])
num_outliers = self.class_df[self.qcflag_col].sum()
self.mark_in_plot()
if self.repeat == 'Yes':
self.remove_marked()
if self.repeat == 'No':
break
def extend_df(self, daynight_df, other_df, which):
"""
Extend start or end by one value, for the calc of di
see Papale et al. (2006), Section 2.2, p573
"""
extension_df = daynight_df.copy()
max_possible_ix = len(self.class_df) - 1
# Search integer index of date in other
which_ix = 0 if which == 'start' else -1
date = extension_df.iloc[which_ix].name
ix_in_other = np.where(other_df.index == date)[0]
# Calculate index required for extension
adjacent_ix = -1 if which == 'start' else 1
extension_ix = ix_in_other + adjacent_ix # 'start' will get the previous index (-1), 'end' the next (+1)
if (extension_ix < 0) or (extension_ix > max_possible_ix): # Not possible, return
return extension_df
row = self.class_df.iloc[extension_ix] # One row of records that will be added
extension_df = pd.concat([extension_df, row], axis=0)
extension_df = extension_df.sort_index(axis=0)
return extension_df
def set_colnames(self):
self.shifted_down_col = ('_shifted_down', '[aux]')
......@@ -187,14 +262,25 @@ class Run(addContent):
self.lower_lim_col]
def add_daynight_data(self):
"""Add daytime column to data"""
define_nighttime_as = self.drp_define_nighttime_data_as.currentText()
if define_nighttime_as == 'EddyPro daytime column = 0':
daytime_col = ('daytime', '[1=daytime]')
else:
daytime_col = -9999
self.class_df[daytime_col] = self.tab_data_df[daytime_col]
return daytime_col
"""Add flag to indicate group membership, in this case day/night data"""
self.class_df[self.grouping_col] = np.nan
# Check if grouping col is part of the class df
delete_grouping_col = True
if self.nighttime_col in list(self.class_df.columns):
delete_grouping_col = False
# Generate flag
nighttime_filter = None
self.class_df[self.nighttime_col] = self.tab_data_df[self.nighttime_col] # Add for grouping
if self.set_nighttime_if == 'Smaller Than Threshold':
nighttime_filter = self.class_df[self.nighttime_col] < self.nighttime_threshold
elif self.set_nighttime_if == 'Larger Than Threshold':
nighttime_filter = self.class_df[self.nighttime_col] > self.nighttime_threshold
self.class_df.loc[nighttime_filter, [self.grouping_col]] = 0
self.class_df.loc[~nighttime_filter, [self.grouping_col]] = 1
if delete_grouping_col:
self.class_df.drop(self.nighttime_col, axis=1, inplace=True) # Only needed for grouping
def init_new_cols(self):
self.class_df[self.shifted_down_col] = np.nan
......@@ -209,25 +295,32 @@ class Run(addContent):
self.class_df[self.date_group_to_col] = np.nan
def calc_di(self, df, shift_by):
# Calculate differences di and median of differences Md
# Eq. (1) / di = (NEEi − NEEi−1) − (NEEi+1 − NEEi )
# df = df.copy()
"""
Calculate differences di and median of differences Md
# copy=df[self.measured_col].shift(periods=shift_by).copy()
Eq. (1) / di = (NEEi − NEEi−1) − (NEEi+1 − NEEi )
df.loc[:, self.shifted_down_col] = df[self.target_col].shift(periods=shift_by)
# df[self.shifted_down_col] = df[self.measured_col].shift(periods=shift_by)
"""
# Before potential extension, get original start and end index.
# Since extension is only needed for the calculation of di,
# the original start and end can be used to restrict df to
# it's original range after di was calcualted.
df_start = df.index[0]
df_end = df.index[-1]
df = self.extend_df(daynight_df=df, other_df=self.class_df, which='start')
df = self.extend_df(daynight_df=df, other_df=self.class_df, which='end')
df.loc[:, self.shifted_down_col] = df[self.target_col].shift(periods=shift_by)
df.loc[:, self.shifted_up_col] = df[self.target_col].shift(periods=-shift_by)
df.loc[:, self.di_col] = \
df[self.target_col].subtract(df[self.shifted_down_col]) - \
df[self.shifted_up_col].subtract(df[self.target_col])
df[self.Md_col] = df[self.di_col].median()
df = df.loc[df_start:df_end] # Restore original range (unextended)
return df
def calc_MAD(self, df):
"""Calculate median of absolute deviation about the median"""
# Eq. (4) / MAD = median (|di−Md|)
df[self.MAD_col] = df[self.di_col] - df[self.Md_col]
df[self.MAD_col] = df[self.MAD_col].abs()
......
......@@ -367,7 +367,8 @@ class Call:
y = plot_df[self.focus_col]
h = sns.distplot(y.dropna(), ax=ax, kde=False, color=COLOR_HISTOGRAM, bins=25)
h = sns.histplot(y.dropna(), ax=ax, kde=False, color=COLOR_HISTOGRAM, bins=25, stat='count')
# h = sns.distplot(y.dropna(), ax=ax, kde=False, color=COLOR_HISTOGRAM, bins=25)
ax.text(0.05, 0.95, "Histogram",
horizontalalignment='left', verticalalignment='top', transform=ax.transAxes,
size=FONTSIZE_HEADER_AXIS, color=COLOR_HISTOGRAM, backgroundcolor='none')
......
This diff is collapsed.
This diff is collapsed.
import main
main.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment