To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit c9612ae7 authored by holukas's avatar holukas
Browse files

Update to v0.18.0-alpha

parent d68ad61a
daytime,sonic_temperature_class,min_temp,max_temp,fraction_median,sum_of_squares_median,num_vals_avg,fraction_Q25,fraction_Q75,bootstrap_runs
1.0,0.0,264.494,275.45,0.002805506628928885,141.7446306091417,201.0,0.0028055066082950234,0.004052685356138176,3.0
1.0,1.0,275.46,277.512,-7.709397436907097e-05,173.35261817595563,201.0,-0.0002024451557203166,-7.709397436501619e-05,3.0
1.0,2.0,277.522,279.57,8.74958662390588e-05,201.4495434719217,201.0,-3.799279022854948e-05,0.0002003377667554113,3.0
1.0,3.0,279.572,281.041,-0.0004498535441611598,302.0861678046415,201.0,-0.0004498535541898632,-0.00040728685980051825,3.0
1.0,4.0,281.05,283.006,-0.0006262224993875907,285.36765129183686,201.0,-0.0006262225052539963,-0.0006115998698696493,3.0
1.0,5.0,283.03,284.665,-0.00027586474252506223,313.3905189277476,201.0,-0.0002758647456567244,-0.0002758647370593891,3.0
1.0,6.0,284.67,287.746,-0.000938897887281777,358.50097855708304,201.0,-0.000992812817108574,-0.0009262518831683151,3.0
0.0,0.0,262.171,273.697,0.0022689461170761244,90.09883415333927,202.0,0.002268946109401249,0.002292061884985271,3.0
0.0,1.0,273.717,275.73,0.00017127050100619698,107.55102311348244,202.0,0.0001712704975648942,0.0007853035740385816,3.0
0.0,2.0,275.73400000000004,277.591,0.0035265680889910553,109.59273864544201,201.0,0.003052291000371939,0.003530792776425204,3.0
0.0,3.0,277.603,279.802,0.003368468964495829,116.47647144086389,202.0,0.003163385035327957,0.004158487705583072,3.0
0.0,4.0,279.823,281.657,0.001001611986981479,116.58860601970267,201.0,0.0009743490596357289,0.001148402758812284,3.0
0.0,5.0,281.658,283.455,0.004134649471288179,116.43255863989909,202.0,0.00395032230761443,0.004134649512433994,3.0
0.0,6.0,283.457,286.005,0.005272149729523773,200.23479857667058,202.0,0.004621142524254208,0.00538804336435207,3.0
daytime,sonic_temperature_group,min_temp,max_temp,fraction_median,sum_of_squares_median,num_vals_avg,fraction_Q25,fraction_Q75,bootstrap_runs
1.0,0.0,264.494,275.45,0.04382996557971396,117.78441418510042,201.0,0.02708661009574173,0.05543290370400679,3.0
1.0,1.0,275.46,277.512,0.08935519480712166,155.51365596889593,201.0,0.08622858043444817,0.09379214118836479,3.0
1.0,2.0,277.522,279.57,0.07914885834611862,179.40765652130204,201.0,0.057057684200668084,0.10494022495894195,3.0
1.0,3.0,279.572,281.041,-0.0060620953659842455,282.8342250930042,201.0,-0.008514892084955775,0.007412946080716316,3.0
1.0,4.0,281.05,283.006,0.018749007376682038,342.7233068587676,201.0,0.00312565212851489,0.02211497874419134,3.0
1.0,5.0,283.03,284.665,0.016673591249483904,292.23266498919367,201.0,0.012159504273495868,0.018558446569775396,3.0
1.0,6.0,284.67,287.746,0.013309477721861945,442.2088850013222,201.0,0.0119791761000955,0.016963877814997537,3.0
0.0,0.0,262.171,273.697,0.042205521584308535,84.60614320147104,202.0,0.041518462170545944,0.04406095330260084,3.0
0.0,1.0,273.717,275.73,0.020145494764050448,108.19844187019825,202.0,0.015542835060850364,0.03596713429582704,3.0
0.0,2.0,275.73400000000004,277.591,0.06532927847337006,117.30121427473344,201.0,0.06476072860338114,0.0755388004035544,3.0
0.0,3.0,277.603,279.802,0.013960330929509598,130.7730281117726,202.0,0.008429386670832484,0.015447498086751374,3.0
0.0,4.0,279.823,281.657,0.014689991931023307,136.19230807722084,201.0,0.009991302426339912,0.02580401149483353,3.0
0.0,5.0,281.658,283.455,0.029826873294374768,155.99768191628078,202.0,0.02596360714670353,0.029826873572898722,3.0
0.0,6.0,283.457,286.005,0.025143974674648174,224.35216809178348,202.0,0.02212089425999626,0.0331581884141821,3.0
"""
(CSV file, uncompressed)
This .csv file is basically an uncompressed version of a single .amp file.
It contains a two-line header and a full timestamp in the first column.
"""
from inout.SettingsOps import update_ampsettings_dict
# from inout.TimestampTypes import C1_FullDateTimeNoSec
from inout.TimestampTypes import C1_FullDateTimeWithSec
class Settings(object):
def __init__(self):
self.file_settings_dict = self.file_settings()
def file_settings(self):
""" File settings are filled into the GUI """
# _timestamp_info_dict = C1_FullDateTimeNoSec.Settings().get_timestamp_info_dict()
_timestamp_info_dict = C1_FullDateTimeWithSec.Settings().get_timestamp_info_dict()
settings_dict = {
'filetype': 'DIIVE_CSV_30T',
'file_ext': '*.csv',
'parse_date': '0', # 0=True
'index_col': '0',
'header_rows': '[0, 1]',
'freq': '30T',
'na_values': '-9999',
'skip_rows': '[]',
'timestamp_type': _timestamp_info_dict['format_id'], # ix 0 in dropdown, 2 cols separate date/time
'date_and_time_colstr': _timestamp_info_dict['date_and_time_colstr'],
'timestamp_type_drp_ix': _timestamp_info_dict['dropdown_ix'],
'zipped': 'No',
'delimiter': ','}
settings_dict = update_ampsettings_dict(settings_dict=settings_dict)
return settings_dict
def get_file_settings_dict(self):
return self.file_settings_dict
def post_parse(df):
return df
"""
Amp (compressed)
The .amp file is basically a zip archive with a different file extension.
The purpose of this file is to store multiple files, but it can also store
only one single file.
"""
from inout.SettingsOps import update_ampsettings_dict
from inout.TimestampTypes import C1_FullDateTimeWithSec
from inout import DataFunctions
class Settings(object):
def __init__(self):
self.file_settings_dict = self.file_settings()
def file_settings(self):
""" File settings are filled into the GUI """
_timestamp_info_dict = C1_FullDateTimeWithSec.Settings().get_timestamp_info_dict()
settings_dict = {
'filetype': 'Amp_30T',
'file_ext': '*.amp',
'parse_date': '0', # 0=True
'index_col': '0',
'header_rows': '[0, 1]',
'freq': '30T',
'na_values': '-9999',
'skip_rows': '[]',
'timestamp_type': _timestamp_info_dict['format_id'], # ix 0 in dropdown, 2 cols separate date/time
'date_and_time_colstr': _timestamp_info_dict['date_and_time_colstr'],
'timestamp_type_drp_ix': _timestamp_info_dict['dropdown_ix'],
'zipped': 'Yes',
'delimiter': ','}
settings_dict = update_ampsettings_dict(settings_dict=settings_dict)
return settings_dict
def get_file_settings_dict(self):
return self.file_settings_dict
def post_parse(df):
return df
from inout.SettingsOps import update_ampsettings_dict
from inout.TimestampTypes import C2_DateAndTime
class Settings(object):
def __init__(self):
self.file_settings_dict = self.file_settings()
def file_settings(self):
""" File settings are filled into the GUI """
_timestamp_info_dict = C2_DateAndTime.Settings().get_timestamp_info_dict()
# _timestamp_info_dict = Formats(ts_format='Cols2TypeA').get_timestamp_info()
settings_dict = {
'filetype': 'EddyProFullOutput_30T',
'file_ext': '*.csv',
'parse_date': '0', # True
'index_col': '0',
'header_rows': '[0, 1]',
'freq': '30T',
'na_values': '-9999',
'skip_rows': '[0]',
'timestamp_type': _timestamp_info_dict['format_id'], # ix 0 in dropdown, 2 cols separate date/time
'date_and_time_colstr': _timestamp_info_dict['date_and_time_colstr'],
'timestamp_type_drp_ix': _timestamp_info_dict['dropdown_ix'],
'zipped': 'No',
'delimiter': ','}
settings_dict = update_ampsettings_dict(settings_dict=settings_dict)
return settings_dict
def get_file_settings_dict(self):
return self.file_settings_dict
def post_parse(df):
return df
from inout.SettingsOps import update_ampsettings_dict
from inout.TimestampTypes import C1_FullDateTimeWithSec
class Settings(object):
def __init__(self):
self.file_settings_dict = self.file_settings()
def file_settings(self):
""" File settings are filled into the GUI """
_timestamp_info_dict = C1_FullDateTimeWithSec.Settings().get_timestamp_info_dict()
settings_dict = {
'filetype': 'EthMetScr_30T',
'file_ext': '*.dat',
'parse_date': '0', # True
'index_col': '0',
'header_rows': '[0, 1]',
'freq': '30T',
'na_values': '-9999',
'skip_rows': '[]',
'timestamp_type': _timestamp_info_dict['format_id'], # ix 0 in dropdown, 2 cols separate date/time
'date_and_time_colstr': _timestamp_info_dict['date_and_time_colstr'],
'timestamp_type_drp_ix': _timestamp_info_dict['dropdown_ix'],
'zipped': 'No',
'delimiter': ','}
settings_dict = update_ampsettings_dict(settings_dict=settings_dict)
return settings_dict
def get_file_settings_dict(self):
return self.file_settings_dict
def post_parse(df):
return df
import pandas as pd
from inout.SettingsOps import update_settings_dict
from inout.TimestampTypes import C1_FullDateTimeWithSec
from inout.TimestampTypes import C1_FullDateNoTime
class Settings(object):
def __init__(self):
self.file_settings_dict = self.file_settings()
def file_settings(self):
""" File settings are filled into the GUI """
_timestamp_info_dict = C1_FullDateNoTime.Settings().get_timestamp_info_dict()
settings_dict = {
'filetype': 'Events',
'file_ext': '*.events',
'parse_date': '1', # 0=Yes, 1=No
'index_col': '0',
'header_rows': '[0, 1]',
'freq': '-none-',
'na_values': '-9999',
'skip_rows': '[]',
'timestamp_type': _timestamp_info_dict['format_id'],
'date_and_time_colstr': _timestamp_info_dict['date_and_time_colstr'],
'timestamp_type_drp_ix': _timestamp_info_dict['dropdown_ix'],
'zipped': 'No',
'delimiter': ','}
settings_dict = update_settings_dict(settings_dict=settings_dict)
return settings_dict
def get_file_settings_dict(self):
return self.file_settings_dict
def post_parse(df):
# data_df[('TIMESTAMP', '[yyyy-mm-dd HH:MM:SS]')] = data_df.index
# CONVERSION of events df to standardized format as used by timeseries data
# Detect how many unique events are given in the Events File and
# use this information to generate a new DataFrame
unique_events = set(df.iloc[:, 0].tolist()) ## event description in second col
# Make sure the index is datetime
# data_df.index = pd.to_datetime(data_df.index)
_df = pd.DataFrame(index=df.index, columns=unique_events)
# _df.index = pd.to_datetime(_df.index)
# In the data_df generated from reading the Events File, the event type is given in rows
# In _df, this information was used to generate a new df with the event type as column names
# The iterrows method cycles through all rows in the data_df one by one, reads the event
# type from the row, and inserts a flag in the corresponding column in _df
for ix, row in df.iterrows():
_df.loc[ix, row] = 1
lst_for_event_units = []
for e in range(len(_df.columns)): ## generate entry for all cols in df
lst_for_event_units.append('[event]')
_df.columns = [_df.columns, lst_for_event_units]
data_df = _df.copy()
# data_df[('TIMESTAMP', '[yyyy-mm-dd HH:MM:SS]')] = data_df.index
return data_df
from inout.SettingsOps import update_ampsettings_dict
from inout.TimestampTypes import C1_FullDateTimeWithSec
class Settings(object):
def __init__(self):
self.file_settings_dict = self.file_settings()
def file_settings(self):
""" File settings are filled into the GUI """
_timestamp_info_dict = C1_FullDateTimeWithSec.Settings().get_timestamp_info_dict()
settings_dict = {
'filetype': 'TOA5_1T',
'file_ext': '*.dat',
'parse_date': '0', # True
'index_col': '0',
'header_rows': '[0, 1]',
'freq': '1T',
'na_values': 'NAN',
'skip_rows': '[0, 3]',
'timestamp_type': _timestamp_info_dict['format_id'],
'date_and_time_colstr': _timestamp_info_dict['date_and_time_colstr'],
'timestamp_type_drp_ix': _timestamp_info_dict['dropdown_ix'],
'zipped': 'No',
'delimiter': ','}
settings_dict = update_ampsettings_dict(settings_dict=settings_dict)
return settings_dict
def get_file_settings_dict(self):
return self.file_settings_dict
def post_parse(df):
return df
import datetime as dt
from inout import DataFunctions
class Settings:
def __init__(self):
self.parsed_index_col = ('index', '[parsed]')
def timestamp_info(self):
timestamp_info_dict = {
'dropdown_txt': '[1 COLUMN] Full Date Without Time,'
' e.g. 2019-06-10',
'format_id': 'C1_FullDateNoTime',
'dropdown_ix': 3, ## index in dropdown menu
'date_and_time_colstr': '-not-needed-'}
return timestamp_info_dict
def get_timestamp_info_dict(self):
return self.timestamp_info()
def parsing_args(self):
parse = lambda x: dt.datetime.strptime(x, '%Y-%m-%d')
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [0]},
date_parser=parse, index_col=None, dtype=None)
return parsing_args
def get_parsing_args(self):
return self.parsing_args()
def post_parse_df(self, data_df):
data_df = DataFunctions.standardize_index(df=data_df)
return data_df
def get_post_parse_df(self, df):
return self.post_parse_df(data_df=df)
import datetime as dt
from inout import DataFunctions
class Settings:
def __init__(self):
self.parsed_index_col = ('index', '[parsed]')
def timestamp_info(self):
timestamp_info_dict = {
'dropdown_txt': '[1 COLUMN] Full Datetime, Day First, Without Seconds,'
' e.g. 07.09.2019 23:02',
'format_id': 'C1_FullDateTimeDayFirstNoSec',
'dropdown_ix': 7, ## index in dropdown menu
'date_and_time_colstr': '-not-needed-'}
return timestamp_info_dict
def get_timestamp_info_dict(self):
return self.timestamp_info()
def parsing_args(self):
parse = lambda x: dt.datetime.strptime(x, '%d.%m.%Y %H:%M')
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [0]},
date_parser=parse, index_col=None, dtype=None)
return parsing_args
def get_parsing_args(self):
return self.parsing_args()
def post_parse_df(self, data_df):
data_df = DataFunctions.standardize_index(df=data_df)
return data_df
def get_post_parse_df(self, df):
return self.post_parse_df(data_df=df)
# import datetime as dt
from inout import DataFunctions
# TODO not working atm
class Settings:
def __init__(self):
self.parsed_index_col = ('index', '[parsed]')
def timestamp_info(self):
timestamp_info_dict = {
'dropdown_txt': '[1 COLUMN] Full Datetime With Nanoseconds,'
' e.g. 2019-06-10T14:00:00.891867727Z',
'format_id': 'C1_FullDateTimeNanosec',
'dropdown_ix': 4, ## index in dropdown menu
'date_and_time_colstr': '-not-needed-'}
return timestamp_info_dict
def get_timestamp_info_dict(self):
return self.timestamp_info()
def parsing_args(self):
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [0]},
date_parser=None, index_col=None, dtype=None)
return parsing_args
def get_parsing_args(self):
return self.parsing_args()
def post_parse_df(self, data_df):
data_df = DataFunctions.standardize_index(df=data_df)
# Insert second row for (not given) column units
lst_for_empty_units = []
for e in range(len(data_df.columns)): ## generate entry for all cols in df
lst_for_empty_units.append('-no-units-')
data_df.columns = [data_df.columns, lst_for_empty_units] ## conv column index to multiindex
# data_df.index.names = [('TIMESTAMP', '[yyyy-mm-dd HH:MM]')] ## note the plural: names
data_df[('TIMESTAMP', '[yyyy-mm-dd HH:MM:SS]')] = data_df.index
return data_df
def get_post_parse_df(self, data_df):
return self.post_parse_df(data_df=data_df)
\ No newline at end of file
import datetime as dt
from inout import DataFunctions
class Settings:
def __init__(self):
self.parsed_index_col = ('index', '[parsed]')
def timestamp_info(self):
timestamp_info_dict = {
'dropdown_txt': '[1 COLUMN] Full Datetime Without Seconds,'
' e.g. 2019-06-10 14:00',
'format_id': 'C1_FullDateTimeNoSec',
'dropdown_ix': 2, ## index in dropdown menu
'date_and_time_colstr': '-not-needed-'}
return timestamp_info_dict
def get_timestamp_info_dict(self):
return self.timestamp_info()
def parsing_args(self):
""" Parsing the file w/ safeguards to avoid issues during data import with duplicate column names
or inconsistently named timestamp columns, in particular inconsistently named timestamp units.
keep_date_col=False:
To avoid issues with duplicate column names. For example, if the
column that is parsed has the same name as the default timestamp
index name.
parse_dates={self.parsed_index_col: [0]} and index_col=None:
The first column is explicitely parsed with
these settings to insert the timestamp index as *data* column with a
unique column name that is later set as the index. Explicit settings
are necessary b/c of inconsistently named timestamp columns in data files.
For example, in one file [A] the timestamp units indicated that the timestamp
contained seconds, although no seconds were included. When merging this file to
another file [B] that contained the same timestamp format, but of which the
units correctly indicated that no seconds were present, this would
result in erroneous merging due to the different given timestamp units.
To be clear, only the given timestamp units in [A] and [B] were different,
the timestamp format itself was the same: both included no seconds.
co2_flux would then only be shown for one of the files. The reason for this
is that the original timestamp column is included when directly settings the
index from via .read_csv. The header section would then contain additional
"column names" for the index that I failed to delete (they were not index.name
or any multiindex level, but were still shown in the df... strange).
"""
parse = lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M')
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [0]},
date_parser=parse, index_col=None, dtype=None)
return parsing_args
def get_parsing_args(self):
return self.parsing_args()
def post_parse_df(self, data_df):
data_df = DataFunctions.standardize_index(df=data_df)
return data_df
def get_post_parse_df(self, df):
return self.post_parse_df(data_df=df)
import datetime as dt
from inout import DataFunctions
class Settings:
def __init__(self):
self.parsed_index_col = ('index', '[parsed]')
def timestamp_info(self):
timestamp_info_dict = {
'dropdown_txt': '[1 COLUMN] Full Datetime In One Number,'
' e.g. 199701011700',
'format_id': 'C1_FullDateTimeOneNumber',
'dropdown_ix': 5, ## index in dropdown menu
'date_and_time_colstr': '-not-needed-'}
return timestamp_info_dict
def get_timestamp_info_dict(self):
return self.timestamp_info()
def parsing_args(self):
parse = lambda x: dt.datetime.strptime(x, '%Y%m%d%H%M')
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [1]},
date_parser=parse, index_col=None, dtype=None)
return parsing_args
def get_parsing_args(self):
return self.parsing_args()
def post_parse_df(self, data_df):
data_df = DataFunctions.standardize_index(df=data_df)
return data_df
def get_post_parse_df(self, df):
return self.post_parse_df(data_df=df)
import datetime as dt
from inout import DataFunctions
class Settings:
def __init__(self):
self.parsed_index_col = ('index', '[parsed]')
def timestamp_info(self):
timestamp_info_dict = {
'dropdown_txt': '[1 COLUMN] Full Datetime With Seconds,'
' e.g. 2019-06-10 14:00:00',
'format_id': 'C1_FullDateTimeWithSec',
'dropdown_ix': 1, ## index in dropdown menu
'date_and_time_colstr': '-not-needed-'}
return timestamp_info_dict
def get_timestamp_info_dict(self):
return self.timestamp_info()
def parsing_args(self):
parse = lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [0]}, date_parser=parse,
index_col=None, dtype=None)
return parsing_args
def get_parsing_args(self):
return self.parsing_args()
def parsing_args_events(self):
parse = lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
parsing_args = dict(keep_date_col=False, parse_dates={self.parsed_index_col: [0]}, date_parser=parse,
index_col=None, dtype=object)
return parsing_args
def get_parsing_args_events(self):
return self.parsing_args_events()
def post_parse_df(self, data_df):
data_df = DataFunctions.standardize_index(df=data_df)
return data_df
def get_post_parse_df(self, df):
return self.post_parse_df(df)