To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 1f3aaa83 authored by holukas's avatar holukas
Browse files

Added option to specify index column for fluxnet file

parent b134c663
......@@ -15,21 +15,25 @@ from matplotlib import gridspec
def please_quality_control(working_directory):
version = 'FQC 2.1.1'
version = 'FQC 2.1.2'
# 0 = original full_output and FLUXNET header (FOF: 3 rows, 1st row is ignored, FLUXNET: 1 row)
# 1 = merged full_output and FLUXNET header (FOF: 2 rows, FLUXNET: 1 row)
header_type = 1
header_type = 0
# if the files were merged, then the merged FLUXNET file might have a different date format
FLXNET_file_dateformat = '%Y-%m-%d %H:%M:%S'
# and index column
FLXNET_file_dateformat = '%Y%m%d%H%M'
# FLXNET_file_dateformat = '%Y-%m-%d %H:%M:%S'
# fxn_index_col=0
fxn_index_col = 1
# VARIABLES -------------------------------------
FOF_qc_variable_list = ['co2_flux', 'H', 'LE', 'h2o_flux', 'ch4_flux',
'n2o_flux'] # searching for these variables in full_output (EddyPro column names)
FLXNT_qc_variable_list = ['FC', 'H', 'LE', '-9999', '-9999', '-9999']
FLXNT_qc_variable_flags = ['FC_SSITC_TEST', 'H_SSITC_TEST', 'LE_SSITC_TEST', '-9999', '-9999',
'-9999']
'-9999']
FOF_qc_variable_position_in_9_digit_flag_number = [5, 4, 6, 6, 7, 8] # code is e.g. 800000099, see full_output file
FOF_qc_variable_position_in_5_digit_flag_number = [1, -9999, 2, 2, 3, 4] # code is e.g. 89999
......@@ -273,8 +277,8 @@ def please_quality_control(working_directory):
# this loses the original timestamp
FLXNET_parse_date = lambda z: dt.datetime.strptime(z, FLXNET_file_dateformat)
FLXNET_contents = pd.read_csv(FLXNET_found_file, skiprows=None,
keep_date_col=True, index_col=0,
date_parser=FLXNET_parse_date)
keep_date_col=True, index_col=fxn_index_col,
date_parser=FLXNET_parse_date)
# FLXNET_units = pd.read_csv(FLXNET_found_file, skiprows=FLXNET_skiprows_units, nrows=1, mangle_dupe_cols=True)
# FLXNET_units = FLXNET_units.columns[
# 1:] # skip the first column (yyyymmddHHMM... we generate our own timestamp column name incl units)
......@@ -693,7 +697,8 @@ def please_quality_control(working_directory):
data_DA = data.replace(-9999, np.nan).resample('D').mean()
data_SD = data.replace(-9999, np.nan).resample('D').std()
ax2.plot_date(data_DA.index, data_DA, color='black', alpha=1, label='daily average', ms=0.5, lw=1)
ax2.plot_date(data_DA.index, data_DA, color='black', alpha=1, label='daily average',
ms=0.5, lw=1)
# ax2.scatter(data_DA.index, data_DA, color='black', s=3, alpha=1, label='daily average')
ax2.set_ylim(data_DA.min(), data_DA.max())
ax2.errorbar(data_DA.index, data_DA, alpha=0.2,
......@@ -797,7 +802,7 @@ def please_quality_control(working_directory):
# we can now output FOF_contents directly:
FLXNET_final_qc_filename = os.path.join(qc_folder,
FLXNET_found_file_no_ending + " " + id_string + ".csv")
FLXNET_found_file_no_ending + " " + id_string + ".csv")
FLXNET_contents.to_csv(FLXNET_final_qc_filename, header=True, index=False, encoding='utf-8')
print("length of " + FLXNET_final_qc_filename + " = " + str(len(FLXNET_contents)))
......
# release notes:
# 2020-06-27: version 2.1.1
# * changed settings for the EddyPro FLUXNET file to better integrate with FIME File Merger
# > After FIME, the FLUXNET file now also has a full timestamp in column 0. Settings during
# .read_csv were adjusted accordingly.
# 2020-05-24: version 2.1.0
# * created conda environment
# * changed .scatter plotting (did not work anymore) to .plot_date
# 2019-06-02: version 2.0.0
# * implemented the new *_fluxnet_* files that replace the *_ghg-europe_* files in EddyPro 7
# * changed output folder to better match the folder naming convention: OUT_QC-YYYYMMDD-HHMMSS
# 2019-03-04: version 1.04
# * signal strength / status byte / window dirtiness is now also recognized by the string "signal_strength"
# 2019-02-25: version 1.03
# * ignoring empty row for merged files is no longer necessary, pandas now outputs without that empty row
# 2019-02-25: version 1.02
# * date format of ghg-europe file is now in separate variable dateformat_ghg_europe_file
# 2019-02-24:
# * now called version 1.01
# * changed output ID to different format, e.g. QC-20190224-1725
# * now_string is now called id_string
# * added id_string to output csv files
# 2017-03-07: changed "mangle_dupe_cols" to "True" in line
# GHG_units = pd.read_csv(GHG_found_file, skiprows=GHG_skiprows_units, nrows=1, mangle_dupe_cols=True)
# 2017-01-31: rewrote the logic behind finding the correct AGC variable
# added error message if no site or IRGA was selected
# 2016-07-13: removed bug that wrote the wrong units in diurnal cycle plots
# 2015-09-22: flag in ghg-europe file is not changed, raw data screening sets flagged values to -9999
# SSITC flag in ghg-europe must not be changed for database upload
# instead values that fail the raw data screening are set to -9999 (in accordance w/ database submission guidelines)
# 2015-08-24: update to version 0.2: added support for EddyPro 6 output files
# removed H-flag, not necessary anymore, bug in EP was removed in version 6.0.0
# the overall QC flag is not output in a separate column in the EP6 GHG-Europe output file
# 2015-05-06: the flag for originally missing values is now left at -9999 instead of setting it to 2
# from EddyPro 5.2 ghg-europe file contains "ISOdate" as date column, in addition missing dates are not filled anymore
# please specify working directory
# working_directory = r'M:\Dropbox\luhk_work\programming\python\FQC_FluxQualityControl'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment