To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit fd9e303a authored by holukas's avatar holukas
Browse files

Direct output to file during conversion

parent 87e57d90
......@@ -2,6 +2,9 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="254ecb79-655b-4854-8af6-177bb7347e8a" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/bico.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/bico.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/bin.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/bin.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/Bico.settings" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/Bico.settings" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/Bico.settingsOld" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/Bico.settingsOld" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/_version.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/_version.py" afterDir="false" />
......@@ -325,6 +328,7 @@
<workItem from="1610442714131" duration="7942000" />
<workItem from="1610466344660" duration="744000" />
<workItem from="1610467243727" duration="132000" />
<workItem from="1610468410978" duration="11871000" />
</task>
<servers />
</component>
......@@ -333,13 +337,6 @@
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/bico/ops/bin.py</url>
<line>334</line>
<option name="timeStamp" value="3" />
</line-breakpoint>
</breakpoints>
<default-breakpoints>
<breakpoint type="python-exception">
<properties notifyOnTerminate="true" exception="BaseException">
......@@ -357,7 +354,7 @@
<SUITE FILE_PATH="coverage/BICO$main.coverage" NAME="bico Coverage Results" MODIFIED="1598258463691" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$gui.coverage" NAME="gui Coverage Results" MODIFIED="1606435918815" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$main.coverage" NAME="main Coverage Results" MODIFIED="1596498134562" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$start_bico.coverage" NAME="start_bico Coverage Results" MODIFIED="1610467250193" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$start_bico.coverage" NAME="start_bico Coverage Results" MODIFIED="1610491966587" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$example.coverage" NAME="example Coverage Results" MODIFIED="1606348759035" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/example" />
</component>
</project>
\ No newline at end of file
import gzip
import datetime as dt
import os
import sys
......@@ -319,7 +320,12 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
for bin_file, bin_filepath in bin_found_files_dict.items():
bin_filedate = dt.datetime.strptime(bin_filepath.name,
self.settings_dict['filename_datetime_parsing_string'])
csv_filedate = bin_filedate.strftime('%Y%m%d%H%M') # w/o extension
ascii_filedate = bin_filedate.strftime('%Y%m%d%H%M') # w/o extension
ascii_filename = f"{self.settings_dict['site']}_{ascii_filedate}.csv"
ascii_filepath = self.settings_dict['dir_out_run_raw_data_ascii'] / ascii_filename
ascii_filename_gzip = f"{self.settings_dict['site']}_{ascii_filedate}.csv.gz"
ascii_filepath_gzip = self.settings_dict['dir_out_run_raw_data_ascii'] / ascii_filename_gzip
counter_bin_files += 1
self.statusbar.showMessage(f"Working on file #{counter_bin_files}: {bin_file}")
......@@ -340,42 +346,55 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
size_header=self.bin_size_header,
dblocks=dblocks_props,
limit_read_lines=int(self.settings_dict['row_limit']),
logger=self.logger)
logger=self.logger,
outfile_ascii_path=ascii_filepath)
obj.run()
data_lines, header, tic, counter_lines = obj.get_data()
bin.speedstats(tic=tic, counter_lines=counter_lines, logger=logger)
# Read the converted file that was created
file_contents_ascii_df = self.read_converted_ascii(filepath=ascii_filepath)
# Make DataFrame
df = format_data.make_df(data_lines=data_lines, header=header, logger=logger)
# Compress uncompressed ASCII to gzip, delete uncompressed if gzip selected
if self.settings_dict['file_compression'] == 'gzip':
with open(ascii_filepath, 'rb') as f_in, gzip.open(ascii_filepath_gzip, 'wb') as f_out:
f_out.writelines(f_in)
os.remove(ascii_filepath) # Delete uncompressed
# Stats #todo -9999 NaNs
stats_coll_df = stats.calc(stats_df=df.copy(),
# Stats
stats_coll_df = stats.calc(stats_df=file_contents_ascii_df.copy(),
stats_coll_df=stats_coll_df,
bin_filedate=bin_filedate,
counter_bin_files=counter_bin_files,
logger=logger)
stats_coll_df.loc[bin_filedate, ('_filesize', '[Bytes]', '[FILE]', 'total')] = os.path.getsize(bin_filepath)
stats_coll_df.loc[bin_filedate, ('_columns', '[#]', '[FILE]', 'total')] = len(df.columns)
stats_coll_df.loc[bin_filedate, ('_total_values', '[#]', '[FILE]', 'total')] = df.size
csv_filename = f"{self.settings_dict['site']}_{csv_filedate}"
# Export file CSV
file.export_raw_data_ascii(df=df, outfile=csv_filename, logger=logger,
outdir=self.settings_dict['dir_out_run_raw_data_ascii'],
compression=self.settings_dict['file_compression'])
stats_coll_df.loc[bin_filedate, ('_columns', '[#]', '[FILE]', 'total')] = len(
file_contents_ascii_df.columns)
stats_coll_df.loc[bin_filedate, ('_total_values', '[#]', '[FILE]', 'total')] = file_contents_ascii_df.size
# Plot high-resolution data
if self.settings_dict['plot_ts_hires'] == '1':
vis.high_res_ts(df=df.copy(), outfile=csv_filename,
vis.high_res_ts(df=file_contents_ascii_df.copy(), outfile=ascii_filename,
outdir=self.settings_dict['dir_out_run_plots_hires'], logger=logger)
if self.settings_dict['plot_histogram_hires'] == '1':
vis.high_res_histogram(df=df.copy(), outfile=csv_filename,
vis.high_res_histogram(df=file_contents_ascii_df.copy(), outfile=ascii_filename,
outdir=self.settings_dict['dir_out_run_plots_hires'], logger=logger)
return stats_coll_df
def read_converted_ascii(self, filepath):
"""Read converted file"""
file_contents_ascii_df = pd.read_csv(filepath,
skiprows=None,
header=[0, 1, 2],
na_values=-9999,
encoding='utf-8',
delimiter=',',
# keep_date_col=True,
parse_dates=False,
date_parser=None,
index_col=None,
dtype=None)
return file_contents_ascii_df
def assemble_datablock_sequence(self):
dblocks_seq = []
instrument_settings = ['instrument_1', 'instrument_2', 'instrument_3']
......
,"('U', '[m+1 s-1]', '[R350-A]')","('V', '[m+1 s-1]', '[R350-A]')","('W', '[m+1 s-1]', '[R350-A]')","('T_SONIC', '[K]', '[R350-A]')","('INC_X', '[deg]', '[R350-A]')","('INC_Y', '[deg]', '[R350-A]')","('DATA_SIZE', '[Bytes]', '[IRGA75-A]')","('STATUS_CODE', '[status_code_irga]', '[IRGA75-A]')","('GA_DIAG_CODE', '[bit_map]', '[IRGA75-A]')","('AGC', '[%]', '[IRGA75-A]')","('H2O_CONC', '[mmol+1 m-3]', '[IRGA75-A]')","('CO2_CONC', '[mmol+1 m-3]', '[IRGA75-A]')","('T_BOX', '[degC]', '[IRGA75-A]')","('PRESS_BOX', '[hPa]', '[IRGA75-A]')","('COOLER_V', '[V]', '[IRGA75-A]')","('DATA_SIZE', '[Bytes]', '[QCL-A2]')","('STATUS_CODE', '[bit_map]', '[QCL-A2]')","('STATUS', '[0=OK]', '[QCL-A2]')","('CH4_DRY', '[nmol+1 mol-1]', '[QCL-A2]')","('N2O_DRY', '[nmol+1 mol-1]', '[QCL-A2]')","('CH4', '[nmol+1 mol-1]', '[QCL-A2]')","('T_CELL', '[K]', '[QCL-A2]')","('PRESS_CELL', '[Torr]', '[QCL-A2]')"
,-0.5,0.29,0.04,284.56,0.0,1.86,16.0,0,255.0,50.0,243.509,5.902,-1.8400000000000034,971.0,1.1375,2.0,104.0,-9999,-9999,-9999,-9999,-9999,-9999
,-0.5,0.32,0.03,284.56,0.0,1.86,16.0,0,255.0,50.0,243.577,5.929,-1.8400000000000034,971.0,1.1365,2.0,104.0,-9999,-9999,-9999,-9999,-9999,-9999
0,-0.49,0.29,0.03,284.58,0.0,1.86,16.0,0,255.0,50.0,243.03,5.9696,-1.8499999999999943,971.0,1.1365,2.0,104.0,,,,,,
1,-0.5,0.27,0.04,284.57,0.0,1.86,16.0,0,255.0,50.0,243.48,5.984,-1.8499999999999943,971.0,1.1375,2.0,104.0,,,,,,
2,-0.49,0.26,0.03,284.58,0.0,1.86,16.0,0,255.0,50.0,243.94799999999998,6.0048,-1.8700000000000043,971.0,1.1385,2.0,104.0,,,,,,
3,-0.5,0.27,0.04,284.57,0.0,1.86,16.0,0,255.0,50.0,243.793,6.0093,-1.8700000000000043,971.0,1.138,2.0,104.0,,,,,,
4,-0.49,0.26,0.06,284.56,0.0,1.86,16.0,0,255.0,50.0,243.977,5.9851,-1.8700000000000043,971.0,1.1375,2.0,104.0,,,,,,
5,-0.51,0.27,0.08,284.56,0.0,1.86,16.0,0,255.0,50.0,244.525,5.9670000000000005,-1.8499999999999943,971.0,1.137,2.0,104.0,,,,,,
6,-0.48,0.25,0.09,284.56,0.0,1.86,16.0,0,255.0,50.0,244.338,5.959,-1.8400000000000036,971.0,1.138,2.0,104.0,,,,,,
7,-0.51,0.28,0.08,284.54,0.0,1.86,16.0,0,255.0,50.0,244.236,5.9803,-1.8599999999999997,971.0,1.138,2.0,104.0,,,,,,
import csv
import mmap
import os
import struct
......@@ -40,8 +41,11 @@ def bit_map_extract_header(bit_map_dict):
class ReadFile:
"""
Read and convert binary data to ASCII, write to file
"""
def __init__(self, binary_filename, size_header, dblocks, limit_read_lines, logger):
def __init__(self, binary_filename, size_header, dblocks, limit_read_lines, logger, outfile_ascii_path):
self.tic = time.time() # Start time
self.binary_filename = binary_filename
self.binary_filesize = os.path.getsize(self.binary_filename)
......@@ -52,47 +56,78 @@ class ReadFile:
self.file_counter_lines = 0
self.file_total_bytes_read = 0
self.file_data_rows = [] # Collects all data, i.e. all line records
self.ascii_filename = outfile_ascii_path
self.dblock_headers = []
self.logger.info(f" File size: {self.binary_filesize} Bytes")
def run(self):
self.dblock_headers = self.make_file_header()
self.open_binary = self.read_bin_file_to_mem(binary_filename=self.binary_filename, logger=self.logger)
# First read header at top of file
# First read binary header at top of file, but don't write to output file
settings.data_blocks.header.wecom3.data_block_header(open_file_object=self.open_binary,
size_header=self.size_header)
self.convert()
self.convert_to_ascii()
def convert(self):
# def get_data(self):
# return self.dblock_headers
def write_multirow_header_to_ascii(self, asciiWriter):
"""Write header info from list of tuples to file as multi-row header
Since self.dblock_headers is a list of tuples and the output ascii is
written row-by-row, the header info is extracted from the list: each tuple
in the list comprises three elements (variable name, units and instrument).
Therefore, first the first element of each tuple (all variable names) is written
to the first row of the file, then all second elements (units) are written to the
second row, and finally the third elements (instrument) are written to the
third row of the output file.
"""
for headerrow in range(0, 3):
headerrow_out = [i[headerrow] for i in self.dblock_headers]
asciiWriter.writerow(headerrow_out)
def convert_to_ascii(self):
self.logger.info(f" Reading file data, converting to ASCII ...")
end_of_data_reached = False # Reset for each file
while not end_of_data_reached:
# Read data blocks per instrument
# tic = time.time()
# print(time.time() - tic)
file_newrow_records = []
for instr in self.dblocks:
incoming_dblock_data, end_of_data_reached = self.read_instr_dblock(dblock=instr)
if not end_of_data_reached:
file_newrow_records = file_newrow_records + incoming_dblock_data
else:
file_newrow_records = False
break # Breaks FOR loop
if file_newrow_records:
self.file_counter_lines += 1
# print(self.counter_lines)
self.file_data_rows.append(file_newrow_records)
# Limit = 0 means no limit
if self.limit_read_lines > 0:
if self.file_counter_lines == self.limit_read_lines:
break
with open(self.ascii_filename, 'w', newline='') as open_ascii:
asciiWriter = csv.writer(open_ascii, delimiter=',')
# File header
self.dblock_headers = self.make_file_header()
self.write_multirow_header_to_ascii(asciiWriter=asciiWriter)
# Data records
while not end_of_data_reached:
# Read data blocks per instrument
file_newrow_records = []
for instr in self.dblocks:
incoming_dblock_data, end_of_data_reached = self.read_instr_dblock(dblock=instr)
if not end_of_data_reached:
file_newrow_records = file_newrow_records + incoming_dblock_data
else:
file_newrow_records = False
break # Breaks FOR loop
if file_newrow_records:
self.file_counter_lines += 1
asciiWriter.writerow(file_newrow_records)
# self.file_data_rows.append(file_newrow_records)
# Limit = 0 means no limit
if self.limit_read_lines > 0:
if self.file_counter_lines == self.limit_read_lines:
break
self.open_binary.close()
open_ascii.close()
self.logger.info(f" Finished conversion to ASCII.")
self.logger.info(f" ASCII data saved to file {self.ascii_filename}")
self.file_speedstats()
def read_instr_dblock(self, dblock):
"""Cycle through vars in data block"""
......@@ -412,11 +447,11 @@ class ReadFile:
bit_map_dict[bit_map_var] = bit_map_props
return bit_map_dict
def get_data(self):
return self.file_data_rows, self.dblock_headers, self.tic, self.file_counter_lines
def make_file_header(self):
"""Make header for converted ASCII file, for all data blocks"""
"""Make header for converted ASCII file, for all data blocks
Returns list of tuples
"""
dblock_headers = []
for dblock in self.dblocks:
dblock_header = make_header(dblock=dblock)
......@@ -436,6 +471,14 @@ class ReadFile:
logger.info(f" Done reading file to memory.")
return open_binary
def file_speedstats(self):
toc = time.time() - self.tic
try:
runtime_line_avg = self.file_counter_lines / toc
except ZeroDivisionError:
runtime_line_avg = 0
_len = f" {self.file_counter_lines} rows read in {toc:.2f}s, speed: {int(runtime_line_avg)} rows s-1"
self.logger.info(_len)
# def read_file(binary_filename, size_header, dblocks, limit_read_lines, logger, statusbar):
# binary_filesize = os.path.getsize(binary_filename)
......@@ -505,16 +548,6 @@ class ReadFile:
# data_header = dblock_headers
# return data_rows, data_header, tic, counter_lines
def speedstats(tic, counter_lines, logger):
toc = time.time() - tic
try:
runtime_line_avg = counter_lines / toc
except ZeroDivisionError:
runtime_line_avg = 0
_len = f" {counter_lines} lines read in {toc:.2f}s, speed: {int(runtime_line_avg)} lines s-1"
logger.info(_len)
# def generate_file_header(dblocks):
# """Make header for converted output file"""
# header = []
......
run_id=BICO-20210112-170051
run_id=BICO-20210112-235248
# INSTRUMENTS
# ===========
# Site
site=CH-DAV
site=CH-CHA
# Data Blocks
header=WECOM3
instrument_1=R350-A
instrument_2=IRGA75-A
instrument_3=QCL-A2
instrument_3=QCL-A
# RAW DATA
# ========
# Source Folder
dir_source=Y:/CH-CHA_Chamau/20_sonic_ghg/2020
dir_source=Y:/CH-CHA_Chamau/20_sonic_ghg/2020/06
# Time Range
start_date=2020-02-12 19:00
end_date=2020-02-12 19:00
start_date=2020-06-01 06:00
end_date=2020-06-03 23:00
# File Settings
filename_datetime_format=yyyymmddHH.CMM
......@@ -35,15 +35,15 @@ select_random_files=0
# OUTPUT
# ======
dir_out=A:/FLUXES/x-TEST-OUT
dir_out_run=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-170051
dir_out_run_log=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-170051\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-170051\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-170051\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-170051\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-170051\raw_data_ascii
dir_out_run=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235248
dir_out_run_log=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235248\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235248\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235248\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235248\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235248\raw_data_ascii
output_folder_name_prefix=CHA
file_compression=None
file_compression=gzip
plot_file_availability=0
plot_ts_hires=0
......
run_id=BICO-20210112-165148
run_id=BICO-20210112-235241
# INSTRUMENTS
# ===========
# Site
site=CH-DAV
site=CH-CHA
# Data Blocks
header=WECOM3
instrument_1=R350-A
instrument_2=IRGA75-A
instrument_3=QCL-A2
instrument_3=QCL-A
# RAW DATA
# ========
# Source Folder
dir_source=Y:/CH-CHA_Chamau/20_sonic_ghg/2020
dir_source=Y:/CH-CHA_Chamau/20_sonic_ghg/2020/06
# Time Range
start_date=2020-02-12 19:00
end_date=2020-02-12 19:00
start_date=2020-06-01 06:00
end_date=2020-06-03 23:00
# File Settings
filename_datetime_format=yyyymmddHH.CMM
......@@ -35,15 +35,15 @@ select_random_files=0
# OUTPUT
# ======
dir_out=A:/FLUXES/x-TEST-OUT
dir_out_run=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-165148
dir_out_run_log=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-165148\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-165148\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-165148\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-165148\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-165148\raw_data_ascii
dir_out_run=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235241
dir_out_run_log=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235241\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235241\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235241\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235241\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\CHA_BICO-20210112-235241\raw_data_ascii
output_folder_name_prefix=CHA
file_compression=None
file_compression=gzip
plot_file_availability=0
plot_ts_hires=0
......
__version__ = "0.1.1"
__version__ = "0.2.0"
__date__ = "12 Jan 2021"
__link_source_code__ = "https://gitlab.ethz.ch/holukas/bico"
__link_releases__ = "https://gitlab.ethz.ch/holukas/bico/-/releases"
......
......@@ -8,7 +8,7 @@ setuptools.setup(
name='bico',
packages=setuptools.find_packages(),
# packages=['dyco'],
version='0.1.1',
version='0.2.0',
license='GNU General Public License v3 (GPLv3)',
description='A Python package to convert binary files to ASCII',
long_description=long_description,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment