To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 97794c49 authored by holukas's avatar holukas
Browse files

Store converted in list before DataFrame

parent 09c1eb9e
......@@ -4,11 +4,13 @@
<list default="true" id="254ecb79-655b-4854-8af6-177bb7347e8a" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/bico.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/bico.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/gui/gui.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/gui/gui.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/bin.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/bin.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/file.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/file.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/format_data.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/format_data.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/Bico.settings" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/Bico.settings" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/Bico.settingsOld" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/Bico.settingsOld" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/_version.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/_version.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/setup.py" beforeDir="false" afterPath="$PROJECT_DIR$/setup.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -333,6 +335,7 @@
<workItem from="1610494627121" duration="3765000" />
<workItem from="1610499634762" duration="1562000" />
<workItem from="1610528020539" duration="10670000" />
<workItem from="1610612175310" duration="3600000" />
</task>
<servers />
</component>
......@@ -344,14 +347,9 @@
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/bico/bico.py</url>
<line>412</line>
<line>425</line>
<option name="timeStamp" value="41" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/bico/ops/bin.py</url>
<line>121</line>
<option name="timeStamp" value="43" />
</line-breakpoint>
</breakpoints>
<default-breakpoints>
<breakpoint type="python-exception">
......@@ -371,7 +369,7 @@
<SUITE FILE_PATH="coverage/BICO$main.coverage" NAME="bico Coverage Results" MODIFIED="1598258463691" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$gui.coverage" NAME="gui Coverage Results" MODIFIED="1606435918815" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$main.coverage" NAME="main Coverage Results" MODIFIED="1596498134562" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$start_bico.coverage" NAME="start_bico Coverage Results" MODIFIED="1610569519710" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$start_bico.coverage" NAME="start_bico Coverage Results" MODIFIED="1610615547522" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$example.coverage" NAME="example Coverage Results" MODIFIED="1606348759035" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/example" />
</component>
</project>
\ No newline at end of file
......@@ -163,6 +163,8 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
# Output
self.update_dict_key(key='dir_out', new_val=self.lbl_output_folder.text())
self.update_dict_key(key='output_folder_name_prefix', new_val=self.lne_output_folder_name_prefix.text())
self.update_dict_key(key='add_instr_to_varname',
new_val='1' if self.chk_output_variables_add_instr_to_varname.isChecked() else '0')
self.update_dict_key(key='file_compression', new_val=self.cmb_output_compression.currentText())
self.update_dict_key(key='plot_file_availability',
new_val='1' if self.chk_output_plots_file_availability.isChecked() else '0')
......@@ -229,6 +231,8 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
self.lbl_output_folder.setText(str(self.settings_dict['dir_out']))
self.set_gui_lineedit(lineedit=self.lne_output_folder_name_prefix,
string=self.settings_dict['output_folder_name_prefix'])
self.set_gui_checkbox(checkbox=self.chk_output_variables_add_instr_to_varname,
state=self.settings_dict['add_instr_to_varname'])
self.set_gui_combobox(combobox=self.cmb_output_compression, find_text=self.settings_dict['file_compression'])
self.set_gui_checkbox(checkbox=self.chk_output_plots_file_availability,
state=self.settings_dict['plot_file_availability'])
......@@ -348,10 +352,15 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
dblocks=dblocks_props,
limit_read_lines=int(self.settings_dict['row_limit']),
logger=self.logger,
file_number=counter_bin_files)
cur_file_number=counter_bin_files)
obj.run()
# ascii_df = obj.get_data()
dblock_headers, file_data_rows = obj.get_data()
# Add instrument info to variable name
if self.settings_dict['add_instr_to_varname'] == '1':
dblock_headers = self.add_instr_to_varname(dblock_headers=dblock_headers)
# Make dataframe of data
ascii_df = format_data.make_df(data_lines=file_data_rows,
header=dblock_headers,
......@@ -368,12 +377,6 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
file_contents_ascii_df = self.read_converted_ascii(filepath=ascii_filepath,
compression=self.settings_dict['file_compression'])
# # Compress uncompressed ASCII to gzip, delete uncompressed if gzip selected
# if self.settings_dict['file_compression'] == 'gzip':
# with open(ascii_filepath, 'rb') as f_in, gzip.open(ascii_filepath_gzip, 'wb') as f_out:
# f_out.writelines(f_in)
# os.remove(ascii_filepath) # Delete uncompressed
# Stats
stats_coll_df = stats.calc(stats_df=file_contents_ascii_df.copy(),
stats_coll_df=stats_coll_df,
......@@ -395,6 +398,16 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
return stats_coll_df
def add_instr_to_varname(self, dblock_headers):
"""Add instrument info to variable name to avoid duplicates
e.g. The var STATUS_CODE exists both in IRGA72-A and QCL-C
and are renamed to STATUS_CODE_IRGA75-A and STATUS_CODE_QCL-C
"""
for idx_h, h in enumerate(dblock_headers):
dblock_headers[idx_h] = (f"{h[0]}_{h[2]}", h[1], h[2])
return dblock_headers
def read_converted_ascii(self, filepath, compression):
"""Read converted file"""
compression = None if compression == 'None' else compression
......@@ -430,3 +443,9 @@ def main():
if __name__ == '__main__':
main()
# # Compress uncompressed ASCII to gzip, delete uncompressed if gzip selected
# if self.settings_dict['file_compression'] == 'gzip':
# with open(ascii_filepath, 'rb') as f_in, gzip.open(ascii_filepath_gzip, 'wb') as f_out:
# f_out.writelines(f_in)
# os.remove(ascii_filepath) # Delete uncompressed
......@@ -125,13 +125,19 @@ class Ui_MainWindow(object):
self.lne_output_folder_name_prefix = \
gui_elements.add_label_lineedit_to_grid(label='Folder Name Prefix', grid=grid,
row=5, value='')
# Variables
header_output_file_variables = qtw.QLabel('Variables')
header_output_file_variables.setProperty('labelClass', 'header_2')
grid.addWidget(header_output_file_variables, 6, 0)
self.chk_output_variables_add_instr_to_varname = \
gui_elements.add_checkbox_to_grid(label='Add Instrument To Variable Name', grid=grid, row=7)
# File Compression
header_output_file_compression = qtw.QLabel('File Compression')
header_output_file_compression.setProperty('labelClass', 'header_2')
grid.addWidget(header_output_file_compression, 6, 0)
grid.addWidget(header_output_file_compression, 8, 0)
self.cmb_output_compression = \
gui_elements.add_label_combobox_to_grid(label='Compression', grid=grid, row=7,
gui_elements.add_label_combobox_to_grid(label='Compression', grid=grid, row=9,
items=['gzip', 'None'])
self.cmb_output_compression.setToolTip(tooltips.cmb_output_compression)
......@@ -139,17 +145,17 @@ class Ui_MainWindow(object):
# Plots
header_output_plots = qtw.QLabel('Plots')
header_output_plots.setProperty('labelClass', 'header_2')
grid.addWidget(header_output_plots, 8, 0, 1, 1)
grid.addWidget(header_output_plots, 10, 0, 1, 1)
self.chk_output_plots_file_availability = \
gui_elements.add_checkbox_to_grid(label='File Availability Heatmap', grid=grid, row=9)
gui_elements.add_checkbox_to_grid(label='File Availability Heatmap', grid=grid, row=11)
self.chk_output_plots_ts_hires = \
gui_elements.add_checkbox_to_grid(label='High-res Time Series', grid=grid, row=10)
gui_elements.add_checkbox_to_grid(label='High-res Time Series', grid=grid, row=12)
self.chk_output_plots_histogram_hires = \
gui_elements.add_checkbox_to_grid(label='High-res Histograms', grid=grid, row=11)
gui_elements.add_checkbox_to_grid(label='High-res Histograms', grid=grid, row=13)
self.chk_output_plots_ts_agg = \
gui_elements.add_checkbox_to_grid(label='Aggregated Time Series', grid=grid, row=12)
gui_elements.add_checkbox_to_grid(label='Aggregated Time Series', grid=grid, row=14)
grid.setRowStretch(13, 1)
grid.setRowStretch(15, 1)
section.setLayout(grid)
return section
......
import pandas as pd
import csv
import mmap
import os
import struct
import time
import pandas as pd
import settings.data_blocks.header.wecom3
from . import bin_conversion_exceptions as bce
from ops import format_data, file
def make_header(dblock):
"""Get header info for data block, including for variables from bit maps"""
......@@ -46,7 +46,7 @@ class ConvertData:
Read and convert binary data to ASCII, write to file
"""
def __init__(self, binary_filename, size_header, dblocks, limit_read_lines, logger, file_number):
def __init__(self, binary_filename, size_header, dblocks, limit_read_lines, logger, cur_file_number):
self.tic = time.time() # Start time
self.binary_filename = binary_filename
self.binary_filesize = os.path.getsize(self.binary_filename)
......@@ -60,6 +60,7 @@ class ConvertData:
self.data_df = pd.DataFrame()
# self.ascii_filename = outfile_ascii_path
self.dblock_headers = []
self.cur_file_number = cur_file_number
self.logger.info(f" File size: {self.binary_filesize} Bytes")
......@@ -73,53 +74,46 @@ class ConvertData:
self.convert_to_ascii()
def get_data(self):
# return self.data_df
return self.dblock_headers, self.file_data_rows
def write_multirow_header_to_ascii(self, asciiWriter):
"""Write header info from list of tuples to file as multi-row header
Since self.dblock_headers is a list of tuples and the output ascii is
written row-by-row, the header info is extracted from the list: each tuple
in the list comprises three elements (variable name, units and instrument).
Therefore, first the first element of each tuple (all variable names) is written
to the first row of the file, then all second elements (units) are written to the
second row, and finally the third elements (instrument) are written to the
third row of the output file.
"""
for headerrow in range(0, 3):
headerrow_out = [i[headerrow] for i in self.dblock_headers]
asciiWriter.writerow(headerrow_out)
def convert_to_ascii(self):
self.logger.info(f" Reading file data, converting to ASCII ...")
end_of_data_reached = False # Reset for each file
# with open(self.ascii_filename, 'w', newline='') as open_ascii:
# asciiWriter = csv.writer(open_ascii, delimiter=',')
# File header
self.dblock_headers = self.make_file_header()
self.data_df = pd.DataFrame(columns=self.dblock_headers)
# self.write_multirow_header_to_ascii(asciiWriter=asciiWriter)
# Data records
while not end_of_data_reached:
# Read data blocks per instrument
file_newrow_records = []
for instr in self.dblocks:
incoming_dblock_data, end_of_data_reached = self.read_instr_dblock(dblock=instr)
if not end_of_data_reached:
file_newrow_records = file_newrow_records + incoming_dblock_data
else:
file_newrow_records = False
break # Breaks FOR loop
_end_of_data_reached = []
results = [self.read_instr_dblock(dblock=d) for d in self.dblocks]
for r in results:
file_newrow_records.extend(r[0])
_end_of_data_reached.append(r[1])
# end_of_data_reached = True if True in _end_of_data_reached else False
if True in _end_of_data_reached:
end_of_data_reached = True
file_newrow_records = False
# for instr in self.dblocks:
# incoming_dblock_data, end_of_data_reached = self.read_instr_dblock(dblock=instr)
# if not end_of_data_reached:
# file_newrow_records.extend(incoming_dblock_data)
# # file_newrow_records = file_newrow_records + incoming_dblock_data
# else:
# file_newrow_records = False
# break # Breaks FOR loop
if file_newrow_records:
self.file_counter_lines += 1
# asciiWriter.writerow(file_newrow_records)
self.file_data_rows.append(file_newrow_records)
self.data_df.append(file_newrow_records)
# self.data_df.append(file_newrow_records)
# Limit = 0 means no limit
if self.limit_read_lines > 0:
......@@ -142,9 +136,11 @@ class ConvertData:
dblock_vars_read = 0
end_of_data_reached = False
# todo hier weiter var-by-var
for var, props in dblock.items():
if 'bit_pos_start' in props.keys(): # Skip bit map variables, will be extracted later
if 'bit_pos_start' in props.keys(): # Skip variables from bit map, will be extracted later
continue
varbytes = self.open_binary.read(props['bytes']) # Read Bytes for current var
......@@ -222,6 +218,7 @@ class ConvertData:
for bmv in bit_map_vals:
dblock_data.append(bmv)
# return dblock_data
return dblock_data, end_of_data_reached
def convert_val(self, units, var_val):
......@@ -490,7 +487,7 @@ class ConvertData:
runtime_line_avg = self.file_counter_lines / toc
except ZeroDivisionError:
runtime_line_avg = 0
_len = f" {self.file_counter_lines} rows read in {toc:.2f}s, speed: {int(runtime_line_avg)} rows s-1"
_len = f" {self.file_counter_lines} data rows converted in {toc:.2f}s, speed: {int(runtime_line_avg)} rows s-1"
self.logger.info(_len)
# def read_file(binary_filename, size_header, dblocks, limit_read_lines, logger, statusbar):
......@@ -583,3 +580,20 @@ class ConvertData:
# bytes_read_perc = (total_bytes_read / binary_filesize) * 100
# print(f"\r Read {counter_lines} lines / {total_bytes_read} Bytes ({bytes_read_perc:.1f}%) / "
# f"time remaining: {rem_time:.1f}s ...", end='')
# def write_multirow_header_to_ascii(self, asciiWriter):
# """Write header info from list of tuples to file as multi-row header
#
# Since self.dblock_headers is a list of tuples and the output ascii is
# written row-by-row, the header info is extracted from the list: each tuple
# in the list comprises three elements (variable name, units and instrument).
# Therefore, first the first element of each tuple (all variable names) is written
# to the first row of the file, then all second elements (units) are written to the
# second row, and finally the third elements (instrument) are written to the
# third row of the output file.
#
# """
# for headerrow in range(0, 3):
# headerrow_out = [i[headerrow] for i in self.dblock_headers]
# asciiWriter.writerow(headerrow_out)
......@@ -57,6 +57,8 @@ class SearchAll():
self.valid_files_dict = self.keep_files_up_to_filelimit()
self.valid_files_dict = self.keep_random_files(valid_files_dict=self.valid_files_dict)
# sorted(self.valid_files_dict) # todo sort dict necessary?
return self.valid_files_dict
def keep_random_files(self, valid_files_dict):
......
run_id=BICO-20210113-212521
run_id=BICO-20210114-101229
# INSTRUMENTS
# ===========
......@@ -26,8 +26,8 @@ end_date=2020-09-30 23:00
filename_datetime_format=yyyymmddHH.XMM
file_ext=*.X*
file_size_min=900
file_limit=1
row_limit=0
file_limit=0
row_limit=10
# Special
select_random_files=0
......@@ -35,15 +35,17 @@ select_random_files=0
# OUTPUT
# ======
dir_out=A:/FLUXES/x-TEST-OUT
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\raw_data_ascii
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101229
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101229\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101229\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101229\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101229\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101229\raw_data_ascii
output_folder_name_prefix=DAV
file_compression=gzip
file_compression=None
add_instr_to_varname=0
plot_file_availability=0
plot_ts_hires=0
......
run_id=BICO-20210113-212408
run_id=BICO-20210114-101216
# INSTRUMENTS
# ===========
......@@ -27,7 +27,7 @@ filename_datetime_format=yyyymmddHH.XMM
file_ext=*.X*
file_size_min=900
file_limit=1
row_limit=0
row_limit=10
# Special
select_random_files=0
......@@ -35,15 +35,17 @@ select_random_files=0
# OUTPUT
# ======
dir_out=A:/FLUXES/x-TEST-OUT
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\raw_data_ascii
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101216
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101216\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101216\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101216\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101216\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210114-101216\raw_data_ascii
output_folder_name_prefix=DAV
file_compression=gzip
file_compression=None
add_instr_to_varname=0
plot_file_availability=0
plot_ts_hires=0
......
__version__ = "0.2.1"
__date__ = "13 Jan 2021"
__version__ = "0.3.0"
__date__ = "14 Jan 2021"
__link_source_code__ = "https://gitlab.ethz.ch/holukas/bico"
__link_releases__ = "https://gitlab.ethz.ch/holukas/bico/-/releases"
__link_wiki__ = "https://gitlab.ethz.ch/holukas/bico/-/wikis/home"
......
......@@ -8,7 +8,7 @@ setuptools.setup(
name='bico',
packages=setuptools.find_packages(),
# packages=['dyco'],
version='0.2.0',
version='0.3.0',
license='GNU General Public License v3 (GPLv3)',
description='A Python package to convert binary files to ASCII',
long_description=long_description,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment