To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 09c1eb9e authored by holukas's avatar holukas
Browse files

Preparations

parent a5b02c25
......@@ -2,7 +2,13 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="254ecb79-655b-4854-8af6-177bb7347e8a" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/bico.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/bico.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/bin.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/bin.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/file.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/file.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/ops/format_data.py" beforeDir="false" afterPath="$PROJECT_DIR$/bico/ops/format_data.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/Bico.settings" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/Bico.settings" afterDir="false" />
<change beforePath="$PROJECT_DIR$/bico/settings/Bico.settingsOld" beforeDir="false" afterPath="$PROJECT_DIR$/bico/settings/Bico.settingsOld" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -198,11 +204,11 @@
<method v="2" />
</configuration>
<list>
<item itemvalue="Python._test" />
<item itemvalue="Python.bico" />
<item itemvalue="Python.gui (1)" />
<item itemvalue="Python.gui" />
<item itemvalue="Python.gui (1)" />
<item itemvalue="Python.start_bico" />
<item itemvalue="Python._test" />
</list>
<recent_temporary>
<list>
......@@ -337,14 +343,14 @@
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/bico/ops/bin.py</url>
<line>124</line>
<option name="timeStamp" value="6" />
<url>file://$PROJECT_DIR$/bico/bico.py</url>
<line>412</line>
<option name="timeStamp" value="41" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/bico/ops/bin.py</url>
<line>147</line>
<option name="timeStamp" value="9" />
<line>121</line>
<option name="timeStamp" value="43" />
</line-breakpoint>
</breakpoints>
<default-breakpoints>
......@@ -365,7 +371,7 @@
<SUITE FILE_PATH="coverage/BICO$main.coverage" NAME="bico Coverage Results" MODIFIED="1598258463691" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$gui.coverage" NAME="gui Coverage Results" MODIFIED="1606435918815" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$main.coverage" NAME="main Coverage Results" MODIFIED="1596498134562" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$start_bico.coverage" NAME="start_bico Coverage Results" MODIFIED="1610549884717" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$start_bico.coverage" NAME="start_bico Coverage Results" MODIFIED="1610569519710" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/bico" />
<SUITE FILE_PATH="coverage/BICO_Binary_Converter$example.coverage" NAME="example Coverage Results" MODIFIED="1606348759035" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/example" />
</component>
</project>
\ No newline at end of file
import datetime as dt
import gzip
import os
import sys
from pathlib import Path
......@@ -14,6 +13,7 @@ import ops.logger
import ops.setup
from gui.gui import Ui_MainWindow
from ops import bin, vis, file, stats
from ops import format_data
from settings import _version
......@@ -321,10 +321,11 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
bin_filedate = dt.datetime.strptime(bin_filepath.name,
self.settings_dict['filename_datetime_parsing_string'])
ascii_filedate = bin_filedate.strftime('%Y%m%d%H%M') # w/o extension
ascii_filename = f"{self.settings_dict['site']}_{ascii_filedate}.csv"
ascii_filepath = self.settings_dict['dir_out_run_raw_data_ascii'] / ascii_filename
ascii_filename_gzip = f"{self.settings_dict['site']}_{ascii_filedate}.csv.gz"
ascii_filepath_gzip = self.settings_dict['dir_out_run_raw_data_ascii'] / ascii_filename_gzip
ascii_filename = f"{self.settings_dict['site']}_{ascii_filedate}" # w/o extension
# ascii_filename = f"{self.settings_dict['site']}_{ascii_filedate}.csv"
# ascii_filepath = self.settings_dict['dir_out_run_raw_data_ascii'] / ascii_filename
# ascii_filename_gzip = f"{self.settings_dict['site']}_{ascii_filedate}.csv.gz"
# ascii_filepath_gzip = self.settings_dict['dir_out_run_raw_data_ascii'] / ascii_filename_gzip
counter_bin_files += 1
self.statusbar.showMessage(f"Working on file #{counter_bin_files}: {bin_file}")
......@@ -342,22 +343,36 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
logger.info(f" Data block sequence: {self.dblocks_seq}")
# Read binary data file
obj = bin.ReadFile(binary_filename=bin_filepath,
size_header=self.bin_size_header,
dblocks=dblocks_props,
limit_read_lines=int(self.settings_dict['row_limit']),
logger=self.logger,
outfile_ascii_path=ascii_filepath)
obj = bin.ConvertData(binary_filename=bin_filepath,
size_header=self.bin_size_header,
dblocks=dblocks_props,
limit_read_lines=int(self.settings_dict['row_limit']),
logger=self.logger,
file_number=counter_bin_files)
obj.run()
dblock_headers, file_data_rows = obj.get_data()
# Make dataframe of data
ascii_df = format_data.make_df(data_lines=file_data_rows,
header=dblock_headers,
logger=self.logger)
# Save to file
ascii_filepath = file.export_raw_data_ascii(df=ascii_df,
outdir=self.settings_dict['dir_out_run_raw_data_ascii'],
outfilename=ascii_filename,
logger=self.logger,
compression=self.settings_dict['file_compression'])
# Read the converted file that was created
file_contents_ascii_df = self.read_converted_ascii(filepath=ascii_filepath)
file_contents_ascii_df = self.read_converted_ascii(filepath=ascii_filepath,
compression=self.settings_dict['file_compression'])
# Compress uncompressed ASCII to gzip, delete uncompressed if gzip selected
if self.settings_dict['file_compression'] == 'gzip':
with open(ascii_filepath, 'rb') as f_in, gzip.open(ascii_filepath_gzip, 'wb') as f_out:
f_out.writelines(f_in)
os.remove(ascii_filepath) # Delete uncompressed
# # Compress uncompressed ASCII to gzip, delete uncompressed if gzip selected
# if self.settings_dict['file_compression'] == 'gzip':
# with open(ascii_filepath, 'rb') as f_in, gzip.open(ascii_filepath_gzip, 'wb') as f_out:
# f_out.writelines(f_in)
# os.remove(ascii_filepath) # Delete uncompressed
# Stats
stats_coll_df = stats.calc(stats_df=file_contents_ascii_df.copy(),
......@@ -380,8 +395,9 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
return stats_coll_df
def read_converted_ascii(self, filepath):
def read_converted_ascii(self, filepath, compression):
"""Read converted file"""
compression = None if compression == 'None' else compression
file_contents_ascii_df = pd.read_csv(filepath,
skiprows=None,
header=[0, 1, 2],
......@@ -392,7 +408,8 @@ class Bico(qtw.QMainWindow, Ui_MainWindow):
parse_dates=False,
date_parser=None,
index_col=None,
dtype=None)
dtype=None,
compression=compression)
return file_contents_ascii_df
def assemble_datablock_sequence(self):
......
import pandas as pd
import csv
import mmap
import os
......@@ -6,7 +7,7 @@ import time
import settings.data_blocks.header.wecom3
from . import bin_conversion_exceptions as bce
from ops import format_data, file
def make_header(dblock):
"""Get header info for data block, including for variables from bit maps"""
......@@ -19,7 +20,7 @@ def make_header(dblock):
# Extract variables from bit map
if props['units'] == 'bit_map':
bit_map_dict = ReadFile.bit_map_get_vars(dblock=dblock)
bit_map_dict = ConvertData.bit_map_get_vars(dblock=dblock)
bit_map_headers = bit_map_extract_header(bit_map_dict=bit_map_dict)
for bmh in bit_map_headers:
dblock_header.append(bmh)
......@@ -40,12 +41,12 @@ def bit_map_extract_header(bit_map_dict):
return bit_map_headers
class ReadFile:
class ConvertData:
"""
Read and convert binary data to ASCII, write to file
"""
def __init__(self, binary_filename, size_header, dblocks, limit_read_lines, logger, outfile_ascii_path):
def __init__(self, binary_filename, size_header, dblocks, limit_read_lines, logger, file_number):
self.tic = time.time() # Start time
self.binary_filename = binary_filename
self.binary_filesize = os.path.getsize(self.binary_filename)
......@@ -56,7 +57,8 @@ class ReadFile:
self.file_counter_lines = 0
self.file_total_bytes_read = 0
self.file_data_rows = [] # Collects all data, i.e. all line records
self.ascii_filename = outfile_ascii_path
self.data_df = pd.DataFrame()
# self.ascii_filename = outfile_ascii_path
self.dblock_headers = []
self.logger.info(f" File size: {self.binary_filesize} Bytes")
......@@ -70,8 +72,8 @@ class ReadFile:
self.convert_to_ascii()
# def get_data(self):
# return self.dblock_headers
def get_data(self):
return self.dblock_headers, self.file_data_rows
def write_multirow_header_to_ascii(self, asciiWriter):
"""Write header info from list of tuples to file as multi-row header
......@@ -93,40 +95,42 @@ class ReadFile:
self.logger.info(f" Reading file data, converting to ASCII ...")
end_of_data_reached = False # Reset for each file
with open(self.ascii_filename, 'w', newline='') as open_ascii:
asciiWriter = csv.writer(open_ascii, delimiter=',')
# File header
self.dblock_headers = self.make_file_header()
self.write_multirow_header_to_ascii(asciiWriter=asciiWriter)
# Data records
while not end_of_data_reached:
# Read data blocks per instrument
file_newrow_records = []
for instr in self.dblocks:
incoming_dblock_data, end_of_data_reached = self.read_instr_dblock(dblock=instr)
if not end_of_data_reached:
file_newrow_records = file_newrow_records + incoming_dblock_data
else:
file_newrow_records = False
break # Breaks FOR loop
if file_newrow_records:
self.file_counter_lines += 1
asciiWriter.writerow(file_newrow_records)
# self.file_data_rows.append(file_newrow_records)
# Limit = 0 means no limit
if self.limit_read_lines > 0:
if self.file_counter_lines == self.limit_read_lines:
break
# with open(self.ascii_filename, 'w', newline='') as open_ascii:
# asciiWriter = csv.writer(open_ascii, delimiter=',')
# File header
self.dblock_headers = self.make_file_header()
self.data_df = pd.DataFrame(columns=self.dblock_headers)
# self.write_multirow_header_to_ascii(asciiWriter=asciiWriter)
# Data records
while not end_of_data_reached:
# Read data blocks per instrument
file_newrow_records = []
for instr in self.dblocks:
incoming_dblock_data, end_of_data_reached = self.read_instr_dblock(dblock=instr)
if not end_of_data_reached:
file_newrow_records = file_newrow_records + incoming_dblock_data
else:
file_newrow_records = False
break # Breaks FOR loop
if file_newrow_records:
self.file_counter_lines += 1
# asciiWriter.writerow(file_newrow_records)
self.file_data_rows.append(file_newrow_records)
self.data_df.append(file_newrow_records)
# Limit = 0 means no limit
if self.limit_read_lines > 0:
if self.file_counter_lines == self.limit_read_lines:
break
self.open_binary.close()
open_ascii.close()
# open_ascii.close()
self.logger.info(f" Finished conversion to ASCII.")
self.logger.info(f" ASCII data saved to file {self.ascii_filename}")
# self.logger.info(f" ASCII data saved to file {self.ascii_filename}")
self.file_speedstats()
def read_instr_dblock(self, dblock):
......
......@@ -178,13 +178,25 @@ class SearchAll():
return valid_files_dict
def export_raw_data_ascii(df, outdir, logger, outfile='temp', compression='gzip'):
def export_raw_data_ascii(df, outdir, logger, outfilename='temp', compression='gzip'):
logger.info(" Saving raw data csv ...")
outpath = outdir / outfile
if compression == 'gzip':
df.to_csv(f"{outpath}.csv.gz", index=False, compression='gzip')
outfilename_ext = '.csv.gz'
compression = 'gzip'
elif compression == 'None':
df.to_csv(f"{outpath}.csv", index=False)
outfilename_ext = '.csv'
compression = None
else:
outfilename_ext = '.csv'
compression = None
outfilename_ext = outfilename + outfilename_ext
outpath = outdir / outfilename_ext
df.to_csv(f"{outpath}", index=False, compression=compression)
return outpath
def export_stats_collection_csv(df, outdir, run_id, logger):
......
......@@ -2,6 +2,7 @@ import pandas as pd
def make_df(data_lines, header, logger):
"""Make dataframe from list of record rows"""
logger.info(" Converting to dataframe ...")
df = pd.DataFrame(data_lines, columns=pd.MultiIndex.from_tuples(header))
return df
......
run_id=BICO-20210113-124047
run_id=BICO-20210113-212521
# INSTRUMENTS
# ===========
......@@ -35,15 +35,15 @@ select_random_files=0
# OUTPUT
# ======
dir_out=A:/FLUXES/x-TEST-OUT
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\raw_data_ascii
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212521\raw_data_ascii
output_folder_name_prefix=DAV
file_compression=None
file_compression=gzip
plot_file_availability=0
plot_ts_hires=0
......
run_id=BICO-20210113-124047
run_id=BICO-20210113-212408
# INSTRUMENTS
# ===========
......@@ -35,15 +35,15 @@ select_random_files=0
# OUTPUT
# ======
dir_out=A:/FLUXES/x-TEST-OUT
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-124047\raw_data_ascii
dir_out_run=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408
dir_out_run_log=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\log
dir_out_run_plots=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\plots
dir_out_run_plots_hires=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\plots\hires
dir_out_run_plots_agg=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\plots\agg
dir_out_run_raw_data_ascii=A:\FLUXES\x-TEST-OUT\DAV_BICO-20210113-212408\raw_data_ascii
output_folder_name_prefix=DAV
file_compression=None
file_compression=gzip
plot_file_availability=0
plot_ts_hires=0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment