def h5_tables_gen(db_path, tables, tables_log, db=None) -> Iterator[Tuple[str, pd.HDFStore]]: """ Generate table names with associated coefficients :param tables: tables names search pattern or sequence of table names :param tables_log: tables names for metadata of data in `tables` :param db_path: :param cfg_out: not used but kept for the requirement of h5_dispenser_and_names_gen() argument :return: iterator that returns (table name, coefficients) updates cfg_in['tables'] - sets to list of found tables in store """ # will be filled by each table from cfg['in']['tables'] tbl_log_pattern = (tables_log[0] or '{}/logRuns') if len(tables_log) == 1 else tables_log[0] with FakeContextIfOpen(lambda f: pd.HDFStore(f, mode='r'), file=db_path, opened_file_object=db) as store: if len(tables) == 1: tables = h5find_tables(store, tables[0]) for tbl, tbl_log in zip_longest(tables, tables_log, fillvalue=tbl_log_pattern): yield tbl, tbl_log.format(tbl), store
def ge_names_from_hdf5_paths(cfg, f_file_name=lambda x: x): """ Replasing for veuszPropagate.ge_names() to use tables instead files :param cfg: dict with field ['in']['tables'], - list of tables or list with regular expression path to find tables :return: """ with pd.HDFStore(cfg['in']['path'], mode='r') as store: if len(cfg['in']['tables']) == 1: cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0]) for tbl in cfg['in']['tables']: yield f_file_name(tbl)
def ge_names(cfg, f_file_name=lambda x: x): """ Replasing for veuszPropagate.ge_names() to use tables instead files :param cfg: :return: """ with pd.HDFStore(cfg['in']['path'], mode='r') as store: if len(cfg['in']['tables']) == 1: cfg['in']['tables'] = h5find_tables( store, cfg['in']['tables'][0], parent_name=cfg['in']['db_parent_path']) for tbl in cfg['in']['tables']: # if int(tbl[-2:]) in {5,9,10,11,14,20}: yield f_file_name(tbl)
def main(config: ConfigType) -> None: """ ---------------------------- Calculates coefficients from data of Pandas HDF5 store*.h5 and saves them back ---------------------------- 1. Obtains command line arguments (for description see my_argparser()) that can be passed from new_arg and ini.file also. 2. Loads device data of calibration in laboratory from hdf5 database (cfg['in']['db_path']) 2. Calibrates configured by cfg['in']['channels'] channels ('accelerometer' and/or 'magnetometer'): soft iron 3. Wrong implementation - not use cfg['in']['timerange_nord']! todo: Rotate compass using cfg['in']['timerange_nord'] :param config: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument argv[1:] == '-h' or '-v' passed to this code argv[1] is cfgFile. It was used with cfg files: """ global cfg, l cfg = main_init(config, cs_store_name, __file__=None) cfg = main_init_input_file(cfg, cs_store_name) # input data tables may be defined by 'probes_prefix' and 'probes' fields of cfg['in'] if cfg['in']['probes'] or not len(cfg['in']['tables']): if cfg['in']['probes']: cfg['in']['tables'] = [ f"{cfg['in']['probes_prefix']}{probe:0>2}" for probe in cfg['in']['probes'] ] elif cfg['in']['probes_prefix']: cfg['in']['tables'] = [f"{cfg['in']['probes_prefix']}.*"] # else: # default config # cfg['in']['tables'] = ['.*'] #h5init(cfg['in'], cfg['out']) #cfg['out']['dt_from_utc'] = 0 # cfg = cfg_from_args(my_argparser(), new_arg) lf.info("{:s}({:s}) for channels: {} started. ", this_prog_basename(__file__), ', '.join(cfg['in']['tables']), cfg['in']['channels']) fig = None fig_filt = None fig_save_dir_path = cfg['in']['db_path'].parent with pd.HDFStore(cfg['in']['db_path'], mode='r') as store: if len(cfg['in']['tables']) == 1: cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0]) coefs = {} for itbl, tbl in enumerate(cfg['in']['tables'], start=1): probe_number = int(re.findall('\d+', tbl)[0]) lf.info(f'{itbl}. {tbl}: ') if isinstance(cfg['in']['timerange'], Mapping): # individual interval for each table if probe_number in cfg['in']['timerange']: timerange = cfg['in']['timerange'][probe_number] else: timerange = None else: timerange = cfg['in'][ 'timerange'] # same interval for each table a = load_hdf5_data(store, table=tbl, t_intervals=timerange) # iUseTime = np.searchsorted(stime, [np.array(s, 'datetime64[s]') for s in np.array(strTimeUse)]) # Calibrate channels of 'accelerometer' or/and 'magnetometer' coefs[tbl] = {} for channel in cfg['in']['channels']: print(f' channel "{channel}"', end=' ') (col_str, coef_str) = channel_cols(channel) # filtering # col_str == 'A'? if True: b_ok = np.zeros(a.shape[0], bool) for component in ['x', 'y', 'z']: b_ok |= is_works( a[col_str + component], noise=cfg['filter']['no_works_noise'][channel]) lf.info('Filtered not working area: {:2.1f}%', (b_ok.size - b_ok.sum()) * 100 / b_ok.size) # vec3d = np.column_stack( # (a[col_str + 'x'], a[col_str + 'y'], a[col_str + 'z']))[:, b_ok].T # [slice(*iUseTime.flat)] vec3d = a.loc[ b_ok, [col_str + 'x', col_str + 'y', col_str + 'z']].to_numpy(float).T index = a.index[b_ok] vec3d, b_ok, fig_filt = filter_channes( vec3d, index, fig_filt, fig_save_prefix= f"{fig_save_dir_path / tbl}-'{channel}'", blocks=cfg['filter']['blocks'], offsets=cfg['filter']['offsets'], std_smooth_sigma=cfg['filter']['std_smooth_sigma']) A, b = calibrate(vec3d) window_title = f"{tbl} '{channel}' channel ellipse" fig = calibrate_plot(vec3d, A, b, fig, window_title=window_title) fig.savefig(fig_save_dir_path / (window_title + '.png'), dpi=300, bbox_inches="tight") A_str, b_str = coef2str(A, b) lf.info( 'Calibration coefficients calculated: \nA = \n{:s}\nb = \n{:s}', A_str, b_str) coefs[tbl][channel] = {'A': A, 'b': b} # Zeroing Nord direction timerange_nord = cfg['in']['timerange_nord'] if isinstance(timerange_nord, Mapping): timerange_nord = timerange_nord.get(probe_number) if timerange_nord: coefs[tbl]['M']['azimuth_shift_deg'] = zeroing_azimuth( store, tbl, timerange_nord, calc_vel_flat_coef(coefs[tbl]), cfg['in']) else: lf.info('not zeroing North') # Write coefs to each of output tables named same as input for cfg_output in (['in', 'out'] if cfg['out'].get('db_path') else ['in']): lf.info('Writing to {}', cfg[cfg_output]['db_path']) for itbl, tbl in enumerate(cfg['in']['tables'], start=1): # i_search = re.search('\d*$', tbl) # for channel in cfg['in']['channels']: # (col_str, coef_str) = channel_cols(channel) # dict_matrices = {f'//coef//{coef_str}//A': coefs[tbl][channel]['A'], # f'//coef//{coef_str}//C': coefs[tbl][channel]['b'], # } # if channel == 'M': # if coefs[tbl]['M'].get('azimuth_shift_deg'): # dict_matrices[f'//coef//{coef_str}//azimuth_shift_deg'] = coefs[tbl]['M']['azimuth_shift_deg'] # # Coping probe number to coefficient to can manually check when copy manually # if i_search: # try: # dict_matrices['//coef//i'] = int(i_search.group(0)) # except Exception as e: # pass dict_matrices = dict_matrices_for_h5(coefs[tbl], tbl, cfg['in']['channels']) h5copy_coef(None, cfg[cfg_output]['db_path'], tbl, dict_matrices=dict_matrices) print('Ok>', end=' ')
def main(new_arg=None): cfg = cfg_from_args(my_argparser(), new_arg) if not cfg: return if new_arg == '<return_cfg>': # to help testing return cfg l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose']) if not cfg['out']['path'].is_absolute(): # set path relative to cfg['in']['db_path'] cfg['out']['path'] = cfg['in']['db_path'].with_name(str(cfg['out']['path'])) l.warning('\n {}({}) is gonna save gpx to ..{} dir. '.format( this_prog_basename(__file__), cfg['in']['db_path'], cfg['out']['path'].parent)) if cfg['out']['select_from_tablelog_ranges'] is None: gpx_symbols = None else: gpx_symbols = init_gpx_symbols_fun(cfg['out']) global gpx_names_funs # Shortcat for cfg['out']['gpx_names_funs'] # Load data ################################################################# qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')" with pd.HDFStore(cfg['in']['db_path'], mode='r') as store: # Find tables by pattern if '*' in cfg['in']['tables'][0]: # if 'table_prefix' in cfg['in'] pattern_tables = cfg['in']['tables'][0] cfg['in']['tables'] = h5find_tables(store, pattern_tables) len_tables = len(cfg['in']['tables']) msg = 'Found {} tables with pattern {}'.format(len_tables, pattern_tables) if len_tables: l.info(msg) else: raise Ex_nothing_done(msg + '!') gpx_names_funs = [] for itbl in range(len(cfg['in']['tables'])): # same fo each table gpx_names_funs.append(cfg['out']['gpx_names_funs'][0]) else: # fixed number of tables # initialise with defaults if need: gpx_names_funs = cfg['out']['gpx_names_funs'] for itbl in range(len(gpx_names_funs), len(cfg['in']['tables'])): gpx_names_funs.append('i+1') dfs_rnav = [] tbl_names_all_shortened = [] for itbl, tblD in enumerate(cfg['in']['tables']): print(itbl, '. ', tblD, end=': ', sep='') if cfg['in']['tables_log'][0]: tblL = tblD + '/' + cfg['in']['tables_log'][0] try: dfL = store[tblL] except KeyError as e: l.warning(' '.join([s for s in e.args if isinstance(s, str)])) continue else: # only for tables without log (usually no such tables) l.warning('configuration specifies to get data without use of "log..." tables') st_en = store[tblD].index[[0, -1]] if cfg['process']['period_files']: t_intervals_start = pd.date_range( start=st_en[0].normalize(), end=max(st_en[-1], st_en[-1].normalize() + pd_period_to_timedelta( cfg['process']['period_files'])), freq=cfg['process']['period_files'])[1:] # makes last t_interval_start >= all_data[-1] dfL = pd.DataFrame.from_records({'DateEnd': t_intervals_start, 'fileName': tblD}, index=st_en[:1].append(t_intervals_start[:-1])) else: dfL = pd.DataFrame.from_records({'DateEnd': st_en[-1], 'fileName': tblD}, index=st_en[:1]) gpx_names_fun_str = "lambda i, row, t=0: '{}'.format({})".format( cfg['out']['gpx_names_fun_format'], gpx_names_funs[itbl]) gpx_names_fun = eval(compile(gpx_names_fun_str, '', 'eval')) if cfg['out']['select_from_tablelog_ranges'] is None: # Use all data for ranges specified in log rows and saves tracks (not points) for irow, r in enumerate(dfL.itertuples()): # iterrows() qstr = qstr_trange_pattern.format(r.Index, r.DateEnd) print(qstr, end='... ') try: dfD = store.select(cfg['in']['table_nav' ] if cfg['in']['table_nav'] else tblD, qstr, columns=['Lat', 'Lon', 'DepEcho']) except Exception as e: l.exception('Error when query: {}. '.format(qstr)) # '\n==> '.join([s for s in e.args if isinstance(s, str)]))) continue # Keep data with period = 1s only dfD = dfD[~dfD.index.round(pd.Timedelta(seconds=1)).duplicated()] # dfD.drop_duplicates(['Lat', 'Lon', 'index'])' bGood = filterGlobal_minmax(dfD, dfD.index, cfg['filter']) dfD = dfD[bGood] # Add UTC time and table name to output file name # Local time and table name to gpx object name str_time_long = '{:%y%m%d_%H%M}'.format(r.Index) r = r._replace(Index=timzone_view(r.Index, cfg['out']['dt_from_utc_in_comments'])) tblD_safe = file_from_tblname(tblD, cfg['in']['tables_log'][0]) try: gpx_names_fun_result = gpx_names_fun(tblD_safe, r) # '{:%y%m%d}'.format(timeLocal) except TypeError as e: raise TypeError('Can not evalute gpx_names_fun "{}"'.format(gpx_names_fun_str)).with_traceback( e.__traceback__) save_to_gpx( dfD, cfg['out']['path'].with_name(f'{str_time_long}{tblD_safe}'), gpx_obj_namef=gpx_names_fun_result, cfg_proc=cfg['process']) if len(cfg['in']['tables']) > 1: nav2add_cur = dfD if irow == 0 else nav2add_cur.append(dfD) if len(cfg['in']['tables']) > 1: nav2add_cur = dfD.assign(itbl=itbl) else: # Use only 1 data point per log row if cfg['out']['select_from_tablelog_ranges'] != 0: print('selecting from {} row index of log table'.format( cfg['out']['select_from_tablelog_ranges'])) try: dfL.index = dfL.index.tz_convert('UTC') except TypeError as e: print((e.msg if hasattr(e, 'msg') else str(e)) + '!\n- continue presume on UTC log index...') print(end='all log data ') time_points = (dfL.index if cfg['out']['select_from_tablelog_ranges'] == 0 else dfL['DateEnd'] if cfg['out']['select_from_tablelog_ranges'] == -1 else None) if time_points is None: raise (ValueError("cfg['out']['select_from_tablelog_ranges'] must be 0 or -1")) cols_nav = ['Lat', 'Lon', 'DepEcho'] nav2add = h5select(store, cfg['in']['table_nav'], cols_nav, time_points=time_points, dt_check_tolerance=cfg['process']['dt_search_nav_tolerance'], query_range_lims=(time_points[0], dfL['DateEnd'][-1]) )[0] cols_nav = nav2add.columns # not all columns may be loaded # Try get non NaN from dfL if it has needed columns (we used to write there edges' data with _st/_en suffixes) isna = nav2add.isna() dfL_col_suffix = 'st' if cfg['out']['select_from_tablelog_ranges'] == 0 else 'en' for col in cols_nav: col_dat = f'{col}_{dfL_col_suffix}' if isna[col].any() and col_dat in dfL.columns: b_use = isna[col].values & dfL[col_dat].notna().values nav2add.loc[b_use, col] = dfL.loc[b_use, col_dat].values nav2add.index = timzone_view(nav2add.index, dt_from_utc=cfg['out']['dt_from_utc_in_comments']) # tz_local= tzoffset(None, cfg['out']['dt_from_utc_in_comments'].total_seconds()) # if nav2add.index.tz is None: # # think if time zone of tz-naive Timestamp is naive then it is UTC # nav2add.index = nav2add.index.tz_localize('UTC') # nav2add.tz_convert(tz_local, copy= False) # Save to gpx waypoints nav2add_cur = nav2add.assign(itbl=itbl) # if 'gpx_names_funs' in cfg['out'] and \ # len(cfg['out']['gpx_names_funs'])>itbl: # # gpx_names = eval(compile('lambda i: str({})'.format( # cfg['out']['gpx_names_funs'][itbl]), [], 'eval')) # save_to_gpx(nav2add_cur, cfg['out']['path'] / f"stations_{file_from_tblname(tblD, cfg['in']['tables_log'][0])}", gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols, cfg_proc=cfg['process'] ) # save_to_csv(nav2add, dfL.index, cfg['out']['path'].with_name(f'nav{tblD}.txt')) if False: # Show table info store.get_storer(tblD).table nodes = sorted(store.root.__members__) # , key=number_key print(nodes) # store.get_node('CTD_Idronaut(Redas)').logFiles # next level nodes # prepare saving of combined gpx if tbl_names_all_shortened: i_new = 0 for c_prev, c_new in zip(tbl_names_all_shortened[-1], tblD): if c_new == c_prev: i_new += 1 else: break tbl_names_all_shortened.append(tblD[i_new:]) else: tbl_names_all_shortened.append(tblD) dfs_rnav.append(nav2add_cur) if len(cfg['in']['tables']) > 1 and cfg['out']['gpx_names_funs_cobined']: print('combined: ', end='') # Save combined data to gpx df_rnav_combined = pd.concat(dfs_rnav) df_rnav_combined.sort_index(inplace=True) # Save to gpx waypoints if 'gpx_names_funs' in cfg['out']['gpx_names_funs_cobined']: gpx_names_funs = [ # row not used, it is here only for compability with tracks eval(compile("lambda i: " + f, '', 'eval')) for f in gpx_names_funs] gpx_names_fun = eval(compile( "lambda i,row,t: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map( cfg['out']), '', 'eval')) # gpx_symbols = lambda row: cfg['out']['gpx_symbols'][sym_index_fun(row)] # gpx_names = eval(compile("lambda i,row: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(cfg['out']), '', 'eval')) # gpx_names = lambda i: str(i + 1) save_to_gpx( df_rnav_combined, cfg['out']['path'].with_name( 'all_' + file_from_tblname(','.join(tbl_names_all_shortened), cfg['in']['tables_log'][0])), gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols, cfg_proc=cfg['process']) print('Ok')