Пример #1
0
def h5_tables_gen(db_path,
                  tables,
                  tables_log,
                  db=None) -> Iterator[Tuple[str, pd.HDFStore]]:
    """
    Generate table names with associated coefficients
    :param tables: tables names search pattern or sequence of table names
    :param tables_log: tables names for metadata of data in `tables`
    :param db_path:
    :param cfg_out: not used but kept for the requirement of h5_dispenser_and_names_gen() argument
    :return: iterator that returns (table name, coefficients)
    updates cfg_in['tables'] - sets to list of found tables in store
    """
    # will be filled by each table from cfg['in']['tables']
    tbl_log_pattern = (tables_log[0] or
                       '{}/logRuns') if len(tables_log) == 1 else tables_log[0]
    with FakeContextIfOpen(lambda f: pd.HDFStore(f, mode='r'),
                           file=db_path,
                           opened_file_object=db) as store:
        if len(tables) == 1:
            tables = h5find_tables(store, tables[0])
        for tbl, tbl_log in zip_longest(tables,
                                        tables_log,
                                        fillvalue=tbl_log_pattern):
            yield tbl, tbl_log.format(tbl), store
Пример #2
0
def ge_names_from_hdf5_paths(cfg, f_file_name=lambda x: x):
    """
    Replasing for veuszPropagate.ge_names() to use tables instead files
    :param cfg: dict with field ['in']['tables'], - list of tables or list with regular expression path to find tables
    :return:
    """
    with pd.HDFStore(cfg['in']['path'], mode='r') as store:
        if len(cfg['in']['tables']) == 1:
            cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0])
    for tbl in cfg['in']['tables']:
        yield f_file_name(tbl)
Пример #3
0
 def ge_names(cfg, f_file_name=lambda x: x):
     """
     Replasing for veuszPropagate.ge_names() to use tables instead files
     :param cfg:
     :return:
     """
     with pd.HDFStore(cfg['in']['path'], mode='r') as store:
         if len(cfg['in']['tables']) == 1:
             cfg['in']['tables'] = h5find_tables(
                 store,
                 cfg['in']['tables'][0],
                 parent_name=cfg['in']['db_parent_path'])
     for tbl in cfg['in']['tables']:
         # if int(tbl[-2:]) in {5,9,10,11,14,20}:
         yield f_file_name(tbl)
Пример #4
0
def main(config: ConfigType) -> None:
    """
    ----------------------------
    Calculates coefficients from
    data of Pandas HDF5 store*.h5
    and saves them back
    ----------------------------
    1. Obtains command line arguments (for description see my_argparser()) that can be passed from new_arg and ini.file
    also.
    2. Loads device data of calibration in laboratory from hdf5 database (cfg['in']['db_path'])
    2. Calibrates configured by cfg['in']['channels'] channels ('accelerometer' and/or 'magnetometer'): soft iron
    3. Wrong implementation - not use cfg['in']['timerange_nord']! todo: Rotate compass using cfg['in']['timerange_nord']
    :param config: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:


    """
    global cfg, l
    cfg = main_init(config, cs_store_name, __file__=None)
    cfg = main_init_input_file(cfg, cs_store_name)
    # input data tables may be defined by 'probes_prefix' and 'probes' fields of cfg['in']
    if cfg['in']['probes'] or not len(cfg['in']['tables']):
        if cfg['in']['probes']:
            cfg['in']['tables'] = [
                f"{cfg['in']['probes_prefix']}{probe:0>2}"
                for probe in cfg['in']['probes']
            ]
        elif cfg['in']['probes_prefix']:
            cfg['in']['tables'] = [f"{cfg['in']['probes_prefix']}.*"]
        # else:  # default config
        #     cfg['in']['tables'] = ['.*']

    #h5init(cfg['in'], cfg['out'])
    #cfg['out']['dt_from_utc'] = 0
    # cfg = cfg_from_args(my_argparser(), new_arg)

    lf.info("{:s}({:s}) for channels: {} started. ",
            this_prog_basename(__file__), ', '.join(cfg['in']['tables']),
            cfg['in']['channels'])
    fig = None
    fig_filt = None

    fig_save_dir_path = cfg['in']['db_path'].parent
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        if len(cfg['in']['tables']) == 1:
            cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0])
        coefs = {}
        for itbl, tbl in enumerate(cfg['in']['tables'], start=1):
            probe_number = int(re.findall('\d+', tbl)[0])
            lf.info(f'{itbl}. {tbl}: ')
            if isinstance(cfg['in']['timerange'],
                          Mapping):  # individual interval for each table
                if probe_number in cfg['in']['timerange']:
                    timerange = cfg['in']['timerange'][probe_number]
                else:
                    timerange = None
            else:
                timerange = cfg['in'][
                    'timerange']  # same interval for each table
            a = load_hdf5_data(store, table=tbl, t_intervals=timerange)
            # iUseTime = np.searchsorted(stime, [np.array(s, 'datetime64[s]') for s in np.array(strTimeUse)])

            # Calibrate channels of 'accelerometer' or/and 'magnetometer'
            coefs[tbl] = {}
            for channel in cfg['in']['channels']:
                print(f' channel "{channel}"', end=' ')
                (col_str, coef_str) = channel_cols(channel)

                # filtering # col_str == 'A'?
                if True:
                    b_ok = np.zeros(a.shape[0], bool)
                    for component in ['x', 'y', 'z']:
                        b_ok |= is_works(
                            a[col_str + component],
                            noise=cfg['filter']['no_works_noise'][channel])
                    lf.info('Filtered not working area: {:2.1f}%',
                            (b_ok.size - b_ok.sum()) * 100 / b_ok.size)
                    # vec3d = np.column_stack(
                    #     (a[col_str + 'x'], a[col_str + 'y'], a[col_str + 'z']))[:, b_ok].T  # [slice(*iUseTime.flat)]
                    vec3d = a.loc[
                        b_ok, [col_str + 'x', col_str + 'y', col_str +
                               'z']].to_numpy(float).T
                    index = a.index[b_ok]

                    vec3d, b_ok, fig_filt = filter_channes(
                        vec3d,
                        index,
                        fig_filt,
                        fig_save_prefix=
                        f"{fig_save_dir_path / tbl}-'{channel}'",
                        blocks=cfg['filter']['blocks'],
                        offsets=cfg['filter']['offsets'],
                        std_smooth_sigma=cfg['filter']['std_smooth_sigma'])

                A, b = calibrate(vec3d)
                window_title = f"{tbl} '{channel}' channel ellipse"
                fig = calibrate_plot(vec3d,
                                     A,
                                     b,
                                     fig,
                                     window_title=window_title)
                fig.savefig(fig_save_dir_path / (window_title + '.png'),
                            dpi=300,
                            bbox_inches="tight")
                A_str, b_str = coef2str(A, b)
                lf.info(
                    'Calibration coefficients calculated: \nA = \n{:s}\nb = \n{:s}',
                    A_str, b_str)
                coefs[tbl][channel] = {'A': A, 'b': b}

            # Zeroing Nord direction
            timerange_nord = cfg['in']['timerange_nord']
            if isinstance(timerange_nord, Mapping):
                timerange_nord = timerange_nord.get(probe_number)
            if timerange_nord:
                coefs[tbl]['M']['azimuth_shift_deg'] = zeroing_azimuth(
                    store, tbl, timerange_nord, calc_vel_flat_coef(coefs[tbl]),
                    cfg['in'])
            else:
                lf.info('not zeroing North')
    # Write coefs to each of output tables named same as input
    for cfg_output in (['in', 'out'] if cfg['out'].get('db_path') else ['in']):
        lf.info('Writing to {}', cfg[cfg_output]['db_path'])
        for itbl, tbl in enumerate(cfg['in']['tables'], start=1):
            # i_search = re.search('\d*$', tbl)
            # for channel in cfg['in']['channels']:
            #     (col_str, coef_str) = channel_cols(channel)
            #     dict_matrices = {f'//coef//{coef_str}//A': coefs[tbl][channel]['A'],
            #                      f'//coef//{coef_str}//C': coefs[tbl][channel]['b'],
            #                      }
            #     if channel == 'M':
            #         if coefs[tbl]['M'].get('azimuth_shift_deg'):
            #             dict_matrices[f'//coef//{coef_str}//azimuth_shift_deg'] = coefs[tbl]['M']['azimuth_shift_deg']
            #         # Coping probe number to coefficient to can manually check when copy manually
            #         if i_search:
            #             try:
            #                 dict_matrices['//coef//i'] = int(i_search.group(0))
            #             except Exception as e:
            #                 pass
            dict_matrices = dict_matrices_for_h5(coefs[tbl], tbl,
                                                 cfg['in']['channels'])
            h5copy_coef(None,
                        cfg[cfg_output]['db_path'],
                        tbl,
                        dict_matrices=dict_matrices)

    print('Ok>', end=' ')
Пример #5
0
def main(new_arg=None):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg:
        return
    if new_arg == '<return_cfg>':  # to help testing
        return cfg
    l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])
    if not cfg['out']['path'].is_absolute():
        # set path relative to cfg['in']['db_path']
        cfg['out']['path'] = cfg['in']['db_path'].with_name(str(cfg['out']['path']))

    l.warning('\n {}({}) is gonna save gpx to ..{} dir. '.format(
        this_prog_basename(__file__), cfg['in']['db_path'], cfg['out']['path'].parent))

    if cfg['out']['select_from_tablelog_ranges'] is None:
        gpx_symbols = None
    else:
        gpx_symbols = init_gpx_symbols_fun(cfg['out'])

    global gpx_names_funs  # Shortcat for cfg['out']['gpx_names_funs']

    # Load data #################################################################
    qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')"
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        # Find tables by pattern
        if '*' in cfg['in']['tables'][0]:
            # if 'table_prefix' in cfg['in']
            pattern_tables = cfg['in']['tables'][0]
            cfg['in']['tables'] = h5find_tables(store, pattern_tables)
            len_tables = len(cfg['in']['tables'])
            msg = 'Found {} tables with pattern {}'.format(len_tables, pattern_tables)
            if len_tables:
                l.info(msg)
            else:
                raise Ex_nothing_done(msg + '!')

            gpx_names_funs = []
            for itbl in range(len(cfg['in']['tables'])):  # same fo each table
                gpx_names_funs.append(cfg['out']['gpx_names_funs'][0])
        else:  # fixed number of tables
            # initialise with defaults if need:
            gpx_names_funs = cfg['out']['gpx_names_funs']
            for itbl in range(len(gpx_names_funs), len(cfg['in']['tables'])):
                gpx_names_funs.append('i+1')
        dfs_rnav = []
        tbl_names_all_shortened = []
        for itbl, tblD in enumerate(cfg['in']['tables']):
            print(itbl, '. ', tblD, end=': ', sep='')
            if cfg['in']['tables_log'][0]:
                tblL = tblD + '/' + cfg['in']['tables_log'][0]
                try:
                    dfL = store[tblL]
                except KeyError as e:
                    l.warning(' '.join([s for s in e.args if isinstance(s, str)]))
                    continue
            else:  # only for tables without log (usually no such tables)
                l.warning('configuration specifies to get data without use of "log..." tables')
                st_en = store[tblD].index[[0, -1]]
                if cfg['process']['period_files']:
                    t_intervals_start = pd.date_range(
                        start=st_en[0].normalize(),
                        end=max(st_en[-1], st_en[-1].normalize() + pd_period_to_timedelta(
                            cfg['process']['period_files'])),
                        freq=cfg['process']['period_files'])[1:]  # makes last t_interval_start >= all_data[-1]
                    dfL = pd.DataFrame.from_records({'DateEnd': t_intervals_start, 'fileName': tblD},
                                                    index=st_en[:1].append(t_intervals_start[:-1]))
                else:
                    dfL = pd.DataFrame.from_records({'DateEnd': st_en[-1], 'fileName': tblD}, index=st_en[:1])

            gpx_names_fun_str = "lambda i, row, t=0: '{}'.format({})".format(
                cfg['out']['gpx_names_fun_format'],
                gpx_names_funs[itbl])
            gpx_names_fun = eval(compile(gpx_names_fun_str, '', 'eval'))
            if cfg['out']['select_from_tablelog_ranges'] is None:
                # Use all data for ranges specified in log rows and saves tracks (not points)

                for irow, r in enumerate(dfL.itertuples()):  # iterrows()
                    qstr = qstr_trange_pattern.format(r.Index, r.DateEnd)
                    print(qstr, end='... ')
                    try:
                        dfD = store.select(cfg['in']['table_nav'
                                           ] if cfg['in']['table_nav'] else tblD, qstr,
                                           columns=['Lat', 'Lon', 'DepEcho'])
                    except Exception as e:
                        l.exception('Error when query:  {}. '.format(qstr))
                        # '\n==> '.join([s for s in e.args if isinstance(s, str)])))
                        continue
                    # Keep data with period = 1s only
                    dfD = dfD[~dfD.index.round(pd.Timedelta(seconds=1)).duplicated()]
                    # dfD.drop_duplicates(['Lat', 'Lon', 'index'])'

                    bGood = filterGlobal_minmax(dfD, dfD.index, cfg['filter'])
                    dfD = dfD[bGood]
                    # Add UTC time and table name to output file name
                    # Local time and table name to gpx object name
                    str_time_long = '{:%y%m%d_%H%M}'.format(r.Index)
                    r = r._replace(Index=timzone_view(r.Index, cfg['out']['dt_from_utc_in_comments']))
                    tblD_safe = file_from_tblname(tblD, cfg['in']['tables_log'][0])
                    try:
                        gpx_names_fun_result = gpx_names_fun(tblD_safe, r)  # '{:%y%m%d}'.format(timeLocal)
                    except TypeError as e:
                        raise TypeError('Can not evalute gpx_names_fun "{}"'.format(gpx_names_fun_str)).with_traceback(
                            e.__traceback__)

                    save_to_gpx(
                        dfD, cfg['out']['path'].with_name(f'{str_time_long}{tblD_safe}'),
                        gpx_obj_namef=gpx_names_fun_result, cfg_proc=cfg['process'])

                    if len(cfg['in']['tables']) > 1:
                        nav2add_cur = dfD if irow == 0 else nav2add_cur.append(dfD)
                if len(cfg['in']['tables']) > 1:
                    nav2add_cur = dfD.assign(itbl=itbl)

            else:
                # Use only 1 data point per log row

                if cfg['out']['select_from_tablelog_ranges'] != 0:
                    print('selecting from {} row index of log table'.format(
                        cfg['out']['select_from_tablelog_ranges']))

                try:
                    dfL.index = dfL.index.tz_convert('UTC')
                except TypeError as e:
                    print((e.msg if hasattr(e, 'msg') else str(e)) + '!\n- continue presume on UTC log index...')
                print(end='all log data ')
                time_points = (dfL.index if cfg['out']['select_from_tablelog_ranges'] == 0 else
                               dfL['DateEnd'] if cfg['out']['select_from_tablelog_ranges'] == -1 else
                               None)
                if time_points is None:
                    raise (ValueError("cfg['out']['select_from_tablelog_ranges'] must be 0 or -1"))
                cols_nav = ['Lat', 'Lon', 'DepEcho']
                nav2add = h5select(store, cfg['in']['table_nav'], cols_nav, time_points=time_points,
                                   dt_check_tolerance=cfg['process']['dt_search_nav_tolerance'],
                                   query_range_lims=(time_points[0], dfL['DateEnd'][-1])
                                   )[0]
                cols_nav = nav2add.columns  # not all columns may be loaded
                # Try get non NaN from dfL if it has needed columns (we used to write there edges' data with _st/_en suffixes)
                isna = nav2add.isna()
                dfL_col_suffix = 'st' if cfg['out']['select_from_tablelog_ranges'] == 0 else 'en'
                for col in cols_nav:
                    col_dat = f'{col}_{dfL_col_suffix}'
                    if isna[col].any() and  col_dat in dfL.columns:
                        b_use = isna[col].values & dfL[col_dat].notna().values
                        nav2add.loc[b_use, col] = dfL.loc[b_use, col_dat].values

                nav2add.index = timzone_view(nav2add.index, dt_from_utc=cfg['out']['dt_from_utc_in_comments'])
                # tz_local= tzoffset(None, cfg['out']['dt_from_utc_in_comments'].total_seconds())
                # if nav2add.index.tz is None:
                #     # think if time zone of tz-naive Timestamp is naive then it is UTC
                #     nav2add.index = nav2add.index.tz_localize('UTC')
                # nav2add.tz_convert(tz_local, copy= False)

                # Save to gpx waypoints
                nav2add_cur = nav2add.assign(itbl=itbl)

                # if 'gpx_names_funs' in cfg['out'] and \
                #     len(cfg['out']['gpx_names_funs'])>itbl:
                #
                #     gpx_names = eval(compile('lambda i: str({})'.format(
                #         cfg['out']['gpx_names_funs'][itbl]), [], 'eval'))
                #
                save_to_gpx(nav2add_cur,
                            cfg['out']['path'] / f"stations_{file_from_tblname(tblD, cfg['in']['tables_log'][0])}",
                            gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols,
                            cfg_proc=cfg['process']
                            )
                # save_to_csv(nav2add, dfL.index, cfg['out']['path'].with_name(f'nav{tblD}.txt'))
                if False:  # Show table info
                    store.get_storer(tblD).table

                    nodes = sorted(store.root.__members__)  # , key=number_key
                    print(nodes)
                    # store.get_node('CTD_Idronaut(Redas)').logFiles        # next level nodes

            # prepare saving of combined gpx
            if tbl_names_all_shortened:
                i_new = 0
                for c_prev, c_new in zip(tbl_names_all_shortened[-1], tblD):
                    if c_new == c_prev:
                        i_new += 1
                    else:
                        break
                tbl_names_all_shortened.append(tblD[i_new:])
            else:
                tbl_names_all_shortened.append(tblD)
            dfs_rnav.append(nav2add_cur)

        if len(cfg['in']['tables']) > 1 and cfg['out']['gpx_names_funs_cobined']:
            print('combined: ', end='')  # Save combined data to gpx
            df_rnav_combined = pd.concat(dfs_rnav)
            df_rnav_combined.sort_index(inplace=True)
            # Save to gpx waypoints
            if 'gpx_names_funs' in cfg['out']['gpx_names_funs_cobined']:
                gpx_names_funs = [  # row not used, it is here only for compability with tracks
                    eval(compile("lambda i: " + f, '', 'eval')) for f in gpx_names_funs]
            gpx_names_fun = eval(compile(
                "lambda i,row,t: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(
                    cfg['out']), '', 'eval'))

            # gpx_symbols = lambda row: cfg['out']['gpx_symbols'][sym_index_fun(row)]

            # gpx_names = eval(compile("lambda i,row: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(cfg['out']), '', 'eval'))
            # gpx_names = lambda i: str(i + 1)

            save_to_gpx(
                df_rnav_combined,
                cfg['out']['path'].with_name(
                    'all_' + file_from_tblname(','.join(tbl_names_all_shortened), cfg['in']['tables_log'][0])),
                gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols, cfg_proc=cfg['process'])
    print('Ok')