Пример #1
0
def main(new_arg=None):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg:
        return
    if new_arg == '<return_cfg>':  # to help testing
        return cfg
    l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])
    if not cfg['out']['path'].is_absolute():
        # set path relative to cfg['in']['db_path']
        cfg['out']['path'] = cfg['in']['db_path'].with_name(str(cfg['out']['path']))

    l.warning('\n {}({}) is gonna save gpx to ..{} dir. '.format(
        this_prog_basename(__file__), cfg['in']['db_path'], cfg['out']['path'].parent))

    if cfg['out']['select_from_tablelog_ranges'] is None:
        gpx_symbols = None
    else:
        gpx_symbols = init_gpx_symbols_fun(cfg['out'])

    global gpx_names_funs  # Shortcat for cfg['out']['gpx_names_funs']

    # Load data #################################################################
    qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')"
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        # Find tables by pattern
        if '*' in cfg['in']['tables'][0]:
            # if 'table_prefix' in cfg['in']
            pattern_tables = cfg['in']['tables'][0]
            cfg['in']['tables'] = h5find_tables(store, pattern_tables)
            len_tables = len(cfg['in']['tables'])
            msg = 'Found {} tables with pattern {}'.format(len_tables, pattern_tables)
            if len_tables:
                l.info(msg)
            else:
                raise Ex_nothing_done(msg + '!')

            gpx_names_funs = []
            for itbl in range(len(cfg['in']['tables'])):  # same fo each table
                gpx_names_funs.append(cfg['out']['gpx_names_funs'][0])
        else:  # fixed number of tables
            # initialise with defaults if need:
            gpx_names_funs = cfg['out']['gpx_names_funs']
            for itbl in range(len(gpx_names_funs), len(cfg['in']['tables'])):
                gpx_names_funs.append('i+1')
        dfs_rnav = []
        tbl_names_all_shortened = []
        for itbl, tblD in enumerate(cfg['in']['tables']):
            print(itbl, '. ', tblD, end=': ', sep='')
            if cfg['in']['tables_log'][0]:
                tblL = tblD + '/' + cfg['in']['tables_log'][0]
                try:
                    dfL = store[tblL]
                except KeyError as e:
                    l.warning(' '.join([s for s in e.args if isinstance(s, str)]))
                    continue
            else:  # only for tables without log (usually no such tables)
                l.warning('configuration specifies to get data without use of "log..." tables')
                st_en = store[tblD].index[[0, -1]]
                if cfg['process']['period_files']:
                    t_intervals_start = pd.date_range(
                        start=st_en[0].normalize(),
                        end=max(st_en[-1], st_en[-1].normalize() + pd_period_to_timedelta(
                            cfg['process']['period_files'])),
                        freq=cfg['process']['period_files'])[1:]  # makes last t_interval_start >= all_data[-1]
                    dfL = pd.DataFrame.from_records({'DateEnd': t_intervals_start, 'fileName': tblD},
                                                    index=st_en[:1].append(t_intervals_start[:-1]))
                else:
                    dfL = pd.DataFrame.from_records({'DateEnd': st_en[-1], 'fileName': tblD}, index=st_en[:1])

            gpx_names_fun_str = "lambda i, row, t=0: '{}'.format({})".format(
                cfg['out']['gpx_names_fun_format'],
                gpx_names_funs[itbl])
            gpx_names_fun = eval(compile(gpx_names_fun_str, '', 'eval'))
            if cfg['out']['select_from_tablelog_ranges'] is None:
                # Use all data for ranges specified in log rows and saves tracks (not points)

                for irow, r in enumerate(dfL.itertuples()):  # iterrows()
                    qstr = qstr_trange_pattern.format(r.Index, r.DateEnd)
                    print(qstr, end='... ')
                    try:
                        dfD = store.select(cfg['in']['table_nav'
                                           ] if cfg['in']['table_nav'] else tblD, qstr,
                                           columns=['Lat', 'Lon', 'DepEcho'])
                    except Exception as e:
                        l.exception('Error when query:  {}. '.format(qstr))
                        # '\n==> '.join([s for s in e.args if isinstance(s, str)])))
                        continue
                    # Keep data with period = 1s only
                    dfD = dfD[~dfD.index.round(pd.Timedelta(seconds=1)).duplicated()]
                    # dfD.drop_duplicates(['Lat', 'Lon', 'index'])'

                    bGood = filterGlobal_minmax(dfD, dfD.index, cfg['filter'])
                    dfD = dfD[bGood]
                    # Add UTC time and table name to output file name
                    # Local time and table name to gpx object name
                    str_time_long = '{:%y%m%d_%H%M}'.format(r.Index)
                    r = r._replace(Index=timzone_view(r.Index, cfg['out']['dt_from_utc_in_comments']))
                    tblD_safe = file_from_tblname(tblD, cfg['in']['tables_log'][0])
                    try:
                        gpx_names_fun_result = gpx_names_fun(tblD_safe, r)  # '{:%y%m%d}'.format(timeLocal)
                    except TypeError as e:
                        raise TypeError('Can not evalute gpx_names_fun "{}"'.format(gpx_names_fun_str)).with_traceback(
                            e.__traceback__)

                    save_to_gpx(
                        dfD, cfg['out']['path'].with_name(f'{str_time_long}{tblD_safe}'),
                        gpx_obj_namef=gpx_names_fun_result, cfg_proc=cfg['process'])

                    if len(cfg['in']['tables']) > 1:
                        nav2add_cur = dfD if irow == 0 else nav2add_cur.append(dfD)
                if len(cfg['in']['tables']) > 1:
                    nav2add_cur = dfD.assign(itbl=itbl)

            else:
                # Use only 1 data point per log row

                if cfg['out']['select_from_tablelog_ranges'] != 0:
                    print('selecting from {} row index of log table'.format(
                        cfg['out']['select_from_tablelog_ranges']))

                try:
                    dfL.index = dfL.index.tz_convert('UTC')
                except TypeError as e:
                    print((e.msg if hasattr(e, 'msg') else str(e)) + '!\n- continue presume on UTC log index...')
                print(end='all log data ')
                time_points = (dfL.index if cfg['out']['select_from_tablelog_ranges'] == 0 else
                               dfL['DateEnd'] if cfg['out']['select_from_tablelog_ranges'] == -1 else
                               None)
                if time_points is None:
                    raise (ValueError("cfg['out']['select_from_tablelog_ranges'] must be 0 or -1"))
                cols_nav = ['Lat', 'Lon', 'DepEcho']
                nav2add = h5select(store, cfg['in']['table_nav'], cols_nav, time_points=time_points,
                                   dt_check_tolerance=cfg['process']['dt_search_nav_tolerance'],
                                   query_range_lims=(time_points[0], dfL['DateEnd'][-1])
                                   )[0]
                cols_nav = nav2add.columns  # not all columns may be loaded
                # Try get non NaN from dfL if it has needed columns (we used to write there edges' data with _st/_en suffixes)
                isna = nav2add.isna()
                dfL_col_suffix = 'st' if cfg['out']['select_from_tablelog_ranges'] == 0 else 'en'
                for col in cols_nav:
                    col_dat = f'{col}_{dfL_col_suffix}'
                    if isna[col].any() and  col_dat in dfL.columns:
                        b_use = isna[col].values & dfL[col_dat].notna().values
                        nav2add.loc[b_use, col] = dfL.loc[b_use, col_dat].values

                nav2add.index = timzone_view(nav2add.index, dt_from_utc=cfg['out']['dt_from_utc_in_comments'])
                # tz_local= tzoffset(None, cfg['out']['dt_from_utc_in_comments'].total_seconds())
                # if nav2add.index.tz is None:
                #     # think if time zone of tz-naive Timestamp is naive then it is UTC
                #     nav2add.index = nav2add.index.tz_localize('UTC')
                # nav2add.tz_convert(tz_local, copy= False)

                # Save to gpx waypoints
                nav2add_cur = nav2add.assign(itbl=itbl)

                # if 'gpx_names_funs' in cfg['out'] and \
                #     len(cfg['out']['gpx_names_funs'])>itbl:
                #
                #     gpx_names = eval(compile('lambda i: str({})'.format(
                #         cfg['out']['gpx_names_funs'][itbl]), [], 'eval'))
                #
                save_to_gpx(nav2add_cur,
                            cfg['out']['path'] / f"stations_{file_from_tblname(tblD, cfg['in']['tables_log'][0])}",
                            gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols,
                            cfg_proc=cfg['process']
                            )
                # save_to_csv(nav2add, dfL.index, cfg['out']['path'].with_name(f'nav{tblD}.txt'))
                if False:  # Show table info
                    store.get_storer(tblD).table

                    nodes = sorted(store.root.__members__)  # , key=number_key
                    print(nodes)
                    # store.get_node('CTD_Idronaut(Redas)').logFiles        # next level nodes

            # prepare saving of combined gpx
            if tbl_names_all_shortened:
                i_new = 0
                for c_prev, c_new in zip(tbl_names_all_shortened[-1], tblD):
                    if c_new == c_prev:
                        i_new += 1
                    else:
                        break
                tbl_names_all_shortened.append(tblD[i_new:])
            else:
                tbl_names_all_shortened.append(tblD)
            dfs_rnav.append(nav2add_cur)

        if len(cfg['in']['tables']) > 1 and cfg['out']['gpx_names_funs_cobined']:
            print('combined: ', end='')  # Save combined data to gpx
            df_rnav_combined = pd.concat(dfs_rnav)
            df_rnav_combined.sort_index(inplace=True)
            # Save to gpx waypoints
            if 'gpx_names_funs' in cfg['out']['gpx_names_funs_cobined']:
                gpx_names_funs = [  # row not used, it is here only for compability with tracks
                    eval(compile("lambda i: " + f, '', 'eval')) for f in gpx_names_funs]
            gpx_names_fun = eval(compile(
                "lambda i,row,t: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(
                    cfg['out']), '', 'eval'))

            # gpx_symbols = lambda row: cfg['out']['gpx_symbols'][sym_index_fun(row)]

            # gpx_names = eval(compile("lambda i,row: '{gpx_names_fun_format}'.format({gpx_names_funs_cobined})".format_map(cfg['out']), '', 'eval'))
            # gpx_names = lambda i: str(i + 1)

            save_to_gpx(
                df_rnav_combined,
                cfg['out']['path'].with_name(
                    'all_' + file_from_tblname(','.join(tbl_names_all_shortened), cfg['in']['tables_log'][0])),
                gpx_obj_namef=gpx_names_fun, waypoint_symbf=gpx_symbols, cfg_proc=cfg['process'])
    print('Ok')
Пример #2
0
        for centers, qstr in gen_queries(ranges,
                                         patterns,
                                         margins,
                                         print_patern=print_patern):
            file_stem = file_patern.format(**centers)
            file_stem_no_time = file_patern_no_time.format(**centers)
            # p, p_st, p_en in np.arange(pst, pen, pstep)[:, np.newaxis] + np.array(
            # [[0, -pwidth, pwidth]]) / 2:  # [:, np.]
            # print('\n{:g}m.'.format(p), end=' ')
            # qstr = qstr_pattern.format(p_st, p_en)
            FCTD = pd.read_hdf(db_path_temp, 'CTD', where=qstr)
            if FCTD.empty:
                print('- empty', end='')
                continue
            time_st_local, time_en_local = [
                timzone_view(x, t_our_zone) for x in FCTD.index[[0, -1]]
            ]
            fileN_time =\
                f'{time_st_local:%y%m%d_%H%M}-'\
                f'{{:{"%d_" if time_st_local.day!=time_en_local.day else ""}%H%M}}'.format(time_en_local)

            # Get data for each run
            # It is possible to get it by aggeregation (df_points = FCTD.groupby(['Lat', 'Lon']))
            # but here we use runs info which is icapsulated in _shift_. Runs were found in Veusz
            iruns = np.flatnonzero(np.diff(FCTD['shift']) != 0)
            ctd = np.empty(
                (iruns.size + 1, ), {
                    'names': params + ['Lat', 'Lon'],
                    'formats': ['f8'] * (len(params) + 2)
                })
            ctd.fill(np.NaN)
Пример #3
0
def h5add_log(cfg_out: Dict[str, Any], df,
              log: Union[pd.DataFrame, Mapping, None], tim, log_dt_from_utc):
    """
    Updates (or creates if need) metadata table
    :param cfg_out: dict with fields:
     - b_log_ready: if False or '' then updates log['Date0'], log['DateEnd'].
     - db: handle of opened hdf5 store
     - some of following fields (next will be tried if previous not defined):
         - table_log: str, path of log table
         - tables_log: List[str], path of log table in first element
         - table: str, path of log table will be consructed by adding '/log'
         - tables: List[str], path of log table will be consructed by adding '/log' to first element
     - logfield_fileName_len: optiondal, fixed length of string format of 'fileName' hdf5 column
    :param df:
    :param log: Mapping records or dataframe. updates 'Date0' and 'DateEnd' if no 'Date0' or it is {} or None
    :param tim:
    :param log_dt_from_utc:
    :return:
    """
    if cfg_out.get('b_log_ready') and (isinstance(log, Mapping) and not log):
        return

    # synchro "tables_log" and more user friendly but not so universal to code "table_log"

    if cfg_out.get('table_log'):
        table_log = cfg_out['table_log']
    else:
        table_log = cfg_out.get('tables_log')
        if table_log:
            if '{}' in table_log[0]:
                table_log = table_log[0].format(cfg_out['table'])
            else:
                table_log = table_log[0]

        else:  # set default for (1st) data table
            try:
                table_log = f"{cfg_out['table']}'/log'"
            except KeyError:
                table_log = f"{cfg_out['tables'][0]}'/log'"

    set_field_if_no(cfg_out, 'logfield_fileName_len', 255)

    if (log.get('Date0') is None) or not cfg_out.get('b_log_ready'):
        # or (table_log.split('/')[-1].startswith('logFiles')):
        log['Date0'], log['DateEnd'] = timzone_view(
            (tim if tim is not None else df.index.compute() if isinstance(
                df, dd.DataFrame) else df.index)[[0, -1]], log_dt_from_utc)
    # dfLog = pd.DataFrame.from_dict(log, np.dtype(np.unicode_, cfg_out['logfield_fileName_len']))
    if not isinstance(log, pd.DataFrame):
        try:
            log = pd.DataFrame(log).set_index('Date0')
        except ValueError as e:  # , Exception
            log = pd.DataFrame.from_records(
                log,
                exclude=['Date0'],
                index=log['Date0']
                if isinstance(log['Date0'], pd.DatetimeIndex) else
                [log['Date0']])  # index='Date0' not work for dict

    try:
        return df_log_append_fun(log, table_log, cfg_out)
    except ValueError as e:
        return h5append_on_inconsistent_index(cfg_out, table_log, log,
                                              df_log_append_fun, e,
                                              'append log')
    except ClosedFileError as e:
        l.warning('Check code: On reopen store update store variable')
Пример #4
0
def get_runs_parameters(df_raw,
                        times_min,
                        times_max,
                        cols_good_data: Union[str, Sequence[str], None],
                        dt_from_utc: timedelta = timedelta(0),
                        db=None,
                        db_path=None,
                        table_nav=None,
                        table_nav_cols=('Lat', 'Lon', 'DepEcho', 'Speed',
                                        'Course'),
                        dt_search_nav_tolerance=timedelta(minutes=2)):
    """

    :param df_raw:
    :param times_min:
    :param times_max:
    :param cols_good_data: cols of essential data that must be good (depth)
    :param dt_from_utc:
    :param db:
    :param db_path:
    :param table_nav: 'navigation' table to find data absent in df_raw. Note: tries to find only positive vals
    :param table_nav_cols:
    :param dt_search_nav_tolerance:
    :return:
    """

    log = {}
    log_update = {
    }  # {_st: DataFrame, _en: DataFrame} - dataframes of parameters for imin and imax
    for times_lim, suffix, log_time_col, i_search in ((times_min, '_st',
                                                       'Date0', 0),
                                                      (times_max, '_en',
                                                       'DateEnd', -1)):
        log_update[suffix] = df_raw.asof(
            times_lim, subset=cols_good_data)  # rows of last good data
        log[log_time_col] = timzone_view(log_update[suffix].index, dt_from_utc)

        # Search for nearest good values if have bad parameter p
        for (p, *isnan) in log_update[suffix].isna().T.itertuples(name=None):
            if i_search == -1:
                log_update[suffix].loc[isnan,
                                       p] = df_raw[p].asof(times_max[isnan])
            else:
                # "asof()"-alternative for 1st notna: take 1st good element in each interval
                for time_nan, time_min, time_max in zip(
                        times_lim[isnan], times_min[isnan], times_max[isnan]):
                    s_search = df_raw.loc[time_min:time_max, p]

                    try:
                        log_update[suffix].at[time_nan, p] = s_search[
                            s_search.notna()].iat[
                                0]  # same as .at[s_search.first_valid_index()]
                    except IndexError:
                        l.warning(
                            'no good values for parameter "%s" in run started %s',
                            p, time_nan)
                        continue
        log_update[suffix] = log_update[suffix].add_suffix(suffix)
    log.update(  # pd.DataFrame(, index=log_update['_st'].index).rename_axis('Date0')
        {**dict(
            [(k, v.values) for st_en in zip(log_update['_st'].items(), log_update['_en'].items()) for k, v in st_en]),
         # flatten pares
         })

    if table_nav:
        time_points = log_update['_st'].index.append(log_update['_en'].index)
        with FakeContextIfOpen(lambda f: pd.HDFStore(f, mode='r'), db_path,
                               db) as store:
            df_nav, dt = h5select(  # all starts then all ends in row
                store,
                table_nav,
                columns=table_nav_cols,
                time_points=time_points,
                dt_check_tolerance=dt_search_nav_tolerance)

        # {:0.0f}s'.format(cfg['out']['dt_search_nav_tolerance'].total_seconds())
        # todo: allow filter for individual columns. solution: use multiple calls for columns that need filtering with appropriate query_range_pattern argument of h5select()
        isnan = df_nav.isna()
        for col in df_nav.columns[isnan.any(axis=0)]:

            # not works:
            # df_nav_col, dt_col = h5select(  # for current parameter's name
            #         cfg['in']['db'], cfg['in']['table_nav'],
            #         columns=[col],
            #         query_range_lims=time_points[[0,-1]],
            #         time_points=time_points[isnan[col]],
            #         query_range_pattern = f"index>=Timestamp('{{}}') & index<=Timestamp('{{}}') & {col} > 0 ",
            #         dt_check_tolerance=cfg['out']['dt_search_nav_tolerance']
            #         )

            # Note: tries to find only positive vals:
            df_nav_col = store.select(
                table_nav,
                where="index>=Timestamp('{}') & index<=Timestamp('{}') & {} > 0"
                .format(
                    *(time_points[[0, -1]] + np.array(
                        (-dt_search_nav_tolerance, dt_search_nav_tolerance))),
                    col),
                columns=[col])
            try:
                vals = df_nav_col[col].values
                vals = vals[inearestsorted(df_nav_col.index,
                                           time_points[isnan[col]])]
            except IndexError:
                continue  # not found
            if vals.any():
                df_nav.loc[isnan[col], col] = vals

        # df_nav['nearestNav'] = dt.astype('m8[s]').view(np.int64)
        df_edges_items_list = [
            df_edge.add_suffix(suffix).items() for suffix, df_edge in (
                ('_st', df_nav.iloc[:len(log_update['_st'])]),
                ('_en', df_nav.iloc[len(log_update['_st']):len(df_nav)]))
        ]

        for st_en in zip(*df_edges_items_list):
            for name, series in st_en:
                # If have from data table already => update needed elements only
                if name in log:
                    b_need = np.isnan(log.get(name))
                    if b_need.any():
                        b_have = np.isfinite(series.values)
                        # from loaded nav in points
                        b_use = b_need & b_have
                        if b_use.any():
                            log[name][b_use] = series.values[b_use]
                        # # from all nav (not loaded)
                        # b_need &= ~b_have
                        #
                        # if b_need.any():
                        #     # load range to search nearest good val. for specified fields and tolerance
                        #     df = cfg['in']['db'].select(cfg['in']['table_nav'], where=query_range_pattern.format(st_en.index), columns=name)

                        # df_nav = h5select(  # for current parameter's name
                        #     cfg['in']['db'], cfg['in']['table_nav'],
                        #     columns=name,
                        #     query_range_lims=st_en
                        #     time_points=log_update['_st'].index.append(log_update['_en'].index),
                        #     dt_check_tolerance=cfg['out']['dt_search_nav_tolerance']
                        #     )
                    continue
                # else:
                #     b_need = np.isnan(series.values)
                #     for

                # Else update all elements at once
                log[name] = series.values
    return log
Пример #5
0
         except Exception as e:
             print('{}\n Try set [in].b_raise_on_err= False'.format(e))
             raise (e)
     # Process a and get date date in ISO standard format
     try:
         date = fun_proc_loaded(a, cfg['in'])
     except IndexError:
         print('no data!')
         continue
     # add time shift specified in configuration .ini
     date = np.atleast_1d(date)
     tim, b_ok = time_corr(date, cfg['in'], sort=True)
     # Save last time to can filter next file
     cfg['in']['time_last'] = date[-1]
     log_item['rows'] = 1
     log_item['Date0'] = timzone_view(tim[0], cfg['in']['dt_from_utc'])
     log_item['DateEnd'] = datetime.now()  # can not paste np.NaN
     log_item['fileNameNew'] = '{Date0:%y%m%d_%H%M}'.format(**log_item)
     log.append(log_item.copy())
     strLog = '{fileName}:\t{Date0:%d.%m.%Y %H:%M:%S}->\t{fileNameNew}.txt'.format(
         **log_item)  # \t{Lat}\t{Lon}\t{strOldVal}->\t{mag}
     print(strLog)
     if 'log' in cfg['program'].keys():  # Log to logfile
         f.writelines('\n' + strLog)
 else:
     if len(log):
         s = input(
             '\n{} txt files. Rename _ASC.TXT, .TXT, r.000, r.000.nc? Y/n: '
             .format(nFiles))
         if 'n' in s or 'N' in s:
             print('nothing done')