Exemplo n.º 1
0
def h5move_and_sort(out):
    """
    Moves from temporary storage and sorts `tables_have_wrote` tables and clears this list
    :param out: fields:
        tables_have_wrote: set of tuples of str, table names
    :return:

    """
    failed_storages = h5move_tables(out, tbl_names=out['tables_have_wrote'])
    print('Finishing...' if failed_storages else 'Ok.', end=' ')
    # Sort if have any processed data, else don't because ``ptprepack`` not closes hdf5 source if it not finds data
    out['b_remove_duplicates'] = True
    h5index_sort(out,
                 out_storage_name=f"{out['db_path'].stem}-resorted.h5",
                 in_storages=failed_storages,
                 tables=out['tables_have_wrote'])
    out['tables_have_wrote'] = set()
Exemplo n.º 2
0
def do(cfg):
    """
    :param new_arg: list of strings, command line arguments

    Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:
        'csv2h5_nav_supervisor.ini'
        'csv2h5_IdrRedas.ini'
        'csv2h5_Idronaut.ini'

    :return:
    """
    h5init(cfg.input, cfg.out)
    # OmegaConf.update(cfg, "in", cfg.input, merge=False)  # error
    # to allow non primitive types (cfg.out['db']) and special words field names ('in'):
    cfg = OmegaConf.to_container(cfg)
    cfg['in'] = cfg.pop('input')

    cfg['in']['dt_from_utc'] = 0
    cfg['out']['table_log'] = f"{cfg['out']['table']}/logFiles"
    cfg['out']['b_insert_separator'] = False
    in_cols = ('datetime', 'latitude', 'longitude', 'depth_meters')
    out_cols = ('Time', 'Lat', 'Lon', 'DepEcho')
    ## Main circle ############################################################
    for i1_file, file in h5_dispenser_and_names_gen(cfg['in'], cfg['out']):
        lf.info('{}. {}: ', i1_file, file.name)
        # Loading data
        df = load_nmea(file, in_cols)
        df_rename_cols(df, in_cols, out_cols)
        lf.info('write: ')
        call_with_valid_kwargs(df_filter_and_save_to_h5,
                               df,
                               **cfg,
                               input=cfg['in'])
    failed_storages = h5move_tables(cfg['out'],
                                    tbl_names=cfg['out']['tables_have_wrote'])
    print('Finishing...' if failed_storages else 'Ok.', end=' ')
    # Sort if have any processed data, else don't because ``ptprepack`` not closes hdf5 source if it not finds data
    if cfg['in'].get('time_last'):
        cfg['out']['b_remove_duplicates'] = True
        h5index_sort(
            cfg['out'],
            out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
            in_storages=failed_storages,
            tables=cfg['out']['tables_have_wrote'])
Exemplo n.º 3
0
def main(new_arg=None):
    """

    :param new_arg: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:
        'csv2h5_nav_supervisor.ini'
        'csv2h5_IdrRedas.ini'
        'csv2h5_Idronaut.ini'
    :return:
    """

    global l
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')
    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**{
                **cfg['in'], 'path': cfg['in']['db_path']
            },
                                      b_interact=cfg['program']['b_interact'])
        set_field_if_no(
            cfg['in'], 'tables_log', '{}/logFiles'
        )  # will be filled by each table from cfg['in']['tables']
        cfg['in']['query'] = query_time_range(**cfg['in'])
        set_field_if_no(cfg['out'], 'db_path', cfg['in']['db_path'])
        # cfg['out'] = init_file_names(cfg['out'], , path_field='db_path')
    except Ex_nothing_done as e:
        print(e.message)
        return ()

    # args = parser.parse_args()
    # args.verbose= args.verbose[0]
    # try:
    #     cfg= ini2dict(args.cfgFile)
    #     cfg['in']['cfgFile']= args.cfgFile
    # except IOError as e:
    #     print('\n==> '.join([a for a in e.args if isinstance(a,str)])) #e.message
    #     raise(e)
    # Open text log
    if 'log' in cfg['program'].keys():
        dir_create_if_need(os_path.dirname(cfg['program']['log']))
        flog = open(cfg['program']['log'], 'a+', encoding='cp1251')

    cfg['out']['log'] = OrderedDict({'fileName': None, 'fileChangeTime': None})

    # Prepare saving to csv
    if 'file_names_add_fun' in cfg['out']:
        file_names_add = eval(
            compile(cfg['out']['file_names_add_fun'], '', 'eval'))
    else:
        file_names_add = lambda i: '.csv'  # f'_{i}.csv'

    # Prepare data for output store and open it
    if cfg['out']['tables'] == ['None']:
        # will not write new data table and its log
        cfg['out']['tables'] = None
        # cfg['out']['tables_log'] = None  # for _runs cfg will be redefined (this only None case that have sense?)

    h5init(cfg['in'], cfg['out'])
    # store, dfLogOld = h5temp_open(**cfg['out'])

    cfg_fileN = os_path.splitext(cfg['in']['cfgFile'])[0]
    out_tables_log = cfg['out'].get('tables_log')
    if cfg_fileN.endswith('_runs') or (bool(out_tables_log)
                                       and 'logRuns' in out_tables_log[0]):

        # Will calculate only after filter  # todo: calculate derived parameters before were they are bad (or replace all of them if any bad?)
        func_before_cycle = lambda x: None
        func_before_filter = lambda df, log_row, cfg: df
        func_after_filter = lambda df, cfg: log_runs(df, cfg, cfg['out']['log']
                                                     )

        # this table will be added:
        cfg['out']['tables_log'] = [cfg['out']['tables'][0] + '/logRuns']
        cfg['out'][
            'b_log_ready'] = True  # to not apdate time range in h5_append()

        # Settings to not affect main data table and switch off not compatible options:
        cfg['out']['tables'] = []
        cfg['out'][
            'b_skip_if_up_to_date'] = False  # todo: If False check it: need delete all previous result of CTD_calc() or set min_time > its last log time. True not implemented?
        cfg['program'][
            'b_log_display'] = False  # can not display multiple rows log
        if 'b_save_images' in cfg['extract_runs']:
            cfg['extract_runs']['path_images'] = cfg['out'][
                'db_path'].with_name('_subproduct')
            dir_create_if_need(cfg['extract_runs']['path_images'])
    else:
        if 'brown' in cfg_fileN.lower():
            func_before_cycle = load_coef
            if 'Lat' in cfg['in']:
                func_before_filter = lambda *args, **kwargs: add_ctd_params(
                    process_brown(*args, **kwargs), kwargs['cfg'])
            else:
                func_before_filter = process_brown
        else:
            func_before_cycle = lambda x: None

            def ctd_coord_and_params(df: pd.DataFrame, log_row, cfg):
                coord_data_col_ensure(df, log_row)
                return add_ctd_params(df, cfg)

            func_before_filter = ctd_coord_and_params
        func_after_filter = lambda df, cfg: df  # nothing after filter

    func_before_cycle(cfg)  # prepare: usually assign data to cfg['for']
    if cfg['out'].get('path_csv'):
        dir_create_if_need(cfg['out']['path_csv'])
    # Load data Main circle #########################################
    # Open input store and cicle through input table log records
    qstr_trange_pattern = "index>=Timestamp('{}') & index<=Timestamp('{}')"
    iSt = 1

    dfLogOld, cfg['out']['db'], cfg['out'][
        'b_skip_if_up_to_date'] = h5temp_open(**cfg['out'])
    b_out_db_is_different = cfg['out']['db'] is not None and cfg['out'][
        'db_path_temp'] != cfg['in']['db_path']
    # Cycle for each table, for each row in log:
    # for path_csv in gen_names_and_log(cfg['out'], dfLogOld):
    with FakeContextIfOpen(
            lambda f: pd.HDFStore(f, mode='r'), cfg['in']['db_path'],
            None if b_out_db_is_different else cfg['out']['db']
    ) as cfg['in']['db']:  # not opens ['in']['db'] if already opened to write

        for tbl in cfg['in']['tables']:
            if False:  # Show table info
                nodes = sorted(
                    cfg['out']['db'].root.__members__)  # , key=number_key
                print(nodes)
            print(tbl, end='. ')

            df_log = cfg['in']['db'].select(cfg['in']['tables_log'].format(tbl)
                                            or tbl,
                                            where=cfg['in']['query'])
            if True:  # try:
                if 'log' in cfg['program'].keys():
                    nRows = df_log.rows.size
                    flog.writelines(datetime.now().strftime(
                        '\n\n%d.%m.%Y %H:%M:%S> processed ') + f'{nRows} row' +
                                    ('s:' if nRows > 1 else ':'))

                for ifile, r in enumerate(df_log.itertuples(),
                                          start=iSt):  # name=None
                    print('.', end='')
                    sys_stdout.flush()

                    path_raw = PurePath(r.fileName)
                    cfg['out']['log'].update(fileName=path_raw.name,
                                             fileChangeTime=r.fileChangeTime)
                    # save current state
                    cfg['in']['file_stem'] = cfg['out']['log'][
                        'fileName']  # for exmple to can extract date in subprogram
                    cfg['in']['fileChangeTime'] = cfg['out']['log'][
                        'fileChangeTime']

                    if cfg['in']['b_skip_if_up_to_date']:
                        have_older_data, have_duplicates = h5del_obsolete(
                            cfg['out'], cfg['out']['log'], dfLogOld)
                        if have_older_data:
                            continue
                        if have_duplicates:
                            cfg['out']['b_remove_duplicates'] = True
                    print('{}. {}'.format(ifile, path_raw.name), end=': ')

                    # Load data
                    qstr = qstr_trange_pattern.format(r.Index, r.DateEnd)
                    df_raw = cfg['in']['db'].select(tbl, qstr)
                    cols = df_raw.columns.tolist()

                    # cfg['in']['lat'] and ['lon'] may be need in add_ctd_params() if Lat not in df_raw
                    if 'Lat_en' in df_log.columns and 'Lat' not in cols:
                        cfg['in']['lat'] = np.nanmean((r.Lat_st, r.Lat_en))
                        cfg['in']['lon'] = np.nanmean((r.Lon_st, r.Lon_en))

                    df = func_before_filter(df_raw, log_row=r, cfg=cfg)

                    if df.size:  # size is zero means save only log but not data
                        # filter, updates cfg['out']['log']['rows']
                        df, _ = set_filterGlobal_minmax(
                            df, cfg['filter'], cfg['out']['log'])
                    if 'rows' not in cfg['out']['log']:
                        l.warning('no data!')
                        continue
                    elif isinstance(cfg['out']['log']['rows'], int):
                        print('filtered out {rows_filtered}, remains {rows}'.
                              format_map(cfg['out']['log']))
                        if cfg['out']['log']['rows']:
                            print('.', end='')
                        else:
                            l.warning('no data!')
                            continue

                    df = func_after_filter(df, cfg=cfg)

                    # Append to Store
                    h5_append(cfg['out'],
                              df,
                              cfg['out']['log'],
                              log_dt_from_utc=cfg['in']['dt_from_utc'])

                    # Copy to csv
                    if cfg['out'].get('path_csv'):
                        fname = '{:%y%m%d_%H%M}-{:%d_%H%M}'.format(
                            r.Index, r.DateEnd) + file_names_add(ifile)
                        if not 'data_columns' in cfg['out']:
                            cfg['out']['data_columns'] = slice(0,
                                                               -1)  # all cols
                        df.to_csv(  # [cfg['out']['data_columns']]
                            cfg['out']['path_csv'] / fname,
                            date_format=cfg['out']['text_date_format'],
                            float_format='%5.6g',
                            index_label='Time'
                        )  # to_string, line_terminator='\r\n'

                    # Log to screen (if not prohibited explicitly)
                    if cfg['out']['log'].get('Date0') is not None and (
                        ('b_log_display' not in cfg['program'])
                            or cfg['program']['b_log_display']):
                        str_log = '{fileName}:\t{Date0:%d.%m.%Y %H:%M:%S}-' \
                                  '{DateEnd:%d. %H:%M:%S%z}\t{rows}rows'.format_map(
                            cfg['out']['log'])  # \t{Lat}\t{Lon}\t{strOldVal}->\t{mag}
                        l.info(str_log)
                    else:
                        str_log = str(cfg['out']['log'].get('rows', '0'))
                    # Log to logfile
                    if 'log' in cfg['program'].keys():
                        flog.writelines('\n' + str_log)

    if b_out_db_is_different:
        try:
            if cfg['out']['tables'] is not None:
                print('')
                if cfg['out']['b_remove_duplicates']:
                    h5remove_duplicates(cfg['out'],
                                        cfg_table_keys=('tables',
                                                        'tables_log'))
                # Create full indexes. Must be done because of using ptprepack in h5move_tables() below
                l.debug('Create index')
                for tblName in (cfg['out']['tables'] +
                                cfg['out']['tables_log']):
                    try:
                        cfg['out']['db'].create_table_index(tblName,
                                                            columns=['index'],
                                                            kind='full')
                    except Exception as e:
                        l.warning(
                            ': table {}. Index not created - error'.format(
                                tblName), '\n==> '.join(
                                    [s for s in e.args if isinstance(s, str)]))
        except Exception as e:
            l.exception('The end. There are error ')

            import traceback, code
            from sys import exc_info as sys_exc_info
            tb = sys_exc_info()[2]  # type, value,
            traceback.print_exc()
            last_frame = lambda tb=tb: last_frame(tb.tb_next
                                                  ) if tb.tb_next else tb
            frame = last_frame().tb_frame
            ns = dict(frame.f_globals)
            ns.update(frame.f_locals)
            code.interact(local=ns)
        finally:

            cfg['out']['db'].close()
            if cfg['program']['log']:
                flog.close()
            if cfg['out']['db'].is_open:
                print('Wait store is closing...')
                sleep(2)

            failed_storages = h5move_tables(cfg['out'])
            print('Finishing...' if failed_storages else 'Ok.', end=' ')
            h5index_sort(
                cfg['out'],
                out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
                in_storages=failed_storages)
Exemplo n.º 4
0
def main(new_arg=None):
    cfg = cfg_from_args(my_argparser(), new_arg)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')

    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**cfg['in'],
                                      b_interact=cfg['program']['b_interact'],
                                      cfg_search_parent=cfg['out'])
        h5init(cfg['in'], cfg['out'])
    except Ex_nothing_done as e:
        print(e.message)
        exit()

    df_dummy = pd.DataFrame(
        np.full(1,
                np.NaN,
                dtype=np.dtype({
                    'formats': ['float64', 'float64'],
                    'names': cfg['out']['tracks_cols'][1:]
                })),
        index=(pd.NaT, ))  # used for insert separator lines

    if 'routes_cols' not in cfg['in']:
        cfg['in']['routes_cols'] = cfg['in']['waypoints_cols']
    if 'routes_cols' not in cfg['out']:
        cfg['out']['routes_cols'] = cfg['out'][
            'waypoints_cols']  # cfg['in']['routes_cols']  #
    # Writing
    if True:  # try:
        l.warning('processing ' + str(cfg['in']['nfiles']) + ' file' +
                  's:' if cfg['in']['nfiles'] > 1 else ':')
        cfg['out']['log'] = {}
        set_field_if_no(cfg['out'], 'table_prefix',
                        PurePath(cfg['in']['path']).stem)
        cfg['out']['table_prefix'] = cfg['out']['table_prefix'].replace(
            '-', '')
        if len([t for t in cfg['out']['tables'] if len(t)]) > 1:
            cfg['out']['tables'] = \
                [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables']]
            cfg['out']['tables_log'] = \
                [cfg['out']['table_prefix'] + '_' + s for s in cfg['out']['tables_log']]

        tables = dict(zip(df_names, cfg['out']['tables']))
        tables_log = dict(zip(df_names, cfg['out']['tables_log']))
        # Can not save path to DB (useless?) so set  for this max file name length:
        set_field_if_no(cfg['out'], 'logfield_fileName_len', 50)
        cfg['out']['index_level2_cols'] = cfg['in']['routes_cols'][0]

        # ###############################################################
        # ## Cumulate all data in cfg['out']['path_temp'] ##################
        ## Main circle ############################################################
        for i1_file, path_gpx in h5_dispenser_and_names_gen(
                cfg['in'], cfg['out']):
            l.info('{}. {}: '.format(i1_file, path_gpx.name))
            # Loading data
            dfs = gpxConvert(cfg, path_gpx)
            print('write', end=': ')
            sys_stdout.flush()
            for key, df in dfs.items():
                if (not tables.get(key)) or df.empty:
                    continue
                elif key == 'tracks':
                    # Save last time to can filter next file
                    cfg['in']['time_last'] = df.index[-1]

                sort_time = False if key in {'waypoints', 'routes'} else None

                # monkey patching
                if 'tracker' in tables[key]:
                    # Also {} must be in tables[key]. todo: better key+'_fun_tracker' in cfg['out']?
                    # Trackers processing
                    trackers_numbers = {
                        '0-3106432': '1',
                        '0-2575092': '2',
                        '0-3124620': '3',
                        '0-3125300': '4',
                        '0-3125411': '5',
                        '0-3126104': '6'
                    }
                    tables_pattern = tables[key]
                    tables_log_pattern = tables_log[key]

                    df['comment'] = df['comment'].str.split(" @",
                                                            n=1,
                                                            expand=True)[0]
                    # split data and save to multipe tables
                    df_all = df.set_index(['comment', df.index])
                    for sn, n in trackers_numbers.items(
                    ):  # set(df_all.index.get_level_values(0))
                        try:
                            df = df_all.loc[sn]
                        except KeyError:
                            continue
                        # redefine saving parameters
                        cfg['out']['table'] = tables_pattern.format(
                            trackers_numbers[sn])
                        cfg['out']['table_log'] = tables_log_pattern.format(
                            trackers_numbers[sn])
                        call_with_valid_kwargs(df_filter_and_save_to_h5,
                                               df**cfg,
                                               input=cfg['in'],
                                               sort_time=sort_time)
                else:
                    cfg['out']['table'] = tables[key]
                    cfg['out']['table_log'] = tables_log[key]
                    call_with_valid_kwargs(df_filter_and_save_to_h5,
                                           df,
                                           **cfg,
                                           input=cfg['in'],
                                           sort_time=sort_time)

    # try:
    # if cfg['out']['b_remove_duplicates']:
    #     for tbls in cfg['out']['tables_have_wrote']:
    #         for tblName in tbls:
    #             cfg['out']['db'][tblName].drop_duplicates(keep='last', inplace= True)
    # print('Create index', end=', ')

    # create_table_index calls create_table which docs sais "cannot index Time64Col() or ComplexCol"
    # so load it, index, then save
    # level2_index = None
    # df = cfg['out']['db'][tblName] # last commented
    # df.set_index([navp_all_index, level2_index])
    # df.sort_index()

    # cfg['out']['db'][tblName].sort_index(inplace=True)

    # if df is not None:  # resave
    #     df_log = cfg['out']['db'][tblName]
    #     cfg['out']['db'].remove(tbls[0])
    #     cfg['out']['db'][tbls[0]] = df
    #     cfg['out']['db'][tbls[1]] = df_log

    try:
        pass
    except Exception as e:
        print('The end. There are error ', standard_error_info(e))

    #     import traceback, code
    #     from sys import exc_info as sys_exc_info
    #
    #     tb = sys_exc_info()[2]  # type, value,
    #     traceback.print_exc()
    #     last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb
    #     frame = last_frame().tb_frame
    #     ns = dict(frame.f_globals)
    #     ns.update(frame.f_locals)
    #     code.interact(local=ns)
    # finally:
    #     cfg['out']['db'].close()
    #     failed_storages= h5move_tables(cfg['out'], cfg['out']['tables_have_wrote'])

    try:
        failed_storages = h5move_tables(cfg['out'],
                                        tbl_names=cfg['out'].get(
                                            'tables_have_wrote', set()))
        print('Finishing...' if failed_storages else 'Ok.', end=' ')
        # Sort if have any processed data that needs it (not the case for the routes and waypoints), else don't because ``ptprepack`` not closes hdf5 source if it not finds data
        if cfg['in'].get('time_last'):
            cfg['out']['b_remove_duplicates'] = True
            h5index_sort(
                cfg['out'],
                out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
                in_storages=failed_storages,
                tables=cfg['out'].get('tables_have_wrote', set()))
    except Ex_nothing_done:
        print('ok')
Exemplo n.º 5
0
    nodes = parent_group.__members__
    childs = [f'/{tbl}/{g}' for g in nodes if (g != 'table') and (g != '_i_table')]
    if childs:
        print('found {} childs of {}. Copying...'.format(len(nodes), tbl))
        for child in childs:
            sr._handle.copy_node(child, newparent=sw.get_storer(tbl).group, recursive=True, overwrite=True)
        sw.flush()  # .flush(fsync=True
sleep(8)

# write tables back with sorted index
store_in, store_out = store_out, store_in
h5move_tables({
    'db_path_temp': store_in,
    'db_path': store_out,
    'tables': [tbl],
    'tables_log': [],
    'addargs': ['--checkCSI']
    },
    col_sort='Time'
    )  #'navigation/logFiles' will be copied as child


store_out = str(Path(store_out).with_name('sort_man.h5'))
with pd.HDFStore(store_in, 'r') as sr, pd.HDFStore(store_out, 'w') as sw:
    df = sr[tbl]
    df.sort_index().to_hdf(sw, tbl, format='table', data_columns=True, append=False, index=False)
    sw.create_table_index(tbl, columns=['index'], kind='full')

store_in, store_out = store_out, str(Path(store_out).with_name('sort_man_ptp.h5'))
h5move_tables({
    'db_path_temp': store_in,
Exemplo n.º 6
0
def main(new_arg=None, **kwargs):
    """

    :param new_arg: list of strings, command line arguments
    :kwargs: dicts for each section: to overwrite values in them (overwrites even high priority values, other values remains)
    Note: if new_arg=='<cfg_from_args>' returns cfg but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:
        'csv2h5_nav_supervisor.ini'
        'csv2h5_IdrRedas.ini'
        'csv2h5_Idronaut.ini'

    :return:
    """
    global l

    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    if not cfg or not cfg['program'].get('return'):
        print('Can not initialise')
        return cfg
    elif cfg['program']['return'] == '<cfg_from_args>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    print('\n' + this_prog_basename(__file__), end=' started. ')
    try:
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(**cfg['in'],
                                      b_interact=cfg['program']['b_interact'])
    except Ex_nothing_done as e:
        print(e.message)
        return ()

    bOld_FF00FF = False
    # if 'TermGrunt' in sys.argv[1] FF00FF' in str(cfg['in']['path']):  # 'TermGrunt.h5'  ? args.path.endswith ('bin'):
    #     bOld_FF00FF = True
    #     cfg['in'].update({
    #     'header': 'TERM',
    #     'dt_from_utc': timedelta(hours=-1),
    #     'fs': 1, 'b_time_fromtimestamp': True,
    #     'b_time_fromtimestamp_source': False})
    # else:  # 'Katran.h5'
    #     cfg['in'].update({
    #     'delimiter_hex': '000000E6',
    #     'header': 'P, Temp, Cond',
    #     'dt_from_utc': timedelta(hours=0),
    #     'fs': 10, 'b_time_fromtimestamp': False,
    #     'b_time_fromtimestamp_source': False})

    set_field_if_no(
        cfg['in'], 'dtype', 'uint{:d}'.format(2**(3 + np.searchsorted(
            2**np.array([3, 4, 5, 6, 7]) > np.array(
                8 * (cfg['in']['data_word_len'] - 1)), 1))))

    # Prepare cpecific format loading and writing
    set_field_if_no(cfg['in'], 'coltime', [])
    cfg['in'] = init_input_cols(cfg['in'])
    cfg['out']['names'] = np.array(cfg['in']['dtype'].names)[ \
        cfg['in']['cols_loaded_save_b']]
    cfg['out']['formats'] = [
        cfg['in']['dtype'].fields[n][0] for n in cfg['out']['names']
    ]
    cfg['out']['dtype'] = np.dtype({
        'formats': cfg['out']['formats'],
        'names': cfg['out']['names']
    })
    h5init(cfg['in'], cfg['out'])

    # cfg['Period'] = 1.0 / cfg['in']['fs']  # instead Second can use Milli / Micro / Nano:
    # cfg['pdPeriod'] = pd.to_timedelta(cfg['Period'], 's')
    # #pd.datetools.Second(cfg['Period'])\
    #     if 1 % cfg['in']['fs'] == 0 else\
    #     pd.datetools.Nano(cfg['Period'] * 1e9)

    # log table of loaded files. columns: Start time, file name, and its index in array off all loaded data:
    log_item = cfg['out']['log'] = {
    }  # fields will have: 'fileName': None, 'fileChangeTime': None, 'rows': 0

    strLog = ''
    # from collections import namedtuple
    # type_log_files = namedtuple('type_log_files', ['label','iStart'])
    # log.sort(axis=0, order='log_item['Date0']')#sort files by time

    dfLogOld, cfg['out']['db'], cfg['out'][
        'b_skip_if_up_to_date'] = h5temp_open(**cfg['out'])
    if 'log' in cfg['program'].keys():
        f = open(PurePath(sys_argv[0]).parent / cfg['program']['log'],
                 'a',
                 encoding='cp1251')
        f.writelines(
            datetime.now().strftime('\n\n%d.%m.%Y %H:%M:%S> processed ' +
                                    str(cfg['in']['nfiles']) + ' file' +
                                    's:' if cfg['in']['nfiles'] > 1 else ':'))
    b_remove_duplicates = False  # normally no duplicates but will if detect
    # Config specially for readBinFramed
    set_field_if_no(cfg['in'], 'b_byte_order_is_big_endian', True)
    set_field_if_no(cfg['in'], 'b_baklan', False)
    set_field_if_no(cfg['in'], 'b_time_fromtimestamp_source', False)
    cfg['out']['fs'] = cfg['in']['fs']
    if True:
        ## Main circle ############################################################
        for i1_file, path_in in h5_dispenser_and_names_gen(
                cfg['in'], cfg['out']):
            l.info('{}. {}: '.format(i1_file, path_in.name))

            # Loading data
            if bOld_FF00FF:
                V = readFF00FF(path_in, cfg)
                iFrame = np.arange(len(V))
            else:
                V, iFrame = readBinFramed(path_in, cfg['in'])
            if ('b_time_fromtimestamp' in cfg['in'] and cfg['in']['b_time_fromtimestamp']) or \
                    ('b_time_fromtimestamp_source' in cfg['in'] and cfg['in']['b_time_fromtimestamp_source']):
                path_in_rec = os_path.join(
                    'd:\\workData\\_source\\BalticSea\\151021_T1Grunt_Pregol\\_source\\not_corrected',
                    os_path.basename(path_in)[:-3] + 'txt'
                ) if cfg['in']['b_time_fromtimestamp_source'] else path_in
                log_item['Date0'] = datetime.fromtimestamp(
                    os_path.getmtime(path_in_rec))  # getctime is bad
                log_item['Date0'] -= iFrame[-1] * timedelta(
                    seconds=1 / cfg['in']['fs']
                )  # use for computer filestamp at end of recording
            else:
                log_item['Date0'] = datetime.strptime(
                    path_in.stem, cfg['in']['filename2timestart_format'])
            log_item['Date0'] += cfg['in']['dt_from_utc']
            tim = log_item['Date0'] + iFrame * timedelta(
                seconds=1 / cfg['in']['fs']
            )  # tim = pd.date_range(log_item['Date0'], periods=np.size(V, 0), freq=cfg['pdPeriod'])
            df = pd.DataFrame(
                V.view(dtype=cfg['out']['dtype']),  # np.uint16
                columns=cfg['out']['names'],
                index=tim)
            # pd.DataFrame(V, columns=cfg['out']['names'], dtype=cfg['out']['formats'], index=tim)
            if df.empty:  # log['rows']==0
                print('No data => skip file')
                continue

            df, tim = set_filterGlobal_minmax(df,
                                              cfg_filter=cfg['filter'],
                                              log=log_item,
                                              dict_to_save_last_time=cfg['in'])
            if log_item['rows_filtered']:
                print('filtered out {}, remains {}'.format(
                    log_item['rows_filtered'], log_item['rows']))
            if not log_item['rows']:
                l.warning('no data! => skip file')
                continue
            elif log_item['rows']:
                print(
                    '.', end=''
                )  # , divisions=d.divisions), divisions=pd.date_range(tim[0], tim[-1], freq='1D')
            else:
                l.warning('no data! => skip file')
                continue

            # Append to Store
            h5_append(cfg['out'], df.astype('int32'), log_item)

            if 'txt' in cfg['program'].keys():  # can be saved as text too
                np.savetxt(cfg['program']['txt'],
                           V,
                           delimiter='\t',
                           newline='\n',
                           header=cfg['in']['header'] + log_item['fileName'],
                           fmt='%d',
                           comments='')

    try:
        if b_remove_duplicates:
            for tblName in (cfg['out']['table'] +
                            cfg['out']['tableLog_names']):
                cfg['out']['db'][tblName].drop_duplicates(
                    keep='last', inplace=True)  # subset='fileName',?
        if len(strLog):
            print('Create index', end=', ')
            for tblName in (cfg['out']['table'] +
                            cfg['out']['tableLog_names']):
                cfg['out']['db'].create_table_index(tblName,
                                                    columns=['index'],
                                                    kind='full')
        else:
            print('done nothing')
    except Exception as e:
        l.exception('The end. There are error ')

        import traceback, code
        from sys import exc_info as sys_exc_info

        tb = sys_exc_info()[2]  # type, value,
        traceback.print_exc()
        last_frame = lambda tb=tb: last_frame(tb.tb_next) if tb.tb_next else tb
        frame = last_frame().tb_frame
        ns = dict(frame.f_globals)
        ns.update(frame.f_locals)
        code.interact(local=ns)
    # sort index if have any processed data (needed because ``ptprepack`` not closses hdf5 source if it not finds data)
    if cfg['in'].get('time_last'):
        failed_storages = h5move_tables(cfg['out'])
        print('Ok.', end=' ')
        h5index_sort(
            cfg['out'],
            out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
            in_storages=failed_storages)
Exemplo n.º 7
0
def main(config: ConfigType) -> None:  #
    with pd.HDFStore(cfg_in['db_path'], mode='r') as store:
        df = store[cfg_in['table']][cfg_in['min_date']:cfg_in['max_date']]
        k = store.get_node(f"{cfg_in['table']}/coef")[cfg_in['col']].read()

    n_rows_before = df.shape[0]
    lf.info('Loaded data {0[0]} - {0[1]}: {1} rows. Filtering {2[col_out]}...',
            df.index[[0, -1]], n_rows_before, config)
    # print(f"Loaded data {df.index[0]} - {df.index[-1]}: {n_rows_before} rows. Filtering {cfg_in['col']}...")
    p_name = config['col_out']
    df[p_name] = np.polyval(k, df[cfg_in['col']])

    # Battery compensation
    kBat = [1.7314032932363, -11.9301097967443]
    df[p_name] -= np.polyval(kBat, df['Battery'])
    MIN_P = 6  # P filtered below: to not delete spikes that may be used to find other spikes using ~constant period

    if config['cols_order']:
        df = df.loc[:, config['cols_order']]
    else:
        df.drop(cfg_in['col'], axis='columns', inplace=True)

    i_burst, mean_burst_size, max_hole = i_bursts_starts(df.index)

    i_col = df.columns.get_loc(p_name)

    if cfg_in['b_show']:
        from matplotlib import pyplot as plt
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.grid(True, alpha=.85, color='white', axis='y', linestyle='-')
        fig.subplots_adjust(top=.89)

        fig.show()
    else:
        ax = None

    # 'db': store
    cfg_out = {
        'table':
        cfg_in['table'],
        'table_log':
        f"{cfg_in['table']}/logFiles",
        'log': {},
        'db_path':
        Path(config['db_path']) if 'db_path' in config else
        (cfg_in['db_path'].with_name(f"{cfg_in['db_path'].stem}_filt_s.h5"))
    }

    def h5_names_gen(cfg_in, cfg_out: Mapping[str, Any],
                     **kwargs) -> Iterator[None]:
        #cfg_out['log']['fileName'] = pname.name[-cfg_out['logfield_fileName_len']:-4]
        cfg_out['log']['fileChangeTime'] = datetime.fromtimestamp(
            cfg_in['db_path'].stat().st_mtime)
        yield None

    h5init(cfg_in,
           cfg_out)  # cfg_in for full path if cfg_out['db_path'] only name
    n_rows_after = 0
    #with pd.HDFStore(out_path) as store:  #, mode='w'
    for _, _ in h5_dispenser_and_names_gen(
            cfg_in, cfg_out,
            h5_names_gen):  # handles temporary db for h5_append()
        try:
            if h5remove_table(cfg_out['db'], cfg_in['table']):
                lf.info('previous table removed')
        except Exception as e:  # no such table?
            pass

        for st, en in pairwise(i_burst):
            cfg_out['log']['fileName'] = str(st)
            sl = slice(st, en)
            ind_ok = filter_periodic_spike(df.iloc[sl, i_col], ax=ax)

            # Filtering
            bad_p = df.loc[ind_ok, p_name] < MIN_P
            n_bad = bad_p.sum()
            if n_bad:
                lf.info('filtering {} > {}: deleting {} values in frame {}',
                        p_name, MIN_P, n_bad,
                        pattern_log_dt.format(*df.index[[0, -1]]))
                ind_ok = ind_ok[~bad_p]
                if not ind_ok.size:
                    continue
                # df.loc[bad_p, p_name] = np.NaN

            # save result
            h5_append(cfg_out, df.loc[ind_ok], cfg_out['log'])
            n_rows_after += ind_ok.size

    # Temporary db to compressed db with pandas index
    if n_rows_after:  # check needed because ``ptprepack`` in h5index_sort() not closes hdf5 source if it not finds data
        failed_storages = h5move_tables(cfg_out)
        print('Ok.', end=' ')
        h5index_sort(cfg_out,
                     out_storage_name=f"{cfg_out['db_path'].stem}-resorted.h5",
                     in_storages=failed_storages)

    lf.info(
        f'Removed {n_rows_before - n_rows_after} rows. Saved {n_rows_after} rows to {cfg_out["db_path"]}...'
    )
Exemplo n.º 8
0
    with pd.HDFStore(path_db.with_name('_not_sorted.h5')) as store_tmp:
        try:
            del store_tmp[tbl_log]
        except KeyError:
            pass
        df_log.to_hdf(store_tmp,
                      tbl_log,
                      append=True,
                      data_columns=True,
                      format='table',
                      dropna=True,
                      index=False)
    h5move_tables({
        'db_path_temp': path_db.with_name('_not_sorted.h5'),
        'db_path': path_db,
        'tables': [tbl_log],
        'tables_log': [],
        'addargs': ['--checkCSI', '--verbose']
    })

    # Now run step 30 with veuszPropagate seting: '--b_update_existed', 'False' to save only modified vsz/images. After that delete old vsz and its images

if False:  #st(40)  # may not comment always because can not delete same time more than once
    # Deletng bad runs from DB:
    import pandas as pd

    # find bad runs that have time:
    time_in_bad_run_any = ['2018-10-16T19:35:41+00:00']
    tbl = f'/{device}'
    tbl_log = tbl + '/logRuns'
    print('Deletng bad runs from DB: tables: {}, {} run with time {}'.format(
Exemplo n.º 9
0
                        sw[child_w] = sr[child_r]
                    tables_have_wrote_cur.append(child_w)
            else:
                print('found {} cr of {}. Copying...'.format(len(nodes_cr), tbl))
                for child_r in cr:
                    st._handle.copy_node(child_r, newparent=sw.get_storer(tbl).group, recursive=True, overwrite=True)
        tables_have_wrote.add(tuple(tables_have_wrote_cur))
    st.flush()  # .flush(fsync=True
sleep(8)

# write tables with sorted index
try:
    failed_storages = h5move_tables({
        'db_path_temp': store_out_temp,
        'db_path': store_out,
        'addargs': ['--overwrite']  # '--checkCSI'
        },
        tbl_names=tables_have_wrote,
        # col_sort='Time'  # must exist
        )  # 'navigation/logFiles' will be copied as child
except Ex_nothing_done as e:
    print('Tables not moved')
except RuntimeError:  # not captured
    raise

if False:
    store_out = str(Path(store_out).with_name('sort_man.h5'))
    with pd.HDFStore(store_in, 'r') as sr, pd.HDFStore(store_out, 'w') as sw:
        df = sr[tbl]
        df.sort_index().to_hdf(sw, tbl, format='table', data_columns=True, append=False, index=False)
        sw.create_table_index(tbl, columns=['index'], kind='full')
Exemplo n.º 10
0
    # Append to Store
    if df.empty:  # log['rows']==0
        print('No data => skip file')
    h5_append(cfg['out'], df, log)
    b_appended = True
except Exception as e:
    b_appended = False
finally:
    store.close()

if b_appended:
    if store.is_open:
        print('Wait store is closing...')
        # from time import sleep
        # sleep(2)
    failed_storages = h5move_tables(cfg['out'])
    print('Ok.', end=' ')
    h5index_sort(cfg['out'], out_storage_name=f"{cfg['out']['db_path'].stem}-resorted.h5",
                 in_storages=failed_storages)

# @+node:korzh.20180520131532.4: ** garbage
# def main_gabage():
#     print('\n' + this_prog_basename(__file__), end=' started. ')
#     try:
#         cfg['in']= init_file_names(cfg['in'])
#     except Ex_nothing_done as e:
#         print(e.message)
#         return()
#
#     fGi = lambda Ax,Ay,Az,Ag,Cg,i: np.dot(Ag.T, (np.column_stack((Ax, Ay, Az))[
#                                                      slice(*i)] - Cg[0,:]).T)