示例#1
0
def main(config: ConfigType) -> None:
    """
    ----------------------------
    Calculates coefficients from
    data of Pandas HDF5 store*.h5
    and saves them back
    ----------------------------
    1. Obtains command line arguments (for description see my_argparser()) that can be passed from new_arg and ini.file
    also.
    2. Loads device data of calibration in laboratory from hdf5 database (cfg['in']['db_path'])
    2. Calibrates configured by cfg['in']['channels'] channels ('accelerometer' and/or 'magnetometer'): soft iron
    3. Wrong implementation - not use cfg['in']['timerange_nord']! todo: Rotate compass using cfg['in']['timerange_nord']
    :param config: returns cfg if new_arg=='<cfg_from_args>' but it will be None if argument
     argv[1:] == '-h' or '-v' passed to this code
    argv[1] is cfgFile. It was used with cfg files:


    """
    global cfg, l
    cfg = main_init(config, cs_store_name, __file__=None)
    cfg = main_init_input_file(cfg, cs_store_name)
    # input data tables may be defined by 'probes_prefix' and 'probes' fields of cfg['in']
    if cfg['in']['probes'] or not len(cfg['in']['tables']):
        if cfg['in']['probes']:
            cfg['in']['tables'] = [
                f"{cfg['in']['probes_prefix']}{probe:0>2}"
                for probe in cfg['in']['probes']
            ]
        elif cfg['in']['probes_prefix']:
            cfg['in']['tables'] = [f"{cfg['in']['probes_prefix']}.*"]
        # else:  # default config
        #     cfg['in']['tables'] = ['.*']

    #h5init(cfg['in'], cfg['out'])
    #cfg['out']['dt_from_utc'] = 0
    # cfg = cfg_from_args(my_argparser(), new_arg)

    lf.info("{:s}({:s}) for channels: {} started. ",
            this_prog_basename(__file__), ', '.join(cfg['in']['tables']),
            cfg['in']['channels'])
    fig = None
    fig_filt = None

    fig_save_dir_path = cfg['in']['db_path'].parent
    with pd.HDFStore(cfg['in']['db_path'], mode='r') as store:
        if len(cfg['in']['tables']) == 1:
            cfg['in']['tables'] = h5find_tables(store, cfg['in']['tables'][0])
        coefs = {}
        for itbl, tbl in enumerate(cfg['in']['tables'], start=1):
            probe_number = int(re.findall('\d+', tbl)[0])
            lf.info(f'{itbl}. {tbl}: ')
            if isinstance(cfg['in']['timerange'],
                          Mapping):  # individual interval for each table
                if probe_number in cfg['in']['timerange']:
                    timerange = cfg['in']['timerange'][probe_number]
                else:
                    timerange = None
            else:
                timerange = cfg['in'][
                    'timerange']  # same interval for each table
            a = load_hdf5_data(store, table=tbl, t_intervals=timerange)
            # iUseTime = np.searchsorted(stime, [np.array(s, 'datetime64[s]') for s in np.array(strTimeUse)])

            # Calibrate channels of 'accelerometer' or/and 'magnetometer'
            coefs[tbl] = {}
            for channel in cfg['in']['channels']:
                print(f' channel "{channel}"', end=' ')
                (col_str, coef_str) = channel_cols(channel)

                # filtering # col_str == 'A'?
                if True:
                    b_ok = np.zeros(a.shape[0], bool)
                    for component in ['x', 'y', 'z']:
                        b_ok |= is_works(
                            a[col_str + component],
                            noise=cfg['filter']['no_works_noise'][channel])
                    lf.info('Filtered not working area: {:2.1f}%',
                            (b_ok.size - b_ok.sum()) * 100 / b_ok.size)
                    # vec3d = np.column_stack(
                    #     (a[col_str + 'x'], a[col_str + 'y'], a[col_str + 'z']))[:, b_ok].T  # [slice(*iUseTime.flat)]
                    vec3d = a.loc[
                        b_ok, [col_str + 'x', col_str + 'y', col_str +
                               'z']].to_numpy(float).T
                    index = a.index[b_ok]

                    vec3d, b_ok, fig_filt = filter_channes(
                        vec3d,
                        index,
                        fig_filt,
                        fig_save_prefix=
                        f"{fig_save_dir_path / tbl}-'{channel}'",
                        blocks=cfg['filter']['blocks'],
                        offsets=cfg['filter']['offsets'],
                        std_smooth_sigma=cfg['filter']['std_smooth_sigma'])

                A, b = calibrate(vec3d)
                window_title = f"{tbl} '{channel}' channel ellipse"
                fig = calibrate_plot(vec3d,
                                     A,
                                     b,
                                     fig,
                                     window_title=window_title)
                fig.savefig(fig_save_dir_path / (window_title + '.png'),
                            dpi=300,
                            bbox_inches="tight")
                A_str, b_str = coef2str(A, b)
                lf.info(
                    'Calibration coefficients calculated: \nA = \n{:s}\nb = \n{:s}',
                    A_str, b_str)
                coefs[tbl][channel] = {'A': A, 'b': b}

            # Zeroing Nord direction
            timerange_nord = cfg['in']['timerange_nord']
            if isinstance(timerange_nord, Mapping):
                timerange_nord = timerange_nord.get(probe_number)
            if timerange_nord:
                coefs[tbl]['M']['azimuth_shift_deg'] = zeroing_azimuth(
                    store, tbl, timerange_nord, calc_vel_flat_coef(coefs[tbl]),
                    cfg['in'])
            else:
                lf.info('not zeroing North')
    # Write coefs to each of output tables named same as input
    for cfg_output in (['in', 'out'] if cfg['out'].get('db_path') else ['in']):
        lf.info('Writing to {}', cfg[cfg_output]['db_path'])
        for itbl, tbl in enumerate(cfg['in']['tables'], start=1):
            # i_search = re.search('\d*$', tbl)
            # for channel in cfg['in']['channels']:
            #     (col_str, coef_str) = channel_cols(channel)
            #     dict_matrices = {f'//coef//{coef_str}//A': coefs[tbl][channel]['A'],
            #                      f'//coef//{coef_str}//C': coefs[tbl][channel]['b'],
            #                      }
            #     if channel == 'M':
            #         if coefs[tbl]['M'].get('azimuth_shift_deg'):
            #             dict_matrices[f'//coef//{coef_str}//azimuth_shift_deg'] = coefs[tbl]['M']['azimuth_shift_deg']
            #         # Coping probe number to coefficient to can manually check when copy manually
            #         if i_search:
            #             try:
            #                 dict_matrices['//coef//i'] = int(i_search.group(0))
            #             except Exception as e:
            #                 pass
            dict_matrices = dict_matrices_for_h5(coefs[tbl], tbl,
                                                 cfg['in']['channels'])
            h5copy_coef(None,
                        cfg[cfg_output]['db_path'],
                        tbl,
                        dict_matrices=dict_matrices)

    print('Ok>', end=' ')
示例#2
0
                '--fs_float',
                f'{fs(probe, in_file.stem)}',
                # '--dt_from_utc_seconds', "{}".format(int((np.datetime64('00', 'Y') - np.datetime64(dt_from_utc[probe]
                #  #   ['19-06-24T10:19:00', '19-06-24T10:21:30'][i_proc_probe]
                #     ))/np.timedelta64(1,'s')))
            ] + (['--csv_specific_param_dict', 'invert_magnitometr: True']
                 if prefix ==
                 'incl' else ['--cols_load_list', "yyyy,mm,dd,HH,MM,SS,P,U"]))

            # Get coefs:
            db_coefs = r'd:\WorkData\~configuration~\inclinometr\190710incl.h5'
            try:
                tbl = f'{prefix}{probe:0>2}'
                l.info(
                    f"Adding coefficients to {db_path}/{tbl} from {db_coefs}")
                h5copy_coef(db_coefs, db_path, tbl)
            except KeyError as e:  # Unable to open object (component not found)
                l.warning('Coef is not copied!')
                # todo write some dummy coefficients to can load Veusz patterns
            i_proc_file += 1
        else:
            print(probe, end=': no, ')
        i_proc_probe += 1
    print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.')

# Calculate velocity and average
if st(2):
    # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5
    for aggregate_period_s in [
            None, 2, 600, 3600 if 'w' in prefix else 7200
    ]:  # 2,, 7200  # 300, 600,  [None], [None, 2, 600, 3600 if 'w' in prefix else 7200], [3600]
示例#3
0
            '--pattern_path', str(vsz_path),
            '--widget', '/fitV(incl)/grid1/graph/fit_t/values',
            # '/fitV(force)/grid1/graph/fit1/values',
            '--data_for_coef', 'max_incl_of_fit_t',
            '--out.path', str(db_path_tank),
            '--re_tbl_from_vsz_name', '\D*\d*',
            '--channels_list', 'M,A',
            '--b_update_existed', 'True',  # to not skip.
            '--export_pages_int_list', '',  #4 0 = all
            '--b_interact', 'False',
            '--b_execute_vsz', 'True',
            '--return', '<embedded_object>',  # reuse to not bloat memory
            ],
            veusze=vsz_data['veusze'])


        if vsz_data is not None:
            # if step == 3:
            # to 1st db too
            # l = init_logging(logging, None)
            print(f"Adding coefficients to {db_path_calibr_scalling}/{tbl} from {db_path_tank}")
            h5copy_coef(db_path_tank, db_path_calibr_scalling, tbl, ok_to_replace_group=True)

            vsz_data['veusze'].Close()
            try:
                vsz_data['veusze'].WaitForClose()
            except AttributeError:  # already 'NoneType' => closed ok
                pass
        else:
            vsz_data = {'veusze': None}
示例#4
0
def main(new_arg=None, **kwargs):
    """

    :param new_arg: list of strings, command line arguments
    :kwargs: dicts of dictcts (for each ini section): specified values overwrites ini values
    """

    # global l
    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    cfg['in']['db_coefs'] = Path(cfg['in']['db_coefs'])
    for path_field in ['db_coefs', 'path_cruise']:
        if not cfg['in'][path_field].is_absolute():
            cfg['in'][path_field] = (
                cfg['in']['cfgFile'].parent / cfg['in'][path_field]
            ).resolve().absolute()  # cfg['in']['cfgFile'].parent /

    def constant_factory(val):
        def default_val():
            return val

        return default_val

    for lim in ('min_date', 'max_date'):
        cfg['filter'][lim] = defaultdict(
            constant_factory(cfg['filter'][lim].get(
                '0', cfg['filter'][lim].get(0))), cfg['filter'][lim])

    l = init_logging(logging, None, None, 'INFO')
    #l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])

    if True:  # False. Experimental speedup but takes memory
        from dask.cache import Cache
        cache = Cache(2e9)  # Leverage two gigabytes of memory
        cache.register()  # Turn cache on globally
    if cfg['program']['dask_scheduler']:
        if cfg['program']['dask_scheduler'] == 'distributed':
            from dask.distributed import Client
            client = Client(
                processes=False
            )  # navigate to http://localhost:8787/status to see the diagnostic dashboard if you have Bokeh installed
            # processes=False: avoide inter-worker communication for computations releases the GIL (numpy, da.array)  # without is error
        else:
            if cfg['program']['dask_scheduler'] == 'synchronous':
                l.warning('using "synchronous" scheduler for debugging')
            import dask
            dask.config.set(scheduler=cfg['program']['dask_scheduler'])

    # Run steps :
    st.start = cfg['program']['step_start']
    st.end = cfg['program']['step_end']
    st.go = True

    if not cfg['out'][
            'db_name']:  # set name by 'path_cruise' name or parent if it has digits at start. priority for name  is  "*inclinometer*"
        for p in (lambda p: [p, p.parent])(cfg['in']['path_cruise']):
            m = re.match('(^[\d_]*).*', p.name)
            if m:
                break
        cfg['out']['db_name'] = f"{m.group(1).strip('_')}incl.h5"
    cfg['in']['path_cruise'].glob('*inclinometer*')
    dir_incl = next((d for d in cfg['in']['path_cruise'].glob('*inclinometer*')
                     if d.is_dir()), cfg['in']['path_cruise'])
    db_path = dir_incl / cfg['out']['db_name']

    # ---------------------------------------------------------------------------------------------
    def fs(probe, name):
        return 5
        # if 'w' in name.lower():  # Baranov's wavegauge electronic
        #     return 5  # 10
        # if probe < 20 or probe in [23, 29, 30, 32, 33]:  # 30 [4, 11, 5, 12] + [1, 7, 13, 30]
        #     return 5
        # if probe in [21, 25, 26] + list(range(28, 35)):
        #     return 8.2
        # return 4.8

    def datetime64_str(time_str: Optional[str] = None) -> np.ndarray:
        """
        Reformat time_str to ISO 8601 or to 'NaT'. Used here for input in funcs that converts str to numpy.datetime64
        :param time_str: May be 'NaT'
        :return: ndarray of strings (tested for 1 element only) formatted by numpy.
        """
        return np.datetime_as_string(np.datetime64(time_str, 's'))

    probes = cfg['in']['probes'] or range(
        1, 41)  # sets default range, specify your values before line ---
    raw_root, subs_made = re.subn('INCL_?', 'INKL_',
                                  cfg['in']['probes_prefix'].upper())
    if st(
            1
    ):  # Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need)
        i_proc_probe = 0  # counter of processed probes
        i_proc_file = 0  # counter of processed files
        # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]':

        raw_parent = dir_incl / '_raw'
        dir_out = raw_parent / re.sub(
            r'[.\\/ ]', '_', cfg['in']['raw_subdir']
        )  # sub replaces multilevel subdirs to 1 level that correct_fun() can only make
        raw_parent /= cfg['in']['raw_subdir']
        for probe in probes:
            raw_found = []
            raw_pattern_file = cfg['in']['raw_pattern'].format(prefix=raw_root,
                                                               number=probe)
            correct_fun = partial(
                correct_kondrashov_txt if subs_made else correct_baranov_txt,
                dir_out=dir_out)
            # if not archive:
            if (not '.zip' in cfg['in']['raw_subdir'].lower() and not '.rar'
                    in cfg['in']['raw_subdir'].lower()) or raw_parent.is_dir():
                raw_found = list(raw_parent.glob(raw_pattern_file))
            if not raw_found:
                # Check if already have corrected files for probe generated by correct_kondrashov_txt(). If so then just use them
                raw_found = list(
                    raw_parent.glob(
                        f"{cfg['in']['probes_prefix']}{probe:0>2}.txt"))
                if raw_found:
                    print('corrected csv file', [r.name for r in raw_found],
                          'found')
                    correct_fun = lambda x: x
                elif not cfg['in']['raw_subdir']:
                    continue

            for file_in in (raw_found or open_csv_or_archive_of_them(
                    raw_parent, binary_mode=False, pattern=raw_pattern_file)):
                file_in = correct_fun(file_in)
                if not file_in:
                    continue
                tbl = f"{cfg['in']['probes_prefix']}{probe:0>2}"
                # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w',  # correct name
                #              re.sub('^[\d_]*|\*', '', file_in.stem).lower()),  # remove date-prefix if in name
                csv2h5(
                    [
                        str(
                            Path(__file__).parent / 'ini' /
                            f"csv_inclin_{'Kondrashov' if subs_made else 'Baranov'}.ini"
                        ),
                        '--path',
                        str(file_in),
                        '--blocksize_int',
                        '50_000_000',  # 50Mbt
                        '--table',
                        tbl,
                        '--db_path',
                        str(db_path),
                        # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'),
                        # '--b_raise_on_err', '0',  # ?
                        '--b_interact',
                        '0',
                        '--fs_float',
                        f'{fs(probe, file_in.stem)}',
                        '--dt_from_utc_seconds',
                        str(cfg['in']['dt_from_utc'].total_seconds()),
                        '--b_del_temp_db',
                        '1',
                    ] +
                    (['--csv_specific_param_dict', 'invert_magnitometr: True']
                     if subs_made else
                     ['--cols_load_list', "yyyy,mm,dd,HH,MM,SS,P,U"]),
                    **{
                        'filter': {
                            'min_date': cfg['filter']['min_date'][probe],
                            'max_date': cfg['filter']['max_date'][probe],
                        }
                    })

                # Get coefs:
                l.info(
                    f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}"
                )
                try:
                    h5copy_coef(cfg['in']['db_coefs'], db_path, tbl)
                except KeyError as e:  # Unable to open object (component not found)
                    l.warning(
                        'No coefs to copy?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                except OSError as e:
                    l.warning(
                        'Not found DB with coefs?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                i_proc_file += 1
            else:
                print('no', raw_pattern_file, end=', ')
            i_proc_probe += 1
        print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.')

    # Calculate velocity and average
    if st(2):
        # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5
        if not cfg['out']['aggregate_period_s']:
            cfg['out']['aggregate_period_s'] = [
                None, 2, 600,
                3600 if 'w' in cfg['in']['probes_prefix'] else 7200
            ]

        if cfg['in']['azimuth_add']:
            if 'Lat' in cfg['in']['azimuth_add']:
                from datetime import datetime
                # add magnetic declination,° for used coordinates
                # todo: get time
                azimuth_add = mag_dec(cfg['in']['azimuth_add']['Lat'],
                                      cfg['in']['azimuth_add']['Lon'],
                                      datetime(2020, 9, 10),
                                      depth=-1)
            else:
                azimuth_add = 0
            if 'constant' in cfg['in']['azimuth_add']:
                # and add constant. For example, subtruct declination at the calibration place if it was applied
                azimuth_add += cfg['in']['azimuth_add'][
                    'constant']  # add -6.65644183° to account for calibration in Kaliningrad
        for aggregate_period_s in cfg['out']['aggregate_period_s']:
            if aggregate_period_s is None:
                db_path_in = db_path
                db_path_out = db_path.with_name(
                    f'{db_path.stem}_proc_noAvg.h5')
            else:
                db_path_in = db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')
                db_path_out = f'{db_path.stem}_proc.h5'  # or separately: '_proc{aggregate_period_s}.h5'

            args = [
                Path(incl_h5clc.__file__).with_name(
                    f'incl_h5clc_{db_path.stem}.yaml'),
                # if no such file all settings are here
                '--db_path',
                str(db_path_in),
                # !   'incl.*|w\d*'  inclinometers or wavegauges w\d\d # 'incl09':
                '--tables_list',
                'incl.*' if not cfg['in']['probes'] else
                f"incl.*(?:{'|'.join('{:0>2}'.format(p) for p in cfg['in']['probes'])})",
                '--aggregate_period',
                f'{aggregate_period_s}S' if aggregate_period_s else '',
                '--out.db_path',
                str(db_path_out),
                '--table',
                f'V_incl_bin{aggregate_period_s}'
                if aggregate_period_s else 'V_incl',
                '--verbose',
                'INFO',  #'DEBUG' get many numba messages
                '--b_del_temp_db',
                '1',
                # '--calc_version', 'polynom(force)',  # depreshiated
                # '--chunksize', '20000',
                # '--not_joined_h5_path', f'{db_path.stem}_proc.h5',
            ]
            # if aggregate_period_s <= 5:   # [s], do not need split csv for big average interval
            #     args += (['--split_period', '1D'])
            if aggregate_period_s is None:  # proc. parameters (if we have saved proc. data then when aggregating we are not processing)
                args += ([
                    '--max_dict',
                    'M[xyz]:4096',
                    # Note: for Baranov's prog 4096 is not suited
                    # '--timerange_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n,"  # not works - use kwarg
                    # '--timerange_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
                    '--split_period',
                    '1D'
                ] if subs_made else [
                    '--bad_p_at_bursts_starts_peroiod',
                    '1H',
                ])
            # csv splitted by 1day (default for no avg) and monolith csv if aggregate_period_s==600
            if aggregate_period_s not in cfg['out'][
                    'aggregate_period_s_not_to_text']:  # , 300, 600]:
                args += ['--text_path', str(db_path.parent / 'text_output')]
            kwarg = {
                'in': {
                    'min_date': cfg['filter']['min_date'][0],
                    'max_date': cfg['filter']['max_date'][0],
                    'timerange_zeroing': cfg['in']['timerange_zeroing'],
                    'azimuth_add': azimuth_add
                }
            }
            # If need all data to be combined one after one:
            # set_field_if_no(kwarg, 'in', {})
            # kwarg['in'].update({
            #
            #         'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0],
            #         'dates_min': min_date.values(),  # in table list order
            #         'dates_max': max_date.values(),  #
            #         })
            # set_field_if_no(kwarg, 'out', {})
            # kwarg['out'].update({'b_all_to_one_col': 'True'})

            incl_h5clc.main(args, **kwarg)

    # Calculate spectrograms.
    if st(3):  # Can be done at any time after step 1

        def raise_ni():
            raise NotImplementedError(
                'Can not proc probes having different fs in one run: you need to do it separately'
            )

        args = [
            Path(incl_h5clc.__file__).with_name(
                f'incl_h5spectrum{db_path.stem}.yaml'),
            # if no such file all settings are here
            '--db_path',
            str(db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')),
            '--tables_list',
            f"{cfg['in']['probes_prefix']}.*",  # inclinometers or wavegauges w\d\d  ## 'w02', 'incl.*',
            # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '',
            '--min_date',
            datetime64_str(cfg['filter']['min_date'][0]),
            '--max_date',
            datetime64_str(cfg['filter']['max_date']
                           [0]),  # '2019-09-09T16:31:00',  #17:00:00
            # '--max_dict', 'M[xyz]:4096',  # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity
            '--out.db_path',
            f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5",
            # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd',
            '--fs_float',
            f"{fs(probes[0], cfg['in']['probes_prefix'])}",
            # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni()
            #
            # '--timerange_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
            # '--verbose', 'DEBUG',
            # '--chunksize', '20000',
            '--b_interact',
            '0',
        ]
        if 'w' in cfg['in']['probes_prefix']:
            args += [
                '--split_period',
                '1H',
                '--dt_interval_minutes',
                '10',  # burst mode
                '--fmin',
                '0.0001',
                '--fmax',
                '4'
            ]
        else:
            args += [
                '--split_period',
                '2H',
                '--fmin',
                '0.0004',  #0.0004
                '--fmax',
                '1.05'
            ]

        incl_h5spectrum.main(args)

    # Draw in Veusz
    if st(4):
        b_images_only = True  # False
        pattern_path = db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz'  # r'vsz_5min\191126_0000_5m_w02.vsz'
        if not b_images_only:
            pattern_bytes_slice_old = re.escape(b'((5828756, 5830223, None),)')

        # Length of not adjacent intervals, s (set None to not allow)
        period = '1D'
        length = '5m'  # period  # '1D'

        dt_custom_s = pd_period_to_timedelta(
            length) if length != period else None  # None  #  60 * 5

        if True:
            # Load starts and assign ends
            t_intervals_start = pd.read_csv(
                cfg['in']['path_cruise'] /
                r'vsz+h5_proc\intervals_selected.txt',
                converters={
                    'time_start': lambda x: np.datetime64(x, 'ns')
                },
                index_col=0).index
            edges = (pd.DatetimeIndex(t_intervals_start),
                     pd.DatetimeIndex(t_intervals_start + dt_custom_s)
                     )  # np.zeros_like()
        else:
            # Generate periodic intervals
            t_interval_start, t_intervals_end = intervals_from_period(
                datetime_range=np.array(
                    [
                        cfg['filter']['min_date']['0'],
                        cfg['filter']['max_date']['0']
                    ],
                    # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'],
                    # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'],
                    # ['2018-11-16T15:19', '2018-12-14T14:35'],
                    # ['2018-10-22T12:30', '2018-10-27T06:30:00'],
                    'datetime64[s]'),
                period=period)
            edges = (pd.DatetimeIndex([t_interval_start
                                       ]).append(t_intervals_end[:-1]),
                     pd.DatetimeIndex(t_intervals_end))

        for i, probe in enumerate(probes):
            probe_name = f"{cfg['in']['probes_prefix']}{probe:02}"  # table name in db
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   edges[0].size)
            # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1):

            cfg_vp = {'veusze': None}
            for i_interval, (t_interval_start,
                             t_interval_end) in enumerate(zip(*edges),
                                                          start=1):

                # if i_interval < 23: #<= 0:  # TEMPORARY Skip this number of intervals
                #     continue
                if period != length:
                    t_interval_start = t_interval_end - pd.Timedelta(
                        dt_custom_s, 's')

                try:  # skipping absent probes
                    start_end = h5q_interval2coord(
                        db_path=str(db_path),
                        table=f'/{probe_name}',
                        t_interval=(t_interval_start, t_interval_end))
                    if not len(start_end):
                        break  # no data
                except KeyError:
                    break  # device name not in specified range, go to next name

                pattern_path_new = pattern_path.with_name(
                    f"{t_interval_start:%y%m%d_%H%M}_{length}_{probe_name}.vsz"
                )

                # Modify pattern file
                if not b_images_only:
                    probe_name_old = re.match('.*((?:incl|w)\d*).*',
                                              pattern_path.name).groups()[0]
                    bytes_slice = bytes(
                        '(({:d}, {:d}, None),)'.format(*(start_end +
                                                         np.int32([-1, 1]))),
                        'ascii')

                    def f_replace(line):
                        """
                        Replace in file
                        1. probe name
                        2. slice
                        """
                        # if i_interval == 1:
                        line, ok = re.subn(bytes(probe_name_old, 'ascii'),
                                           bytes(probe_name, 'ascii'), line)
                        if ok:  # can be only in same line
                            line = re.sub(pattern_bytes_slice_old, bytes_slice,
                                          line)
                        return line

                    if not rep_in_file(pattern_path,
                                       pattern_path_new,
                                       f_replace=f_replace):
                        l.warning('Veusz pattern not changed!')
                        # break
                    elif cfg_vp['veusze']:
                        cfg_vp['veusze'].Load(str(pattern_path_new))
                elif cfg_vp['veusze']:
                    cfg_vp['veusze'].Load(str(pattern_path_new))

                txt_time_range = \
                    """
                    "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \
                    """.format(t_interval_start, t_interval_end)
                print(f'{i_interval}. {txt_time_range}', end=' ')

                cfg_vp = veuszPropagate.main(
                    [
                        Path(veuszPropagate.__file__).parent.with_name(
                            'veuszPropagate.ini'),
                        # '--data_yield_prefix', '-',
                        '--path',
                        str(
                            db_path
                        ),  # use for custom loading from db and some source is required
                        '--tables_list',
                        f'/{probe_name}',  # 181022inclinometers/ \d*
                        '--pattern_path',
                        str(pattern_path_new),
                        # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz',
                        # str(db_path.parent / dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name
                        # '--before_next', 'restore_config',
                        # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{length}",
                        '--filename_fun',
                        f'lambda tbl: "{pattern_path_new.name}"',
                        '--add_custom_list',
                        'USEtime',  # nAveragePrefer',
                        '--add_custom_expressions_list',
                        txt_time_range,
                        # + """
                        # ", 5"
                        # """,
                        '--b_update_existed',
                        'True',
                        '--export_pages_int_list',
                        '1, 2',  # 0 for all '6, 7, 8',  #'1, 2, 3'
                        # '--export_dpi_int', '200',
                        '--export_format',
                        'emf',
                        '--b_interact',
                        '0',
                        '--b_images_only',
                        f'{b_images_only}',
                        '--return',
                        '<embedded_object>',  # reuse to not bloat memory
                    ],
                    veusze=cfg_vp['veusze'])
示例#5
0
def main(config: ConfigType) -> None:
    """
    ----------------------------
    Save data to Pandas HDF5 store*.h5
    ----------------------------
    The store contains tables for each device and each device table contains log with metadata of recording sessions

    :param config: with fields:
    - in - mapping with fields:
      - tables_log: - log table name or pattern str for it: in pattern '{}' will be replaced by data table name
      - cols_good_data: -
      ['dt_from_utc', 'db', 'db_path', 'table_nav']
    - out - mapping with fields:
      - cols: can use i - data row number and i_log_row - log row number that is used to load data range
      - cols_log: can use i - log row number
      - text_date_format
      - file_name_fun, file_name_fun_log - {fun} part of "lambda rec_num, t_st, t_en: {fun}" string to compile function
      for name of data and log text files
      - sep

    """
    global cfg
    cfg = to_vaex_hdf5.cfg_dataclasses.main_init(config, cs_store_name)
    cfg_in = cfg.pop('input')
    cfg_in['cfgFile'] = cs_store_name
    cfg['in'] = cfg_in
    # try:
    #     cfg = to_vaex_hdf5.cfg_dataclasses.main_init_input_file(cfg, cs_store_name, )
    # except Ex_nothing_done:
    #     pass  # existed db is not mandatory

    device_path, cfg['out']['db_path'] = device_in_out_paths(
        db_path=cfg['out'].get('db_path'),
        path_cruise=cfg['in']['path_cruise'],
        device_short_name=cfg['in']['probes_prefix'],
        device_dir_pattern='*inclinometer*')

    out = cfg['out']
    # h5init(cfg['in'], out)

    probes = cfg['in']['probes'] or range(
        1, 41)  # sets default range, specify your values before line ---
    raw_root, probe_is_incl = re.subn('INCL_?', 'INKL_',
                                      cfg['in']['probes_prefix'].upper())

    # some parameters that depends of probe type (indicated by probes_prefix)
    p_type = defaultdict(
        # baranov's format
        constant_factory({
            'correct_fun':
            partial(correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^\r?(?P<use>20\d{2}(\t\d{1,2}){5}(\t\d{5}){8}).*',
                        b'^.+'
                    ]),
            'fs':
            10,
            'format':
            'Baranov',
        }),
        {
            'incl': {
                'correct_fun':
                partial(
                    correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,6}){6}(,\d{1,2}\.\d{2})(,\-?\d{1,3}\.\d{2})).*',
                        b'^.+'
                    ]),
                'fs':
                5,
                'format':
                'Kondrashov',
            },
            'voln': {
                'correct_fun':
                partial(
                    correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,8})(,\-?\d{1,2}\.\d{2}){2}).*',
                        b'^.+'
                    ]),
                'fs':
                5,
                #'tbl_prefix': 'w',
                'format':
                'Kondrashov',
            }
        })

    if st(1, 'Save inclinometer or wavegage data from ASCII to HDF5'):
        # Note: Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need)

        i_proc_probe = 0  # counter of processed probes
        i_proc_file = 0  # counter of processed files
        # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]':

        raw_parent = dir_incl / '_raw'  # raw_parent /=
        if cfg['in']['raw_subdir'] is None:
            cfg['in']['raw_subdir'] = ''

        dir_out = raw_parent / re.sub(r'[.\\/ *?]', '_',
                                      cfg['in']['raw_subdir'])

        # sub replaces multilevel subdirs to 1 level that correct_fun() can only make

        def dt_from_utc_2000(probe):
            """ Correct time of probes started without time setting. Raw date must start from  2000-01-01T00:00"""
            return (
                datetime(year=2000, month=1, day=1) -
                cfg['in']['time_start_utc'][probe]
            ) if cfg['in']['time_start_utc'].get(probe) else timedelta(0)

        # convert cfg['in']['dt_from_utc'] keys to int

        cfg['in']['dt_from_utc'] = {
            int(p): v
            for p, v in cfg['in']['dt_from_utc'].items()
        }
        # convert cfg['in']['t_start_utc'] to cfg['in']['dt_from_utc'] and keys to int
        cfg['in']['dt_from_utc'].update(    # overwriting the 'time_start_utc' where already exist
            {int(p): dt_from_utc_2000(p) for p, v in cfg['in']['time_start_utc'].items()}
            )
        # make cfg['in']['dt_from_utc'][0] be default value
        cfg['in']['dt_from_utc'] = defaultdict(
            constant_factory(cfg['in']['dt_from_utc'].pop(0, timedelta(0))),
            cfg['in']['dt_from_utc'])

        for probe in probes:
            raw_found = []
            raw_pattern_file = str(
                Path(glob.escape(cfg['in']['raw_subdir'])) /
                cfg['in']['raw_pattern'].format(prefix=raw_root, number=probe))
            correct_fun = p_type[cfg['in']['probes_prefix']]['correct_fun']
            # if not archive:
            if (not re.match(r'.*(\.zip|\.rar)$', cfg['in']['raw_subdir'],
                             re.IGNORECASE)) and raw_parent.is_dir():
                raw_found = list(raw_parent.glob(raw_pattern_file))
            if not raw_found:
                # Check if already have corrected files for probe generated by correct_txt(). If so then just use them
                raw_found = list(
                    dir_out.glob(
                        f"{cfg['in']['probes_prefix']}{probe:0>2}.txt"))
                if raw_found:
                    print('corrected csv file', [r.name for r in raw_found],
                          'found')
                    correct_fun = lambda x, dir_out: x
                elif not cfg['in']['raw_subdir']:
                    continue

            for file_in in (raw_found or open_csv_or_archive_of_them(
                    raw_parent, binary_mode=False, pattern=raw_pattern_file)):
                file_in = correct_fun(file_in, dir_out=dir_out)
                if not file_in:
                    continue
                tbl = file_in.stem  # f"{cfg['in']['probes_prefix']}{probe:0>2}"
                # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w',  # correct name
                #              re.sub('^[\d_]*|\*', '', file_in.stem).lower()),  # remove date-prefix if in name
                csv2h5(
                    [
                        str(
                            Path(__file__).parent / 'ini' /
                            f"csv_{'inclin' if probe_is_incl else 'wavegage'}_{p_type[cfg['in']['probes_prefix']]['format']}.ini"
                        ),
                        '--path',
                        str(file_in),
                        '--blocksize_int',
                        '50_000_000',  # 50Mbt
                        '--table',
                        tbl,
                        '--db_path',
                        str(db_path),
                        # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'),
                        # '--b_raise_on_err', '0',  # ?
                        '--b_interact',
                        '0',
                        '--fs_float',
                        str(p_type[cfg['in']['probes_prefix']]
                            ['fs']),  #f'{fs(probe, file_in.stem)}',
                        '--dt_from_utc_seconds',
                        str(cfg['in']['dt_from_utc'][probe].total_seconds()),
                        '--b_del_temp_db',
                        '1',
                    ] +
                    (['--csv_specific_param_dict', 'invert_magnitometr: True']
                     if probe_is_incl else []),
                    **{
                        'filter': {
                            'min_date':
                            cfg['filter']['min_date'].get(
                                probe, np.datetime64(0, 'ns')),
                            'max_date':
                            cfg['filter']['max_date'].get(
                                probe, np.datetime64('now', 'ns')
                            ),  # simple 'now' works in sinchronious mode
                        }
                    })

                # Get coefs:
                l.info(
                    f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}"
                )
                try:
                    h5copy_coef(cfg['in']['db_coefs'], db_path, tbl)
                except KeyError as e:  # Unable to open object (component not found)
                    l.warning(
                        'No coefs to copy?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                except OSError as e:
                    l.warning(
                        'Not found DB with coefs?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                i_proc_file += 1
            else:
                print('no', raw_pattern_file, end=', ')
            i_proc_probe += 1
        print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.')

    cfg_in['tables'] = ['incl30']
    from inclinometer.incl_h5clc import h5_names_gen
    from inclinometer.h5inclinometer_coef import rot_matrix_x, rot_matrix_y  #rotate_x, rotate_y
    # R*[xyz]. As we next will need apply coefs Ag = Rz*Ry*Rx we can incorporate this
    # operation by precalculate it adding known angles on each axes to Rz,Ry,Rx.
    # If rotation is 180 deg, then we can add it only to Rx. Modified coef: Ag_new = Rz*Ry*R(x+180)
    # R(x+180) = Rx*Rx180 equivalent to rotate Ag.T in opposite direction:
    # Ag_new = rotate_x()

    # inclinometer changed so that applying coefs returns rotated data fiels vectors:
    # Out_rotated = Ag * In
    # We rotate it back:
    # Out = rotate(Out_rotated) =
    # after  angle after calibration to some angle P so determine angle relative to vertical
    # by rotate data vector in opposite dir: Out = Ag * R_back * In. This equivalent to have new coef by apply rotation to Ag:
    # Ag_new = Ag * R_back = (R_back.T * Ag.T).T = rotate_forward(Ag.T).T =

    # Applying calibration coef will get data in inverted basis so we need rotate it after:
    #
    # coefs['Ag'] = rotate_x(coefs['Ag'], angle_degrees=180)
    # coefs['Ah'] = rotate_x(coefs['Ah'], angle_degrees=180)

    # dfLogOld, cfg_out['db'], cfg_out['b_skip_if_up_to_date'] = h5temp_open(**cfg_out)
    for i1, (tbl, coefs) in enumerate(h5_names_gen(cfg_in), start=1):
        # using property of rotation around same axis: R(x, θ1)@R(x, θ2) = R(x, θ1 + θ2)
        coefs['Ag'] = coefs['Ag'] @ rot_matrix_x(np.cos(np.pi), np.sin(np.pi))
        coefs['Ah'] = coefs['Ah'] @ rot_matrix_x(np.cos(np.pi), np.sin(np.pi))
        coefs['azimuth_shift_deg'] = 180
        h5copy_coef(None,
                    cfg['out']['db_path'],
                    tbl,
                    dict_matrices=dict_matrices_for_h5(coefs,
                                                       tbl,
                                                       to_nested_keys=True))

    # Calculate velocity and average
    if st(2):
        # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5
        if not cfg['out']['aggregate_period_s']:
            cfg['out']['aggregate_period_s'] = [
                None, 2, 600,
                3600 if 'w' in cfg['in']['probes_prefix'] else 7200
            ]

        if cfg['in']['azimuth_add']:
            if 'Lat' in cfg['in']['azimuth_add']:
                from datetime import datetime
                # add magnetic declination,° for used coordinates
                # todo: get time
                azimuth_add = mag_dec(cfg['in']['azimuth_add']['Lat'],
                                      cfg['in']['azimuth_add']['Lon'],
                                      datetime(2020, 9, 10),
                                      depth=-1)
            else:
                azimuth_add = 0
            if 'constant' in cfg['in']['azimuth_add']:
                # and add constant. For example, subtruct declination at the calibration place if it was applied
                azimuth_add += cfg['in']['azimuth_add'][
                    'constant']  # add -6.65644183° to account for calibration in Kaliningrad
        for aggregate_period_s in cfg['out']['aggregate_period_s']:
            if aggregate_period_s is None:
                db_path_in = db_path
                db_path_out = db_path.with_name(
                    f'{db_path.stem}_proc_noAvg.h5')
            else:
                db_path_in = db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')
                db_path_out = f'{db_path.stem}_proc.h5'  # or separately: '_proc{aggregate_period_s}.h5'

            args = [
                Path(incl_h5clc.__file__).with_name(
                    f'incl_h5clc_{db_path.stem}.yaml'),
                # if no such file all settings are here
                '--db_path',
                str(db_path_in),
                # !   'incl.*|w\d*'  inclinometers or wavegauges w\d\d # 'incl09':
                '--tables_list',
                'incl.*' if not cfg['in']['probes'] else
                f"incl.*(?:{'|'.join('{:0>2}'.format(p) for p in cfg['in']['probes'])})",
                '--aggregate_period',
                f'{aggregate_period_s}S' if aggregate_period_s else '',
                '--out.db_path',
                str(db_path_out),
                '--table',
                f'V_incl_bin{aggregate_period_s}'
                if aggregate_period_s else 'V_incl',
                '--verbose',
                'INFO',  #'DEBUG' get many numba messages
                '--b_del_temp_db',
                '1',
                # '--calc_version', 'polynom(force)',  # depreshiated
                # '--chunksize', '20000',
                # '--not_joined_h5_path', f'{db_path.stem}_proc.h5',
            ]
            # if aggregate_period_s <= 5:   # [s], do not need split csv for big average interval
            #     args += (['--split_period', '1D'])
            if aggregate_period_s is None:  # proc. parameters (if we have saved proc. data then when aggregating we are not processing)
                args += ([
                    '--max_dict',
                    'M[xyz]:4096',
                    # Note: for Baranov's prog 4096 is not suited
                    # '--time_range_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n,"  # not works - use kwarg
                    # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
                    '--split_period',
                    '1D'
                ] if subs_made else [
                    '--bad_p_at_bursts_starts_peroiod',
                    '1H',
                ])
            # csv splitted by 1day (default for no avg) and monolith csv if aggregate_period_s==600
            if aggregate_period_s not in cfg['out'][
                    'aggregate_period_s_not_to_text']:  # , 300, 600]:
                args += ['--text_path', str(db_path.parent / 'text_output')]
            kwarg = {
                'in': {
                    'min_date': cfg['filter']['min_date'][0],
                    'max_date': cfg['filter']['max_date'][0],
                    'time_range_zeroing': cfg['in']['time_range_zeroing'],
                    'azimuth_add': azimuth_add
                }
            }
            # If need all data to be combined one after one:
            # set_field_if_no(kwarg, 'in', {})
            # kwarg['in'].update({
            #
            #         'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0],
            #         'dates_min': min_date.values(),  # in table list order
            #         'dates_max': max_date.values(),  #
            #         })
            # set_field_if_no(kwarg, 'out', {})
            # kwarg['out'].update({'b_all_to_one_col': 'True'})

            incl_h5clc.main(args, **kwarg)

    # Calculate spectrograms.
    if st(3):  # Can be done at any time after step 1

        def raise_ni():
            raise NotImplementedError(
                'Can not proc probes having different fs in one run: you need to do it separately'
            )

        args = [
            Path(incl_h5clc.__file__).with_name(
                f'incl_h5spectrum{db_path.stem}.yaml'),
            # if no such file all settings are here
            '--db_path',
            str(db_path.with_name(f'{db_path.stem}_proc_noAvg.h5')),
            '--tables_list',
            f"{cfg['in']['probes_prefix']}.*",  # inclinometers or wavegauges w\d\d  ## 'w02', 'incl.*',
            # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '',
            '--min_date',
            datetime64_str(cfg['filter']['min_date'][0]),
            '--max_date',
            datetime64_str(cfg['filter']['max_date']
                           [0]),  # '2019-09-09T16:31:00',  #17:00:00
            # '--max_dict', 'M[xyz]:4096',  # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity
            '--out.db_path',
            f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5",
            # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd',
            '--fs_float',
            f"{fs(probes[0], cfg['in']['probes_prefix'])}",
            # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni()
            #
            # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
            # '--verbose', 'DEBUG',
            # '--chunksize', '20000',
            '--b_interact',
            '0',
        ]
        if 'w' in cfg['in']['probes_prefix']:
            args += [
                '--split_period',
                '1H',
                '--dt_interval_minutes',
                '10',  # burst mode
                '--fmin',
                '0.0001',
                '--fmax',
                '4'
            ]
        else:
            args += [
                '--split_period',
                '2H',
                '--fmin',
                '0.0004',  #0.0004
                '--fmax',
                '1.05'
            ]

        incl_h5spectrum.main(args)

    # Draw in Veusz
    if st(4):
        b_images_only = True  # False
        pattern_path = db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz'  # r'vsz_5min\191126_0000_5m_w02.vsz'
        if not b_images_only:
            pattern_bytes_slice_old = re.escape(b'((5828756, 5830223, None),)')

        # Length of not adjacent intervals, s (set None to not allow)
        period = '1D'
        length = '5m'  # period  # '1D'

        dt_custom_s = pd_period_to_timedelta(
            length) if length != period else None  # None  #  60 * 5

        if True:
            # Load starts and assign ends
            t_intervals_start = pd.read_csv(
                cfg['in']['path_cruise'] /
                r'vsz+h5_proc\intervals_selected.txt',
                converters={
                    'time_start': lambda x: np.datetime64(x, 'ns')
                },
                index_col=0).index
            edges = (pd.DatetimeIndex(t_intervals_start),
                     pd.DatetimeIndex(t_intervals_start + dt_custom_s)
                     )  # np.zeros_like()
        else:
            # Generate periodic intervals
            t_interval_start, t_intervals_end = intervals_from_period(
                datetime_range=np.array(
                    [
                        cfg['filter']['min_date']['0'],
                        cfg['filter']['max_date']['0']
                    ],
                    # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'],
                    # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'],
                    # ['2018-11-16T15:19', '2018-12-14T14:35'],
                    # ['2018-10-22T12:30', '2018-10-27T06:30:00'],
                    'datetime64[s]'),
                period=period)
            edges = (pd.DatetimeIndex([t_interval_start
                                       ]).append(t_intervals_end[:-1]),
                     pd.DatetimeIndex(t_intervals_end))

        for i, probe in enumerate(probes):
            probe_name = f"{cfg['in']['probes_prefix']}{probe:02}"  # table name in db
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   edges[0].size)
            # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1):

            cfg_vp = {'veusze': None}
            for i_interval, (t_interval_start,
                             t_interval_end) in enumerate(zip(*edges),
                                                          start=1):

                # if i_interval < 23: #<= 0:  # TEMPORARY Skip this number of intervals
                #     continue
                if period != length:
                    t_interval_start = t_interval_end - pd.Timedelta(
                        dt_custom_s, 's')

                try:  # skipping absent probes
                    start_end = h5q_interval2coord(
                        db_path=str(db_path),
                        table=f'/{probe_name}',
                        t_interval=(t_interval_start, t_interval_end))
                    if not len(start_end):
                        break  # no data
                except KeyError:
                    break  # device name not in specified range, go to next name

                pattern_path_new = pattern_path.with_name(
                    f"{t_interval_start:%y%m%d_%H%M}_{length}_{probe_name}.vsz"
                )

                # Modify pattern file
                if not b_images_only:
                    probe_name_old = re.match('.*((?:incl|w)\d*).*',
                                              pattern_path.name).groups()[0]
                    bytes_slice = bytes(
                        '(({:d}, {:d}, None),)'.format(*(start_end +
                                                         np.int32([-1, 1]))),
                        'ascii')

                    def f_replace(line):
                        """
                        Replace in file
                        1. probe name
                        2. slice
                        """
                        # if i_interval == 1:
                        line, ok = re.subn(bytes(probe_name_old, 'ascii'),
                                           bytes(probe_name, 'ascii'), line)
                        if ok:  # can be only in same line
                            line = re.sub(pattern_bytes_slice_old, bytes_slice,
                                          line)
                        return line

                    if not rep_in_file(pattern_path,
                                       pattern_path_new,
                                       f_replace=f_replace):
                        l.warning('Veusz pattern not changed!')
                        # break
                    elif cfg_vp['veusze']:
                        cfg_vp['veusze'].Load(str(pattern_path_new))
                elif cfg_vp['veusze']:
                    cfg_vp['veusze'].Load(str(pattern_path_new))

                txt_time_range = \
                    """
                    "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \
                    """.format(t_interval_start, t_interval_end)
                print(f'{i_interval}. {txt_time_range}', end=' ')

                cfg_vp = veuszPropagate.main(
                    [
                        Path(veuszPropagate.__file__).parent.with_name(
                            'veuszPropagate.ini'),
                        # '--data_yield_prefix', '-',
                        '--path',
                        str(
                            db_path
                        ),  # use for custom loading from db and some source is required
                        '--tables_list',
                        f'/{probe_name}',  # 181022inclinometers/ \d*
                        '--pattern_path',
                        str(pattern_path_new),
                        # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz',
                        # str(db_path.parent / dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name
                        # '--before_next', 'restore_config',
                        # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{length}",
                        '--filename_fun',
                        f'lambda tbl: "{pattern_path_new.name}"',
                        '--add_custom_list',
                        'USEtime',  # nAveragePrefer',
                        '--add_custom_expressions_list',
                        txt_time_range,
                        # + """
                        # ", 5"
                        # """,
                        '--b_update_existed',
                        'True',
                        '--export_pages_int_list',
                        '1, 2',  # 0 for all '6, 7, 8',  #'1, 2, 3'
                        # '--export_dpi_int', '200',
                        '--export_format',
                        'emf',
                        '--b_interact',
                        '0',
                        '--b_images_only',
                        f'{b_images_only}',
                        '--return',
                        '<embedded_object>',  # reuse to not bloat memory
                    ],
                    veusze=cfg_vp['veusze'])
示例#6
0
            'M,A',
            '--b_update_existed',
            'True',  # to not skip.
            '--export_pages_int_list',
            '0',  # 0 = all
            '--b_interact',
            'False'
        ])
        # if step == 3:
        # to 1st db too
        # l = init_logging(logging, None)
        l.info(
            f"Adding coefficients to {db_path_calibr_scalling}/{tbl} from {db_path_tank}"
        )
        h5copy_coef(db_path_tank,
                    db_path_calibr_scalling,
                    tbl,
                    ok_to_replace_group=True)

    if step == 3:
        time_ranges_nord = {
            1: ['2019-07-11T18:48:35', '2019-07-11T18:49:20'],
            #  7: ['2019-07-11T16:53:40', '2019-07-11T16:54:10'], ???
            # 30: ['2019-07-09T17:54:50', '2019-07-09T17:55:22'],
            4: ['2019-07-11T17:22:15', '2019-07-11T17:23:08'],
            5: ['2019-07-11T18:27:10', '2019-07-11T18:27:48'],
            9: ['2019-12-20T16:58:30', '2019-12-20T16:59:15'],
            10: ['2019-12-23T17:32:35', '2019-12-23T17:33:27'],
            11: ['2019-07-11T17:41:44', '2019-07-11T18:42:48'],
            12: ['2019-07-11T18:04:46', '2019-07-11T18:05:36'],
            14: ['2019-09-02T14:01:41', '2019-09-02T14:02:15'],  # todo
            16: ['2019-09-03T19:22:20', '2019-09-03T19:22:54'],
示例#7
0
def main(new_arg=None, **kwargs):
    """

    :param new_arg: list of strings, command line arguments
    :kwargs: dicts of dictcts (for each ini section): specified values overwrites ini values
    """

    # global l
    cfg = cfg_from_args(my_argparser(), new_arg, **kwargs)
    if not cfg['program']:
        return  # usually error of unrecognized arguments displayed
    cfg['in']['db_coefs'] = Path(cfg['in']['db_coefs'])
    for path_field in ['db_coefs', 'path_cruise']:
        if not cfg['in'][path_field].is_absolute():
            cfg['in'][path_field] = (
                cfg['in']['cfgFile'].parent / cfg['in'][path_field]
            ).resolve().absolute()  # cfg['in']['cfgFile'].parent /

    def constant_factory(val):
        def default_val():
            return val

        return default_val

    for lim in ('min_date', 'max_date'):
        # convert keys to int because they must be comparable to probes_int_list (for command line arguments keys are allways strings, in yaml you can set string or int)
        _ = {int(k): v for k, v in cfg['filter'][lim].items()}
        cfg['filter'][lim] = defaultdict(constant_factory(_.get(0)), _)

    l = init_logging(logging, None, None, 'INFO')
    #l = init_logging(logging, None, cfg['program']['log'], cfg['program']['verbose'])

    if True:  # False. Experimental speedup but takes memory
        from dask.cache import Cache
        cache = Cache(2e9)  # Leverage two gigabytes of memory
        cache.register()  # Turn cache on globally

    #if __debug__:
    # # because there was errors on debug when default scheduler used
    # cfg['program']['dask_scheduler'] = 'synchronous'

    if cfg['program']['dask_scheduler']:
        if cfg['program']['dask_scheduler'] == 'distributed':
            from dask.distributed import Client
            # cluster = dask.distributed.LocalCluster(n_workers=2, threads_per_worker=1, memory_limit="5.5Gb")
            client = Client(processes=False)
            # navigate to http://localhost:8787/status to see the diagnostic dashboard if you have Bokeh installed
            # processes=False: avoide inter-worker communication for computations releases the GIL (numpy, da.array)  # without is error
        else:
            if cfg['program']['dask_scheduler'] == 'synchronous':
                l.warning('using "synchronous" scheduler for debugging')
            import dask
            dask.config.set(scheduler=cfg['program']['dask_scheduler'])

    # Run steps :
    st.start = cfg['program']['step_start']
    st.end = cfg['program']['step_end']
    st.go = True

    if not cfg['out'][
            'db_name']:  # set name by 'path_cruise' name or parent if it has digits at start. priority for name  is  "*inclinometer*"
        for p in (lambda p: [p, p.parent])(cfg['in']['path_cruise']):
            m = re.match('(^[\d_]*).*', p.name)
            if m:
                break
        cfg['out']['db_name'] = f"{m.group(1).strip('_')}incl.h5"

    dir_incl = next((d for d in cfg['in']['path_cruise'].glob('*inclinometer*')
                     if d.is_dir()), cfg['in']['path_cruise'])
    db_path = dir_incl / '_raw' / cfg['out']['db_name']

    # ---------------------------------------------------------------------------------------------
    # def fs(probe, name):
    #     if 'w' in name.lower():  # Baranov's wavegauge electronic
    #         return 10  # 5
    #     return 5
    # if probe < 20 or probe in [23, 29, 30, 32, 33]:  # 30 [4, 11, 5, 12] + [1, 7, 13, 30]
    #     return 5
    # if probe in [21, 25, 26] + list(range(28, 35)):
    #     return 8.2
    # return 4.8

    def datetime64_str(time_str: Optional[str] = None) -> np.ndarray:
        """
        Reformat time_str to ISO 8601 or to 'NaT'. Used here for input in funcs that converts str to numpy.datetime64
        :param time_str: May be 'NaT'
        :return: ndarray of strings (tested for 1 element only) formatted by numpy.
        """
        return np.datetime_as_string(np.datetime64(time_str, 's'))

    probes = cfg['in']['probes'] or range(
        1, 41)  # sets default range, specify your values before line ---
    raw_root, probe_is_incl = re.subn('INCL_?', 'INKL_',
                                      cfg['in']['probes_prefix'].upper())

    # some parameters that depends of probe type (indicated by probes_prefix)
    p_type = defaultdict(
        # baranov's format
        constant_factory({
            'correct_fun':
            partial(correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^\r?(?P<use>20\d{2}(\t\d{1,2}){5}(\t\d{5}){8}).*',
                        b'^.+'
                    ]),
            'fs':
            10,
            'format':
            'Baranov',
        }),
        {
            (lambda x: x if x.startswith('incl') else 'incl')(cfg['in']['probes_prefix']):
            {
                'correct_fun':
                partial(
                    correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,6}){6}(,\d{1,2}\.\d{2})(,\-?\d{1,3}\.\d{2})).*',
                        b'^.+'
                    ]),
                'fs':
                5,
                'format':
                'Kondrashov',
            },
            'voln': {
                'correct_fun':
                partial(
                    correct_txt,
                    mod_file_name=mod_incl_name,
                    sub_str_list=[
                        b'^(?P<use>20\d{2}(,\d{1,2}){5}(,\-?\d{1,8})(,\-?\d{1,2}\.\d{2}){2}).*',
                        b'^.+'
                    ]),
                'fs':
                5,
                #'tbl_prefix': 'w',
                'format':
                'Kondrashov',
            }
        })

    if st(1, 'Save inclinometer or wavegage data from ASCII to HDF5'):
        # Note: Can not find additional not corrected files for same probe if already have any corrected in search path (move them out if need)

        i_proc_probe = 0  # counter of processed probes
        i_proc_file = 0  # counter of processed files
        # patten to identify only _probe_'s raw data files that need to correct '*INKL*{:0>2}*.[tT][xX][tT]':

        raw_parent = dir_incl / '_raw'  # raw_parent /=
        if cfg['in']['raw_subdir'] is None:
            cfg['in']['raw_subdir'] = ''

        dir_out = raw_parent / re.sub(r'[.\\/ *?]', '_',
                                      cfg['in']['raw_subdir'])

        # sub replaces multilevel subdirs to 1 level that correct_fun() can only make

        def dt_from_utc_2000(probe):
            """ Correct time of probes started without time setting. Raw date must start from  2000-01-01T00:00"""
            return (
                datetime(year=2000, month=1, day=1) -
                cfg['in']['time_start_utc'][probe]
            ) if cfg['in']['time_start_utc'].get(probe) else timedelta(0)

        # convert cfg['in']['dt_from_utc'] keys to int

        cfg['in']['dt_from_utc'] = {
            int(p): v
            for p, v in cfg['in']['dt_from_utc'].items()
        }
        # convert cfg['in']['t_start_utc'] to cfg['in']['dt_from_utc'] and keys to int
        cfg['in']['dt_from_utc'].update(    # overwriting the 'time_start_utc' where already exist
            {int(p): dt_from_utc_2000(p) for p, v in cfg['in']['time_start_utc'].items()}
            )
        # make cfg['in']['dt_from_utc'][0] be default value
        cfg['in']['dt_from_utc'] = defaultdict(
            constant_factory(cfg['in']['dt_from_utc'].pop(0, timedelta(0))),
            cfg['in']['dt_from_utc'])

        for probe in probes:
            raw_found = []
            raw_pattern_file = str(
                Path(glob.escape(cfg['in']['raw_subdir'])) /
                cfg['in']['raw_pattern'].format(prefix=raw_root, number=probe))
            correct_fun = p_type[cfg['in']['probes_prefix']]['correct_fun']
            # if not archive:
            if (not re.match(r'.*(\.zip|\.rar)$', cfg['in']['raw_subdir'],
                             re.IGNORECASE)) and raw_parent.is_dir():
                raw_found = list(raw_parent.glob(raw_pattern_file))
            if not raw_found:
                # Check if already have corrected files for probe generated by correct_txt(). If so then just use them
                raw_found = list(
                    dir_out.glob(
                        f"{cfg['in']['probes_prefix']}{probe:0>2}.txt"))
                if raw_found:
                    print('corrected csv file', [r.name for r in raw_found],
                          'found')
                    correct_fun = lambda x, dir_out: x
                elif not cfg['in']['raw_subdir']:
                    continue

            for file_in in (raw_found or open_csv_or_archive_of_them(
                    raw_parent, binary_mode=False, pattern=raw_pattern_file)):
                file_in = correct_fun(file_in, dir_out=dir_out)
                if not file_in:
                    continue
                tbl = file_in.stem  # f"{cfg['in']['probes_prefix']}{probe:0>2}"
                # tbl = re.sub('^((?P<i>inkl)|w)_0', lambda m: 'incl' if m.group('i') else 'w',  # correct name
                #              re.sub('^[\d_]*|\*', '', file_in.stem).lower()),  # remove date-prefix if in name
                csv2h5(
                    [
                        str(
                            Path(__file__).parent / 'ini' /
                            f"csv_{'inclin' if probe_is_incl else 'wavegage'}_{p_type[cfg['in']['probes_prefix']]['format']}.ini"
                        ),
                        '--path',
                        str(file_in),
                        '--blocksize_int',
                        '50_000_000',  # 50Mbt
                        '--table',
                        tbl,
                        '--db_path',
                        str(db_path),
                        # '--log', str(scripts_path / 'log/csv2h5_inclin_Kondrashov.log'),
                        # '--b_raise_on_err', '0',  # ?
                        '--b_interact',
                        '0',
                        '--fs_float',
                        str(p_type[cfg['in']['probes_prefix']]
                            ['fs']),  #f'{fs(probe, file_in.stem)}',
                        '--dt_from_utc_seconds',
                        str(cfg['in']['dt_from_utc'][probe].total_seconds()),
                        '--b_del_temp_db',
                        '1',
                    ] +
                    (['--csv_specific_param_dict', 'invert_magnitometr: True']
                     if probe_is_incl else []),
                    **{
                        'filter': {
                            'min_date':
                            cfg['filter']['min_date'].get(
                                probe, np.datetime64(0, 'ns')),
                            'max_date':
                            cfg['filter']['max_date'].get(
                                probe, np.datetime64('now', 'ns')
                            ),  # simple 'now' works in sinchronious mode
                        }
                    })

                # Get coefs:
                l.info(
                    f"Adding coefficients to {db_path}/{tbl} from {cfg['in']['db_coefs']}"
                )
                try:
                    h5copy_coef(cfg['in']['db_coefs'], db_path, tbl)
                except KeyError as e:  # Unable to open object (component not found)
                    l.warning(
                        'No coefs to copy?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                except OSError as e:
                    l.warning(
                        'Not found DB with coefs?'
                    )  # write some dummy coefficients to can load Veusz patterns:
                    h5copy_coef(None,
                                db_path,
                                tbl,
                                dict_matrices=dict_matrices_for_h5(tbl=tbl))
                i_proc_file += 1
            else:
                print('no', raw_pattern_file, end=', ')
            i_proc_probe += 1
        print('Ok:', i_proc_probe, 'probes,', i_proc_file, 'files processed.')

    if st(2, 'Calculate physical parameters and average'):
        kwarg = {
            'in': {
                'min_date': cfg['filter']['min_date'][0],
                'max_date': cfg['filter']['max_date'][0],
                'time_range_zeroing': cfg['in']['time_range_zeroing']
            },
            'proc': {}
        }
        # if aggregate_period_s is None then not average and write to *_proc_noAvg.h5 else loading from that h5 and writing to _proc.h5
        if not cfg['out']['aggregate_period_s']:
            cfg['out']['aggregate_period_s'] = [
                None, 2, 600, 7200 if probe_is_incl else 3600
            ]

        if cfg['in']['azimuth_add']:
            if 'Lat' in cfg['in']['azimuth_add']:
                # add magnetic declination,° for used coordinates
                # todo: get time
                kwarg['proc']['azimuth_add'] = mag_dec(
                    cfg['in']['azimuth_add']['Lat'],
                    cfg['in']['azimuth_add']['Lon'],
                    datetime(2020, 9, 10),
                    depth=-1)
            else:
                kwarg['proc']['azimuth_add'] = 0
            if 'constant' in cfg['in']['azimuth_add']:
                # and add constant. For example, subtruct declination at the calibration place if it was applied
                kwarg['proc']['azimuth_add'] += cfg['in']['azimuth_add'][
                    'constant']  # add -6.656 to account for calibration in Kaliningrad (mag deg = 6.656°)

        for aggregate_period_s in cfg['out']['aggregate_period_s']:
            if aggregate_period_s is None:
                db_path_in = db_path
                db_path_out = dir_incl / f'{db_path.stem}_proc_noAvg.h5'
            else:
                db_path_in = dir_incl / f'{db_path.stem}_proc_noAvg.h5'
                db_path_out = dir_incl / f'{db_path.stem}_proc.h5'  # or separately: '_proc{aggregate_period_s}.h5'

            # 'incl.*|w\d*'  inclinometers or wavegauges w\d\d # 'incl09':
            tables_list_regex = f"{cfg['in']['probes_prefix'].replace('voln', 'w')}.*"
            if cfg['in']['probes']:
                tables_list_regex += "(?:{})".format('|'.join(
                    '{:0>2}'.format(p) for p in cfg['in']['probes']))

            args = [
                '../../empty.yml',  # all settings are here, so to not print 'using default configuration' we use some existed empty file
                '--db_path',
                str(db_path_in),
                '--tables_list',
                tables_list_regex,
                '--aggregate_period',
                f'{aggregate_period_s}S' if aggregate_period_s else '',
                '--out.db_path',
                str(db_path_out),
                '--table',
                f'V_incl_bin{aggregate_period_s}'
                if aggregate_period_s else 'V_incl',
                '--verbose',
                'INFO',  #'DEBUG' get many numba messages
                '--b_del_temp_db',
                '1',
                # '--calc_version', 'polynom(force)',  # depreshiated
                # '--chunksize', '20000',
                # '--not_joined_h5_path', f'{db_path.stem}_proc.h5',
            ]

            if aggregate_period_s is None:  # proc. parameters (if we have saved proc. data then when aggregating we are not processing)
                # Note: for Baranov's prog 4096 is not suited:
                args += ([
                    '--max_dict',
                    'M[xyz]:4096',
                    # '--time_range_zeroing_dict', "incl19: '2019-11-10T13:00:00', '2019-11-10T14:00:00'\n,"  # not works - use kwarg
                    # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
                    '--split_period',
                    '1D'
                ] if probe_is_incl else [
                    '--bad_p_at_bursts_starts_peroiod',
                    '1H',
                ])
                # csv splitted by 1day (default for no avg) else csv is monolith
            if aggregate_period_s not in cfg['out'][
                    'aggregate_period_s_not_to_text']:  # , 300, 600]:
                args += ['--text_path', str(dir_incl / 'text_output')]
            # If need all data to be combined one after one:
            # set_field_if_no(kwarg, 'in', {})
            # kwarg['in'].update({
            #
            #         'tables': [f'incl{i:0>2}' for i in min_date.keys() if i!=0],
            #         'dates_min': min_date.values(),  # in table list order
            #         'dates_max': max_date.values(),  #
            #         })
            # set_field_if_no(kwarg, 'out', {})
            # kwarg['out'].update({'b_all_to_one_col': 'True'})

            incl_h5clc.main(args, **kwarg)

    if st(3, 'Calculate spectrograms'):  # Can be done at any time after step 1
        min_Pressure = 7

        # add dict dates_min like {probe: parameter} of incl_clc to can specify param to each probe
        def raise_ni():
            raise NotImplementedError(
                'Can not proc probes having different fs in one run: you need to do it separately'
            )

        args = [
            Path(incl_h5clc.__file__).with_name(
                f'incl_h5spectrum{db_path.stem}.yaml'),
            # if no such file all settings are here
            '--db_path',
            str(dir_incl / f'{db_path.stem}_proc_noAvg.h5'),
            '--tables_list',
            f"{cfg['in']['probes_prefix']}.*",  # inclinometers or wavegauges w\d\d  ## 'w02', 'incl.*',
            # '--aggregate_period', f'{aggregate_period_s}S' if aggregate_period_s else '',
            '--min_date',
            datetime64_str(cfg['filter']['min_date'][0]),
            '--max_date',
            datetime64_str(cfg['filter']['max_date']
                           [0]),  # '2019-09-09T16:31:00',  #17:00:00
            '--min_Pressure',
            f'{min_Pressure}',
            # '--max_dict', 'M[xyz]:4096',  # use if db_path is not ends with _proc_noAvg.h5 i.e. need calc velocity
            '--out.db_path',
            f"{db_path.stem.replace('incl', cfg['in']['probes_prefix'])}_proc_psd.h5",
            # '--table', f'psd{aggregate_period_s}' if aggregate_period_s else 'psd',
            '--fs_float',
            str(p_type[cfg['in']['probes_prefix']]
                ['fs']),  # f"{fs(probes[0], cfg['in']['probes_prefix'])}",
            # (lambda x: x == x[0])(np.vectorize(fs)(probes, prefix))).all() else raise_ni()
            #
            # '--time_range_zeroing_list', '2019-08-26T04:00:00, 2019-08-26T05:00:00'
            # '--verbose', 'DEBUG',
            # '--chunksize', '20000',
            '--b_interact',
            '0',
        ]
        if probe_is_incl:
            args += [
                '--split_period',
                '2H',
                '--fmin',
                '0.0004',  #0.0004
                '--fmax',
                '1.05'
            ]
        else:
            args += [
                '--split_period',
                '1H',
                '--dt_interval_minutes',
                '15',  # set this if burst mode to the burst interval
                '--fmin',
                '0.0001',
                '--fmax',
                '4',
                #'--min_Pressure', '-1e15',  # to not load NaNs
            ]

        incl_h5spectrum.main(args)

    if st(4, 'Draw in Veusz'):
        pattern_path = dir_incl / r'processed_h5,vsz/201202-210326incl_proc#28.vsz'
        # r'\201202_1445incl_proc#03_pattern.vsz'  #'
        # db_path.parent / r'vsz_5min\191119_0000_5m_incl19.vsz'  # r'vsz_5min\191126_0000_5m_w02.vsz'

        b_images_only = False
        # importing in vsz index slices replacing:
        pattern_str_slice_old = None

        # Length of not adjacent intervals, s (set None to not allow)
        # pandas interval in string or tuple representation '1D' of period between intervals and interval to draw
        period_str = '0s'  # '1D'  #  dt
        dt_str = '0s'  # '5m'
        file_intervals = None

        period = to_offset(period_str).delta
        dt = to_offset(dt_str).delta  # timedelta(0)  #  60 * 5

        if file_intervals and period and dt:

            # Load starts and assign ends
            t_intervals_start = pd.read_csv(
                cfg['in']['path_cruise'] /
                r'vsz+h5_proc\intervals_selected.txt',
                converters={
                    'time_start': lambda x: np.datetime64(x, 'ns')
                },
                index_col=0).index
            edges = (pd.DatetimeIndex(t_intervals_start),
                     pd.DatetimeIndex(t_intervals_start + dt_custom_s)
                     )  # np.zeros_like()
        elif period and dt:
            # Generate periodic intervals
            t_interval_start, t_intervals_end = intervals_from_period(
                datetime_range=np.array(
                    [
                        cfg['filter']['min_date']['0'],
                        cfg['filter']['max_date']['0']
                    ],
                    # ['2018-08-11T18:00:00', '2018-09-06T00:00:00'],
                    # ['2019-02-11T13:05:00', '2019-03-07T11:30:00'],
                    # ['2018-11-16T15:19', '2018-12-14T14:35'],
                    # ['2018-10-22T12:30', '2018-10-27T06:30:00'],
                    'datetime64[s]'),
                period=period)
            edges = (pd.DatetimeIndex([t_interval_start
                                       ]).append(t_intervals_end[:-1]),
                     pd.DatetimeIndex(t_intervals_end))
        else:  # [min, max] edges for each probe
            edges_dict = {
                pr:
                [cfg['filter']['min_date'][pr], cfg['filter']['max_date'][pr]]
                for pr in probes
            }

        cfg_vp = {'veusze': None}
        for i, probe in enumerate(probes):
            # cfg_vp = {'veusze': None}
            if edges_dict:  # custom edges for each probe
                edges = [pd.DatetimeIndex([t]) for t in edges_dict[probe]]

            # substr in file to rerplace probe_name_in_pattern (see below).
            probe_name = f"_{cfg['in']['probes_prefix'].replace('incl', 'i')}{probe:02}"
            tbl = None  # f"/{cfg['in']['probes_prefix']}{probe:02}"  # to check probe data exist in db else will not check
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   edges[0].size)
            # for i_interval, (t_interval_start, t_interval_end) in enumerate(zip(pd.DatetimeIndex([t_interval_start]).append(t_intervals_end[:-1]), t_intervals_end), start=1):

            for i_interval, (t_interval_start,
                             t_interval_end) in enumerate(zip(*edges),
                                                          start=1):

                # if i_interval < 23: #<= 0:  # TEMPORARY Skip this number of intervals
                #     continue
                if period and period != dt:
                    t_interval_start = t_interval_end - pd.Timedelta(
                        dt_custom_s, 's')

                if tbl:
                    try:  # skipping absent probes
                        start_end = h5q_interval2coord(
                            db_path=str(db_path),
                            table=tbl,
                            t_interval=(t_interval_start, t_interval_end))
                        if not len(start_end):
                            break  # no data
                    except KeyError:
                        break  # device name not in specified range, go to next name

                pattern_path_new = pattern_path.with_name(''.join([
                    f'{t_interval_start:%y%m%d_%H%M}',
                    f'_{dt_str}' if dt else '', f'{probe_name}.vsz'
                ]))

                # Modify pattern file
                if not b_images_only:
                    pattern_type, pattern_number = re.match(
                        r'.*(incl|w)_proc?#?(\d*).*',
                        pattern_path.name).groups()
                    probe_name_in_pattern = f"_{pattern_type.replace('incl', 'i')}{pattern_number}"

                    def f_replace(line):
                        """
                        Replace in file
                        1. probe name
                        2. slice
                        """
                        # if i_interval == 1:
                        line, ok = re.subn(probe_name_in_pattern, probe_name,
                                           line)
                        if ok and pattern_str_slice_old:  # can be only in same line
                            str_slice = '(({:d}, {:d}, None),)'.format(
                                *(start_end +
                                  np.int32([-1, 1])))  # bytes(, 'ascii')
                            line = re.sub(pattern_str_slice_old, str_slice,
                                          line)
                        return line

                    if not rep_in_file(pattern_path,
                                       pattern_path_new,
                                       f_replace=f_replace,
                                       binary_mode=False):
                        l.warning('Veusz pattern not changed!'
                                  )  # may be ok if we need draw pattern
                        # break
                    elif cfg_vp['veusze']:
                        cfg_vp['veusze'].Load(str(pattern_path_new))
                elif cfg_vp['veusze']:
                    cfg_vp['veusze'].Load(str(pattern_path_new))

                txt_time_range = \
                    """
                    "[['{:%Y-%m-%dT%H:%M}', '{:%Y-%m-%dT%H:%M}']]" \
                    """.format(t_interval_start, t_interval_end)
                print(f'{i_interval}. {txt_time_range}', end=' ')

                cfg_vp = veuszPropagate.main(
                    [
                        Path(veuszPropagate.__file__).parent.with_name(
                            'veuszPropagate.ini'),
                        # '--data_yield_prefix', '-',

                        # '--path', str(db_path),  # if custom loading from db and some source is required
                        '--tables_list',
                        '',  # switches to search vsz-files only # f'/{probe_name}',  # 181022inclinometers/ \d*
                        '--pattern_path',
                        str(pattern_path_new),
                        # fr'd:\workData\BalticSea\190801inclinometer_Schuka\{probe_name}_190807_1D.vsz',
                        # str(dir_incl / f'{probe_name}_190211.vsz'), #warning: create file with small name
                        # '--before_next', 'restore_config',
                        # '--add_to_filename', f"_{t_interval_start:%y%m%d_%H%M}_{dt}",
                        '--filename_fun',
                        f'lambda tbl: "{pattern_path_new.name}"',
                        '--add_custom_list',
                        f'USEtime__',  # f'USEtime{probe_name}', nAveragePrefer',
                        '--add_custom_expressions_list',
                        txt_time_range,
                        # + """
                        # ", 5"
                        # """,
                        '--b_update_existed',
                        'True',
                        '--export_pages_int_list',
                        '0',  # 0 for all '6, 7, 8',  #'1, 2, 3'
                        # '--export_dpi_int', '200',
                        '--export_format',
                        'jpg',  #'emf',
                        '--b_interact',
                        '0',
                        '--b_images_only',
                        f'{b_images_only}',
                        '--return',
                        '<embedded_object>',  # reuse to not bloat memory
                        '--b_execute_vsz',
                        'True',
                        '--before_next',
                        'Close()'  # Close() need if b_execute_vsz many files
                    ],
                    veusze=cfg_vp['veusze'])

    if st(40, f'Draw in Veusz by loader-drawer.vsz method'):
        # save all vsz files that uses separate code

        from os import chdir as os_chdir
        dt_s = 300
        cfg['in'][
            'pattern_path'] = db_path.parent / f'vsz_{dt_s:d}s' / '~pattern~.vsz'

        time_starts = pd.read_csv(
            db_path.parent / r'processed_h5,vsz' / 'intervals_selected.txt',
            index_col=0,
            parse_dates=True,
            date_parser=lambda x: pd.to_datetime(x, format='%Y-%m-%dT%H:%M:%S'
                                                 )).index

        pattern_code = cfg['in']['pattern_path'].read_bytes(
        )  # encoding='utf-8'
        path_vsz_all = []
        for i, probe in enumerate(probes):
            probe_name = f"{cfg['in']['probes_prefix']}{probe:02}"  # table name in db
            l.info('Draw %s in Veusz: %d intervals...', probe_name,
                   time_starts.size)
            for i_interval, time_start in enumerate(time_starts, start=1):
                path_vsz = cfg['in']['pattern_path'].with_name(
                    f"{time_start:%y%m%d_%H%M}_{probe_name.replace('incl','i')}.vsz"
                )
                # copy file to path_vsz
                path_vsz.write_bytes(pattern_code)  # replaces 1st row
                path_vsz_all.append(path_vsz)

        os_chdir(cfg['in']['pattern_path'].parent)
        veuszPropagate.main(
            [
                'ini/veuszPropagate.ini',
                '--path',
                str(cfg['in']['pattern_path'].with_name(
                    '??????_????_*.vsz')),  # db_path),
                '--pattern_path',
                f"{cfg['in']['pattern_path']}_",
                # here used to auto get export dir only. may not be _not existed file path_ if ['out']['paths'] is provided
                # '--table_log', f'/{device}/logRuns',
                # '--add_custom_list', f'{device_veusz_prefix}USE_time_search_runs',  # 'i3_USE_timeRange',
                # '--add_custom_expressions',
                # """'[["{log_row[Index]:%Y-%m-%dT%H:%M:%S}", "{log_row[DateEnd]:%Y-%m-%dT%H:%M:%S}"]]'""",
                # '--export_pages_int_list', '1', #'--b_images_only', 'True'
                '--b_interact',
                '0',
                '--b_update_existed',
                'True',  # todo: delete_overlapped
                '--b_images_only',
                'True',
                '--load_timeout_s_float',
                str(cfg['program']['load_timeout_s'])
                # '--min_time', '2020-07-08T03:35:00',
            ],
            **{'out': {
                'paths': path_vsz_all
            }})

    if st(50, 'Export from existed Veusz files in dir'):
        pattern_parent = db_path.parent  # r'vsz_5min\191126_0000_5m_w02.vsz''
        pattern_path = str(pattern_parent / r'processed_h5,vsz' /
                           '??????incl_proc#[1-9][0-9].vsz')  # [0-2,6-9]
        veuszPropagate.main([
            'ini/veuszPropagate.ini',
            '--path',
            pattern_path,
            '--pattern_path',
            pattern_path,
            # '--export_pages_int_list', '1', #'--b_images_only', 'True'
            '--b_interact',
            '0',
            '--b_update_existed',
            'True',  # todo: delete_overlapped
            '--b_images_only',
            'True',
            '--load_timeout_s_float',
            str(cfg['program']['load_timeout_s']),
            '--b_execute_vsz',
            'True',
            '--before_next',
            'Close()'  # Close() need if b_execute_vsz many files
        ])
示例#8
0
def main(new_arg=None, veusze=None):
    """
    Note: if vsz data source have 'Ag_old_inv' variable then not invert coef. Else invert to use in vsz which not invert coefs
    :param new_arg:
    :return:
    """
    global l
    p = veuszPropagate.my_argparser()
    p_groups = {
        g.title: g
        for g in p._action_groups if g.title.split(' ')[-1] != 'arguments'
    }  # skips special argparse groups
    p_groups['in'].add(
        '--channels_list',
        help=
        'channels needed zero calibration: "magnetometer" or "M" for magnetometer and any else for accelerometer, use "M, A" for both, empty to skip '
    )
    p_groups['in'].add(
        '--widget',
        help=
        'path to Veusz widget property which contains coefficients. For example "/fitV(force)/grid1/graph/fit1/values"'
    )
    p_groups['in'].add(
        '--data_for_coef',
        default='max_incl_of_fit_t',
        help=
        'Veusz data to use as coef. If used with widget then this data is appended to data from widget'
    )

    p_groups['out'].add('--out.path', help='path to db where write coef')
    p_groups['out'].add(
        '--re_tbl_from_vsz_name',
        help=
        'regex to extract hdf5 table name from to Veusz file name (last used "\D*\d*")'
        # ? why not simly specify table name?
    )
    # todo:  "b_update_existed" arg will be used here for exported images. Check whether False works or prevent open vsz

    cfg = cfg_from_args(p, new_arg)

    if not Path(cfg['program']['log']).is_absolute():
        cfg['program']['log'] = str(
            Path(__file__).parent.joinpath(
                cfg['program']['log']))  # l.root.handlers[0].baseFilename
    if not cfg:
        return
    if new_arg == '<return_cfg>':  # to help testing
        return cfg

    l = init_logging(logging, None, cfg['program']['log'],
                     cfg['program']['verbose'])
    veuszPropagate.l = l
    print('\n' + this_prog_basename(__file__), 'started', end=' ')
    if cfg['out']['b_images_only']:
        print('in images only mode.')
    try:
        print('Output pattern ')
        # Using cfg['out'] to store pattern information
        if not Path(cfg['in']['pattern_path']).is_absolute():
            cfg['in']['pattern_path'] = str(cfg['in']['path'].parent.joinpath(
                cfg['in']['pattern_path']))
        set_field_if_no(cfg['out'], 'path', cfg['in']['pattern_path'])
        cfg['out']['paths'], cfg['out']['nfiles'], cfg['out'][
            'path'] = init_file_names(**cfg['out'],
                                      b_interact=cfg['program']['b_interact'])
    except Ex_nothing_done as e:
        print(e.message, ' - no pattern')
        return  # or raise FileNotFoundError?
    try:
        print(end='Data ')
        cfg['in']['paths'], cfg['in']['nfiles'], cfg['in'][
            'path'] = init_file_names(
                **cfg['in'], b_interact=False)  # do not bother user 2nd time
    except Ex_nothing_done as e:
        print(e.message)
        return  # or raise FileNotFoundError?
    if not cfg['out']['export_dir']:
        cfg['out']['export_dir'] = Path(cfg['out']['path']).parent
    if cfg['program']['before_next'] and 'restore_config' in cfg['program'][
            'before_next']:
        cfg['in_saved'] = cfg['in'].copy()
    # cfg['loop'] = asyncio.get_event_loop()
    # cfg['export_timeout_s'] = 600
    cfg['out']['export_dir'] = dir_from_cfg(cfg['out']['path'].parent,
                                            cfg['out']['export_dir'])

    veuszPropagate.load_vsz = veuszPropagate.load_vsz_closure(
        cfg['program']['veusz_path'],
        b_execute_vsz=cfg['program']['b_execute_vsz'])
    gen_veusz_and_logs = veuszPropagate.load_to_veusz(
        veuszPropagate.ge_names(cfg), cfg, veusze)

    names_get = ['Inclination_mean_use1', 'logVext1_m__s'
                 ]  # \, 'Inclination_mean_use2', 'logVext2_m__s'
    names_get_fits = ['fit']  # , 'fit2'
    vsz_data = {n: [] for n in names_get}
    for n in names_get_fits:
        vsz_data[n] = []

    # prepare collecting all coef in text also
    names_get_txt_results = ['fit1result']  # , 'fit2result'
    txt_results = {n: {} for n in names_get_txt_results}

    i_file = 0
    for veusze, log in gen_veusz_and_logs:
        if not veusze:
            continue
        i_file += 1
        print(i_file)
        if cfg['out']['re_tbl_from_vsz_name']:
            table = cfg['out']['re_tbl_from_vsz_name'].match(
                log['out_name']).group()
        else:
            table = re.sub(
                '^[\d_]*', '',
                log['out_name'])  # delete all first digits (date part)

        for n in names_get:
            vsz_data[n].append(veusze.GetData(n)[0])
        for n in [cfg['in']['data_for_coef']]:
            vsz_data[n] = list(veusze.GetData(n)[0])

        # Save velocity coefficients into //{table}//coef//Vabs{i} where i - fit number enumeretad from 0
        for i, name_out in enumerate(names_get_fits):  # ['fit1', 'fit2']
            coef = veusze.Get(
                cfg['in']['widget']
            )  # veusze.Root['fitV(inclination)']['grid1']['graph'][name_out].values.val
            if 'a' in coef:
                coef_list = [
                    coef[k] for k in ['d', 'c', 'b', 'a'] if k in coef
                ]
            else:
                coef_list = [
                    coef[k] for k in sorted(coef.keys(), key=digits_first)
                ]
            if cfg['in']['data_for_coef']:
                coef_list += vsz_data[cfg['in']['data_for_coef']]

            vsz_data[name_out].append(coef_list)
            h5copy_coef(None,
                        cfg['out']['path'],
                        table,
                        dict_matrices={
                            f'//coef//Vabs{i}':
                            coef_list,
                            f'//coef//date':
                            np.float64([
                                np.NaN,
                                np.datetime64(datetime.now()).astype(np.int64)
                            ])
                        })
            # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list)
            txt_results[names_get_txt_results[i]][table] = str(coef)

        # Zeroing matrix - calculated in Veusz by rotation on old0pitch old0roll
        Rcor = veusze.GetData(
            'Rcor'
        )[0]  # zeroing angles tuned by "USEcalibr0V_..." in Veusz Custom definitions

        if len(cfg['in']['channels']):
            l.info(
                'Applying zero calibration matrix of peach = {} and roll = {} degrees'
                .format(np.rad2deg(veusze.GetData('old0pitch')[0][0]),
                        np.rad2deg(veusze.GetData('old0roll')[0][0])))
            with h5py.File(cfg['out']['path'], 'a') as h5:
                for channel in cfg['in']['channels']:
                    (col_str, coef_str) = channel_cols(channel)
                    # h5savecoef(cfg['out']['path'], path=f'//{table}//coef//Vabs{i}', coef=coef_list), dict_matrices={'//coef//' + coef_str + '//A': coefs[tbl][channel]['A'], '//coef//' + coef_str + '//C': coefs[tbl][channel]['b']})

                    # Currently used inclinometers have electronics rotated on 180deg. Before we inserted additional
                    # rotation operation in Veusz by inverting A_old. Now we want iclude this information in database coef only.
                    try:  # Checking that A_old_inv exist
                        A_old_inv = veusze.GetData('Ag_old_inv')
                        is_old_used = True  # Rcor is not account for electronic is rotated.
                    except KeyError:
                        is_old_used = False  # Rcor is account for rotated electronic.

                    if is_old_used:  # The rotation is done in vsz (A_old in vsz is inverted) so need rotate it back to
                        # use in vsz without such invertion

                        # Rotate on 180 deg (note: this is not inversion)
                        A_old_inv = h5[f'//{table}//coef//{coef_str}//A'][...]
                        A_old = np.dot(A_old_inv,
                                       [[1, 0, 0], [0, -1, 0], [0, 0, -1]
                                        ])  # adds 180 deg to roll
                    else:
                        A_old = h5[f'//{table}//coef//{coef_str}//A'][...]
                    # A_old now accounts for rotated electronic

                    A = np.dot(Rcor, A_old)
                    h5copy_coef(None,
                                h5,
                                table,
                                dict_matrices={f'//coef//{coef_str}//A': A})

        # veusze.Root['fitV(inclination)']['grid1']['graph2'][name_out].function.val
        print(vsz_data)
        veuszPropagate.export_images(
            veusze,
            cfg['out'],
            f"_{log['out_name']}",
            b_skip_if_exists=not cfg['out']['b_update_existed'])

        # vsz_data = veusz_data(veusze, cfg['in']['data_yield_prefix'])
        # # caller do some processing of data and gives new cfg:
        # cfgin_update = yield(vsz_data, log)  # to test run veusze.Save('-.vsz')
        # cfg['in'].update(cfgin_update)  # only update of cfg.in.add_custom_expressions is tested
        # if cfg['in']['add_custom']:
        #     for n, e in zip(cfg['in']['add_custom'], cfg['in']['add_custom_expressions']):
        #         veusze.AddCustom('definition', n, e, mode='replace')
        # #cor_savings.send((veusze, log))
        #
        #
        #
        #

    # veusze.Save(str(path_vsz_save), mode='hdf5')  # veusze.Save(str(path_vsz_save)) saves time with bad resolution
    print(f'Ok')
    print(txt_results)
    for n in names_get:
        pd.DataFrame.from_dict(
            dict(zip(list(txt_results['fit1result'].keys()),
                     vsz_data[n]))).to_csv(
                         Path(cfg['out']['path']).with_name(
                             f'average_for_fitting-{n}.txt'),
                         sep='\t',
                         header=txt_results['fit1result'].keys,
                         mode='a')
    return {**vsz_data, 'veusze': veusze}