Python write_dl2_dataframe示例，lstchain.io.write_dl2_dataframe Python示例

示例#1

0

显示文件

文件： lstchain_mc_dl1_to_dl2.py 项目： pillera/cta-lstchain

def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(os.path.abspath(args.config_file))
        except("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key)
    data = filter_events(data, filters=config["events_filters"])


    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"
    
    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)
    
    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config)

    os.makedirs(args.outdir, exist_ok=True)
    outfile = args.outdir + '/dl2_' + os.path.basename(args.datafile)

    shutil.copyfile(args.datafile, outfile)
    write_dl2_dataframe(dl2.astype(float), outfile)

示例#2

0

显示文件

def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        data = pd.concat(
            [data, pd.read_hdf(data, key=dl1_params_src_dep_lstcam_key)],
            axis=1)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.
    data = filter_events(data, filters=config["events_filters"])

    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  custom_config=config)

    os.makedirs(args.outdir, exist_ok=True)
    outfile = os.path.join(
        args.outdir,
        os.path.basename(args.datafile).replace('dl1', 'dl2'))

    shutil.copyfile(args.datafile, outfile)
    write_dl2_dataframe(dl2.astype(float), outfile)

示例#3

0

显示文件

文件： lstchain_dl1_to_dl2.py 项目： Hckjs/cta-lstchain

def main():
    args = parser.parse_args()

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    if 'lh_fit_config' in config.keys():
        lhfit_data = pd.read_hdf(args.input_file,
                                 key=dl1_likelihood_params_lstcam_key)
        if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(
                lhfit_data['event_id'] == data['event_id']):
            lhfit_data.drop({'obs_id', 'event_id'}, axis=1, inplace=True)
        lhfit_keys = lhfit_data.keys()
        data = pd.concat([data, lhfit_data], axis=1)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Get trained RF path for reconstruction:
    file_reg_energy = os.path.join(args.path_models, 'reg_energy.sav')
    file_cls_gh = os.path.join(args.path_models, 'cls_gh.sav')
    if config['disp_method'] == 'disp_vector':
        file_disp_vector = os.path.join(args.path_models,
                                        'reg_disp_vector.sav')
    elif config['disp_method'] == 'disp_norm_sign':
        file_disp_norm = os.path.join(args.path_models, 'reg_disp_norm.sav')
        file_disp_sign = os.path.join(args.path_models, 'cls_disp_sign.sav')

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['energy_regression_features'] +
            config['disp_regression_features'] +
            config['particle_classification_features'] +
            config['disp_classification_features'],
        )

        if config['disp_method'] == 'disp_vector':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_vector=file_disp_vector,
                                          focal_length=focal_length,
                                          custom_config=config)
        elif config['disp_method'] == 'disp_norm_sign':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_norm=file_disp_norm,
                                          cls_disp_sign=file_disp_sign,
                                          focal_length=focal_length,
                                          custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:

        # if source-dependent parameters are already in dl1 data, just read those data.
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(args.input_file):
            data_srcdep = get_srcdep_params(args.input_file)

        # if not, source-dependent parameters are added now
        else:
            data_srcdep = pd.concat(dl1_to_dl2.get_source_dependent_parameters(
                data, config, focal_length=focal_length),
                                    axis=1)

        dl2_srcdep_dict = {}
        srcindep_keys = data.keys()
        srcdep_assumed_positions = data_srcdep.columns.levels[0]

        for i, k in enumerate(srcdep_assumed_positions):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['energy_regression_features'] +
                config['disp_regression_features'] +
                config['particle_classification_features'] +
                config['disp_classification_features'],
            )

            if config['disp_method'] == 'disp_vector':
                dl2_df = dl1_to_dl2.apply_models(
                    data_with_srcdep_param,
                    file_cls_gh,
                    file_reg_energy,
                    reg_disp_vector=file_disp_vector,
                    focal_length=focal_length,
                    custom_config=config)
            elif config['disp_method'] == 'disp_norm_sign':
                dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                 file_cls_gh,
                                                 file_reg_energy,
                                                 reg_disp_norm=file_disp_norm,
                                                 cls_disp_sign=file_disp_sign,
                                                 focal_length=focal_length,
                                                 custom_config=config)

            dl2_srcdep = dl2_df.drop(srcindep_keys, axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df[srcindep_keys]

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2', 1))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    if dl1_likelihood_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_likelihood_params_lstcam_key)

    metadata = global_metadata()
    write_metadata(metadata, output_file)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata
    if not config['source_dependent']:
        if 'lh_fit_config' not in config.keys():
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
        else:
            dl2_onlylhfit = dl2[lhfit_keys]
            dl2.drop(lhfit_keys, axis=1, inplace=True)
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
            write_dataframe(dl2_onlylhfit,
                            output_file,
                            dl2_likelihood_params_lstcam_key,
                            config=config,
                            meta=metadata)

    else:
        write_dl2_dataframe(dl2_srcindep,
                            output_file,
                            config=config,
                            meta=metadata)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1),
                        output_file,
                        dl2_params_src_dep_lstcam_key,
                        config=config,
                        meta=metadata)

示例#4

0

显示文件

文件： lstchain_dl1_to_dl2.py 项目： morcuended/cta-lstchain

def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        data_src_dep = pd.read_hdf(args.input_file,
                                   key=dl1_params_src_dep_lstcam_key)
        data = pd.concat([data, data_src_dep], axis=1)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    data = filter_events(
        data,
        filters=config["events_filters"],
        finite_params=config['regression_features'] +
        config['classification_features'],
    )

    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  custom_config=config)

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2'))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)
    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)
    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    write_dl2_dataframe(dl2, output_file)

示例#5

0

显示文件

文件： test_io.py 项目： vuillaut/cta-lstchain

def test_write_dl2_dataframe(tmp_path, simulated_dl2_file):
    from lstchain.io.io import dl2_params_lstcam_key
    from lstchain.io import write_dl2_dataframe

    dl2 = pd.read_hdf(simulated_dl2_file, key=dl2_params_lstcam_key)
    write_dl2_dataframe(dl2, tmp_path / "dl2_test.h5")

示例#6

0

显示文件

def test_write_dl2_dataframe():
    from lstchain.tests.test_lstchain import dl2_file, test_dir
    from lstchain.io.io import dl2_params_lstcam_key
    dl2 = pd.read_hdf(dl2_file, key=dl2_params_lstcam_key)
    from lstchain.io import write_dl2_dataframe
    write_dl2_dataframe(dl2, os.path.join(test_dir, 'dl2_test.h5'))

示例#7

0

显示文件

文件： lstchain_dl1_to_dl2.py 项目： mstrzys/cta-lstchain

def main():
    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['regression_features'] +
            config['classification_features'],
        )

        dl2 = dl1_to_dl2.apply_models(data,
                                      cls_gh,
                                      reg_energy,
                                      reg_disp_vector,
                                      focal_length=focal_length,
                                      custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:
        data_srcdep = pd.read_hdf(args.input_file,
                                  key=dl1_params_src_dep_lstcam_key)
        data_srcdep.columns = pd.MultiIndex.from_tuples([
            tuple(col[1:-1].replace('\'', '').replace(' ', '').split(","))
            for col in data_srcdep.columns
        ])

        dl2_srcdep_dict = {}

        for i, k in enumerate(data_srcdep.columns.levels[0]):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['regression_features'] +
                config['classification_features'],
            )
            dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                             cls_gh,
                                             reg_energy,
                                             reg_disp_vector,
                                             focal_length=focal_length,
                                             custom_config=config)

            dl2_srcdep = dl2_df.drop(data.keys(), axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df.drop(data_srcdep[k].keys(), axis=1)

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2'))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    if not config['source_dependent']:
        write_dl2_dataframe(dl2, output_file)

    else:
        write_dl2_dataframe(dl2_srcindep, output_file)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file,
                        dl2_params_src_dep_lstcam_key)