示例#1
0
def test_filter_events():
    from lstchain.reco.utils import filter_events
    df = pd.DataFrame({
        'a': [
            1,
            2,
            3,
        ],
        'b': [np.nan, 2.2, 3.2],
        'c': [1, 2, np.inf]
    })
    np.testing.assert_array_equal(
        filter_events(df, finite_params=['b']),
        pd.DataFrame({
            'a': [2, 3],
            'b': [2.2, 3.2],
            'c': [2, np.inf]
        }))
    np.testing.assert_array_equal(
        filter_events(df, finite_params=['b', 'c']),
        pd.DataFrame({
            'a': [2],
            'b': [2.2],
            'c': [2]
        }))
    np.testing.assert_array_equal(filter_events(df, finite_params=['e']), df)
    np.testing.assert_array_equal(
        filter_events(df, filters={'a': [0, 1]}),
        pd.DataFrame({
            'a': [1],
            'b': [np.nan],
            'c': 1
        }))
示例#2
0
def test_apply_models(simulated_dl1_file, simulated_dl2_file, rf_models):
    from lstchain.reco.dl1_to_dl2 import apply_models
    import joblib

    dl1 = pd.read_hdf(simulated_dl1_file, key=dl1_params_lstcam_key)
    dl1 = filter_events(
        dl1,
        filters=standard_config["events_filters"],
        finite_params=standard_config['energy_regression_features'] +
        standard_config['disp_regression_features'] +
        standard_config['particle_classification_features'] +
        standard_config['disp_classification_features'])

    reg_energy = joblib.load(rf_models["energy"])
    reg_cls_gh = joblib.load(rf_models["gh_sep"])
    reg_disp_norm = joblib.load(rf_models["disp_norm"])
    cls_disp_sign = joblib.load(rf_models["disp_sign"])

    dl2 = apply_models(dl1,
                       reg_cls_gh,
                       reg_energy,
                       reg_disp_norm=reg_disp_norm,
                       cls_disp_sign=cls_disp_sign,
                       custom_config=standard_config)

    dl2 = apply_models(dl1,
                       rf_models["gh_sep"],
                       rf_models["energy"],
                       reg_disp_norm=rf_models["disp_norm"],
                       cls_disp_sign=rf_models["disp_sign"],
                       custom_config=standard_config)

    dl2.to_hdf(simulated_dl2_file, key=dl2_params_lstcam_key)
def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(os.path.abspath(args.config_file))
        except("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key)
    data = filter_events(data, filters=config["events_filters"])


    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"
    
    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)
    
    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config)

    os.makedirs(args.outdir, exist_ok=True)
    outfile = args.outdir + '/dl2_' + os.path.basename(args.datafile)

    shutil.copyfile(args.datafile, outfile)
    write_dl2_dataframe(dl2.astype(float), outfile)
示例#4
0
def data_prepare(filename,
                 key=dl1_params_lstcam_key,
                 filters=None,
                 telescope=1,
                 quality=True):

    # nacteni pouze sloupcu s parametry do pandas dataframe
    param = pd.read_hdf(filename, key=key)
    param = utils.filter_events(param, filters=events_filters)

    # zakladni statistika dat
    print('Filename:', filename)
    print('Size of dataset:', param.shape[0])

    # vyber jednoho ze simulovanych telescopu
    param = param.where(param.tel_id == telescope)
    param = param.dropna()
    print('Size of dataset (1 tel):', param.shape[0])

    # Application of selection cuts
    if quality:
        param = quality_cuts(param)
        param = param.dropna()
        print('Size of dataset after selection cuts:', param.shape[0])
    else:
        print('No quality cuts applied')

    param = shuffle(param).reset_index(drop=True)
    return param
示例#5
0
def theta2_hist_per_energy_bin(irf_file, dl2_gamma_file):
    """
    plot a theta2 histogram per energy bin of gammas selected event (passing gh_score cut)
    and displaying the theta2 cut applied for IRFs
    """

    gammas, sim_info = read_mc_dl2_to_QTable(dl2_gamma_file)
    gammas = filter_events(gammas, filters)
    for prefix in ("true", "reco"):
        k = f"{prefix}_source_fov_offset"
        gammas[k] = calculate_source_fov_offset(gammas, prefix=prefix)

    source_alt, source_az = determine_source_position(gammas)
    gammas['theta'] = calculate_theta(gammas,
                                      assumed_source_az=source_az,
                                      assumed_source_alt=source_alt)

    gh_cuts = read_gh_cut_table(irf_file)
    theta_cuts = read_theta_cut_table(irf_file)

    tc = theta_cuts["RAD_MAX"].T[:, 0]
    energy_min = theta_cuts['ENERG_LO']
    energy_max = theta_cuts['ENERG_HI']

    np.testing.assert_allclose(energy_min, gh_cuts['low'])
    np.testing.assert_allclose(energy_max, gh_cuts['high'])

    ncols = 5
    nrows = len(energy_min) // ncols + int((len(energy_min) % ncols) > 0)

    fig, axes = plt.subplots(ncols=ncols,
                             nrows=nrows,
                             figsize=(ncols * 5, nrows * 5))
    xrange = (0, 0.4)
    for ii, emin in enumerate(energy_min):
        ax = axes.ravel()[ii]
        emax = energy_max[ii]
        mask = (gammas['gh_score'] < gh_cuts['cut'][ii]) & (
            emin <= gammas['true_energy']) & (gammas['true_energy'] < emax)

        t2unit = u.deg**2
        n, bins, _ = ax.hist(
            (gammas[mask]['theta']**2).to_value(t2unit),
            label=f'{emin:0.2f}-{emax:0.2f}',
            histtype='step',
            lw=2,
            bins=np.linspace(*xrange),
            range=xrange,
            density=False,
        )
        ax.vlines((tc[ii]**2).to_value(t2unit), 0, np.max(n), color='orange')
        ax.legend()
        ax.set_xlim(*xrange)
        ax.set_xlabel(f'theta2 / {t2unit}')
        ax.set_title(f"gh cut: {gh_cuts['cut'][ii]:0.3f}")
        plt.tight_layout()

    return axes
示例#6
0
def test_filter_events():
    from lstchain.reco.utils import filter_events

    df = pd.DataFrame({
        "a": [1, 2, 3],
        "b": [np.nan, 2.2, 3.2],
        "c": [1, 2, np.inf]
    })
    np.testing.assert_array_equal(
        filter_events(
            df,
            filters=dict(a=[0, np.inf], b=[0, np.inf], c=[0, np.inf]),
            finite_params=["b"],
        ),
        pd.DataFrame({
            "a": [2, 3],
            "b": [2.2, 3.2],
            "c": [2, np.inf]
        }),
    )
    np.testing.assert_array_equal(
        filter_events(
            df,
            filters=dict(a=[0, np.inf], b=[0, np.inf], c=[0, np.inf]),
            finite_params=["b", "c"],
        ),
        pd.DataFrame({
            "a": [2],
            "b": [2.2],
            "c": [2]
        }),
    )
    np.testing.assert_array_equal(
        filter_events(df, filters=dict(a=[0, 1])),
        pd.DataFrame({
            "a": [1],
            "b": [np.nan],
            "c": 1
        }),
    )
    with np.testing.assert_raises(KeyError):
        filter_events(df, filters=dict(e=[0, np.inf]))
示例#7
0
def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        data = pd.concat(
            [data, pd.read_hdf(data, key=dl1_params_src_dep_lstcam_key)],
            axis=1)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.
    data = filter_events(data, filters=config["events_filters"])

    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  custom_config=config)

    os.makedirs(args.outdir, exist_ok=True)
    outfile = os.path.join(
        args.outdir,
        os.path.basename(args.datafile).replace('dl1', 'dl2'))

    shutil.copyfile(args.datafile, outfile)
    write_dl2_dataframe(dl2.astype(float), outfile)
示例#8
0
def test_apply_models():
    from lstchain.reco.dl1_to_dl2 import apply_models
    import joblib

    dl1 = pd.read_hdf(dl1_file, key=dl1_params_lstcam_key)
    dl1 = filter_events(dl1, filters=custom_config["events_filters"])

    reg_energy = joblib.load(file_model_energy)
    reg_disp = joblib.load(file_model_disp)
    reg_cls_gh = joblib.load(file_model_gh_sep)

    dl2 = apply_models(dl1,
                       reg_cls_gh,
                       reg_energy,
                       reg_disp,
                       custom_config=custom_config)
    dl2.to_hdf(dl2_file, key=dl2_params_lstcam_key)
示例#9
0
def test_apply_models():
    from lstchain.reco.dl1_to_dl2 import apply_models
    import joblib

    dl1 = pd.read_hdf(dl1_file, key=dl1_params_lstcam_key)
    dl1 = filter_events(
        dl1,
        filters=standard_config["events_filters"],
        finite_params=standard_config['regression_features'] +
        standard_config['classification_features'],
    )

    reg_energy = joblib.load(file_model_energy)
    reg_disp = joblib.load(file_model_disp)
    reg_cls_gh = joblib.load(file_model_gh_sep)

    dl2 = apply_models(dl1,
                       reg_cls_gh,
                       reg_energy,
                       reg_disp,
                       custom_config=standard_config)
    dl2.to_hdf(dl2_file, key=dl2_params_lstcam_key)
示例#10
0
def main():
    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(args.config_file)
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models(
        args.gammafile,
        args.protonfile,
        save_models=args.storerf,
        path_models=args.path_models,
        custom_config=config,
    )

    gammas = filter_events(
        pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )
    proton = filter_events(
        pd.read_hdf(args.protontest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )

    data = pd.concat([gammas, proton], ignore_index=True)

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  custom_config=config)

    ####PLOT SOME RESULTS#####

    selected_gammas = dl2.query('reco_type==0 & mc_type==0')

    if (len(selected_gammas) == 0):
        log.warning('No gammas selected, I will not plot any output')
        sys.exit()

    plot_dl2.plot_features(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.energy_results(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.direction_results(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.plot_disp_vector(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.plot_pos(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.plot_roc_gamma(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.plot_models_features_importances(args.path_models,
                                              args.config_file)
    if not args.batch:
        plt.show()

    plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100)
    plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100)
    if not args.batch:
        plt.show()
示例#11
0
 def filter_cut(self, events):
     return filter_events(events, self.filters, self.finite_params)
示例#12
0
def main():
    args = parser.parse_args()

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    if 'lh_fit_config' in config.keys():
        lhfit_data = pd.read_hdf(args.input_file,
                                 key=dl1_likelihood_params_lstcam_key)
        if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(
                lhfit_data['event_id'] == data['event_id']):
            lhfit_data.drop({'obs_id', 'event_id'}, axis=1, inplace=True)
        lhfit_keys = lhfit_data.keys()
        data = pd.concat([data, lhfit_data], axis=1)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Get trained RF path for reconstruction:
    file_reg_energy = os.path.join(args.path_models, 'reg_energy.sav')
    file_cls_gh = os.path.join(args.path_models, 'cls_gh.sav')
    if config['disp_method'] == 'disp_vector':
        file_disp_vector = os.path.join(args.path_models,
                                        'reg_disp_vector.sav')
    elif config['disp_method'] == 'disp_norm_sign':
        file_disp_norm = os.path.join(args.path_models, 'reg_disp_norm.sav')
        file_disp_sign = os.path.join(args.path_models, 'cls_disp_sign.sav')

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['energy_regression_features'] +
            config['disp_regression_features'] +
            config['particle_classification_features'] +
            config['disp_classification_features'],
        )

        if config['disp_method'] == 'disp_vector':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_vector=file_disp_vector,
                                          focal_length=focal_length,
                                          custom_config=config)
        elif config['disp_method'] == 'disp_norm_sign':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_norm=file_disp_norm,
                                          cls_disp_sign=file_disp_sign,
                                          focal_length=focal_length,
                                          custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:

        # if source-dependent parameters are already in dl1 data, just read those data.
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(args.input_file):
            data_srcdep = get_srcdep_params(args.input_file)

        # if not, source-dependent parameters are added now
        else:
            data_srcdep = pd.concat(dl1_to_dl2.get_source_dependent_parameters(
                data, config, focal_length=focal_length),
                                    axis=1)

        dl2_srcdep_dict = {}
        srcindep_keys = data.keys()
        srcdep_assumed_positions = data_srcdep.columns.levels[0]

        for i, k in enumerate(srcdep_assumed_positions):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['energy_regression_features'] +
                config['disp_regression_features'] +
                config['particle_classification_features'] +
                config['disp_classification_features'],
            )

            if config['disp_method'] == 'disp_vector':
                dl2_df = dl1_to_dl2.apply_models(
                    data_with_srcdep_param,
                    file_cls_gh,
                    file_reg_energy,
                    reg_disp_vector=file_disp_vector,
                    focal_length=focal_length,
                    custom_config=config)
            elif config['disp_method'] == 'disp_norm_sign':
                dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                 file_cls_gh,
                                                 file_reg_energy,
                                                 reg_disp_norm=file_disp_norm,
                                                 cls_disp_sign=file_disp_sign,
                                                 focal_length=focal_length,
                                                 custom_config=config)

            dl2_srcdep = dl2_df.drop(srcindep_keys, axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df[srcindep_keys]

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2', 1))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    if dl1_likelihood_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_likelihood_params_lstcam_key)

    metadata = global_metadata()
    write_metadata(metadata, output_file)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata
    if not config['source_dependent']:
        if 'lh_fit_config' not in config.keys():
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
        else:
            dl2_onlylhfit = dl2[lhfit_keys]
            dl2.drop(lhfit_keys, axis=1, inplace=True)
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
            write_dataframe(dl2_onlylhfit,
                            output_file,
                            dl2_likelihood_params_lstcam_key,
                            config=config,
                            meta=metadata)

    else:
        write_dl2_dataframe(dl2_srcindep,
                            output_file,
                            config=config,
                            meta=metadata)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1),
                        output_file,
                        dl2_params_src_dep_lstcam_key,
                        config=config,
                        meta=metadata)
示例#13
0
 def filter_cut(self, events):
     """
     Apply the event filters
     """
     return filter_events(events, self.filters, self.finite_params)
示例#14
0
def main():
    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['regression_features'] +
            config['classification_features'],
        )

        dl2 = dl1_to_dl2.apply_models(data,
                                      cls_gh,
                                      reg_energy,
                                      reg_disp_vector,
                                      focal_length=focal_length,
                                      custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:
        data_srcdep = pd.read_hdf(args.input_file,
                                  key=dl1_params_src_dep_lstcam_key)
        data_srcdep.columns = pd.MultiIndex.from_tuples([
            tuple(col[1:-1].replace('\'', '').replace(' ', '').split(","))
            for col in data_srcdep.columns
        ])

        dl2_srcdep_dict = {}

        for i, k in enumerate(data_srcdep.columns.levels[0]):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['regression_features'] +
                config['classification_features'],
            )
            dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                             cls_gh,
                                             reg_energy,
                                             reg_disp_vector,
                                             focal_length=focal_length,
                                             custom_config=config)

            dl2_srcdep = dl2_df.drop(data.keys(), axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df.drop(data_srcdep[k].keys(), axis=1)

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2'))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    if not config['source_dependent']:
        write_dl2_dataframe(dl2, output_file)

    else:
        write_dl2_dataframe(dl2_srcindep, output_file)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file,
                        dl2_params_src_dep_lstcam_key)
def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(args.config_file)
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models(
        args.gammafile,
        args.protonfile,
        save_models=args.storerf,
        path_models=args.path_models,
        custom_config=config,
    )

    gammas = filter_events(
        pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )
    proton = filter_events(
        pd.read_hdf(args.protontest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )

    data = pd.concat([gammas, proton], ignore_index=True)

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  custom_config=config)

    ####PLOT SOME RESULTS#####

    gammas = dl2[dl2.gammaness >= 0.5]
    protons = dl2[dl2.gammaness < 0.5]
    gammas.reco_type = 0
    protons.reco_type = 1

    focal_length = 28 * u.m
    src_pos_reco = utils.reco_source_position_sky(
        gammas.x.values * u.m, gammas.y.values * u.m,
        gammas.reco_disp_dx.values * u.m, gammas.reco_disp_dy.values * u.m,
        focal_length, gammas.mc_alt_tel.values * u.rad,
        gammas.mc_az_tel.values * u.rad)

    plot_dl2.plot_features(dl2)
    plt.show()

    plot_dl2.plot_e(gammas, 10, 1.5, 3.5)
    plt.show()

    plot_dl2.calc_resolution(gammas)
    plt.show()

    plot_dl2.plot_e_resolution(gammas, 10, 1.5, 3.5)
    plt.show()

    plot_dl2.plot_disp_vector(gammas)
    plt.show()

    try:
        ctaplot.plot_theta2(
            gammas.mc_alt,
            np.arctan(np.tan(gammas.mc_az)),
            src_pos_reco.alt.rad,
            np.arctan(np.tan(src_pos_reco.az.rad)),
            bins=50,
            range=(0, 1),
        )
        plt.show()
        ctaplot.plot_angular_res_per_energy(
            src_pos_reco.alt.rad, np.arctan(np.tan(src_pos_reco.az.rad)),
            gammas.mc_alt, np.arctan(np.tan(gammas.mc_az)), gammas.mc_energy)
        plt.show()
    except:
        pass

    regression_features = config["regression_features"]
    classification_features = config["classification_features"]

    plt.show()
    plot_dl2.plot_pos(dl2)
    plt.show()
    plot_dl2.plot_ROC(cls_gh, dl2, classification_features, -1)
    plt.show()
    plot_dl2.plot_importances(cls_gh, classification_features)
    plt.show()
    plot_dl2.plot_importances(reg_energy, regression_features)
    plt.show()
    plot_dl2.plot_importances(reg_disp_vector, regression_features)
    plt.show()

    plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100)
    plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100)
    plt.show()
示例#16
0
        try:
            custom_config = read_configuration_file(args.config_file)
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    #Get the data from the Simtelarray file:

    dl0_to_dl1.max_events = args.max_events
    dl0_to_dl1.allowed_tels = {1}
    dl0_to_dl1.r0_to_dl1(args.datafile)
    dl1_file = 'dl1_' + os.path.basename(args.datafile).split('.')[0] + '.h5'

    data = pd.read_hdf(dl1_file, key='events/LSTCam')
    data = filter_events(data, filters=config["events_filters"])

    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        data_src_dep = pd.read_hdf(args.input_file,
                                   key=dl1_params_src_dep_lstcam_key)
        data = pd.concat([data, data_src_dep], axis=1)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    data = filter_events(
        data,
        filters=config["events_filters"],
        finite_params=config['regression_features'] +
        config['classification_features'],
    )

    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    #Apply the models to the data

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  custom_config=config)

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2'))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)
    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)
    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    write_dl2_dataframe(dl2, output_file)
def analyze_on_off(config):
    """
    Extracts the theta2 plot of a dataset taken with ON/OFF observations
    
    Parameters
    ----------
    config_file

    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
    LOGGER.info("Running ON/OFF analysis")
    LOGGER.info("ON data runs: %s", config['analysis']['runs_on'])
    observation_time_on, data_on = merge_dl2_runs(
        config['input']['data_tag'], config['analysis']['runs_on'],
        config['input']['columns_to_read'], 4)
    LOGGER.info("ON observation time: %s", observation_time_on)
    LOGGER.info("OFF data runs: %s", config['analysis']['runs_off'])
    observation_time_off, data_off = merge_dl2_runs(
        config['input']['data_tag'], config['analysis']['runs_off'],
        config['input']['columns_to_read'], 4)
    LOGGER.info("OFF observation time: %s", observation_time_off)
    # observation_time_ratio = observation_time_on / observation_time_off
    # LOGGER.info('Observation time ratio %s', observation_time_ratio)

    selected_data_on = filter_events(data_on, config['preselection'])
    selected_data_off = filter_events(data_off, config['preselection'])

    theta2_on = np.array(compute_theta2(selected_data_on, (0, 0)))
    theta2_off = np.array(compute_theta2(selected_data_off, (0, 0)))

    theta2_cut = config['analysis']['selection']['theta2'][0]
    n_on = np.sum(theta2_on < theta2_cut)
    n_off = np.sum(theta2_off < theta2_cut)
    LOGGER.info('Number of observed ON and OFF events are:\n %s, %s', n_on,
                n_off)

    theta2_norm_min = config['analysis']['selection']['theta2'][1]
    theta2_norm_max = config['analysis']['selection']['theta2'][2]
    n_norm_on = np.sum((theta2_on > theta2_norm_min)
                       & (theta2_on < theta2_norm_max))
    n_norm_off = np.sum((theta2_off > theta2_norm_min)
                        & (theta2_off < theta2_norm_max))
    lima_norm = n_norm_on / n_norm_off
    stat = WStatCountsStatistic(n_on, n_off, lima_norm)
    lima_significance = stat.sqrt_ts.item()
    lima_excess = stat.n_sig
    LOGGER.info('Excess is %s', lima_excess)
    LOGGER.info('Excess significance is %s', lima_significance)
    plotting.plot_1d_excess(
        [('ON data', theta2_on, 1),
         (f'OFF data X {lima_norm:.2f}', theta2_off, lima_norm)],
        lima_significance, r'$\theta^2$ [deg$^2$]', theta2_cut, ax1)

    # alpha analysis
    LOGGER.info('Perform alpha analysis')
    alpha_on = np.array(compute_alpha(selected_data_on))
    alpha_off = np.array(compute_alpha(selected_data_off))
    alpha_cut = config['analysis']['selection']['alpha'][0]
    n_on = np.sum(alpha_on < alpha_cut)
    n_off = np.sum(alpha_off < alpha_cut)
    LOGGER.info('Number of observed ON and OFFevents are:\n %s, %s', n_on,
                n_off)

    alpha_norm_min = config['analysis']['selection']['alpha'][1]
    alpha_norm_max = config['analysis']['selection']['alpha'][2]
    n_norm_on = np.sum((alpha_on > alpha_norm_min)
                       & (alpha_on < alpha_norm_max))
    n_norm_off = np.sum((alpha_off > alpha_norm_min)
                        & (alpha_off < alpha_norm_max))
    lima_norm = n_norm_on / n_norm_off
    stat = WStatCountsStatistic(n_on, n_off, lima_norm)
    lima_significance = stat.sqrt_ts.item()
    lima_excess = stat.n_sig
    LOGGER.info('Excess is %s', lima_excess)
    LOGGER.info('Excess significance is %s', lima_significance)
    plotting.plot_1d_excess(
        [('ON data', alpha_on, 1),
         (f'OFF data X {lima_norm:.2f}', alpha_off, lima_norm)],
        lima_significance, r'$\alpha$ [deg]', alpha_cut, ax2, 0, 90, 90)
    if config['output']['interactive'] is True:
        LOGGER.info(
            'Interactive mode ON, plots will be only shown, but not saved')
        plt.show()
    else:
        LOGGER.info('Interactive mode OFF, no plots will be displayed')
        plt.ioff()
        plt.savefig(f"{config['output']['directory']}/on_off.png")
        plt.close()
示例#19
0
            custom_config = read_configuration_file(args.config_file)
        except("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models(
        args.gammafile,
        args.protonfile,
        save_models=args.storerf,
        path_models=args.path_models,
        custom_config=config,
    )

    gammas = filter_events(pd.read_hdf(args.gammatest, key='events/LSTCam'),
                                      config["events_filters"]
                                      )
    proton = filter_events(pd.read_hdf(args.protontest, key='events/LSTCam'),
                                      config["events_filters"],
                                      )

    data = pd.concat([gammas, proton], ignore_index=True)

    dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config)

    ####PLOT SOME RESULTS#####

    gammas = dl2[dl2.gammaness>=0.5]
    protons = dl2[dl2.gammaness<0.5]
    gammas.reco_type = 0
    protons.reco_type = 1
    filename=args.filename
    #'/nfs/cta-ifae/jurysek/mc_DL1/20190415/proton/south_pointing/20190923/dl1_20190415_proton_south_pointing_20190923_testing-noimage.h5'

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(args.config_file)
        except("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)
    events_filters = config["events_filters"]

    # nacteni pouze sloupcu s parametry
    param = pd.read_hdf(filename, key=args.dl1_params_camera_key)
    param = utils.filter_events(param, filters=events_filters)

    # energy histogram (thrown events)
    # - kazdy sloupec matice histogram je jeden bin v core distance
    # - kazdy radek je jeden bin v energii
    # - pro simulovane spektrum energii se musi poscitat vsechny sloupce - axis=1
    #hist = read_simtel_energy_histogram(filename)
    hist, hist_merged = read_simtel_energy_histogram_merged(filename)
    #print(hist)
    print('E_min [Tev]: {:.4f}, E_max [TeV]: {:.4f}, N_bins: {:d}'.format(min(hist.bins_energy), max(hist.bins_energy), len(hist.bins_energy)-1))

    mc_header = read_simu_info_hdf5(filename)
    #print(mc_header)

    # vyber jednoho ze simulovanych telescopu
    param = param.where(param.tel_id == args.telescope)
def main():
    args = parser.parse_args()

    custom_config = {}
    if args.config_file is not None:
        custom_config = read_configuration_file(args.config_file)

    config = replace_config(standard_config, custom_config)

    subarray_info = SubarrayDescription.from_hdf(args.gammatest)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    reg_energy, reg_disp_norm, cls_disp_sign, cls_gh = dl1_to_dl2.build_models(
        args.gammafile,
        args.protonfile,
        save_models=args.save_models,
        path_models=args.path_models,
        custom_config=config,
    )

    gammas = filter_events(
        pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )
    proton = filter_events(
        pd.read_hdf(args.protontest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )

    data = pd.concat([gammas, proton], ignore_index=True)

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_norm=reg_disp_norm,
                                  cls_disp_sign=cls_disp_sign,
                                  focal_length=focal_length,
                                  custom_config=config)

    ####PLOT SOME RESULTS#####

    selected_gammas = dl2.query('reco_type==0 & mc_type==0')

    if (len(selected_gammas) == 0):
        log.warning('No gammas selected, I will not plot any output')
        sys.exit()

    plot_dl2.plot_features(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.energy_results(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.direction_results(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.plot_disp_vector(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.plot_pos(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.plot_roc_gamma(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.plot_models_features_importances(args.path_models,
                                              args.config_file)
    if not args.batch:
        plt.show()

    plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100)
    plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100)
    if not args.batch:
        plt.show()
def analyze_wobble(config):
    """
    Extracts the theta2 plot of a dataset taken with wobble observations
    
    Parameters
    ----------
    config_file

    """

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
    n_points = config['analysis']['parameters']['n_points']
    theta2_cut = config['analysis']['selection']['theta2'][0]
    LOGGER.info(
        "Running wobble analysis with %s off-source observation points",
        n_points)
    LOGGER.info("Analyzing runs %s", config['analysis']['runs'])
    observation_time, data = merge_dl2_runs(config['input']['data_tag'],
                                            config['analysis']['runs'],
                                            config['input']['columns_to_read'])
    LOGGER.debug('\nPreselection:\n%s', config['preselection'])
    for key, value in config['preselection'].items():
        LOGGER.debug('\nParameter: %s, range: %s, value type: %s', key, value,
                     type(value))

    selected_data = filter_events(data, filters=config['preselection'])
    # Add theta2 to selected data
    true_source_position = extract_source_position(
        selected_data, config['input']['observed_source'])
    plotting.plot_wobble(true_source_position, n_points, ax1)
    named_datasets = []
    named_datasets.append(
        ('ON data',
         np.array(compute_theta2(selected_data, true_source_position)), 1))
    n_on = np.sum(named_datasets[0][1] < theta2_cut)
    n_off = 0
    rotation_angle = 360. / n_points
    origin_x = selected_data['reco_src_x']
    origin_y = selected_data['reco_src_y']
    for off_point in range(1, n_points):
        t_off_data = selected_data.copy()
        off_xy = rotate(tuple(zip(origin_x, origin_y)),
                        rotation_angle * off_point)
        t_off_data['reco_src_x'] = [xy[0] for xy in off_xy]
        t_off_data['reco_src_y'] = [xy[1] for xy in off_xy]
        named_datasets.append(
            (f'OFF {rotation_angle * off_point}',
             np.array(compute_theta2(t_off_data, true_source_position)), 1))
        n_off += np.sum(named_datasets[-1][1] < theta2_cut)

    stat = WStatCountsStatistic(n_on, n_off, 1. / (n_points - 1))

    # API change for attributes significance and excess in the new gammapy version: https://docs.gammapy.org/dev/api/gammapy.stats.WStatCountsStatistic.html
    lima_significance = stat.sqrt_ts.item()
    lima_excess = stat.n_sig
    LOGGER.info('Observation time %s', observation_time)
    LOGGER.info('Number of "ON" events %s', n_on)
    LOGGER.info('Number of "OFF" events %s', n_off)
    LOGGER.info('ON/OFF observation time ratio %s', 1. / (n_points - 1))
    LOGGER.info('Excess is %s', lima_excess)
    LOGGER.info('Li&Ma significance %s', lima_significance)
    plotting.plot_1d_excess(named_datasets, lima_significance,
                            r'$\theta^2$ [deg$^2$]', theta2_cut, ax2)

    if config['output']['interactive'] is True:
        LOGGER.info(
            'Interactive mode ON, plots will be only shown, but not saved')
        plt.show()
    else:
        LOGGER.info('Interactive mode OFF, no plots will be displayed')
        plt.ioff()
        plt.savefig(f"{config['output']['directory']}/wobble.png")
        plt.close()