def test_filter_events(): from lstchain.reco.utils import filter_events df = pd.DataFrame({ 'a': [ 1, 2, 3, ], 'b': [np.nan, 2.2, 3.2], 'c': [1, 2, np.inf] }) np.testing.assert_array_equal( filter_events(df, finite_params=['b']), pd.DataFrame({ 'a': [2, 3], 'b': [2.2, 3.2], 'c': [2, np.inf] })) np.testing.assert_array_equal( filter_events(df, finite_params=['b', 'c']), pd.DataFrame({ 'a': [2], 'b': [2.2], 'c': [2] })) np.testing.assert_array_equal(filter_events(df, finite_params=['e']), df) np.testing.assert_array_equal( filter_events(df, filters={'a': [0, 1]}), pd.DataFrame({ 'a': [1], 'b': [np.nan], 'c': 1 }))
def test_apply_models(simulated_dl1_file, simulated_dl2_file, rf_models): from lstchain.reco.dl1_to_dl2 import apply_models import joblib dl1 = pd.read_hdf(simulated_dl1_file, key=dl1_params_lstcam_key) dl1 = filter_events( dl1, filters=standard_config["events_filters"], finite_params=standard_config['energy_regression_features'] + standard_config['disp_regression_features'] + standard_config['particle_classification_features'] + standard_config['disp_classification_features']) reg_energy = joblib.load(rf_models["energy"]) reg_cls_gh = joblib.load(rf_models["gh_sep"]) reg_disp_norm = joblib.load(rf_models["disp_norm"]) cls_disp_sign = joblib.load(rf_models["disp_sign"]) dl2 = apply_models(dl1, reg_cls_gh, reg_energy, reg_disp_norm=reg_disp_norm, cls_disp_sign=cls_disp_sign, custom_config=standard_config) dl2 = apply_models(dl1, rf_models["gh_sep"], rf_models["energy"], reg_disp_norm=rf_models["disp_norm"], cls_disp_sign=rf_models["disp_sign"], custom_config=standard_config) dl2.to_hdf(simulated_dl2_file, key=dl2_params_lstcam_key)
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(os.path.abspath(args.config_file)) except("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key) data = filter_events(data, filters=config["events_filters"]) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) os.makedirs(args.outdir, exist_ok=True) outfile = args.outdir + '/dl2_' + os.path.basename(args.datafile) shutil.copyfile(args.datafile, outfile) write_dl2_dataframe(dl2.astype(float), outfile)
def data_prepare(filename, key=dl1_params_lstcam_key, filters=None, telescope=1, quality=True): # nacteni pouze sloupcu s parametry do pandas dataframe param = pd.read_hdf(filename, key=key) param = utils.filter_events(param, filters=events_filters) # zakladni statistika dat print('Filename:', filename) print('Size of dataset:', param.shape[0]) # vyber jednoho ze simulovanych telescopu param = param.where(param.tel_id == telescope) param = param.dropna() print('Size of dataset (1 tel):', param.shape[0]) # Application of selection cuts if quality: param = quality_cuts(param) param = param.dropna() print('Size of dataset after selection cuts:', param.shape[0]) else: print('No quality cuts applied') param = shuffle(param).reset_index(drop=True) return param
def theta2_hist_per_energy_bin(irf_file, dl2_gamma_file): """ plot a theta2 histogram per energy bin of gammas selected event (passing gh_score cut) and displaying the theta2 cut applied for IRFs """ gammas, sim_info = read_mc_dl2_to_QTable(dl2_gamma_file) gammas = filter_events(gammas, filters) for prefix in ("true", "reco"): k = f"{prefix}_source_fov_offset" gammas[k] = calculate_source_fov_offset(gammas, prefix=prefix) source_alt, source_az = determine_source_position(gammas) gammas['theta'] = calculate_theta(gammas, assumed_source_az=source_az, assumed_source_alt=source_alt) gh_cuts = read_gh_cut_table(irf_file) theta_cuts = read_theta_cut_table(irf_file) tc = theta_cuts["RAD_MAX"].T[:, 0] energy_min = theta_cuts['ENERG_LO'] energy_max = theta_cuts['ENERG_HI'] np.testing.assert_allclose(energy_min, gh_cuts['low']) np.testing.assert_allclose(energy_max, gh_cuts['high']) ncols = 5 nrows = len(energy_min) // ncols + int((len(energy_min) % ncols) > 0) fig, axes = plt.subplots(ncols=ncols, nrows=nrows, figsize=(ncols * 5, nrows * 5)) xrange = (0, 0.4) for ii, emin in enumerate(energy_min): ax = axes.ravel()[ii] emax = energy_max[ii] mask = (gammas['gh_score'] < gh_cuts['cut'][ii]) & ( emin <= gammas['true_energy']) & (gammas['true_energy'] < emax) t2unit = u.deg**2 n, bins, _ = ax.hist( (gammas[mask]['theta']**2).to_value(t2unit), label=f'{emin:0.2f}-{emax:0.2f}', histtype='step', lw=2, bins=np.linspace(*xrange), range=xrange, density=False, ) ax.vlines((tc[ii]**2).to_value(t2unit), 0, np.max(n), color='orange') ax.legend() ax.set_xlim(*xrange) ax.set_xlabel(f'theta2 / {t2unit}') ax.set_title(f"gh cut: {gh_cuts['cut'][ii]:0.3f}") plt.tight_layout() return axes
def test_filter_events(): from lstchain.reco.utils import filter_events df = pd.DataFrame({ "a": [1, 2, 3], "b": [np.nan, 2.2, 3.2], "c": [1, 2, np.inf] }) np.testing.assert_array_equal( filter_events( df, filters=dict(a=[0, np.inf], b=[0, np.inf], c=[0, np.inf]), finite_params=["b"], ), pd.DataFrame({ "a": [2, 3], "b": [2.2, 3.2], "c": [2, np.inf] }), ) np.testing.assert_array_equal( filter_events( df, filters=dict(a=[0, np.inf], b=[0, np.inf], c=[0, np.inf]), finite_params=["b", "c"], ), pd.DataFrame({ "a": [2], "b": [2.2], "c": [2] }), ) np.testing.assert_array_equal( filter_events(df, filters=dict(a=[0, 1])), pd.DataFrame({ "a": [1], "b": [np.nan], "c": 1 }), ) with np.testing.assert_raises(KeyError): filter_events(df, filters=dict(e=[0, np.inf]))
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key) if config['source_dependent']: data = pd.concat( [data, pd.read_hdf(data, key=dl1_params_src_dep_lstcam_key)], axis=1) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. data = filter_events(data, filters=config["events_filters"]) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) os.makedirs(args.outdir, exist_ok=True) outfile = os.path.join( args.outdir, os.path.basename(args.datafile).replace('dl1', 'dl2')) shutil.copyfile(args.datafile, outfile) write_dl2_dataframe(dl2.astype(float), outfile)
def test_apply_models(): from lstchain.reco.dl1_to_dl2 import apply_models import joblib dl1 = pd.read_hdf(dl1_file, key=dl1_params_lstcam_key) dl1 = filter_events(dl1, filters=custom_config["events_filters"]) reg_energy = joblib.load(file_model_energy) reg_disp = joblib.load(file_model_disp) reg_cls_gh = joblib.load(file_model_gh_sep) dl2 = apply_models(dl1, reg_cls_gh, reg_energy, reg_disp, custom_config=custom_config) dl2.to_hdf(dl2_file, key=dl2_params_lstcam_key)
def test_apply_models(): from lstchain.reco.dl1_to_dl2 import apply_models import joblib dl1 = pd.read_hdf(dl1_file, key=dl1_params_lstcam_key) dl1 = filter_events( dl1, filters=standard_config["events_filters"], finite_params=standard_config['regression_features'] + standard_config['classification_features'], ) reg_energy = joblib.load(file_model_energy) reg_disp = joblib.load(file_model_disp) reg_cls_gh = joblib.load(file_model_gh_sep) dl2 = apply_models(dl1, reg_cls_gh, reg_energy, reg_disp, custom_config=standard_config) dl2.to_hdf(dl2_file, key=dl2_params_lstcam_key)
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(args.config_file) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.storerf, path_models=args.path_models, custom_config=config, ) gammas = filter_events( pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key), config["events_filters"], ) proton = filter_events( pd.read_hdf(args.protontest, key=dl1_params_lstcam_key), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) ####PLOT SOME RESULTS##### selected_gammas = dl2.query('reco_type==0 & mc_type==0') if (len(selected_gammas) == 0): log.warning('No gammas selected, I will not plot any output') sys.exit() plot_dl2.plot_features(dl2) if not args.batch: plt.show() plot_dl2.energy_results(selected_gammas) if not args.batch: plt.show() plot_dl2.direction_results(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_disp_vector(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_pos(dl2) if not args.batch: plt.show() plot_dl2.plot_roc_gamma(dl2) if not args.batch: plt.show() plot_dl2.plot_models_features_importances(args.path_models, args.config_file) if not args.batch: plt.show() plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100) plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100) if not args.batch: plt.show()
def filter_cut(self, events): return filter_events(events, self.filters, self.finite_params)
def main(): args = parser.parse_args() custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key) if 'lh_fit_config' in config.keys(): lhfit_data = pd.read_hdf(args.input_file, key=dl1_likelihood_params_lstcam_key) if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all( lhfit_data['event_id'] == data['event_id']): lhfit_data.drop({'obs_id', 'event_id'}, axis=1, inplace=True) lhfit_keys = lhfit_data.keys() data = pd.concat([data, lhfit_data], axis=1) # if real data, add deltat t to dataframe keys data = add_delta_t_key(data) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. # Get trained RF path for reconstruction: file_reg_energy = os.path.join(args.path_models, 'reg_energy.sav') file_cls_gh = os.path.join(args.path_models, 'cls_gh.sav') if config['disp_method'] == 'disp_vector': file_disp_vector = os.path.join(args.path_models, 'reg_disp_vector.sav') elif config['disp_method'] == 'disp_norm_sign': file_disp_norm = os.path.join(args.path_models, 'reg_disp_norm.sav') file_disp_sign = os.path.join(args.path_models, 'cls_disp_sign.sav') subarray_info = SubarrayDescription.from_hdf(args.input_file) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length # Apply the models to the data # Source-independent analysis if not config['source_dependent']: data = filter_events( data, filters=config["events_filters"], finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) if config['disp_method'] == 'disp_vector': dl2 = dl1_to_dl2.apply_models(data, file_cls_gh, file_reg_energy, reg_disp_vector=file_disp_vector, focal_length=focal_length, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': dl2 = dl1_to_dl2.apply_models(data, file_cls_gh, file_reg_energy, reg_disp_norm=file_disp_norm, cls_disp_sign=file_disp_sign, focal_length=focal_length, custom_config=config) # Source-dependent analysis if config['source_dependent']: # if source-dependent parameters are already in dl1 data, just read those data. if dl1_params_src_dep_lstcam_key in get_dataset_keys(args.input_file): data_srcdep = get_srcdep_params(args.input_file) # if not, source-dependent parameters are added now else: data_srcdep = pd.concat(dl1_to_dl2.get_source_dependent_parameters( data, config, focal_length=focal_length), axis=1) dl2_srcdep_dict = {} srcindep_keys = data.keys() srcdep_assumed_positions = data_srcdep.columns.levels[0] for i, k in enumerate(srcdep_assumed_positions): data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1) data_with_srcdep_param = filter_events( data_with_srcdep_param, filters=config["events_filters"], finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) if config['disp_method'] == 'disp_vector': dl2_df = dl1_to_dl2.apply_models( data_with_srcdep_param, file_cls_gh, file_reg_energy, reg_disp_vector=file_disp_vector, focal_length=focal_length, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param, file_cls_gh, file_reg_energy, reg_disp_norm=file_disp_norm, cls_disp_sign=file_disp_sign, focal_length=focal_length, custom_config=config) dl2_srcdep = dl2_df.drop(srcindep_keys, axis=1) dl2_srcdep_dict[k] = dl2_srcdep if i == 0: dl2_srcindep = dl2_df[srcindep_keys] os.makedirs(args.output_dir, exist_ok=True) output_file = os.path.join( args.output_dir, os.path.basename(args.input_file).replace('dl1', 'dl2', 1)) if os.path.exists(output_file): raise IOError(output_file + ' exists, exiting.') dl1_keys = get_dataset_keys(args.input_file) if dl1_images_lstcam_key in dl1_keys: dl1_keys.remove(dl1_images_lstcam_key) if dl1_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_lstcam_key) if dl1_params_src_dep_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_src_dep_lstcam_key) if dl1_likelihood_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_likelihood_params_lstcam_key) metadata = global_metadata() write_metadata(metadata, output_file) with open_file(args.input_file, 'r') as h5in: with open_file(output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: if not k.startswith('/'): k = '/' + k path, name = k.rsplit('/', 1) if path not in h5out: grouppath, groupname = path.rsplit('/', 1) g = h5out.create_group(grouppath, groupname, createparents=True) else: g = h5out.get_node(path) h5in.copy_node(k, g, overwrite=True) # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata if not config['source_dependent']: if 'lh_fit_config' not in config.keys(): write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) else: dl2_onlylhfit = dl2[lhfit_keys] dl2.drop(lhfit_keys, axis=1, inplace=True) write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) else: write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata) write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key, config=config, meta=metadata)
def filter_cut(self, events): """ Apply the event filters """ return filter_events(events, self.filters, self.finite_params)
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key) # if real data, add deltat t to dataframe keys data = add_delta_t_key(data) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. # Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) subarray_info = SubarrayDescription.from_hdf(args.input_file) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length # Apply the models to the data # Source-independent analysis if not config['source_dependent']: data = filter_events( data, filters=config["events_filters"], finite_params=config['regression_features'] + config['classification_features'], ) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, focal_length=focal_length, custom_config=config) # Source-dependent analysis if config['source_dependent']: data_srcdep = pd.read_hdf(args.input_file, key=dl1_params_src_dep_lstcam_key) data_srcdep.columns = pd.MultiIndex.from_tuples([ tuple(col[1:-1].replace('\'', '').replace(' ', '').split(",")) for col in data_srcdep.columns ]) dl2_srcdep_dict = {} for i, k in enumerate(data_srcdep.columns.levels[0]): data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1) data_with_srcdep_param = filter_events( data_with_srcdep_param, filters=config["events_filters"], finite_params=config['regression_features'] + config['classification_features'], ) dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param, cls_gh, reg_energy, reg_disp_vector, focal_length=focal_length, custom_config=config) dl2_srcdep = dl2_df.drop(data.keys(), axis=1) dl2_srcdep_dict[k] = dl2_srcdep if i == 0: dl2_srcindep = dl2_df.drop(data_srcdep[k].keys(), axis=1) os.makedirs(args.output_dir, exist_ok=True) output_file = os.path.join( args.output_dir, os.path.basename(args.input_file).replace('dl1', 'dl2')) if os.path.exists(output_file): raise IOError(output_file + ' exists, exiting.') dl1_keys = get_dataset_keys(args.input_file) if dl1_images_lstcam_key in dl1_keys: dl1_keys.remove(dl1_images_lstcam_key) if dl1_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_lstcam_key) if dl1_params_src_dep_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_src_dep_lstcam_key) with open_file(args.input_file, 'r') as h5in: with open_file(output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: if not k.startswith('/'): k = '/' + k path, name = k.rsplit('/', 1) if path not in h5out: grouppath, groupname = path.rsplit('/', 1) g = h5out.create_group(grouppath, groupname, createparents=True) else: g = h5out.get_node(path) h5in.copy_node(k, g, overwrite=True) if not config['source_dependent']: write_dl2_dataframe(dl2, output_file) else: write_dl2_dataframe(dl2_srcindep, output_file) write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key)
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(args.config_file) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.storerf, path_models=args.path_models, custom_config=config, ) gammas = filter_events( pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key), config["events_filters"], ) proton = filter_events( pd.read_hdf(args.protontest, key=dl1_params_lstcam_key), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) ####PLOT SOME RESULTS##### gammas = dl2[dl2.gammaness >= 0.5] protons = dl2[dl2.gammaness < 0.5] gammas.reco_type = 0 protons.reco_type = 1 focal_length = 28 * u.m src_pos_reco = utils.reco_source_position_sky( gammas.x.values * u.m, gammas.y.values * u.m, gammas.reco_disp_dx.values * u.m, gammas.reco_disp_dy.values * u.m, focal_length, gammas.mc_alt_tel.values * u.rad, gammas.mc_az_tel.values * u.rad) plot_dl2.plot_features(dl2) plt.show() plot_dl2.plot_e(gammas, 10, 1.5, 3.5) plt.show() plot_dl2.calc_resolution(gammas) plt.show() plot_dl2.plot_e_resolution(gammas, 10, 1.5, 3.5) plt.show() plot_dl2.plot_disp_vector(gammas) plt.show() try: ctaplot.plot_theta2( gammas.mc_alt, np.arctan(np.tan(gammas.mc_az)), src_pos_reco.alt.rad, np.arctan(np.tan(src_pos_reco.az.rad)), bins=50, range=(0, 1), ) plt.show() ctaplot.plot_angular_res_per_energy( src_pos_reco.alt.rad, np.arctan(np.tan(src_pos_reco.az.rad)), gammas.mc_alt, np.arctan(np.tan(gammas.mc_az)), gammas.mc_energy) plt.show() except: pass regression_features = config["regression_features"] classification_features = config["classification_features"] plt.show() plot_dl2.plot_pos(dl2) plt.show() plot_dl2.plot_ROC(cls_gh, dl2, classification_features, -1) plt.show() plot_dl2.plot_importances(cls_gh, classification_features) plt.show() plot_dl2.plot_importances(reg_energy, regression_features) plt.show() plot_dl2.plot_importances(reg_disp_vector, regression_features) plt.show() plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100) plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100) plt.show()
try: custom_config = read_configuration_file(args.config_file) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) #Get the data from the Simtelarray file: dl0_to_dl1.max_events = args.max_events dl0_to_dl1.allowed_tels = {1} dl0_to_dl1.r0_to_dl1(args.datafile) dl1_file = 'dl1_' + os.path.basename(args.datafile).split('.')[0] + '.h5' data = pd.read_hdf(dl1_file, key='events/LSTCam') data = filter_events(data, filters=config["events_filters"]) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy,
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key) if config['source_dependent']: data_src_dep = pd.read_hdf(args.input_file, key=dl1_params_src_dep_lstcam_key) data = pd.concat([data, data_src_dep], axis=1) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. data = filter_events( data, filters=config["events_filters"], finite_params=config['regression_features'] + config['classification_features'], ) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) os.makedirs(args.output_dir, exist_ok=True) output_file = os.path.join( args.output_dir, os.path.basename(args.input_file).replace('dl1', 'dl2')) if os.path.exists(output_file): raise IOError(output_file + ' exists, exiting.') dl1_keys = get_dataset_keys(args.input_file) if dl1_images_lstcam_key in dl1_keys: dl1_keys.remove(dl1_images_lstcam_key) if dl1_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_lstcam_key) if dl1_params_src_dep_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_src_dep_lstcam_key) with open_file(args.input_file, 'r') as h5in: with open_file(output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: if not k.startswith('/'): k = '/' + k path, name = k.rsplit('/', 1) if path not in h5out: grouppath, groupname = path.rsplit('/', 1) g = h5out.create_group(grouppath, groupname, createparents=True) else: g = h5out.get_node(path) h5in.copy_node(k, g, overwrite=True) write_dl2_dataframe(dl2, output_file)
def analyze_on_off(config): """ Extracts the theta2 plot of a dataset taken with ON/OFF observations Parameters ---------- config_file """ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8)) LOGGER.info("Running ON/OFF analysis") LOGGER.info("ON data runs: %s", config['analysis']['runs_on']) observation_time_on, data_on = merge_dl2_runs( config['input']['data_tag'], config['analysis']['runs_on'], config['input']['columns_to_read'], 4) LOGGER.info("ON observation time: %s", observation_time_on) LOGGER.info("OFF data runs: %s", config['analysis']['runs_off']) observation_time_off, data_off = merge_dl2_runs( config['input']['data_tag'], config['analysis']['runs_off'], config['input']['columns_to_read'], 4) LOGGER.info("OFF observation time: %s", observation_time_off) # observation_time_ratio = observation_time_on / observation_time_off # LOGGER.info('Observation time ratio %s', observation_time_ratio) selected_data_on = filter_events(data_on, config['preselection']) selected_data_off = filter_events(data_off, config['preselection']) theta2_on = np.array(compute_theta2(selected_data_on, (0, 0))) theta2_off = np.array(compute_theta2(selected_data_off, (0, 0))) theta2_cut = config['analysis']['selection']['theta2'][0] n_on = np.sum(theta2_on < theta2_cut) n_off = np.sum(theta2_off < theta2_cut) LOGGER.info('Number of observed ON and OFF events are:\n %s, %s', n_on, n_off) theta2_norm_min = config['analysis']['selection']['theta2'][1] theta2_norm_max = config['analysis']['selection']['theta2'][2] n_norm_on = np.sum((theta2_on > theta2_norm_min) & (theta2_on < theta2_norm_max)) n_norm_off = np.sum((theta2_off > theta2_norm_min) & (theta2_off < theta2_norm_max)) lima_norm = n_norm_on / n_norm_off stat = WStatCountsStatistic(n_on, n_off, lima_norm) lima_significance = stat.sqrt_ts.item() lima_excess = stat.n_sig LOGGER.info('Excess is %s', lima_excess) LOGGER.info('Excess significance is %s', lima_significance) plotting.plot_1d_excess( [('ON data', theta2_on, 1), (f'OFF data X {lima_norm:.2f}', theta2_off, lima_norm)], lima_significance, r'$\theta^2$ [deg$^2$]', theta2_cut, ax1) # alpha analysis LOGGER.info('Perform alpha analysis') alpha_on = np.array(compute_alpha(selected_data_on)) alpha_off = np.array(compute_alpha(selected_data_off)) alpha_cut = config['analysis']['selection']['alpha'][0] n_on = np.sum(alpha_on < alpha_cut) n_off = np.sum(alpha_off < alpha_cut) LOGGER.info('Number of observed ON and OFFevents are:\n %s, %s', n_on, n_off) alpha_norm_min = config['analysis']['selection']['alpha'][1] alpha_norm_max = config['analysis']['selection']['alpha'][2] n_norm_on = np.sum((alpha_on > alpha_norm_min) & (alpha_on < alpha_norm_max)) n_norm_off = np.sum((alpha_off > alpha_norm_min) & (alpha_off < alpha_norm_max)) lima_norm = n_norm_on / n_norm_off stat = WStatCountsStatistic(n_on, n_off, lima_norm) lima_significance = stat.sqrt_ts.item() lima_excess = stat.n_sig LOGGER.info('Excess is %s', lima_excess) LOGGER.info('Excess significance is %s', lima_significance) plotting.plot_1d_excess( [('ON data', alpha_on, 1), (f'OFF data X {lima_norm:.2f}', alpha_off, lima_norm)], lima_significance, r'$\alpha$ [deg]', alpha_cut, ax2, 0, 90, 90) if config['output']['interactive'] is True: LOGGER.info( 'Interactive mode ON, plots will be only shown, but not saved') plt.show() else: LOGGER.info('Interactive mode OFF, no plots will be displayed') plt.ioff() plt.savefig(f"{config['output']['directory']}/on_off.png") plt.close()
custom_config = read_configuration_file(args.config_file) except("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.storerf, path_models=args.path_models, custom_config=config, ) gammas = filter_events(pd.read_hdf(args.gammatest, key='events/LSTCam'), config["events_filters"] ) proton = filter_events(pd.read_hdf(args.protontest, key='events/LSTCam'), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) ####PLOT SOME RESULTS##### gammas = dl2[dl2.gammaness>=0.5] protons = dl2[dl2.gammaness<0.5] gammas.reco_type = 0 protons.reco_type = 1
filename=args.filename #'/nfs/cta-ifae/jurysek/mc_DL1/20190415/proton/south_pointing/20190923/dl1_20190415_proton_south_pointing_20190923_testing-noimage.h5' custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(args.config_file) except("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) events_filters = config["events_filters"] # nacteni pouze sloupcu s parametry param = pd.read_hdf(filename, key=args.dl1_params_camera_key) param = utils.filter_events(param, filters=events_filters) # energy histogram (thrown events) # - kazdy sloupec matice histogram je jeden bin v core distance # - kazdy radek je jeden bin v energii # - pro simulovane spektrum energii se musi poscitat vsechny sloupce - axis=1 #hist = read_simtel_energy_histogram(filename) hist, hist_merged = read_simtel_energy_histogram_merged(filename) #print(hist) print('E_min [Tev]: {:.4f}, E_max [TeV]: {:.4f}, N_bins: {:d}'.format(min(hist.bins_energy), max(hist.bins_energy), len(hist.bins_energy)-1)) mc_header = read_simu_info_hdf5(filename) #print(mc_header) # vyber jednoho ze simulovanych telescopu param = param.where(param.tel_id == args.telescope)
def main(): args = parser.parse_args() custom_config = {} if args.config_file is not None: custom_config = read_configuration_file(args.config_file) config = replace_config(standard_config, custom_config) subarray_info = SubarrayDescription.from_hdf(args.gammatest) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length reg_energy, reg_disp_norm, cls_disp_sign, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.save_models, path_models=args.path_models, custom_config=config, ) gammas = filter_events( pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key), config["events_filters"], ) proton = filter_events( pd.read_hdf(args.protontest, key=dl1_params_lstcam_key), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_norm=reg_disp_norm, cls_disp_sign=cls_disp_sign, focal_length=focal_length, custom_config=config) ####PLOT SOME RESULTS##### selected_gammas = dl2.query('reco_type==0 & mc_type==0') if (len(selected_gammas) == 0): log.warning('No gammas selected, I will not plot any output') sys.exit() plot_dl2.plot_features(dl2) if not args.batch: plt.show() plot_dl2.energy_results(selected_gammas) if not args.batch: plt.show() plot_dl2.direction_results(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_disp_vector(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_pos(dl2) if not args.batch: plt.show() plot_dl2.plot_roc_gamma(dl2) if not args.batch: plt.show() plot_dl2.plot_models_features_importances(args.path_models, args.config_file) if not args.batch: plt.show() plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100) plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100) if not args.batch: plt.show()
def analyze_wobble(config): """ Extracts the theta2 plot of a dataset taken with wobble observations Parameters ---------- config_file """ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8)) n_points = config['analysis']['parameters']['n_points'] theta2_cut = config['analysis']['selection']['theta2'][0] LOGGER.info( "Running wobble analysis with %s off-source observation points", n_points) LOGGER.info("Analyzing runs %s", config['analysis']['runs']) observation_time, data = merge_dl2_runs(config['input']['data_tag'], config['analysis']['runs'], config['input']['columns_to_read']) LOGGER.debug('\nPreselection:\n%s', config['preselection']) for key, value in config['preselection'].items(): LOGGER.debug('\nParameter: %s, range: %s, value type: %s', key, value, type(value)) selected_data = filter_events(data, filters=config['preselection']) # Add theta2 to selected data true_source_position = extract_source_position( selected_data, config['input']['observed_source']) plotting.plot_wobble(true_source_position, n_points, ax1) named_datasets = [] named_datasets.append( ('ON data', np.array(compute_theta2(selected_data, true_source_position)), 1)) n_on = np.sum(named_datasets[0][1] < theta2_cut) n_off = 0 rotation_angle = 360. / n_points origin_x = selected_data['reco_src_x'] origin_y = selected_data['reco_src_y'] for off_point in range(1, n_points): t_off_data = selected_data.copy() off_xy = rotate(tuple(zip(origin_x, origin_y)), rotation_angle * off_point) t_off_data['reco_src_x'] = [xy[0] for xy in off_xy] t_off_data['reco_src_y'] = [xy[1] for xy in off_xy] named_datasets.append( (f'OFF {rotation_angle * off_point}', np.array(compute_theta2(t_off_data, true_source_position)), 1)) n_off += np.sum(named_datasets[-1][1] < theta2_cut) stat = WStatCountsStatistic(n_on, n_off, 1. / (n_points - 1)) # API change for attributes significance and excess in the new gammapy version: https://docs.gammapy.org/dev/api/gammapy.stats.WStatCountsStatistic.html lima_significance = stat.sqrt_ts.item() lima_excess = stat.n_sig LOGGER.info('Observation time %s', observation_time) LOGGER.info('Number of "ON" events %s', n_on) LOGGER.info('Number of "OFF" events %s', n_off) LOGGER.info('ON/OFF observation time ratio %s', 1. / (n_points - 1)) LOGGER.info('Excess is %s', lima_excess) LOGGER.info('Li&Ma significance %s', lima_significance) plotting.plot_1d_excess(named_datasets, lima_significance, r'$\theta^2$ [deg$^2$]', theta2_cut, ax2) if config['output']['interactive'] is True: LOGGER.info( 'Interactive mode ON, plots will be only shown, but not saved') plt.show() else: LOGGER.info('Interactive mode OFF, no plots will be displayed') plt.ioff() plt.savefig(f"{config['output']['directory']}/wobble.png") plt.close()