def obtain_time_image(x, y, centroid_x, centroid_y, psi, time_gradient, time_intercept): """Create a pulse time image for a toymodel shower. Assumes the time development occurs only along the longitudinal (major) axis of the shower, and scales linearly with distance along the axis. Parameters ---------- x : u.Quantity[length] X camera coordinate to evaluate the time at. Usually the array of pixel X positions y : u.Quantity[length] Y camera coordinate to evaluate the time at. Usually the array of pixel Y positions centroid_x : u.Quantity[length] X camera coordinate for the centroid of the shower centroid_y : u.Quantity[length] Y camera coordinate for the centroid of the shower psi : convertible to `astropy.coordinates.Angle` rotation angle about the centroid (0=x-axis) time_gradient : u.Quantity[time/length] Rate at which the time changes with distance along the shower axis time_intercept : u.Quantity[time] Pulse time at the shower centroid Returns ------- float or ndarray Pulse time in nanoseconds at (x, y) """ longitudinal, _ = camera_to_shower_coordinates(x, y, centroid_x, centroid_y, psi) longitudinal_m = longitudinal.to_value(u.m) time_gradient_ns_m = time_gradient.to_value(u.ns / u.m) time_intercept_ns = time_intercept.to_value(u.ns) return longitudinal_m * time_gradient_ns_m + time_intercept_ns
def calc_source_dependent_parameters(data, expected_src_pos_x_m, expected_src_pos_y_m): """Calculate source-dependent parameters with a given source position. Parameters: ----------- data: Pandas DataFrame expected_src_pos_x_m: float expected_src_pos_y_m: float """ src_dep_params = pd.DataFrame(index=data.index) src_dep_params['expected_src_x'] = expected_src_pos_x_m src_dep_params['expected_src_y'] = expected_src_pos_y_m src_dep_params['dist'] = np.sqrt((data['x'] - expected_src_pos_x_m)**2 + (data['y'] - expected_src_pos_y_m)**2) disp, miss = camera_to_shower_coordinates(expected_src_pos_x_m, expected_src_pos_y_m, data['x'], data['y'], data['psi']) src_dep_params['time_gradient_from_source'] = data[ 'time_gradient'] * np.sign(disp) * -1 src_dep_params['skewness_from_source'] = data['skewness'] * np.sign( disp) * -1 src_dep_params['alpha'] = np.rad2deg(np.arctan(np.abs(miss / disp))) return src_dep_params
def update_timing(self, image_c, image_t, mask, hillas): image_c = image_c[mask] image_t = image_t[mask] geom = self.geom[mask] greater_than_0 = image_c > 0 pix_x = geom.pix_x[greater_than_0] pix_y = geom.pix_y[greater_than_0] image = image_c[greater_than_0] pulse_time = image_t[greater_than_0] longi, trans = camera_to_shower_coordinates(pix_x, pix_y, hillas.x, hillas.y, hillas.psi) longi = longi.value self.p_tg.set_xdata(longi) self.p_tg.set_ydata(pulse_time) c = polyfit(longi, pulse_time, 1, w=np.sqrt(image)) x = np.linspace(longi.min(), longi.max(), 10) y = polyval(x, c) self.l_tg.set_xdata(x) self.l_tg.set_ydata(y) self.ax_tg.set_title( f"tgrad = {c[1]:.2f}, psi = {hillas.psi.to('deg'):.2f}") self.ax_tg.relim() self.ax_tg.autoscale_view()
def get_cherenkov_shower_image(xpix, ypix, centroid_x, centroid_y, length, width, psi, time_gradient, time_intercept): """ Obtain the PDF and time images for a Cherenkov shower ellipse Uses the toymodel methods defined in ctapipe. Parameters ---------- xpix : ndarray Pixel X coordinates. Unit: m ypix : ndarray Pixel Y coordinates. Unit: m centroid_x : float X coordinate for the center of the ellipse. Unit: m centroid_y : float Y coordinate for the center of the ellipse. Unit: m length : float Length of the ellipse. Unit: m width : float Width of the ellipse. Unit: m psi : float Rotation of the ellipse major axis from the X axis. Unit: degrees time_gradient : float Rate at which the time changes with distance along the shower axis Unit: ns / m time_intercept : float Pulse time at the shower centroid. Unit: ns Returns ------- pdf : ndarray Probability density function of the Cherenkov shower ellipse amplitude time : ndarray Pulse time per pixel. Unit: ns """ xpix = u.Quantity(xpix, u.m) ypix = u.Quantity(ypix, u.m) centroid_x = u.Quantity(centroid_x, u.m) centroid_y = u.Quantity(centroid_y, u.m) psi = Angle(psi, unit='deg') shower_image_pdf = Gaussian( x=centroid_x, y=centroid_y, length=u.Quantity(length, u.m), width=u.Quantity(width, u.m), psi=psi, ).pdf(xpix, ypix) # Normalise shower_image_pdf /= shower_image_pdf.sum() # TODO: replace when ctapipe 0.8 is released longitudinal = camera_to_shower_coordinates(xpix, ypix, centroid_x, centroid_y, psi)[0].to_value(u.m) time = longitudinal * time_gradient + time_intercept return shower_image_pdf, time
def main(): paths = [ "/Volumes/gct-jason/astri_onsky_archive/d2019-05-15_simulations/proton/run1_dl1.h5", ] df_list = [] for ipath, path in enumerate(paths): with DL1Reader(path) as reader: n_events = reader.get_metadata()['n_events'] mapping = reader.get_mapping() geom = get_ctapipe_camera_geometry(mapping, plate_scale=37.56e-3) desc = "Looping over events" it = reader.iterate_over_events() for df in tqdm(it, total=n_events, desc=desc): iev = df['iev'].values[0] image = df['photons'].values time = df['pulse_time'].values mask = obtain_cleaning_mask(geom, image, time) if not mask.any(): continue image_m = image[mask] time_m = time[mask] geom_m = geom[mask] try: hillas = hillas_parameters(geom_m, image_m) except HillasParameterizationError: continue # timing_parameters(geom_m, image_m, time_m, hillas) gt0 = image_m > 0 pix_x = geom_m.pix_x[gt0] pix_y = geom_m.pix_y[gt0] peakpos = time_m[gt0] intensity = image_m[gt0] longi, trans = camera_to_shower_coordinates( pix_x, pix_y, hillas.x, hillas.y, hillas.psi) longi = longi.value trans = trans.value # df_list.append(pd.DataFrame(dict( # ipath=ipath, # iev=iev, # longi=longi, # peakpos=peakpos, # ))) p_relation = RelationPlotter() p_relation.plot(longi, peakpos, intensity) p_relation.save( get_plot( f"d190524_time_gradient/relation/i{ipath}_e{iev}.pdf"))
def get_source_dependent_parameters(data, config={}): """Get parameters for source-dependent analysis . Parameters: ----------- data: Pandas DataFrame config: dictionnary containing configuration """ src_dep_params = pd.DataFrame(index=data.index) is_simu = 'mc_type' in data.columns if is_simu: if (data['mc_type'] == 0).all(): data_type = 'mc_gamma' else: data_type = 'mc_proton' else: data_type = 'real_data' expected_src_pos_x_m, expected_src_pos_y_m = get_expected_source_pos(data, data_type, config) src_dep_params['expected_src_x'] = expected_src_pos_x_m src_dep_params['expected_src_y'] = expected_src_pos_y_m src_dep_params['dist'] = np.sqrt((data['x'] - expected_src_pos_x_m)**2 + (data['y'] - expected_src_pos_y_m)**2) disp, miss = camera_to_shower_coordinates( expected_src_pos_x_m, expected_src_pos_y_m, data['x'], data['y'], data['psi'] ) src_dep_params['time_gradient_from_source'] = data['time_gradient'] * np.sign(disp) * -1 src_dep_params['skewness_from_source'] = data['skewness'] * np.sign(disp) * -1 src_dep_params['alpha'] = np.rad2deg(np.arctan(np.abs(miss / disp))) return src_dep_params
z_next = False for i in range(30): rand = np.random.RandomState(i) x = rand.uniform(-1, 1, 1)[0] * u.m y = rand.uniform(-1, 1, 1)[0] * u.m length = rand.uniform(1, 2.5, 1)[0] * u.m width = rand.uniform(0.5, 0.9, 1)[0] * u.m psi = rand.uniform(0, 360, 1)[0] * u.deg radius = 2.2 * u.m sigma = 0.3 * u.m max_time = rand.uniform(7, 12, 1)[0] max_amp = 15 #rand.uniform(10, 15, 1)[0] longi, trans = camera_to_shower_coordinates(xpix, ypix, x, y, psi) time = longi - longi.min() time = np.round(time * max_time / time.max()).value.astype(np.int) type_rand = np.round(rand.uniform(1, 20, 1)[0]) if z_next: image = np.zeros(32, dtype=np.int) image[[5, 6, 7, 8, 13, 18, 25, 24, 23, 26]] = 5 time = np.full(32, 5, dtype=np.int) z_next = False elif type_rand == 3: image = np.zeros(32, dtype=np.int) image[[5, 6, 7, 8, 13, 19, 25, 24, 23]] = 5 time = np.full(32, 5, dtype=np.int) z_next = True elif type_rand == 7:
ax.plot(x, p + 0.2 * noise, label=f'Pixel {pix}') ax.legend(loc=(0.5, 0.6), frameon=False) fig.savefig('build/calibrated.pdf') hillas = dict( x=80 * u.mm, y=20 * u.mm, width=15 * u.mm, length=50 * u.mm, psi=35 * u.deg, ) cam = CameraGeometry.from_name('FACT').transform_to(EngineeringCameraFrame()) longi, trans = camera_to_shower_coordinates(cam.pix_x, cam.pix_y, hillas['x'], hillas['y'], hillas['psi']) m = SkewedGaussian(**hillas, skewness=0.3) img, signal, noise = m.generate_image(cam, intensity=2500, nsb_level_pe=3) time_noise = np.random.uniform(0, 60, cam.n_pixels) time_image = 0.2 * longi.to_value(u.mm) + 25 time = np.average(np.column_stack([time_noise, time_image]), weights=np.column_stack([noise, signal]) + 1, axis=1) inferno = plt.get_cmap('inferno') inferno.set_bad('gray') rdbu = plt.get_cmap('RdBu_r') rdbu.set_bad('gray')
def apply_models(dl1, classifier, reg_energy, reg_disp_vector=None, reg_disp_norm=None, cls_disp_sign=None, focal_length=28 * u.m, custom_config=None ): """ Apply previously trained Random Forests to a set of data depending on a set of features. The right set of disp models must be passed depending on the config. Parameters ---------- dl1: `pandas.DataFrame` classifier: Random Forest Classifier RF for Gamma/Hadron separation reg_energy: Random Forest Regressor RF for Energy reconstruction reg_disp_vector: Random Forest Regressor RF for disp vector reconstruction reg_disp_norm: Random Forest Regressor RF for disp norm reconstruction cls_disp_sign: Random Forest Classifier RF for disp sign reconstruction focal_length: `astropy.unit` custom_config: dictionnary Modified configuration to update the standard one Returns ------- `pandas.DataFrame` dataframe including reconstructed dl2 features """ custom_config = {} if custom_config is None else custom_config config = replace_config(standard_config, custom_config) energy_regression_features = config["energy_regression_features"] disp_regression_features = config["disp_regression_features"] disp_classification_features = config["disp_classification_features"] classification_features = config["particle_classification_features"] events_filters = config["events_filters"] dl2 = utils.filter_events(dl1, filters=events_filters, finite_params=config['disp_regression_features'] + config['energy_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) # Reconstruction of Energy and disp_norm distance dl2['log_reco_energy'] = reg_energy.predict(dl2[energy_regression_features]) dl2['reco_energy'] = 10 ** (dl2['log_reco_energy']) if config['disp_method'] == 'disp_vector': disp_vector = reg_disp_vector.predict(dl2[disp_regression_features]) elif config['disp_method'] == 'disp_norm_sign': disp_norm = reg_disp_norm.predict(dl2[disp_regression_features]) disp_sign = cls_disp_sign.predict(dl2[disp_classification_features]) dl2['reco_disp_norm'] = disp_norm dl2['reco_disp_sign'] = disp_sign disp_angle = dl2['psi'] # the source here is supposed to be in the direction given by Hillas disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign) dl2['reco_disp_dx'] = disp_vector[:, 0] dl2['reco_disp_dy'] = disp_vector[:, 1] # Construction of Source position in camera coordinates from disp_norm distance. dl2['reco_src_x'], dl2['reco_src_y'] = disp.disp_to_pos(dl2.reco_disp_dx, dl2.reco_disp_dy, dl2.x, dl2.y, ) longi, _ = camera_to_shower_coordinates(dl2['reco_src_x'], dl2['reco_src_y'], dl2['x'], dl2['y'], dl2['psi']) # Obtain the time gradient with sign relative to the reconstructed shower direction (reco_src_x, reco_src_y) # Defined positive if light arrival times increase with distance to it. Negative otherwise: dl2['signed_time_gradient'] = -1 * np.sign(longi) * dl2['time_gradient'] # Obtain skewness with sign relative to the reconstructed shower direction (reco_src_x, reco_src_y) # Defined on the major image axis; sign is such that it is typically positive for gammas: dl2['signed_skewness'] = -1 * np.sign(longi) * dl2['skewness'] if 'mc_alt_tel' in dl2.columns: alt_tel = dl2['mc_alt_tel'].values az_tel = dl2['mc_az_tel'].values elif 'alt_tel' in dl2.columns: alt_tel = dl2['alt_tel'].values az_tel = dl2['az_tel'].values else: alt_tel = - np.pi / 2. * np.ones(len(dl2)) az_tel = - np.pi / 2. * np.ones(len(dl2)) src_pos_reco = utils.reco_source_position_sky(dl2.x.values * u.m, dl2.y.values * u.m, dl2.reco_disp_dx.values * u.m, dl2.reco_disp_dy.values * u.m, focal_length, alt_tel * u.rad, az_tel * u.rad) dl2['reco_alt'] = src_pos_reco.alt.rad dl2['reco_az'] = src_pos_reco.az.rad dl2['reco_type'] = classifier.predict(dl2[classification_features]).astype(int) probs = classifier.predict_proba(dl2[classification_features]) # This check is valid as long as we train on only two classes (gammas and protons) if probs.shape[1] > 2: raise ValueError("The classifier is predicting more than two classes, " "the predicted probabilty to assign as gammaness is unclear." "Please check training data") # gammaness is the prediction probability for the first class (0) dl2['gammaness'] = probs[:, 0] return dl2
def build_models(filegammas, fileprotons, save_models=True, path_models="./", energy_min=-np.inf, custom_config=None, ): """ Uses MC data to train Random Forests for Energy and DISP reconstruction and G/H separation and returns the trained RFs. The passed config superseeds the standard configuration. Here is the complete workflow with the number of events selected from the config: .. mermaid:: graph LR GAMMA[gammas] -->|#`gamma_regressors`| REG(regressors) --> DISK GAMMA --> S(split) S --> |#`gamma_tmp_regressors`| g_train S --> |#`gamma_classifier`| g_test g_train --> tmp_reg(tmp regressors) tmp_reg --- A[ ]:::empty g_test --- A A --> g_test_dl2 g_test_dl2 --- D[ ]:::empty protons -------- |#`proton_classifier`| D D --> cls(classifier) cls--> DISK classDef empty width:0px,height:0px; Parameters ---------- filegammas: string path to the file with MC gamma events fileprotons: string path to the file with MC proton events save_models: bool True to save the trained models on disk path_models: string path of a directory where to save the models. if it does exist, the directory is created energy_min: float Cut in intensity of the showers for training RF custom_config: dictionnary Modified configuration to update the standard one test_size: float or int If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, it will be set to 0.25. Returns ------- if config['disp_method'] == 'disp_vector': return reg_energy, reg_disp_vector, cls_gh elif config['disp_method'] == 'disp_norm_sign': return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh Raises ------ ValueError If the requested number of gamma events in the config for the training of the classifier is not valid. See config["n_training_events"] """ custom_config = {} if custom_config is None else custom_config config = replace_config(standard_config, custom_config) events_filters = config["events_filters"] # Adding a filter on mc_type just for training events_filters['mc_type'] = [-9000, np.inf] df_gamma = pd.read_hdf(filegammas, key=dl1_params_lstcam_key) df_proton = pd.read_hdf(fileprotons, key=dl1_params_lstcam_key) if config['source_dependent']: # if source-dependent parameters are already in dl1 data, just read those data # if not, source-dependent parameters are added here if dl1_params_src_dep_lstcam_key in get_dataset_keys(filegammas): src_dep_df_gamma = get_srcdep_params(filegammas) else: subarray_info = SubarrayDescription.from_hdf(filegammas) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length src_dep_df_gamma = get_source_dependent_parameters(df_gamma, config, focal_length=focal_length) df_gamma = pd.concat([df_gamma, src_dep_df_gamma['on']], axis=1) # if source-dependent parameters are already in dl1 data, just read those data # if not, source-dependent parameters are added here if dl1_params_src_dep_lstcam_key in get_dataset_keys(fileprotons): src_dep_df_proton = get_srcdep_params(fileprotons) else: subarray_info = SubarrayDescription.from_hdf(fileprotons) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length src_dep_df_proton = get_source_dependent_parameters(df_proton, config, focal_length=focal_length) df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1) df_gamma = utils.filter_events(df_gamma, filters=events_filters, finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) df_proton = utils.filter_events(df_proton, filters=events_filters, finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) # Training MC gammas in reduced viewcone src_r_m = np.sqrt(df_gamma['src_x'] ** 2 + df_gamma['src_y'] ** 2) foclen = OPTICS.equivalent_focal_length.value src_r_deg = np.rad2deg(np.arctan(src_r_m / foclen)) df_gamma = df_gamma[(src_r_deg >= config['train_gamma_src_r_deg'][0]) & ( src_r_deg <= config['train_gamma_src_r_deg'][1])] # Train regressors for energy and disp_norm reconstruction, only with gammas n_gamma_regressors = config["n_training_events"]["gamma_regressors"] if n_gamma_regressors not in [1.0, None]: try: df_gamma_reg, _ = train_test_split(df_gamma, train_size=n_gamma_regressors) except ValueError as e: raise ValueError(f"The requested number of gammas {n_gamma_regressors} " f"for the regressors training is not valid.") from e else: df_gamma_reg = df_gamma reg_energy = train_energy(df_gamma_reg, custom_config=config) if config['disp_method'] == 'disp_vector': reg_disp_vector = train_disp_vector(df_gamma, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': reg_disp_norm = train_disp_norm(df_gamma, custom_config=config) cls_disp_sign = train_disp_sign(df_gamma, custom_config=config) # Train classifier for gamma/hadron separation. test_size = config['n_training_events']['gamma_classifier'] train_size = config['n_training_events']['gamma_tmp_regressors'] try: train, testg = train_test_split(df_gamma, test_size=test_size, train_size=train_size) except ValueError as e: raise ValueError( "The requested number of gammas for the classifier training is not valid." ) from e n_proton_classifier = config["n_training_events"]["proton_classifier"] if n_proton_classifier not in [1.0, None]: try: df_proton, _ = train_test_split(df_proton, train_size=config['n_training_events']['proton_classifier']) except ValueError as e: raise ValueError( "The requested number of protons for the classifier training is not valid." ) from e test = testg.append(df_proton, ignore_index=True) temp_reg_energy = train_energy(train, custom_config=config) if config['disp_method'] == 'disp_vector': temp_reg_disp_vector = train_disp_vector(train, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': tmp_reg_disp_norm = train_disp_norm(train, custom_config=config) tmp_cls_disp_sign = train_disp_sign(train, custom_config=config) # Apply the regressors to the test set test['log_reco_energy'] = temp_reg_energy.predict(test[config['energy_regression_features']]) if config['disp_method'] == 'disp_vector': disp_vector = temp_reg_disp_vector.predict(test[config['disp_regression_features']]) elif config['disp_method'] == 'disp_norm_sign': disp_norm = tmp_reg_disp_norm.predict(test[config['disp_regression_features']]) disp_sign = tmp_cls_disp_sign.predict(test[config['disp_classification_features']]) test['reco_disp_norm'] = disp_norm test['reco_disp_sign'] = disp_sign disp_angle = test['psi'] # the source here is supposed to be in the direction given by Hillas disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign) test['reco_disp_dx'] = disp_vector[:, 0] test['reco_disp_dy'] = disp_vector[:, 1] test['reco_src_x'], test['reco_src_y'] = disp.disp_to_pos(test['reco_disp_dx'], test['reco_disp_dy'], test['x'], test['y']) # give skewness and time gradient a meaningful sign, i.e. referred to the reconstructed source position: longi, _ = camera_to_shower_coordinates(test['reco_src_x'], test['reco_src_y'], test['x'], test['y'], test['psi']) test['signed_skewness'] = -1 * np.sign(longi) * test['skewness'] test['signed_time_gradient'] = -1 * np.sign(longi) * test['time_gradient'] # Apply cut in reconstructed energy. New train set is the previous # test with energy and disp_norm reconstructed. train = test[test['log_reco_energy'] > energy_min] del temp_reg_energy if config['disp_method'] == 'disp_vector': del temp_reg_disp_vector elif config['disp_method'] == 'disp_norm_sign': del tmp_reg_disp_norm, tmp_cls_disp_sign # Train the Classifier cls_gh = train_sep(train, custom_config=config) if save_models: os.makedirs(path_models, exist_ok=True) file_reg_energy = path_models + "/reg_energy.sav" joblib.dump(reg_energy, file_reg_energy, compress=3) if config['disp_method'] == 'disp_vector': file_reg_disp_vector = path_models + "/reg_disp_vector.sav" joblib.dump(reg_disp_vector, file_reg_disp_vector, compress=3) elif config['disp_method'] == 'disp_norm_sign': file_reg_disp_norm = os.path.join(path_models, 'reg_disp_norm.sav') file_cls_disp_sign = os.path.join(path_models, 'cls_disp_sign.sav') joblib.dump(reg_disp_norm, file_reg_disp_norm, compress=3) joblib.dump(cls_disp_sign, file_cls_disp_sign, compress=3) file_cls_gh = path_models + "/cls_gh.sav" joblib.dump(cls_gh, file_cls_gh, compress=3) if config['disp_method'] == 'disp_vector': return reg_energy, reg_disp_vector, cls_gh elif config['disp_method'] == 'disp_norm_sign': return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh
def build_models( filegammas, fileprotons, save_models=True, path_models="./", energy_min=-np.inf, custom_config={}, test_size=0.2, ): """Uses MC data to train Random Forests for Energy and disp_norm reconstruction and G/H separation. Returns 3 trained RF. The config in config_file superseeds the one passed in argument. Parameters ---------- filegammas: string Name of the file with MC gamma events fileprotons: string Name of the file with MC proton events energy_min: float Cut in energy for gamma/hadron separation intensity_min: float Cut in intensity of the showers for training RF. Default is 60 phe r_min: float Cut in distance from c.o.g of hillas ellipse to camera center, to avoid images truncated in the border. Default is 80% of camera radius. save_models: boolean Save the trained RF in a file to use them anytime. path_models: string path to store the trained RF regression_args: dictionnary classification_args: dictionnary config_file: str Path to a configuration file. If given, overwrite `regression_args`. Returns ------- (regressor_energy, regressor_disp, classifier_gh) regressor_energy: `RandomForestRegressor` regressor_disp: `RandomForestRegressor` classifier_gh: `RandomForestClassifier` """ config = replace_config(standard_config, custom_config) events_filters = config["events_filters"] # Adding a filter on mc_type just for training events_filters['mc_type'] = [-9000, np.inf] df_gamma = pd.read_hdf(filegammas, key=dl1_params_lstcam_key) df_proton = pd.read_hdf(fileprotons, key=dl1_params_lstcam_key) if config['source_dependent']: # if source-dependent parameters are already in dl1 data, just read those data # if not, source-dependent parameters are added here if dl1_params_src_dep_lstcam_key in get_dataset_keys(filegammas): src_dep_df_gamma = get_srcdep_params(filegammas) else: subarray_info = SubarrayDescription.from_hdf(filegammas) tel_id = config["allowed_tels"][ 0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[ tel_id].optics.equivalent_focal_length src_dep_df_gamma = get_source_dependent_parameters( df_gamma, config, focal_length=focal_length) df_gamma = pd.concat([df_gamma, src_dep_df_gamma['on']], axis=1) # if source-dependent parameters are already in dl1 data, just read those data # if not, source-dependent parameters are added here if dl1_params_src_dep_lstcam_key in get_dataset_keys(fileprotons): src_dep_df_proton = get_srcdep_params(fileprotons) else: subarray_info = SubarrayDescription.from_hdf(fileprotons) tel_id = config["allowed_tels"][ 0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[ tel_id].optics.equivalent_focal_length src_dep_df_proton = get_source_dependent_parameters( df_proton, config, focal_length=focal_length) df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1) df_gamma = utils.filter_events( df_gamma, filters=events_filters, finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) df_proton = utils.filter_events( df_proton, filters=events_filters, finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) #Training MC gammas in reduced viewcone src_r_m = np.sqrt(df_gamma['src_x']**2 + df_gamma['src_y']**2) foclen = OPTICS.equivalent_focal_length.value src_r_deg = np.rad2deg(np.arctan(src_r_m / foclen)) df_gamma = df_gamma[(src_r_deg >= config['train_gamma_src_r_deg'][0]) & (src_r_deg <= config['train_gamma_src_r_deg'][1])] # Train regressors for energy and disp_norm reconstruction, only with gammas reg_energy = train_energy(df_gamma, custom_config=config) if config['disp_method'] == 'disp_vector': reg_disp_vector = train_disp_vector(df_gamma, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': reg_disp_norm = train_disp_norm(df_gamma, custom_config=config) cls_disp_sign = train_disp_sign(df_gamma, custom_config=config) # Train classifier for gamma/hadron separation. train, testg = train_test_split(df_gamma, test_size=test_size) test = testg.append(df_proton, ignore_index=True) temp_reg_energy = train_energy(train, custom_config=config) if config['disp_method'] == 'disp_vector': temp_reg_disp_vector = train_disp_vector(train, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': tmp_reg_disp_norm = train_disp_norm(train, custom_config=config) tmp_cls_disp_sign = train_disp_sign(train, custom_config=config) # Apply the regressors to the test set test['log_reco_energy'] = temp_reg_energy.predict( test[config['energy_regression_features']]) if config['disp_method'] == 'disp_vector': disp_vector = temp_reg_disp_vector.predict( test[config['disp_regression_features']]) elif config['disp_method'] == 'disp_norm_sign': disp_norm = tmp_reg_disp_norm.predict( test[config['disp_regression_features']]) disp_sign = tmp_cls_disp_sign.predict( test[config['disp_classification_features']]) test['reco_disp_norm'] = disp_norm test['reco_disp_sign'] = disp_sign disp_angle = test[ 'psi'] # the source here is supposed to be in the direction given by Hillas disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign) test['reco_disp_dx'] = disp_vector[:, 0] test['reco_disp_dy'] = disp_vector[:, 1] test['reco_src_x'], test['reco_src_y'] = disp.disp_to_pos( test['reco_disp_dx'], test['reco_disp_dy'], test['x'], test['y']) # give skewness and time gradient a meaningful sign, i.e. referred to the reconstructed source position: longi, _ = camera_to_shower_coordinates(test['reco_src_x'], test['reco_src_y'], test['x'], test['y'], test['psi']) test['signed_skewness'] = -1 * np.sign(longi) * test['skewness'] test['signed_time_gradient'] = -1 * np.sign(longi) * test['time_gradient'] # Apply cut in reconstructed energy. New train set is the previous # test with energy and disp_norm reconstructed. train = test[test['log_reco_energy'] > energy_min] del temp_reg_energy if config['disp_method'] == 'disp_vector': del temp_reg_disp_vector elif config['disp_method'] == 'disp_norm_sign': del tmp_reg_disp_norm, tmp_cls_disp_sign # Train the Classifier cls_gh = train_sep(train, custom_config=config) if save_models: os.makedirs(path_models, exist_ok=True) file_reg_energy = path_models + "/reg_energy.sav" joblib.dump(reg_energy, file_reg_energy) if config['disp_method'] == 'disp_vector': file_reg_disp_vector = path_models + "/reg_disp_vector.sav" joblib.dump(reg_disp_vector, file_reg_disp_vector) elif config['disp_method'] == 'disp_norm_sign': file_reg_disp_norm = os.path.join(path_models, 'reg_disp_norm.sav') file_cls_disp_sign = os.path.join(path_models, 'cls_disp_sign.sav') joblib.dump(reg_disp_norm, file_reg_disp_norm) joblib.dump(cls_disp_sign, file_cls_disp_sign) file_cls_gh = path_models + "/cls_gh.sav" joblib.dump(cls_gh, file_cls_gh) if config['disp_method'] == 'disp_vector': return reg_energy, reg_disp_vector, cls_gh elif config['disp_method'] == 'disp_norm_sign': return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh