Пример #1
0
def obtain_time_image(x, y, centroid_x, centroid_y, psi, time_gradient, time_intercept):
    """Create a pulse time image for a toymodel shower. Assumes the time development
    occurs only along the longitudinal (major) axis of the shower, and scales
    linearly with distance along the axis.

    Parameters
    ----------
    x : u.Quantity[length]
        X camera coordinate to evaluate the time at.
        Usually the array of pixel X positions
    y : u.Quantity[length]
        Y camera coordinate to evaluate the time at.
        Usually the array of pixel Y positions
    centroid_x : u.Quantity[length]
        X camera coordinate for the centroid of the shower
    centroid_y : u.Quantity[length]
        Y camera coordinate for the centroid of the shower
    psi : convertible to `astropy.coordinates.Angle`
        rotation angle about the centroid (0=x-axis)
    time_gradient : u.Quantity[time/length]
        Rate at which the time changes with distance along the shower axis
    time_intercept : u.Quantity[time]
        Pulse time at the shower centroid

    Returns
    -------
    float or ndarray
        Pulse time in nanoseconds at (x, y)

    """
    longitudinal, _ = camera_to_shower_coordinates(x, y, centroid_x, centroid_y, psi)
    longitudinal_m = longitudinal.to_value(u.m)
    time_gradient_ns_m = time_gradient.to_value(u.ns / u.m)
    time_intercept_ns = time_intercept.to_value(u.ns)
    return longitudinal_m * time_gradient_ns_m + time_intercept_ns
Пример #2
0
def calc_source_dependent_parameters(data, expected_src_pos_x_m,
                                     expected_src_pos_y_m):
    """Calculate source-dependent parameters with a given source position.

    Parameters:
    -----------
    data: Pandas DataFrame
    expected_src_pos_x_m: float
    expected_src_pos_y_m: float

    """
    src_dep_params = pd.DataFrame(index=data.index)

    src_dep_params['expected_src_x'] = expected_src_pos_x_m
    src_dep_params['expected_src_y'] = expected_src_pos_y_m

    src_dep_params['dist'] = np.sqrt((data['x'] - expected_src_pos_x_m)**2 +
                                     (data['y'] - expected_src_pos_y_m)**2)

    disp, miss = camera_to_shower_coordinates(expected_src_pos_x_m,
                                              expected_src_pos_y_m, data['x'],
                                              data['y'], data['psi'])

    src_dep_params['time_gradient_from_source'] = data[
        'time_gradient'] * np.sign(disp) * -1
    src_dep_params['skewness_from_source'] = data['skewness'] * np.sign(
        disp) * -1
    src_dep_params['alpha'] = np.rad2deg(np.arctan(np.abs(miss / disp)))

    return src_dep_params
Пример #3
0
    def update_timing(self, image_c, image_t, mask, hillas):
        image_c = image_c[mask]
        image_t = image_t[mask]
        geom = self.geom[mask]

        greater_than_0 = image_c > 0
        pix_x = geom.pix_x[greater_than_0]
        pix_y = geom.pix_y[greater_than_0]
        image = image_c[greater_than_0]
        pulse_time = image_t[greater_than_0]

        longi, trans = camera_to_shower_coordinates(pix_x, pix_y, hillas.x,
                                                    hillas.y, hillas.psi)
        longi = longi.value

        self.p_tg.set_xdata(longi)
        self.p_tg.set_ydata(pulse_time)

        c = polyfit(longi, pulse_time, 1, w=np.sqrt(image))
        x = np.linspace(longi.min(), longi.max(), 10)
        y = polyval(x, c)
        self.l_tg.set_xdata(x)
        self.l_tg.set_ydata(y)
        self.ax_tg.set_title(
            f"tgrad = {c[1]:.2f}, psi = {hillas.psi.to('deg'):.2f}")
        self.ax_tg.relim()
        self.ax_tg.autoscale_view()
def get_cherenkov_shower_image(xpix, ypix, centroid_x, centroid_y, length,
                               width, psi, time_gradient, time_intercept):
    """
    Obtain the PDF and time images for a Cherenkov shower ellipse

    Uses the toymodel methods defined in ctapipe.

    Parameters
    ----------
    xpix : ndarray
        Pixel X coordinates. Unit: m
    ypix : ndarray
        Pixel Y coordinates. Unit: m
    centroid_x : float
        X coordinate for the center of the ellipse. Unit: m
    centroid_y : float
        Y coordinate for the center of the ellipse. Unit: m
    length : float
        Length of the ellipse. Unit: m
    width : float
        Width of the ellipse. Unit: m
    psi : float
        Rotation of the ellipse major axis from the X axis. Unit: degrees
    time_gradient : float
        Rate at which the time changes with distance along the shower axis
        Unit: ns / m
    time_intercept : float
        Pulse time at the shower centroid. Unit: ns

    Returns
    -------
    pdf : ndarray
        Probability density function of the Cherenkov shower ellipse amplitude
    time : ndarray
        Pulse time per pixel. Unit: ns
    """
    xpix = u.Quantity(xpix, u.m)
    ypix = u.Quantity(ypix, u.m)
    centroid_x = u.Quantity(centroid_x, u.m)
    centroid_y = u.Quantity(centroid_y, u.m)
    psi = Angle(psi, unit='deg')

    shower_image_pdf = Gaussian(
        x=centroid_x,
        y=centroid_y,
        length=u.Quantity(length, u.m),
        width=u.Quantity(width, u.m),
        psi=psi,
    ).pdf(xpix, ypix)

    # Normalise
    shower_image_pdf /= shower_image_pdf.sum()

    # TODO: replace when ctapipe 0.8 is released
    longitudinal = camera_to_shower_coordinates(xpix, ypix, centroid_x,
                                                centroid_y,
                                                psi)[0].to_value(u.m)
    time = longitudinal * time_gradient + time_intercept

    return shower_image_pdf, time
Пример #5
0
def main():
    paths = [
        "/Volumes/gct-jason/astri_onsky_archive/d2019-05-15_simulations/proton/run1_dl1.h5",
    ]

    df_list = []

    for ipath, path in enumerate(paths):
        with DL1Reader(path) as reader:
            n_events = reader.get_metadata()['n_events']
            mapping = reader.get_mapping()
            geom = get_ctapipe_camera_geometry(mapping, plate_scale=37.56e-3)

            desc = "Looping over events"
            it = reader.iterate_over_events()
            for df in tqdm(it, total=n_events, desc=desc):
                iev = df['iev'].values[0]

                image = df['photons'].values
                time = df['pulse_time'].values

                mask = obtain_cleaning_mask(geom, image, time)
                if not mask.any():
                    continue

                image_m = image[mask]
                time_m = time[mask]
                geom_m = geom[mask]

                try:
                    hillas = hillas_parameters(geom_m, image_m)
                except HillasParameterizationError:
                    continue

                # timing_parameters(geom_m, image_m, time_m, hillas)

                gt0 = image_m > 0
                pix_x = geom_m.pix_x[gt0]
                pix_y = geom_m.pix_y[gt0]
                peakpos = time_m[gt0]
                intensity = image_m[gt0]

                longi, trans = camera_to_shower_coordinates(
                    pix_x, pix_y, hillas.x, hillas.y, hillas.psi)
                longi = longi.value
                trans = trans.value

                # df_list.append(pd.DataFrame(dict(
                #     ipath=ipath,
                #     iev=iev,
                #     longi=longi,
                #     peakpos=peakpos,
                # )))

                p_relation = RelationPlotter()
                p_relation.plot(longi, peakpos, intensity)
                p_relation.save(
                    get_plot(
                        f"d190524_time_gradient/relation/i{ipath}_e{iev}.pdf"))
Пример #6
0
def get_source_dependent_parameters(data, config={}):

    """Get parameters for source-dependent analysis .

    Parameters:
    -----------
    data: Pandas DataFrame
    config: dictionnary containing configuration
    
    """

    src_dep_params = pd.DataFrame(index=data.index)

    is_simu = 'mc_type' in data.columns
    
    if is_simu:
        if (data['mc_type'] == 0).all():
            data_type = 'mc_gamma'
        else:
            data_type = 'mc_proton'
    else:
        data_type = 'real_data'
    
    expected_src_pos_x_m, expected_src_pos_y_m = get_expected_source_pos(data, data_type, config)

    src_dep_params['expected_src_x'] = expected_src_pos_x_m
    src_dep_params['expected_src_y'] = expected_src_pos_y_m
    
    src_dep_params['dist'] = np.sqrt((data['x'] - expected_src_pos_x_m)**2 + (data['y'] - expected_src_pos_y_m)**2)
    
    disp, miss = camera_to_shower_coordinates(
        expected_src_pos_x_m,
        expected_src_pos_y_m, 
        data['x'],
        data['y'],
        data['psi']
    )

    src_dep_params['time_gradient_from_source'] = data['time_gradient'] * np.sign(disp) * -1
    src_dep_params['skewness_from_source'] = data['skewness'] * np.sign(disp) * -1
    
    src_dep_params['alpha'] = np.rad2deg(np.arctan(np.abs(miss / disp)))

    return src_dep_params
Пример #7
0
z_next = False

for i in range(30):
    rand = np.random.RandomState(i)

    x = rand.uniform(-1, 1, 1)[0] * u.m
    y = rand.uniform(-1, 1, 1)[0] * u.m
    length = rand.uniform(1, 2.5, 1)[0] * u.m
    width = rand.uniform(0.5, 0.9, 1)[0] * u.m
    psi = rand.uniform(0, 360, 1)[0] * u.deg
    radius = 2.2 * u.m
    sigma = 0.3 * u.m
    max_time = rand.uniform(7, 12, 1)[0]
    max_amp = 15  #rand.uniform(10, 15, 1)[0]

    longi, trans = camera_to_shower_coordinates(xpix, ypix, x, y, psi)
    time = longi - longi.min()
    time = np.round(time * max_time / time.max()).value.astype(np.int)

    type_rand = np.round(rand.uniform(1, 20, 1)[0])
    if z_next:
        image = np.zeros(32, dtype=np.int)
        image[[5, 6, 7, 8, 13, 18, 25, 24, 23, 26]] = 5
        time = np.full(32, 5, dtype=np.int)
        z_next = False
    elif type_rand == 3:
        image = np.zeros(32, dtype=np.int)
        image[[5, 6, 7, 8, 13, 19, 25, 24, 23]] = 5
        time = np.full(32, 5, dtype=np.int)
        z_next = True
    elif type_rand == 7:
Пример #8
0
    ax.plot(x, p + 0.2 * noise, label=f'Pixel {pix}')

ax.legend(loc=(0.5, 0.6), frameon=False)

fig.savefig('build/calibrated.pdf')

hillas = dict(
    x=80 * u.mm,
    y=20 * u.mm,
    width=15 * u.mm,
    length=50 * u.mm,
    psi=35 * u.deg,
)

cam = CameraGeometry.from_name('FACT').transform_to(EngineeringCameraFrame())
longi, trans = camera_to_shower_coordinates(cam.pix_x, cam.pix_y, hillas['x'],
                                            hillas['y'], hillas['psi'])

m = SkewedGaussian(**hillas, skewness=0.3)
img, signal, noise = m.generate_image(cam, intensity=2500, nsb_level_pe=3)

time_noise = np.random.uniform(0, 60, cam.n_pixels)
time_image = 0.2 * longi.to_value(u.mm) + 25

time = np.average(np.column_stack([time_noise, time_image]),
                  weights=np.column_stack([noise, signal]) + 1,
                  axis=1)

inferno = plt.get_cmap('inferno')
inferno.set_bad('gray')
rdbu = plt.get_cmap('RdBu_r')
rdbu.set_bad('gray')
Пример #9
0
def apply_models(dl1,
                 classifier,
                 reg_energy,
                 reg_disp_vector=None,
                 reg_disp_norm=None,
                 cls_disp_sign=None,
                 focal_length=28 * u.m,
                 custom_config=None
                 ):
    """
    Apply previously trained Random Forests to a set of data
    depending on a set of features.
    The right set of disp models must be passed depending on the config.

    Parameters
    ----------
    dl1: `pandas.DataFrame`
    classifier: Random Forest Classifier
        RF for Gamma/Hadron separation
    reg_energy: Random Forest Regressor
        RF for Energy reconstruction
    reg_disp_vector: Random Forest Regressor
        RF for disp vector reconstruction
    reg_disp_norm: Random Forest Regressor
        RF for disp norm reconstruction
    cls_disp_sign: Random Forest Classifier
        RF for disp sign reconstruction
    focal_length: `astropy.unit`
    custom_config: dictionnary
        Modified configuration to update the standard one

    Returns
    -------
    `pandas.DataFrame`
        dataframe including reconstructed dl2 features
    """
    custom_config = {} if custom_config is None else custom_config
    config = replace_config(standard_config, custom_config)
    energy_regression_features = config["energy_regression_features"]
    disp_regression_features = config["disp_regression_features"]
    disp_classification_features = config["disp_classification_features"]
    classification_features = config["particle_classification_features"]
    events_filters = config["events_filters"]

    dl2 = utils.filter_events(dl1,
                              filters=events_filters,
                              finite_params=config['disp_regression_features']
                                            + config['energy_regression_features']
                                            + config['particle_classification_features']
                                            + config['disp_classification_features'],
                              )

    # Reconstruction of Energy and disp_norm distance
    dl2['log_reco_energy'] = reg_energy.predict(dl2[energy_regression_features])
    dl2['reco_energy'] = 10 ** (dl2['log_reco_energy'])

    if config['disp_method'] == 'disp_vector':
        disp_vector = reg_disp_vector.predict(dl2[disp_regression_features])
    elif config['disp_method'] == 'disp_norm_sign':
        disp_norm = reg_disp_norm.predict(dl2[disp_regression_features])
        disp_sign = cls_disp_sign.predict(dl2[disp_classification_features])
        dl2['reco_disp_norm'] = disp_norm
        dl2['reco_disp_sign'] = disp_sign

        disp_angle = dl2['psi']  # the source here is supposed to be in the direction given by Hillas
        disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign)

    dl2['reco_disp_dx'] = disp_vector[:, 0]
    dl2['reco_disp_dy'] = disp_vector[:, 1]

    # Construction of Source position in camera coordinates from disp_norm distance.

    dl2['reco_src_x'], dl2['reco_src_y'] = disp.disp_to_pos(dl2.reco_disp_dx,
                                                            dl2.reco_disp_dy,
                                                            dl2.x,
                                                            dl2.y,
                                                            )

    longi, _ = camera_to_shower_coordinates(dl2['reco_src_x'], dl2['reco_src_y'],
                                            dl2['x'], dl2['y'], dl2['psi'])

    # Obtain the time gradient with sign relative to the reconstructed shower direction (reco_src_x, reco_src_y)
    # Defined positive if light arrival times increase with distance to it. Negative otherwise:
    dl2['signed_time_gradient'] = -1 * np.sign(longi) * dl2['time_gradient']

    # Obtain skewness with sign relative to the reconstructed shower direction (reco_src_x, reco_src_y)
    # Defined on the major image axis; sign is such that it is typically positive for gammas:    
    dl2['signed_skewness'] = -1 * np.sign(longi) * dl2['skewness']

    if 'mc_alt_tel' in dl2.columns:
        alt_tel = dl2['mc_alt_tel'].values
        az_tel = dl2['mc_az_tel'].values
    elif 'alt_tel' in dl2.columns:
        alt_tel = dl2['alt_tel'].values
        az_tel = dl2['az_tel'].values
    else:
        alt_tel = - np.pi / 2. * np.ones(len(dl2))
        az_tel = - np.pi / 2. * np.ones(len(dl2))

    src_pos_reco = utils.reco_source_position_sky(dl2.x.values * u.m,
                                                  dl2.y.values * u.m,
                                                  dl2.reco_disp_dx.values * u.m,
                                                  dl2.reco_disp_dy.values * u.m,
                                                  focal_length,
                                                  alt_tel * u.rad,
                                                  az_tel * u.rad)

    dl2['reco_alt'] = src_pos_reco.alt.rad
    dl2['reco_az'] = src_pos_reco.az.rad

    dl2['reco_type'] = classifier.predict(dl2[classification_features]).astype(int)
    probs = classifier.predict_proba(dl2[classification_features])

    # This check is valid as long as we train on only two classes (gammas and protons)
    if probs.shape[1] > 2:
        raise ValueError("The classifier is predicting more than two classes, "
                         "the predicted probabilty to assign as gammaness is unclear."
                         "Please check training data")

    # gammaness is the prediction probability for the first class (0)
    dl2['gammaness'] = probs[:, 0]

    return dl2
Пример #10
0
def build_models(filegammas, fileprotons,
                 save_models=True, path_models="./",
                 energy_min=-np.inf,
                 custom_config=None,
                 ):
    """
    Uses MC data to train Random Forests for Energy and DISP
    reconstruction and G/H separation and returns the trained RFs.
    The passed config superseeds the standard configuration.
    Here is the complete workflow with the number of events selected from the config:

    .. mermaid::

        graph LR
            GAMMA[gammas] -->|#`gamma_regressors`| REG(regressors) --> DISK
            GAMMA --> S(split)
            S --> |#`gamma_tmp_regressors`| g_train
            S --> |#`gamma_classifier`| g_test
            g_train --> tmp_reg(tmp regressors)
            tmp_reg --- A[ ]:::empty
            g_test --- A
            A --> g_test_dl2
            g_test_dl2 --- D[ ]:::empty
            protons -------- |#`proton_classifier`| D
            D --> cls(classifier)
            cls--> DISK
            classDef empty width:0px,height:0px;


    Parameters
    ----------
    filegammas: string
        path to the file with MC gamma events
    fileprotons: string
        path to the file with MC proton events
    save_models: bool
        True to save the trained models on disk
    path_models: string
        path of a directory where to save the models.
        if it does exist, the directory is created
    energy_min: float
        Cut in intensity of the showers for training RF
    custom_config: dictionnary
       Modified configuration to update the standard one
    test_size: float or int
        If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split.
        If int, represents the absolute number of test samples.
        If None, it will be set to 0.25.

    Returns
    -------
    if config['disp_method'] == 'disp_vector':
        return reg_energy, reg_disp_vector, cls_gh
    elif config['disp_method'] == 'disp_norm_sign':
        return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh

    Raises
    ------
    ValueError
        If the requested number of gamma events in the config for the training of the classifier is not valid.
        See config["n_training_events"]
    """

    custom_config = {} if custom_config is None else custom_config
    config = replace_config(standard_config, custom_config)
    events_filters = config["events_filters"]

    # Adding a filter on mc_type just for training
    events_filters['mc_type'] = [-9000, np.inf]

    df_gamma = pd.read_hdf(filegammas, key=dl1_params_lstcam_key)
    df_proton = pd.read_hdf(fileprotons, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(filegammas):
            src_dep_df_gamma = get_srcdep_params(filegammas)

        else:
            subarray_info = SubarrayDescription.from_hdf(filegammas)
            tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length
            src_dep_df_gamma = get_source_dependent_parameters(df_gamma, config, focal_length=focal_length)

        df_gamma = pd.concat([df_gamma, src_dep_df_gamma['on']], axis=1)

        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(fileprotons):
            src_dep_df_proton = get_srcdep_params(fileprotons)

        else:
            subarray_info = SubarrayDescription.from_hdf(fileprotons)
            tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length
            src_dep_df_proton = get_source_dependent_parameters(df_proton, config, focal_length=focal_length)

        df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1)

    df_gamma = utils.filter_events(df_gamma,
                                   filters=events_filters,
                                   finite_params=config['energy_regression_features']
                                                 + config['disp_regression_features']
                                                 + config['particle_classification_features']
                                                 + config['disp_classification_features'],
                                   )

    df_proton = utils.filter_events(df_proton,
                                    filters=events_filters,
                                    finite_params=config['energy_regression_features']
                                                  + config['disp_regression_features']
                                                  + config['particle_classification_features']
                                                  + config['disp_classification_features'],
                                    )

    # Training MC gammas in reduced viewcone
    src_r_m = np.sqrt(df_gamma['src_x'] ** 2 + df_gamma['src_y'] ** 2)
    foclen = OPTICS.equivalent_focal_length.value
    src_r_deg = np.rad2deg(np.arctan(src_r_m / foclen))
    df_gamma = df_gamma[(src_r_deg >= config['train_gamma_src_r_deg'][0]) & (
            src_r_deg <= config['train_gamma_src_r_deg'][1])]

    # Train regressors for energy and disp_norm reconstruction, only with gammas
    n_gamma_regressors = config["n_training_events"]["gamma_regressors"]
    if n_gamma_regressors not in [1.0, None]:
        try:
            df_gamma_reg, _ = train_test_split(df_gamma, train_size=n_gamma_regressors)
        except ValueError as e:
            raise ValueError(f"The requested number of gammas {n_gamma_regressors} "
                             f"for the regressors training is not valid.") from e
    else:
        df_gamma_reg = df_gamma

    reg_energy = train_energy(df_gamma_reg, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        reg_disp_vector = train_disp_vector(df_gamma, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        reg_disp_norm = train_disp_norm(df_gamma, custom_config=config)
        cls_disp_sign = train_disp_sign(df_gamma, custom_config=config)

    # Train classifier for gamma/hadron separation.
    test_size = config['n_training_events']['gamma_classifier']
    train_size = config['n_training_events']['gamma_tmp_regressors']
    try:
        train, testg = train_test_split(df_gamma, test_size=test_size, train_size=train_size)
    except ValueError as e:
        raise ValueError(
            "The requested number of gammas for the classifier training is not valid."
        ) from e

    n_proton_classifier = config["n_training_events"]["proton_classifier"]
    if n_proton_classifier not in [1.0, None]:
        try:
            df_proton, _ = train_test_split(df_proton, train_size=config['n_training_events']['proton_classifier'])
        except ValueError as e:
            raise ValueError(
                "The requested number of protons for the classifier training is not valid."
            ) from e

    test = testg.append(df_proton, ignore_index=True)

    temp_reg_energy = train_energy(train, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        temp_reg_disp_vector = train_disp_vector(train, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        tmp_reg_disp_norm = train_disp_norm(train, custom_config=config)
        tmp_cls_disp_sign = train_disp_sign(train, custom_config=config)

    # Apply the regressors to the test set

    test['log_reco_energy'] = temp_reg_energy.predict(test[config['energy_regression_features']])

    if config['disp_method'] == 'disp_vector':
        disp_vector = temp_reg_disp_vector.predict(test[config['disp_regression_features']])
    elif config['disp_method'] == 'disp_norm_sign':
        disp_norm = tmp_reg_disp_norm.predict(test[config['disp_regression_features']])
        disp_sign = tmp_cls_disp_sign.predict(test[config['disp_classification_features']])
        test['reco_disp_norm'] = disp_norm
        test['reco_disp_sign'] = disp_sign

        disp_angle = test['psi']  # the source here is supposed to be in the direction given by Hillas
        disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign)

    test['reco_disp_dx'] = disp_vector[:, 0]
    test['reco_disp_dy'] = disp_vector[:, 1]

    test['reco_src_x'], test['reco_src_y'] = disp.disp_to_pos(test['reco_disp_dx'],
                                                              test['reco_disp_dy'],
                                                              test['x'], test['y'])

    # give skewness and time gradient a meaningful sign, i.e. referred to the reconstructed source position:
    longi, _ = camera_to_shower_coordinates(test['reco_src_x'], test['reco_src_y'],
                                            test['x'], test['y'], test['psi'])
    test['signed_skewness'] = -1 * np.sign(longi) * test['skewness']
    test['signed_time_gradient'] = -1 * np.sign(longi) * test['time_gradient']

    # Apply cut in reconstructed energy. New train set is the previous
    # test with energy and disp_norm reconstructed.

    train = test[test['log_reco_energy'] > energy_min]

    del temp_reg_energy

    if config['disp_method'] == 'disp_vector':
        del temp_reg_disp_vector
    elif config['disp_method'] == 'disp_norm_sign':
        del tmp_reg_disp_norm, tmp_cls_disp_sign

    # Train the Classifier

    cls_gh = train_sep(train, custom_config=config)

    if save_models:
        os.makedirs(path_models, exist_ok=True)

        file_reg_energy = path_models + "/reg_energy.sav"
        joblib.dump(reg_energy, file_reg_energy, compress=3)

        if config['disp_method'] == 'disp_vector':
            file_reg_disp_vector = path_models + "/reg_disp_vector.sav"
            joblib.dump(reg_disp_vector, file_reg_disp_vector, compress=3)

        elif config['disp_method'] == 'disp_norm_sign':
            file_reg_disp_norm = os.path.join(path_models, 'reg_disp_norm.sav')
            file_cls_disp_sign = os.path.join(path_models, 'cls_disp_sign.sav')
            joblib.dump(reg_disp_norm, file_reg_disp_norm, compress=3)
            joblib.dump(cls_disp_sign, file_cls_disp_sign, compress=3)

        file_cls_gh = path_models + "/cls_gh.sav"
        joblib.dump(cls_gh, file_cls_gh, compress=3)

    if config['disp_method'] == 'disp_vector':
        return reg_energy, reg_disp_vector, cls_gh
    elif config['disp_method'] == 'disp_norm_sign':
        return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh
Пример #11
0
def build_models(
    filegammas,
    fileprotons,
    save_models=True,
    path_models="./",
    energy_min=-np.inf,
    custom_config={},
    test_size=0.2,
):
    """Uses MC data to train Random Forests for Energy and disp_norm
    reconstruction and G/H separation. Returns 3 trained RF.
    The config in config_file superseeds the one passed in argument.

    Parameters
    ----------
    filegammas: string
        Name of the file with MC gamma events
    fileprotons: string
        Name of the file with MC proton events
    energy_min: float
        Cut in energy for gamma/hadron separation
    intensity_min: float
        Cut in intensity of the showers for training RF. Default is 60 phe
    r_min: float
        Cut in distance from c.o.g of hillas ellipse to camera center, to avoid images truncated
        in the border. Default is 80% of camera radius.
    save_models: boolean
        Save the trained RF in a file to use them anytime.
    path_models: string
        path to store the trained RF
    regression_args: dictionnary
    classification_args: dictionnary
    config_file: str
        Path to a configuration file. If given, overwrite `regression_args`.

    Returns
    -------
    (regressor_energy, regressor_disp, classifier_gh)
    regressor_energy: `RandomForestRegressor`
    regressor_disp: `RandomForestRegressor`
    classifier_gh: `RandomForestClassifier`
    """

    config = replace_config(standard_config, custom_config)
    events_filters = config["events_filters"]

    # Adding a filter on mc_type just for training
    events_filters['mc_type'] = [-9000, np.inf]

    df_gamma = pd.read_hdf(filegammas, key=dl1_params_lstcam_key)
    df_proton = pd.read_hdf(fileprotons, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(filegammas):
            src_dep_df_gamma = get_srcdep_params(filegammas)

        else:
            subarray_info = SubarrayDescription.from_hdf(filegammas)
            tel_id = config["allowed_tels"][
                0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[
                tel_id].optics.equivalent_focal_length
            src_dep_df_gamma = get_source_dependent_parameters(
                df_gamma, config, focal_length=focal_length)

        df_gamma = pd.concat([df_gamma, src_dep_df_gamma['on']], axis=1)

        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(fileprotons):
            src_dep_df_proton = get_srcdep_params(fileprotons)

        else:
            subarray_info = SubarrayDescription.from_hdf(fileprotons)
            tel_id = config["allowed_tels"][
                0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[
                tel_id].optics.equivalent_focal_length
            src_dep_df_proton = get_source_dependent_parameters(
                df_proton, config, focal_length=focal_length)

        df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1)

    df_gamma = utils.filter_events(
        df_gamma,
        filters=events_filters,
        finite_params=config['energy_regression_features'] +
        config['disp_regression_features'] +
        config['particle_classification_features'] +
        config['disp_classification_features'],
    )

    df_proton = utils.filter_events(
        df_proton,
        filters=events_filters,
        finite_params=config['energy_regression_features'] +
        config['disp_regression_features'] +
        config['particle_classification_features'] +
        config['disp_classification_features'],
    )

    #Training MC gammas in reduced viewcone
    src_r_m = np.sqrt(df_gamma['src_x']**2 + df_gamma['src_y']**2)
    foclen = OPTICS.equivalent_focal_length.value
    src_r_deg = np.rad2deg(np.arctan(src_r_m / foclen))
    df_gamma = df_gamma[(src_r_deg >= config['train_gamma_src_r_deg'][0])
                        & (src_r_deg <= config['train_gamma_src_r_deg'][1])]

    # Train regressors for energy and disp_norm reconstruction, only with gammas

    reg_energy = train_energy(df_gamma, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        reg_disp_vector = train_disp_vector(df_gamma, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        reg_disp_norm = train_disp_norm(df_gamma, custom_config=config)
        cls_disp_sign = train_disp_sign(df_gamma, custom_config=config)

    # Train classifier for gamma/hadron separation.

    train, testg = train_test_split(df_gamma, test_size=test_size)
    test = testg.append(df_proton, ignore_index=True)

    temp_reg_energy = train_energy(train, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        temp_reg_disp_vector = train_disp_vector(train, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        tmp_reg_disp_norm = train_disp_norm(train, custom_config=config)
        tmp_cls_disp_sign = train_disp_sign(train, custom_config=config)

    # Apply the regressors to the test set

    test['log_reco_energy'] = temp_reg_energy.predict(
        test[config['energy_regression_features']])

    if config['disp_method'] == 'disp_vector':
        disp_vector = temp_reg_disp_vector.predict(
            test[config['disp_regression_features']])
    elif config['disp_method'] == 'disp_norm_sign':
        disp_norm = tmp_reg_disp_norm.predict(
            test[config['disp_regression_features']])
        disp_sign = tmp_cls_disp_sign.predict(
            test[config['disp_classification_features']])
        test['reco_disp_norm'] = disp_norm
        test['reco_disp_sign'] = disp_sign

        disp_angle = test[
            'psi']  # the source here is supposed to be in the direction given by Hillas
        disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign)

    test['reco_disp_dx'] = disp_vector[:, 0]
    test['reco_disp_dy'] = disp_vector[:, 1]

    test['reco_src_x'], test['reco_src_y'] = disp.disp_to_pos(
        test['reco_disp_dx'], test['reco_disp_dy'], test['x'], test['y'])

    # give skewness and time gradient a meaningful sign, i.e. referred to the reconstructed source position:
    longi, _ = camera_to_shower_coordinates(test['reco_src_x'],
                                            test['reco_src_y'], test['x'],
                                            test['y'], test['psi'])
    test['signed_skewness'] = -1 * np.sign(longi) * test['skewness']
    test['signed_time_gradient'] = -1 * np.sign(longi) * test['time_gradient']

    # Apply cut in reconstructed energy. New train set is the previous
    # test with energy and disp_norm reconstructed.

    train = test[test['log_reco_energy'] > energy_min]

    del temp_reg_energy

    if config['disp_method'] == 'disp_vector':
        del temp_reg_disp_vector
    elif config['disp_method'] == 'disp_norm_sign':
        del tmp_reg_disp_norm, tmp_cls_disp_sign

    # Train the Classifier

    cls_gh = train_sep(train, custom_config=config)

    if save_models:
        os.makedirs(path_models, exist_ok=True)

        file_reg_energy = path_models + "/reg_energy.sav"
        joblib.dump(reg_energy, file_reg_energy)

        if config['disp_method'] == 'disp_vector':
            file_reg_disp_vector = path_models + "/reg_disp_vector.sav"
            joblib.dump(reg_disp_vector, file_reg_disp_vector)
        elif config['disp_method'] == 'disp_norm_sign':
            file_reg_disp_norm = os.path.join(path_models, 'reg_disp_norm.sav')
            file_cls_disp_sign = os.path.join(path_models, 'cls_disp_sign.sav')
            joblib.dump(reg_disp_norm, file_reg_disp_norm)
            joblib.dump(cls_disp_sign, file_cls_disp_sign)

        file_cls_gh = path_models + "/cls_gh.sav"
        joblib.dump(cls_gh, file_cls_gh)

    if config['disp_method'] == 'disp_vector':
        return reg_energy, reg_disp_vector, cls_gh
    elif config['disp_method'] == 'disp_norm_sign':
        return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh