output['initial_flux_err_stat_{}'.format(
            composition)] = initial_flux_err_stat
        output['initial_flux_err_sys_{}'.format(
            composition)] = initial_flux_err_sys

    # Don't want to consume too much memory by keeping too many figures open
    plt.close('all')

    return output


def save_flux_plot(group, config, case, ts_stopping, num_groups):
    """Saves flux comparison plot
    """
    comp_list = comp.get_comp_list(num_groups=num_groups)
    energybins = comp.get_energybins(config)

    # Get plotting axis
    figures_dir = os.path.join(comp.paths.figures_dir, 'unfolding', config,
                               'datachallenge', '{}_case'.format(case),
                               'prior_comparisons',
                               'ts_stopping_{}'.format(ts_stopping))

    # Make initial counts (pre-unfolding) plot
    fig_counts, ax_counts = plt.subplots()

    fig = plt.figure(figsize=(12, 5))
    gs = gridspec.GridSpec(nrows=2, ncols=num_groups + 1, hspace=0.1)
    # gs = gridspec.GridSpec(nrows=2, ncols=num_groups+1, hspace=0.075)
    axs_flux, axs_ratio = {}, {}
    for idx, composition in enumerate(comp_list + ['total']):
示例#2
0
    parser.add_argument('--param_type',
                        dest='param_type',
                        default='int',
                        choices=['int', 'float', 'string'],
                        help='Type of hyperparameter.')
    parser.add_argument('--cv',
                        dest='cv',
                        type=int,
                        default=10,
                        help='Number of cross-validation folds.')

    args = parser.parse_args()

    color_dict = comp.get_color_dict()

    energybins = comp.get_energybins(args.config)
    comp_list = comp.get_comp_list(num_groups=args.num_groups)
    feature_list, feature_labels = comp.get_training_features()
    # pipeline_str = 'RF_comp_{}_{}-groups'.format(args.config, args.num_groups)
    # pipeline_str = 'SVC_comp_{}_{}-groups'.format(args.config, args.num_groups)
    # pipeline_str = 'LogisticRegression_comp_{}_{}-groups'.format(args.config, args.num_groups)
    pipeline_str = 'xgboost_comp_{}_{}-groups'.format(args.config,
                                                      args.num_groups)
    # pipeline_str = 'BDT_comp_{}_{}-groups'.format(args.config, args.num_groups)

    df_sim_train, df_sim_test = comp.load_sim(
        config=args.config,
        log_energy_min=energybins.log_energy_min,
        log_energy_max=energybins.log_energy_max,
        test_size=0.5,
        verbose=True)
示例#3
0
def fit_efficiencies(df_file=None,
                     config='IC86.2012',
                     num_groups=2,
                     sigmoid='slant',
                     n_samples=1000):
    print('Loading df_file: {}'.format(df_file))

    comp_list = comp.get_comp_list(num_groups=num_groups)

    energybins = comp.get_energybins(config=config)
    # Want to include energy bins for energies below the normal analysis energy
    # range so we can get a better estimate of how the detector efficiencies turn on
    low_energy_bins = np.arange(5.0, energybins.log_energy_min, 0.1)
    bins = np.concatenate((low_energy_bins, energybins.log_energy_bins))
    bin_midpoints = (bins[1:] + bins[:-1]) / 2

    df_sim = comp.load_sim(df_file=df_file,
                           config=config,
                           test_size=0,
                           log_energy_min=None,
                           log_energy_max=None)

    # Thrown areas are different for different energy bin
    thrown_radii = comp.simfunctions.get_sim_thrown_radius(bin_midpoints)
    thrown_areas = np.pi * thrown_radii**2
    thrown_areas_max = thrown_areas.max()

    # Calculate efficiencies and effective areas for each composition group
    efficiencies = pd.DataFrame()
    effective_area, effective_area_err = {}, {}
    for composition in comp_list + ['total']:
        compositions = df_sim['comp_group_{}'.format(num_groups)]
        # Need list of simulation sets for composition to get number of thrown showers
        if composition == 'total':
            comp_mask = np.full_like(compositions, True)
        else:
            comp_mask = compositions == composition
        sim_list = df_sim.loc[comp_mask, 'sim'].unique()
        thrown_showers = thrown_showers_per_ebin(sim_list,
                                                 log_energy_bins=bins)
        print('thrown_showers ({}) = {}'.format(composition, thrown_showers))
        passed_showers = np.histogram(df_sim.loc[comp_mask, 'MC_log_energy'],
                                      bins=bins)[0]

        efficiency, efficiency_err = comp.ratio_error(
            num=passed_showers,
            num_err=np.sqrt(passed_showers),
            den=thrown_showers,
            den_err=np.sqrt(thrown_showers))

        # Calculate effective area from efficiencies and thrown areas
        effective_area[composition] = efficiency * thrown_areas
        effective_area_err[composition] = efficiency_err * thrown_areas

        # Scale efficiencies by geometric factor to take into account
        # different simulated thrown radii
        thrown_radius_factor = thrown_areas / thrown_areas_max
        efficiencies['eff_{}'.format(
            composition)] = efficiency * thrown_radius_factor
        efficiencies['eff_err_{}'.format(
            composition)] = efficiency_err * thrown_radius_factor

    # Fit sigmoid function to efficiency vs. energy distribution
    # fit_func = sigmoid_flat if sigmoid == 'flat' else sigmoid_slant
    poly_degree = 1
    num_params = poly_degree + 3
    fit_func = generate_fit_func(degree=poly_degree)
    # p0 = [7e4, 8.0, 50.0] if sigmoid == 'flat' else [7e4, 8.5, 50.0, 800]
    init_params = [8.5, 50.0, 7e4, 800]
    p0 = np.empty(num_params)
    p0[:min(num_params, len(init_params))] = init_params[:num_params]

    efficiencies_fit = {}
    energy_min_fit, energy_max_fit = 5.8, energybins.log_energy_max
    midpoints_fitmask = np.logical_and(bin_midpoints > energy_min_fit,
                                       bin_midpoints < energy_max_fit)
    # Find best-fit sigmoid function
    for composition in comp_list + ['total']:
        eff = efficiencies.loc[midpoints_fitmask, 'eff_{}'.format(composition)]
        eff_err = efficiencies.loc[midpoints_fitmask,
                                   'eff_err_{}'.format(composition)]
        popt, pcov = curve_fit(fit_func,
                               bin_midpoints[midpoints_fitmask],
                               eff,
                               p0=p0,
                               sigma=eff_err)
        eff_fit = fit_func(bin_midpoints, *popt)
        efficiencies_fit[composition] = eff_fit

        chi2 = np.sum((eff - eff_fit[midpoints_fitmask])**2 / (eff_err)**2)
        ndof = len(eff_fit[midpoints_fitmask]) - len(p0)
        print('({}) chi2 / ndof = {} / {} = {}'.format(composition, chi2, ndof,
                                                       chi2 / ndof))

    # Perform many fits to random statistical fluxuations of the best fit efficiency
    # This will be used to estimate the uncertainty in the best fit efficiency
    np.random.seed(2)
    efficiencies_fit_samples = defaultdict(list)
    for _ in xrange(n_samples):
        for composition in comp_list + ['total']:
            # Get new random sample to fit
            eff_err = efficiencies.loc[midpoints_fitmask,
                                       'eff_err_{}'.format(composition)]
            eff_sample = np.random.normal(
                efficiencies_fit[composition][midpoints_fitmask], eff_err)
            # Fit with error bars
            popt, pcov = curve_fit(fit_func,
                                   bin_midpoints[midpoints_fitmask],
                                   eff_sample,
                                   p0=p0,
                                   sigma=eff_err)

            eff_fit_sample = fit_func(bin_midpoints, *popt)
            efficiencies_fit_samples[composition].append(eff_fit_sample)

    # Calculate median and error of efficiency fits
    eff_fit = pd.DataFrame()
    for composition in comp_list + ['total']:
        fit_median, fit_err_low, fit_err_high = np.percentile(
            efficiencies_fit_samples[composition], (50, 16, 84), axis=0)
        fit_err_low = np.abs(fit_err_low - fit_median)
        fit_err_high = np.abs(fit_err_high - fit_median)

        eff_fit['eff_median_{}'.format(composition)] = fit_median
        eff_fit['eff_err_low_{}'.format(composition)] = fit_err_low
        eff_fit['eff_err_high_{}'.format(composition)] = fit_err_high

    return efficiencies.loc[midpoints_fitmask, :], eff_fit
示例#4
0
                        choices=comp.simfunctions.get_sim_configs(),
                        help='Detector configuration')
    args = parser.parse_args()

    for config in args.config:

        df_sim = comp.load_sim(config=config, test_size=0, verbose=True)

        comp_list = ['light', 'heavy']
        MC_comp_mask = {}
        for composition in comp_list:
            MC_comp_mask[composition] = df_sim['MC_comp_class'] == composition
        light_mask = df_sim['MC_comp_class'] == 'light'
        heavy_mask = df_sim['MC_comp_class'] == 'heavy'

        energybins = comp.get_energybins()

        # Energy resolution
        energy_res = np.log10(df_sim['lap_energy'] / df_sim['MC_energy'])

        medians_light, stds_light, _ = comp.data_functions.get_median_std(
            df_sim['MC_log_energy'][light_mask], energy_res[light_mask],
            energybins.log_energy_bins)
        medians_heavy, stds_heavy, _ = comp.data_functions.get_median_std(
            df_sim['MC_log_energy'][heavy_mask], energy_res[heavy_mask],
            energybins.log_energy_bins)

        gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1], hspace=0.1)
        ax1 = plt.subplot(gs[0])
        ax2 = plt.subplot(gs[1], sharex=ax1)