output['initial_flux_err_stat_{}'.format( composition)] = initial_flux_err_stat output['initial_flux_err_sys_{}'.format( composition)] = initial_flux_err_sys # Don't want to consume too much memory by keeping too many figures open plt.close('all') return output def save_flux_plot(group, config, case, ts_stopping, num_groups): """Saves flux comparison plot """ comp_list = comp.get_comp_list(num_groups=num_groups) energybins = comp.get_energybins(config) # Get plotting axis figures_dir = os.path.join(comp.paths.figures_dir, 'unfolding', config, 'datachallenge', '{}_case'.format(case), 'prior_comparisons', 'ts_stopping_{}'.format(ts_stopping)) # Make initial counts (pre-unfolding) plot fig_counts, ax_counts = plt.subplots() fig = plt.figure(figsize=(12, 5)) gs = gridspec.GridSpec(nrows=2, ncols=num_groups + 1, hspace=0.1) # gs = gridspec.GridSpec(nrows=2, ncols=num_groups+1, hspace=0.075) axs_flux, axs_ratio = {}, {} for idx, composition in enumerate(comp_list + ['total']):
parser.add_argument('--param_type', dest='param_type', default='int', choices=['int', 'float', 'string'], help='Type of hyperparameter.') parser.add_argument('--cv', dest='cv', type=int, default=10, help='Number of cross-validation folds.') args = parser.parse_args() color_dict = comp.get_color_dict() energybins = comp.get_energybins(args.config) comp_list = comp.get_comp_list(num_groups=args.num_groups) feature_list, feature_labels = comp.get_training_features() # pipeline_str = 'RF_comp_{}_{}-groups'.format(args.config, args.num_groups) # pipeline_str = 'SVC_comp_{}_{}-groups'.format(args.config, args.num_groups) # pipeline_str = 'LogisticRegression_comp_{}_{}-groups'.format(args.config, args.num_groups) pipeline_str = 'xgboost_comp_{}_{}-groups'.format(args.config, args.num_groups) # pipeline_str = 'BDT_comp_{}_{}-groups'.format(args.config, args.num_groups) df_sim_train, df_sim_test = comp.load_sim( config=args.config, log_energy_min=energybins.log_energy_min, log_energy_max=energybins.log_energy_max, test_size=0.5, verbose=True)
def fit_efficiencies(df_file=None, config='IC86.2012', num_groups=2, sigmoid='slant', n_samples=1000): print('Loading df_file: {}'.format(df_file)) comp_list = comp.get_comp_list(num_groups=num_groups) energybins = comp.get_energybins(config=config) # Want to include energy bins for energies below the normal analysis energy # range so we can get a better estimate of how the detector efficiencies turn on low_energy_bins = np.arange(5.0, energybins.log_energy_min, 0.1) bins = np.concatenate((low_energy_bins, energybins.log_energy_bins)) bin_midpoints = (bins[1:] + bins[:-1]) / 2 df_sim = comp.load_sim(df_file=df_file, config=config, test_size=0, log_energy_min=None, log_energy_max=None) # Thrown areas are different for different energy bin thrown_radii = comp.simfunctions.get_sim_thrown_radius(bin_midpoints) thrown_areas = np.pi * thrown_radii**2 thrown_areas_max = thrown_areas.max() # Calculate efficiencies and effective areas for each composition group efficiencies = pd.DataFrame() effective_area, effective_area_err = {}, {} for composition in comp_list + ['total']: compositions = df_sim['comp_group_{}'.format(num_groups)] # Need list of simulation sets for composition to get number of thrown showers if composition == 'total': comp_mask = np.full_like(compositions, True) else: comp_mask = compositions == composition sim_list = df_sim.loc[comp_mask, 'sim'].unique() thrown_showers = thrown_showers_per_ebin(sim_list, log_energy_bins=bins) print('thrown_showers ({}) = {}'.format(composition, thrown_showers)) passed_showers = np.histogram(df_sim.loc[comp_mask, 'MC_log_energy'], bins=bins)[0] efficiency, efficiency_err = comp.ratio_error( num=passed_showers, num_err=np.sqrt(passed_showers), den=thrown_showers, den_err=np.sqrt(thrown_showers)) # Calculate effective area from efficiencies and thrown areas effective_area[composition] = efficiency * thrown_areas effective_area_err[composition] = efficiency_err * thrown_areas # Scale efficiencies by geometric factor to take into account # different simulated thrown radii thrown_radius_factor = thrown_areas / thrown_areas_max efficiencies['eff_{}'.format( composition)] = efficiency * thrown_radius_factor efficiencies['eff_err_{}'.format( composition)] = efficiency_err * thrown_radius_factor # Fit sigmoid function to efficiency vs. energy distribution # fit_func = sigmoid_flat if sigmoid == 'flat' else sigmoid_slant poly_degree = 1 num_params = poly_degree + 3 fit_func = generate_fit_func(degree=poly_degree) # p0 = [7e4, 8.0, 50.0] if sigmoid == 'flat' else [7e4, 8.5, 50.0, 800] init_params = [8.5, 50.0, 7e4, 800] p0 = np.empty(num_params) p0[:min(num_params, len(init_params))] = init_params[:num_params] efficiencies_fit = {} energy_min_fit, energy_max_fit = 5.8, energybins.log_energy_max midpoints_fitmask = np.logical_and(bin_midpoints > energy_min_fit, bin_midpoints < energy_max_fit) # Find best-fit sigmoid function for composition in comp_list + ['total']: eff = efficiencies.loc[midpoints_fitmask, 'eff_{}'.format(composition)] eff_err = efficiencies.loc[midpoints_fitmask, 'eff_err_{}'.format(composition)] popt, pcov = curve_fit(fit_func, bin_midpoints[midpoints_fitmask], eff, p0=p0, sigma=eff_err) eff_fit = fit_func(bin_midpoints, *popt) efficiencies_fit[composition] = eff_fit chi2 = np.sum((eff - eff_fit[midpoints_fitmask])**2 / (eff_err)**2) ndof = len(eff_fit[midpoints_fitmask]) - len(p0) print('({}) chi2 / ndof = {} / {} = {}'.format(composition, chi2, ndof, chi2 / ndof)) # Perform many fits to random statistical fluxuations of the best fit efficiency # This will be used to estimate the uncertainty in the best fit efficiency np.random.seed(2) efficiencies_fit_samples = defaultdict(list) for _ in xrange(n_samples): for composition in comp_list + ['total']: # Get new random sample to fit eff_err = efficiencies.loc[midpoints_fitmask, 'eff_err_{}'.format(composition)] eff_sample = np.random.normal( efficiencies_fit[composition][midpoints_fitmask], eff_err) # Fit with error bars popt, pcov = curve_fit(fit_func, bin_midpoints[midpoints_fitmask], eff_sample, p0=p0, sigma=eff_err) eff_fit_sample = fit_func(bin_midpoints, *popt) efficiencies_fit_samples[composition].append(eff_fit_sample) # Calculate median and error of efficiency fits eff_fit = pd.DataFrame() for composition in comp_list + ['total']: fit_median, fit_err_low, fit_err_high = np.percentile( efficiencies_fit_samples[composition], (50, 16, 84), axis=0) fit_err_low = np.abs(fit_err_low - fit_median) fit_err_high = np.abs(fit_err_high - fit_median) eff_fit['eff_median_{}'.format(composition)] = fit_median eff_fit['eff_err_low_{}'.format(composition)] = fit_err_low eff_fit['eff_err_high_{}'.format(composition)] = fit_err_high return efficiencies.loc[midpoints_fitmask, :], eff_fit
choices=comp.simfunctions.get_sim_configs(), help='Detector configuration') args = parser.parse_args() for config in args.config: df_sim = comp.load_sim(config=config, test_size=0, verbose=True) comp_list = ['light', 'heavy'] MC_comp_mask = {} for composition in comp_list: MC_comp_mask[composition] = df_sim['MC_comp_class'] == composition light_mask = df_sim['MC_comp_class'] == 'light' heavy_mask = df_sim['MC_comp_class'] == 'heavy' energybins = comp.get_energybins() # Energy resolution energy_res = np.log10(df_sim['lap_energy'] / df_sim['MC_energy']) medians_light, stds_light, _ = comp.data_functions.get_median_std( df_sim['MC_log_energy'][light_mask], energy_res[light_mask], energybins.log_energy_bins) medians_heavy, stds_heavy, _ = comp.data_functions.get_median_std( df_sim['MC_log_energy'][heavy_mask], energy_res[heavy_mask], energybins.log_energy_bins) gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1], hspace=0.1) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1], sharex=ax1)