Exemplo n.º 1
0
def process(config_file, binfile):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)
 
    nominal_conf = {}
    nominal_conf['alpha'] = [0.55, 1.0]
    nominal_conf['missing_mass'] = [0.0, 5.0]
    nominal_conf['p_mes'] = [0.35, 2.0]
    
    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)

    if binfile is not None:
        with open(binfile, 'rb') as bfile:
            bins = pickle.load(bfile)
    else:
        bins = setup_binning(config, data)

    kin_limits = find_kinematic_limits_in_bins(data, bins)
    kin_limits.to_csv('kinematic_limits_mc.csv', index = False)
Exemplo n.º 2
0
def process(config_file, samples):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)

    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)

    # Applying nominal cuts to get the subset
    # of events that I consider good when
    # using the "best" cut values.
    nominal_filter = utils.build_filter(data)
    nominal_data = utils.build_dataframe(data, nominal_filter)

    # Randomize the sectors to test
    # if we can at least get the same
    # answer.
    utils.randomize_sector(data)

    varfile = os.path.dirname(__file__) + '/../../variations.json'
    variations = load_variations(varfile)

    # Use quantile binning to get integrated bins
    # for the axes listed in the configuration.
    bins = setup_binning(config, nominal_data)

    # Calculate the results for the nominal subset of data.
    results = {}
    results['nominal'] = utils.get_results(nominal_data, bins, config)

    # Calculate the results for each sector.
    for sector in range(1, 7):

        sector_data = data[data['sector'] == sector]

        for imc in range(samples):

            var_time = time.time()
            log.info('Doing sector {}'.format(sector))
            random_filter = utils.get_random_config(sector_data, variations)
            random_data = utils.build_dataframe(sector_data, random_filter)
            sect_result = utils.get_results(random_data, bins, config)
            elapsed_time = time.time() - var_time
            log.info('Elapsed time %.3f' % elapsed_time)
            output_filename = str(config['database_path'] +
                                  'phi/random/sector_' + str(sector) +
                                  '_{}.csv'.format(imc))
            sect_result.to_csv(output_filename, index=False)

    exe_time = time.time() - start_time
    log.info('Finished execution in %.3f seconds.' % exe_time)
Exemplo n.º 3
0
def process(config_file):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)

    nominal_conf = {}
    #nominal_conf['alpha'] = [0.55, 1.0]
    #nominal_conf['missing_mass'] = [0.0, 5.0]
    nominal_conf['p_mes'] = [0.35, 1.8]

    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)
    #data = data.dropna(how='any')
    print(data.info())

    # Applying nominal cuts to get the subset
    # of events that I consider good when
    # using the "best" cut values.
    #nominal_filter = utils.build_filter(data, nominal_conf)
    #nominal_data   = utils.build_dataframe(data, nominal_filter)

    # Use quantile binning to get integrated bins
    # for the axes listed in the configuration.
    #bins = setup_binning(config, nominal_data)
    bins = setup_binning(config, data)
    with open('binning_mc.pkl', 'wb') as binf:
        pickle.dump(bins, binf)

    #kin_limits = find_kinematic_limits_in_bins(data, bins)
    #kin_limits.to_csv('kinematic_limits_mc.csv', index = False)

    # Calculate the results for the nominal subset of data.
    results = utils.get_results(data, bins, config)
    results.to_csv(config['output_filename'], index=False)
Exemplo n.º 4
0
        'dist_ecv_min': (-1.1, -0.9),
        'dist_ecv_max': (0.9, 1.1),
        'dist_ecw_min': (-1.1, -0.9),
        'dist_ecw_max': (0.9, 1.1),
        'dist_ec_edep_min': (-1.1, -0.9),
        'dist_ec_edep_max': (0.9, 1.1),
        'dist_vz_min': (-1.1, -0.9),
        'dist_vz_max': (0.9, 1.1),
        'missing_mass_min': (0.0, 1.75),
        'p_mes_min': (0.3, 0.4),
        'p_mes_max': (1.6, 1.8)
    }

    # Load the configuration file and entire
    # dataset (once).
    config = utils.load_config(args.config)
    data = utils.load_dataset(config)

    # Nominal data to get binning
    nominal_filter = utils.build_filter(data)
    nominal_data = utils.build_dataframe(data, nominal_filter)
    bins = setup_binning(config, nominal_data)

    objective_fn = partial(process_par_set, data=data, config=config)

    opt = BayesianOptimization(f=objective_fn,
                               pbounds=parameter_bounds,
                               random_state=1)

    opt.maximize(init_points=args.init_points, n_iter=args.n_iter)
Exemplo n.º 5
0
def process(config_file):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)

    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)
    utils.randomize_sector(data)

    # Applying nominal cuts to get the subset
    # of events that I consider good when
    # using the "best" cut values.
    if args.bayes_opt_pars is not None:
        log.info("Using Bayesian Optimized parameters for nominal.")
        with open(args.bayes_opt_pars, 'rb') as f:
            bayes_pars = pickle.load(f)

        params = {str(k): float(v) for k, v in bayes_pars['params'].items()}
        bayes_conf = build_bayesian_optimized_config(**params)
        nominal_filter = utils.build_filter(data, bayes_conf)

    else:
        nominal_filter = utils.build_filter(data)

    nominal_data = utils.build_dataframe(data, nominal_filter)

    # Use quantile binning to get integrated bins
    # for the axes listed in the configuration.
    bins = setup_binning(config, nominal_data)

    # Calculate the results for the nominal subset of data.
    results = {}
    results['nominal'] = utils.get_results(nominal_data, bins, config)

    # Calculate the results for each sector.
    for sector in range(1, 7):
        var_time = time.time()
        log.info('Doing sector {}'.format(sector))

        sector_data = nominal_data[nominal_data['sector'] == sector]
        sect_result = utils.get_results(sector_data, bins, config)

        elapsed_time = time.time() - var_time
        log.info('Elapsed time %.3f' % elapsed_time)

        output_filename = str(config['database_path'] + 'phi/sector_' +
                              str(sector) + '.csv')
        sect_result.to_csv(output_filename, index=False)

    del nominal_data

    # Define variations to consider.  These
    # are the systematics that are applied.
    variations = load_variations(config['variation_file'])
    for par in variations.keys():
        results[par] = {}

        for index in variations[par].keys():

            var_time = time.time()
            log.info(
                'Doing  %.3f < %s < %.3f' %
                (variations[par][index][0], par, variations[par][index][1]))

            # get these cut values
            temp_dict = {}
            temp_dict[par] = variations[par][index]

            # get data
            temp_filter = utils.build_filter(data, temp_dict)
            temp_data = utils.build_dataframe(data, temp_filter)
            results[par][index] = utils.get_results(temp_data, bins, config)
            del temp_data

            end_var_time = time.time() - var_time
            log.info('Elapsed time %.3f' % end_var_time)

    # Using all variations, systematic
    # uncertainties are added to the dataframe.
    systematic_sources = assign_systematics(results)
    with open(config['systematics_file'], 'wb') as outputfile:
        pickle.dump(systematic_sources, outputfile)
    #pickle.dump(systematic_sources, config['systematics_file'])

    # Write results to file.
    results['nominal'].to_csv(config['output_filename'], index=False)

    # Write other results too.
    dont_write = ['sector'.format(s) for s in range(1, 7)]
    dont_write.append('nominal')
    for key in results.keys():
        if key not in dont_write:
            for conf in results[key]:
                output_filename = str(config['database_path'] +
                                      'phi/variation_' + key + '_' +
                                      str(conf) + '.csv')
                results[key][conf].to_csv(output_filename, index=False)

    exe_time = time.time() - start_time
    log.info('Finished execution in %.3f seconds.' % exe_time)