示例#1
0
def main(options):

    #take options from the yaml config
    with open(options.config, 'r') as config_file:
        config            = yaml.load(config_file)
        output_tag        = config['output_tag']

        mc_dir            = config['mc_file_dir']
        mc_fnames         = config['mc_file_names']
  
        #data not needed yet, could use this for validation later. keep for compat with class
        data_dir          = config['data_file_dir']
        data_fnames       = config['data_file_names']

        train_vars        = config['train_vars']
        vars_to_add       = config['vars_to_add']
        presel            = config['preselection']

        proc_to_tree_name = config['proc_to_tree_name']

        #sig_colour        = 'forestgreen'
        sig_colour        = 'red'
 
                                           #Data handling stuff#
        sys.exit(1)

        #load the mc dataframe for all years
        if options.pt_reweight: 
            cr_selection = config['reweight_cr']
            output_tag += '_pt_reweighted'
            root_obj = ROOTHelpers(output_tag, mc_dir, mc_fnames, data_dir, data_fnames, proc_to_tree_name, train_vars, vars_to_add, cr_selection)
        else: root_obj = ROOTHelpers(output_tag, mc_dir, mc_fnames, data_dir, data_fnames, proc_to_tree_name, train_vars, vars_to_add, presel)

        for sig_obj in root_obj.sig_objects:
            root_obj.load_mc(sig_obj, reload_samples=options.reload_samples)
        for bkg_obj in root_obj.bkg_objects:
            root_obj.load_mc(bkg_obj, bkg=True, reload_samples=options.reload_samples)
        for data_obj in root_obj.data_objects:
            root_obj.load_data(data_obj, reload_samples=options.reload_samples)
        root_obj.concat()

        if options.pt_reweight and options.reload_samples: 
            root_obj.apply_pt_rew('DYMC', presel)

                                            #Plotter stuff#
 
        #set up X, w and y, train-test 
        plotter = Plotter(root_obj, train_vars, sig_col=sig_colour, norm_to_data=True)
        for var in train_vars:
            plotter.plot_input(var, options.n_bins, output_tag, options.ratio_plot, norm_to_data=True)
示例#2
0
def main(options):

    #take options from the yaml config
    with open(options.config, 'r') as config_file:
        config = yaml.load(config_file)
        output_tag = config['output_tag']

        mc_dir = config['mc_file_dir']
        mc_fnames = config['mc_file_names']

        #data not needed yet, could use this for validation later. keep for compat with class
        data_dir = config['data_file_dir']
        data_fnames = config['data_file_names']

        train_vars = config['train_vars']
        vars_to_add = config['vars_to_add']
        presel = config['preselection']

        proc_to_tree_name = config['proc_to_tree_name']

        #sig_colour        = 'forestgreen'
        sig_colour = 'red'

        #Data handling stuff#

        #load the mc dataframe for all years
        if options.pt_reweight:
            cr_selection = config['reweight_cr']
            output_tag += '_pt_reweighted'
            root_obj = ROOTHelpers(output_tag, mc_dir, mc_fnames, data_dir,
                                   data_fnames, proc_to_tree_name, train_vars,
                                   vars_to_add, cr_selection)
        else:
            root_obj = ROOTHelpers(output_tag, mc_dir, mc_fnames, data_dir,
                                   data_fnames, proc_to_tree_name, train_vars,
                                   vars_to_add, presel)

        for sig_obj in root_obj.sig_objects:
            root_obj.load_mc(sig_obj, reload_samples=options.reload_samples)
        for bkg_obj in root_obj.bkg_objects:
            root_obj.load_mc(bkg_obj,
                             bkg=True,
                             reload_samples=options.reload_samples)
        for data_obj in root_obj.data_objects:
            root_obj.load_data(data_obj, reload_samples=options.reload_samples)
        root_obj.concat()

        if options.pt_reweight and options.reload_samples:
            root_obj.apply_pt_rew('DYMC', presel)

    #load MVA
        with open(options.mva_config, 'r') as mva_config_file:
            config = yaml.load(mva_config_file)
            model = config['models'][options.mva_proc]
            boundaries = config['boundaries'][options.mva_proc]

            #add DNN later
            if isinstance(model, str):
                print 'evaluating BDT: {}'.format(model)
                clf = pickle.load(open('models/{}'.format(model), "rb"))
                root_obj.mc_df_sig[
                    options.mva_proc + '_mva'] = clf.predict_proba(
                        root_obj.mc_df_sig[train_vars].values)[:, 1:].ravel()
                root_obj.mc_df_bkg[
                    options.mva_proc + '_mva'] = clf.predict_proba(
                        root_obj.mc_df_bkg[train_vars].values)[:, 1:].ravel()
                root_obj.data_df[
                    options.mva_proc + '_mva'] = clf.predict_proba(
                        root_obj.data_df[train_vars].values)[:, 1:].ravel()

            else:
                raise IOError(
                    'Did not get a classifier models in correct format in config'
                )

            #Plotter stuff#

        plotter = Plotter(root_obj,
                          train_vars,
                          sig_col=sig_colour,
                          norm_to_data=True)
        cat_counter = 0
        for b in boundaries:
            if cat_counter == 0:
                extra_cuts = options.mva_proc + '_mva >' + str(
                    boundaries['tag_0'])
            else:
                extra_cuts = (options.mva_proc + '_mva <' + str(
                    boundaries['tag_' + str(cat_counter - 1)])) + ' and ' + (
                        options.mva_proc + '_mva >' +
                        str(boundaries['tag_' + str(cat_counter)]))
            plotter.plot_input(options.mass_var_name,
                               options.n_bins,
                               output_tag,
                               options.ratio_plot,
                               norm_to_data=True,
                               extra_cuts=extra_cuts,
                               extra_tag=cat_counter,
                               blind=True)
            cat_counter += 1