Пример #1
0
def make_fractions_single(output_dir,
                          channel,
                          variable,
                          components_dict,
                          category='inclusive',
                          systematic='nominal'):
    '''This funciton aims at producing a root file containing all
    histograms needed for datacards. To do so, provide a dict of components
    (class Component) that have the ROOT histograms to be used.
    Gives also the output directory and the variable name, so that you can
    make datacards for different variables.'''
    rootfilename = '_'.join(['htt', channel, 'for_FF_fractions'])
    rootfile = TFile('{}/{}.root'.format(output_dir, rootfilename), 'UPDATE')
    rootdirname = '_'.join([channels_names[channel], variable, category])
    rootdir = rootfile.GetDirectory(rootdirname)
    if not rootdir:
        rootdir = TDirectoryFile(rootdirname, rootdirname)
    rootdir.cd()
    for key, component in components_dict.iteritems():
        histname = '_'.join([key, systematic])
        if systematic == 'nominal':
            histname = key
        else:
            histname = histname.replace('up', 'Up')
            histname = histname.replace('down', 'Down')
        if rootdir.Get(histname):
            continue
        hist = component.histogram.Clone(histname)
        if histname[:len("fakes_")] == "fakes_":
            hist.Scale(-1)
        hist.SetTitle(key)
        hist.Write()
    rootfile.Close()
Пример #2
0
def make_datacards_singlecat(output_dir,
                             channel,
                             variable,
                             components_dict,
                             category='inclusive',
                             systematics=['nominal']):
    '''This funciton aims at producing a root file containing all
    histograms needed for datacards. To do so, provide a dict of components
    (class Component) that have the ROOT histograms to be used.
    Gives also the output directory and the variable name, so that you can
    make datacards for different variables.'''
    rootfilename = '_'.join(
        ['htt', channel + '.inputs', 'datacards', variable])
    rootfile = TFile('{}/{}.root'.format(output_dir, rootfilename), 'UPDATE')
    rootdirname = '_'.join([channels_names[channel], category])
    rootdir = rootfile.GetDirectory(rootdirname)
    if not rootdir:
        rootdir = TDirectoryFile(rootdirname, rootdirname)
    rootdir.cd()
    for systematic in systematics:
        for key, component in components_dict[systematic].iteritems():
            histname = '_'.join([key, systematic])
            if systematic == 'nominal':
                histname = key
            elif histname in syst_rename_dir:
                histname = syst_rename_dir[histname]
            else:
                histname = histname.replace('up', 'Up')
                histname = histname.replace('down', 'Down')
            if rootdir.Get(histname):
                continue
            hist = component.histogram.Clone(histname)
            if 'jetFakes' in histname and 'ff_' in systematic:
                hist.Scale(
                    components_dict['nominal']['jetFakes'].histogram.Integral(
                        0,
                        hist.GetNbinsX() + 1) /
                    hist.Integral(0,
                                  hist.GetNbinsX() + 1))
            hist.SetMinimum(0.0)
            hist.SetBinContent(0, 0)
            hist.SetBinContent(hist.GetNbinsX() + 1, 0)
            hist.SetTitle(key)
            #if not ((systematic!='nominal') and ('jetFakes' in histname)):
            hist.Write()
            if systematic in syst_split_list:
                hist_list = []
                for sys_type in types_dir[key]:
                    if 'Down' in histname:
                        new_histname = histname.replace(
                            'Down', sys_type + 'Down')
                    elif 'Up' in histname:
                        new_histname = histname.replace('Up', sys_type + 'Up')
                    hist_list.append(component.histogram.Clone(new_histname))
                    hist_list[-1].SetMinimum(0.0)
                    hist_list[-1].SetBinContent(0, 0)
                    hist_list[-1].SetBinContent(hist.GetNbinsX() + 1, 0)
                    hist_list[-1].SetTitle(key)
                    hist_list[-1].Write()
    rootfile.Close()
    print('Datacards for category {} and variable {} made.'.format(
        category, variable))
Пример #3
0
def main(args):
    print(args)

    channels = args.channels.split(',')
    categories = args.categories.split(',')

    nickname = os.path.basename(args.input).replace(".root", "")

    XGB_jsons = args.XGBs.split(',')
    XGB_jsons = [f for f in XGB_jsons if f != ""]

    models = {}
    inputs = []
    for XGB_json in XGB_jsons:
        XGB_object = XGB_model_from_json(XGB_json)
        models[XGB_object.name] = XGB_object
        inputs += XGB_object.inputs

    # load root file and create friend tree
    root_file_input = args.input
    output_path = os.path.join(args.output_dir, nickname)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    root_file_output = os.path.join(
        output_path,
        "_".join(
            filter(
                None,
                [
                    nickname,
                    args.pipeline,
                    str(args.first_entry),
                    str(args.last_entry),
                ],
            )) + ".root",
    )

    root_file_in = uproot.open(root_file_input)

    if 'all' in channels:
        channels = set([k.split('_')[0] for k in root_file_in.keys()])
    if 'all' in categories:
        categories = set([
            k.split('_')[-1].split(';')[0] for k in root_file_in.keys()
            if any([c in k for c in channels])
        ])

    if not args.dry:
        root_file_old = None
        if not args.recreate:
            os.system(
                "if [[ -e {root_file_output} ]] ; then mv {root_file_output} {root_file_output}_to_update ; fi"
                .format(root_file_output=root_file_output))
            root_file_old = TFile("{}_to_update".format(root_file_output),
                                  'read')
        root_file_out = TFile(root_file_output, 'recreate')
        print("Opened new file")
    first_pass = True

    for channel in channels:
        for cat in categories:
            rootdirname = '{}_{}'.format(channel, cat)
            if rootdirname not in root_file_in.keys() and "{};1".format(
                    rootdirname) not in root_file_in.keys():
                continue
            if rootdirname != args.pipeline and args.pipeline != None:
                continue

            print('process pipeline: %s_%s' % (channel, cat))

            if not first_pass and not args.dry:
                root_file_out = TFile(root_file_output, 'update')
            first_pass = False

            if not args.dry:
                rootdir_old = False
                if root_file_old:
                    rootdir_old = root_file_old.GetDirectory(rootdirname)
                if not rootdir_old:
                    already_rootdir = False
                else:
                    already_rootdir = True
                rootdir = TDirectoryFile(rootdirname, rootdirname)
                rootdir.cd()
                tree_old = False
                if already_rootdir:
                    if not args.recreate:
                        tree_old = rootdir_old.Get(args.tree)
                tree = TTree(args.tree, args.tree)

                old_models = []
                if tree_old:
                    old_models = [
                        model.GetName()
                        for model in [tree_old.GetListOfBranches()][0]
                    ]
                if len(old_models) > 0:
                    root_file_out_old = uproot.open(
                        "{}_to_update".format(root_file_output))

                models = {i: models[i] for i in models if i not in old_models}
                all_models = old_models + [k for k in models]

                leafValues = {}
                for model in all_models:
                    leafValues[model] = array.array("f", [0])

            if args.pandas:
                df = root_file_in[rootdirname][args.tree].pandas.df()
                if tree_old:
                    df_old = root_file_out_old[rootdirname][
                        args.tree].pandas.df()
            else:
                _df = root_file_in[rootdirname][args.tree].arrays()
                df = pandas.DataFrame()
                keys_to_export = set(inputs +
                                     ["pt_1", "pt_2", "phi_1", "phi_2"])
                for key in ["N_neutrinos_reco", "mt_tt"]:
                    if key in keys_to_export:
                        keys_to_export.remove(key)
                for k in keys_to_export:
                    df[k] = _df[str.encode(k)]
                if tree_old:
                    _df_old = root_file_out_old[rootdirname][
                        args.tree].arrays()
                    df_old = pandas.DataFrame()
                    keys_to_export = old_models
                    for k in keys_to_export:
                        df_old[k] = _df_old[str.encode(k)]

            df["mt_tt"] = (2 * df["pt_1"] * df["pt_2"] *
                           (1 - np.cos(df["phi_1"] - df["phi_2"])))**.5

            df["N_neutrinos_reco"] = N_neutrinos_in_channel[channel] * np.ones(
                len(df[inputs[0]]), dtype='int')

            # remove values set at -10 by default to match training settings
            for variable in ["jpt_r", "jeta_r", "jphi_r", "Njet_r"]:
                if variable in inputs:
                    df[variable].values[df[variable].values < 0] = 0

            for model in models:
                print("Predicting with {}...".format(model))
                df[model] = models[model].predict(df)

            if not args.dry:
                print("Filling new branch in tree...")
                for model in all_models:
                    newBranch = tree.Branch(model, leafValues[model],
                                            "prediction/F")
                first_entry = args.first_entry
                last_entry = len(df[model].values)
                if args.last_entry > first_entry and args.last_entry < len(
                        df[model].values):
                    last_entry = args.last_entry
                for k in range(first_entry, last_entry + 1):
                    for model in all_models:
                        if model in old_models:
                            leafValues[model][0] = df_old[model].values[k]
                        else:
                            leafValues[model][0] = df[model].values[k]
                    tree.Fill()
                print("Filled.")

                rootdir.Remove(rootdir.Get(args.tree))

                tree.Write(args.tree, kOverwrite)
                root_file_out.Close()
                os.system("rm -rf {}_to_update".format(root_file_output))