def calculate_variables(filename, configpath, friendTrees, outpath, apply_selection=False, split_feature=None): print(" ===== EVALUATING FILE ===== ") print(filename) print(" =========================== ") config = load.Config(configpath, friendTrees, "Calculation") # open input file with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple: # open output root file with load.OutputFile(outpath) as outfile: outfile.SetConfigBranches(config) # start loop over ntuple entries first = True for i, event in enumerate(load.TreeIterator(ntuple)): if split_feature is None: config.calculate_variables(event, outfile, outfile.sampleName) outfile.FillTree() else: loopSize = getattr(event, split_feature) for idx in range(loopSize): config.calculate_variables(event, outfile, outfile.sampleName, idx) outfile.FillTree() outfile.ClearArrays() if first: print("writing variables to output tree:") for b in list(outfile.tree.GetListOfBranches()): print(b.GetName()) first = False if i <= 10 and split_feature is None: print(" === testevent ===") for b in list(outfile.tree.GetListOfBranches()): print( b.GetName(), ", ".join([ str(entry) for entry in list( outfile.branchArrays[b.GetName()]) ])) print(" =================" + "\n") outfile.ClearArrays() continue
def match_jets(filename, configpath, friendTrees, threshold, signal_only, outpath, apply_selection=False): print(" ===== EVALUATING FILE ===== ") print(filename) print(" =========================== ") config = load.Config(configpath, friendTrees, "Matching") # open input file with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple: # load hypotheses module hypotheses = Hypotheses(config) # initialize hypotheses combinatorics hypotheses.initPermutations() first = True fillIdx = 0 # start loop over ntuple entries for i, event in enumerate(load.TreeIterator(ntuple)): entry, error = hypotheses.GetEntry(event, event.N_Jets) if first: # get list of all dataframe variables outputVariables = entry.columns.values outputVariables = np.append(outputVariables, config.naming + "_matchable") for v in outputVariables: print(v) # setup empty array for event data storage outputSig = np.zeros(shape=(ntuple.GetEntries(), len(outputVariables))) if not signal_only: outputBkg = np.zeros(shape=(ntuple.GetEntries(), len(outputVariables))) first = False # indices to fill basic variables regardless of matching status loIdxVars = hypotheses.nBaseVariables hiIdxVars = hypotheses.nAdditionalVariables if error: # for some reason no hypotheses are viable # e.g. not enough jets if not apply_selection: outputSig[fillIdx, :loIdxVars] = -99 outputSig[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[ 0].values[loIdxVars:hiIdxVars] outputSig[fillIdx, hiIdxVars:] = -99 if not signal_only: outputBkg[fillIdx, :loIdxVars] = -99 outputBkg[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[ 0].values[loIdxVars:hiIdxVars] outputBkg[fillIdx, hiIdxVars:] = -99 fillIdx += 1 continue # apply signal selection sig_selection = config.def_signal_selection entry_signal_selection = entry_selection(entry, sig_selection) # get best permutation # bestIndex = findBest(entry, threshold, config.match_variables) bestIndex = findBest(entry_signal_selection, threshold, config.match_variables) # fill -1 if no match was found if bestIndex == -1: if not apply_selection: outputSig[fillIdx, :loIdxVars] = -1 outputSig[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[ 0].values[loIdxVars:hiIdxVars] outputSig[fillIdx, hiIdxVars:] = -1 if not signal_only: outputBkg[fillIdx, :loIdxVars] = -1 outputBkg[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[ 0].values[loIdxVars:hiIdxVars] outputBkg[fillIdx, hiIdxVars:] = -1 else: # randIndex = config.get_random_index(entry, bestIndex) outputSig[fillIdx, :-1] = entry.iloc[bestIndex].values outputSig[fillIdx, -1] = 1 if not signal_only: bkg_selection = config.def_background_selection entry_background_selection = entry_selection( entry, bkg_selection) # print entry_background_selection # print entry_background_selection.shape[0] fill = True if entry_background_selection.shape[0] == 1: if entry_background_selection.index[0] == bestIndex: fill = False if entry_background_selection.shape[0] == 0: fill = False if not fill: if not apply_selection: outputBkg[fillIdx, :loIdxVars] = -99 outputBkg[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[ 0].values[loIdxVars:hiIdxVars] outputBkg[fillIdx, hiIdxVars:] = -99 else: randIndex = config.get_random_index( entry_background_selection, bestIndex) outputBkg[fillIdx, :-1] = entry.iloc[randIndex].values outputBkg[fillIdx, -1] = 1 if fillIdx <= 10: print("=== testevent ===") if not signal_only: for name, sigval, bkgval in zip(outputVariables, outputSig[fillIdx], outputBkg[fillIdx]): print(name, sigval, bkgval) else: for name, sigval in zip(outputVariables, outputSig[fillIdx]): print(name, sigval) print("=================" + "\n\n") fillIdx += 1 # save information as h5 file #df = pd.DataFrame(outputData, columns = outputVariables) #df.to_hdf(outpath.replace(".root",".h5"), key = "data", mode = "w") #del df if apply_selection: print("events that fulfilled the selection {}/{}".format( fillIdx, len(outputSig))) outputSig = outputSig[:fillIdx] if not signal_only: outputBkg = outputBkg[:fillIdx] # open output root file if not signal_only: sigpath = outpath.replace(".root", "_sig.root") bkgpath = outpath.replace(".root", "_bkg.root") else: sigpath = outpath with load.OutputFile(sigpath) as outfile: # initialize branches outfile.SetBranches(outputVariables) # loop over events and fill tree for event in outputSig: outfile.FillTree(event) if not signal_only: with load.OutputFile(bkgpath) as outfile: # initialize branches outfile.SetBranches(outputVariables) # loop over events and fill tree for event in outputBkg: outfile.FillTree(event)
def evaluate_model(filename, modelconfigpath, configpath, friendTrees, outpath, apply_selection = False, write_input_vars = False): print(" ===== EVALUATING FILE ===== ") print(filename) print(" =========================== ") modelconfig = load.ModelConfig(modelconfigpath) model_variables = modelconfig.getAllVariables() config = load.Config(configpath, friendTrees, "Evaluation") additional_variables = [] for v in config.additional_variables: if not v in model_variables: additional_variables.append(v) config.additional_variables = additional_variables # get information about variables that should be written into new friendtrees idxCommonVars = len(additional_variables) commonVars = list(additional_variables) additional_variables += model_variables modelconfig.setVariableIndices(additional_variables) # open input file with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple: # load hypothesis module entry_loader = load.Entry(config) first = True fillIdx = 0 # start loop over ntuple entries for i, event in enumerate(load.TreeIterator(ntuple)): entry, error = entry_loader.GetEntry(event) if first: # get list of all dataframe variables # variables that are to be written to output file outputVariables = np.array(commonVars) # if 'write_input_vars' is activated also write dnn inputs to new file if write_input_vars: for outVar in model_variables: outputVariables = np.append(outputVariables, outVar) idxBaseVars = len(outputVariables) # append output values of dnn outputVariables = modelconfig.setOutputVariables(outputVariables) # remove brakets outputVariables = [v.replace("[","_").replace("]","") for v in outputVariables] # print variables print("variables to be written to output file:") for v in outputVariables: print(v) print("=======================") # setup empty array for event data storage outputData = np.zeros(shape = (ntuple.GetEntries(), len(outputVariables))) # setup input array for dnn evaluation modelconfig.setInputData(ntuple.GetEntries()) first = False if error: # if selection is not fulfilled # fill default values of -1 into entry if not apply_selection: outputData[fillIdx,:] = -1 modelconfig.setEmptyEntry(fillIdx) fillIdx += 1 continue # fill output data array # common variables outputData[fillIdx, :idxCommonVars] = entry[0, :idxCommonVars] # dnn input variables if write_input_vars: outputData[fillIdx, idxCommonVars:idxBaseVars] = entry[0, idxCommonVars:idxBaseVars] # dnn input variables into input array modelconfig.fillInputData(fillIdx, entry, event) fillIdx+=1 # cut outputData to filled length if apply_selection: print("events that fulfilled the selection: {}/{}".format(fillIdx, len(outputData))) outputData = outputData[:fillIdx] modelconfig.removeTrailingEntries(fillIdx) # get dnn output for dnnSet in modelconfig.dnnsets: dnnOutput, maxIndex = dnnSet.evaluate(len(outputData)) # fill dnn output outputData[:, dnnSet.idxOutLo:dnnSet.idxOutHi] = dnnOutput # fill predicted index outputData[:, dnnSet.idxOutHi:dnnSet.idxPrediction] = maxIndex.reshape(len(outputData), -1) # test print of outputs for i in range(10): print("=== testevent ===") for name, value in zip(outputVariables, outputData[i]): print(name, value) print("================="+"\n\n") print("\nsaving information ...") # save information as h5 file df = pd.DataFrame(outputData, columns = outputVariables) df.to_hdf(outpath.replace(".root",".h5"), key = "data", mode = "w") del df # open output root file with load.OutputFile(outpath) as outfile: # initialize branches outfile.SetBranches(outputVariables) # loop over events and fill tree for event in outputData: outfile.FillTree(event)
def evaluate_reconstruction(filename, modelname, configpath, friendTrees, outpath, apply_selection=False): print(" ===== EVALUATING FILE ===== ") print(filename) print(" =========================== ") # load the DNN model model = load.Model(modelname) # set variables needed for dnn training model.setVariables() config = load.Config(configpath, friendTrees, "Reconstruction") # open input file with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple: # load hypotheses module hypotheses = Hypotheses(config) # initialize hypotheses combinatorics hypotheses.initPermutations() first = True fillIdx = 0 # start loop over ntuple entries for i, event in enumerate(load.TreeIterator(ntuple)): entry, error = hypotheses.GetEntry(event, event.N_Jets) if first: # check if all variables for DNN evaluation are present in dataframe check_entry(entry, model.variables) # get list of all dataframe variables outputVariables = entry.columns.values # append output value to columns outputVariables = np.append(outputVariables, config.naming + "_DNNOutput") outputVariables = np.append( outputVariables, config.naming + "_squaredDNNOutput") outputVariables = np.append( outputVariables, config.naming + "_transformedDNNOutput") for v in outputVariables: print(v) # setup empty array for event data storage outputData = np.zeros(shape=(ntuple.GetEntries(), len(outputVariables))) first = False if error: print("hypothesis not viable") # for some reason no hypotheses are viable # e.g. not enough jets if not apply_selection: outputData[fillIdx, :-3] = entry.iloc[0].values # fill dummy output values of DNN outputData[fillIdx, -3] = -1. outputData[fillIdx, -2] = -99. outputData[fillIdx, -1] = -99. fillIdx += 1 continue else: # get best permutation reco_selection = config.def_dnn_reco_selection entry_reco_selection = entry_selection(entry, reco_selection) if entry_reco_selection.shape[0] == 0: outputData[fillIdx, :-3] = entry.iloc[0].values # fill dummy output values of DNN outputData[fillIdx, -3] = -9. outputData[fillIdx, -2] = -9. outputData[fillIdx, -1] = -9. fillIdx += 1 continue # print entry_reco_selection bestIndex, outputValue = model.findBest(entry_reco_selection) # bestIndex, outputValue = model.findBest(entry) # fill output data array outputData[ fillIdx, :-3] = entry_reco_selection.iloc[bestIndex].values # outputData[fillIdx,:-3] = entry.iloc[bestIndex].values # fill output values of DNN outputData[fillIdx, -3] = outputValue outputData[fillIdx, -2] = outputValue**2 outputData[fillIdx, -1] = np.log(outputValue / (1. - outputValue)) if fillIdx <= 10: print("=== testevent ===") for name, value in zip(outputVariables, outputData[fillIdx]): print(name, value) print("=================" + "\n\n") fillIdx += 1 # cut outputData to filled length if apply_selection: print("events that fulfilled the selection: {}/{}".format( fillIdx, len(outputData))) outputData = outputData[:fillIdx] # save information as h5 file df = pd.DataFrame(outputData, columns=outputVariables) df.to_hdf(outpath.replace(".root", ".h5"), key="data", mode="w") del df # open output root file with load.OutputFile(outpath) as outfile: # initialize branches outfile.SetBranches(outputVariables) # loop over events and fill tree for event in outputData: outfile.FillTree(event)
def convert_database(filename, configpath, outpath, friendTrees, database): print(" ===== EVALUATING FILE ===== ") print(filename) print(" =========================== ") config = load.Config(configpath, friendTrees, "Database") # figure out the correct database to load dbfile, indexfile = config.getDataBase(filename, database) print("loading database....") # opening database root file rf = ROOT.TFile(dbfile) db = rf.Get(config.treename) # opening db file with indices idf = pd.read_hdf(indexfile) print(idf) # collect branches to write branches = list([b.GetName() for b in db.GetListOfBranches()]) # open input file with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple: # open output root file with load.OutputFile(outpath) as outfile: outfile.SetBranchList(branches+["Evt_Run", "Evt_Lumi", "Evt_ID"]) # start loop over ntuple entries first = True for i, event in enumerate(load.TreeIterator(ntuple)): config.calculate_variables(event, outfile) # search for corresponding event in database foundDBEntry = False try: # searching for event index dbevt = idf.loc[(idf[config.run] == event.Evt_Run) & (idf[config.lumi] == event.Evt_Lumi) & (idf[config.evtid] == event.Evt_ID)] idx = dbevt.index[0] foundDBEntry = True except: print("event ({}, {}, {}) is not in database - filling defaults".format(event.Evt_Run, event.Evt_Lumi, event.Evt_ID)) if foundDBEntry: # jumping to indexed event in tree db.GetEvent(idx) # filling branches for b in branches: outfile.branchArrays[b][0] = eval("db."+b) if first: print("writing variables to output tree:") for b in list(outfile.tree.GetListOfBranches()): print(b.GetName()) first = False outfile.FillTree() if i<=10: print(" === testevent ===") for b in list(outfile.tree.GetListOfBranches()): print(b.GetName(), ", ".join([str(entry) for entry in list(outfile.branchArrays[b.GetName()])])) print(" ================="+"\n") outfile.ClearArrays() continue