metavar='text', default='cfgFileName.yml', help='config file name with root input files') args = parser.parse_args() with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) inKineFileName = inputCfg['inKineFileName'] Bhadrons = inputCfg['pdgCodeB'] FFbtoB = inputCfg['FFbtoB'] Dhadrons = inputCfg['pdgCodeD'] BRDhadrons = inputCfg['BRD'] partPlusAntiPart = inputCfg['partPlusAntiPart'] outFileName = inputCfg['outFileName'] kineDf = LoadDfFromRootOrParquet(inKineFileName, None, 'fTreeDecays') SetGlobalStyle(padleftmargin=0.14, padbottommargin=0.12, titleoffsety=1.3, optstat=0) Bcolors = [kRed + 1, kAzure + 4, kOrange + 7, kGreen + 2] Bnames = { 511: 'B^{+}', 521: 'B^{0}', 531: 'B_{s}^{0}', 5122: '#Lambda_{b}^{0}' } Dnames = { 411: 'D^{+}', 421: 'D^{0}',
def main(): # read config file parser = argparse.ArgumentParser(description='Arguments to pass') parser.add_argument('cfgFileName', metavar='text', default='cfgFileNameML.yml', help='config file name for ml') parser.add_argument("--train", help="perform only training and testing", action="store_true") parser.add_argument("--apply", help="perform only application", action="store_true") args = parser.parse_args() print('Loading analysis configuration: ...', end='\r') with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) print('Loading analysis configuration: Done!') print('Loading data files: ...', end='\r') PromptDf = LoadDfFromRootOrParquet(inputCfg['input']['prompt']) FDDf = LoadDfFromRootOrParquet(inputCfg['input']['FD']) DataDf = LoadDfFromRootOrParquet(inputCfg['input']['data']) print('Loading data files: Done!') for iBin, (PtMin, PtMax) in enumerate( zip(inputCfg['pt_ranges']['min'], inputCfg['pt_ranges']['max'])): print( f'\n\033[94mStarting ML analysis --- {PtMin} < pT < {PtMax} GeV/c\033[0m' ) OutPutDirPt = os.path.join(inputCfg['output']['dir'], f'pt{PtMin}_{PtMax}') if os.path.isdir(OutPutDirPt): print( 'Output directory already exists, overwrites possibly ongoing!' ) else: os.mkdir(OutPutDirPt) # data preparation #_____________________________________________ TrainTestData, DataDfPtSel, PromptDfPtSelForEff, FDDfPtSelForEff = data_prep( \ inputCfg, iBin, PtMin, PtMax, OutPutDirPt, DataDf, PromptDf, FDDf) # training, testing #_____________________________________________ if not args.apply: ModelHandl = train_test(inputCfg, PtMin, PtMax, OutPutDirPt, TrainTestData) else: ModelList = inputCfg['ml']['saved_models'] ModelPath = ModelList[iBin] if not isinstance(ModelPath, str): print(f'ERROR: path to model not correctly defined!') sys.exit() print(f'Loaded saved model: {ModelPath}') ModelHandl = ModelHandler() ModelHandl.load_model_handler(ModelPath) # model application #_____________________________________________ if not args.train: appl(inputCfg, PtMin, PtMax, OutPutDirPt, ModelHandl, DataDfPtSel, PromptDfPtSelForEff, FDDfPtSelForEff) # delete dataframes to release memory for data in TrainTestData: del data del DataDfPtSel, PromptDfPtSelForEff, FDDfPtSelForEff
help='config file name with root input files') parser.add_argument('outFileName', metavar='text', default='outFile.root', help='output root file name') parser.add_argument("--batch", help="suppress video output", action="store_true") args = parser.parse_args() with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) # load dataframes from input files dfPrompt = LoadDfFromRootOrParquet( inputCfg['infiles']['signal']['prompt']['filename'], inputCfg['infiles']['signal']['prompt']['dirname'], inputCfg['infiles']['signal']['prompt']['treename']) dfFD = LoadDfFromRootOrParquet( inputCfg['infiles']['signal']['feeddown']['filename'], inputCfg['infiles']['signal']['feeddown']['dirname'], inputCfg['infiles']['signal']['feeddown']['treename']) dfBkg_tot = LoadDfFromRootOrParquet( inputCfg['infiles']['background']['filename'], inputCfg['infiles']['background']['dirname'], inputCfg['infiles']['background']['treename']) if inputCfg['infiles']['secpeak']['prompt']['filename']: dfSecPeakPrompt = LoadDfFromRootOrParquet( inputCfg['infiles']['secpeak']['prompt']['filename'], inputCfg['infiles']['secpeak']['prompt']['dirname'], inputCfg['infiles']['secpeak']['prompt']['treename']) else:
def main(): #pylint: disable=too-many-locals,too-many-statements """ Main function of the script """ parser = argparse.ArgumentParser(description='Arguments to pass') parser.add_argument('cfgFileName', metavar='text', default='cfgFileName.yml', help='config file name') parser.add_argument('outFileDir', metavar='text', default='./', help='output file directory') args = parser.parse_args() with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) #Load data dfPromptP6 = LoadDfFromRootOrParquet(inputCfg['input']['prompt_files'][0]) dfPromptP8 = LoadDfFromRootOrParquet(inputCfg['input']['prompt_files'][1]) dfFDP6 = LoadDfFromRootOrParquet(inputCfg['input']['fd_files'][0]) dfFDP8 = LoadDfFromRootOrParquet(inputCfg['input']['fd_files'][1]) #Select pt bin ptMin = inputCfg['pt_bin'][0] ptMax = inputCfg['pt_bin'][1] dfPromptP6 = dfPromptP6.query(f'{ptMin} < pt_cand < {ptMax}') dfPromptP8 = dfPromptP8.query(f'{ptMin} < pt_cand < {ptMax}') dfFDP6 = dfFDP6.query(f'{ptMin} < pt_cand < {ptMax}') dfFDP8 = dfFDP8.query(f'{ptMin} < pt_cand < {ptMax}') SetGlobalStyle(padbottommargin=0.14, padleftmargin=0.18, padrightmargin=0.06, titleoffsety=1.6) varTitle = inputCfg['scan_variable']['title'] nBins = inputCfg['scan_variable']['histo_bins'] binLims = inputCfg['scan_variable']['histo_lims'] varName = inputCfg['scan_variable']['name'] hPromptP6 = TH1F('hPromptP6', f';{varTitle};Counts', nBins, binLims[0], binLims[1]) hPromptP8 = TH1F('hPromptP8', f';{varTitle};Counts', nBins, binLims[0], binLims[1]) hFDP6 = TH1F('hFDP6', f';{varTitle};Counts', nBins, binLims[0], binLims[1]) hFDP8 = TH1F('hFDP8', f';{varTitle};Counts', nBins, binLims[0], binLims[1]) scaleFactor = inputCfg['scan_variable']['rescale_factor'] dfPromptP6[varName] = dfPromptP6[varName] * scaleFactor dfPromptP8[varName] = dfPromptP8[varName] * scaleFactor dfFDP6[varName] = dfFDP6[varName] * scaleFactor dfFDP8[varName] = dfFDP8[varName] * scaleFactor for value in dfPromptP6[varName].to_numpy(): hPromptP6.Fill(value) for value in dfPromptP8[varName].to_numpy(): hPromptP8.Fill(value) for value in dfFDP6[varName].to_numpy(): hFDP6.Fill(value) for value in dfFDP8[varName].to_numpy(): hFDP8.Fill(value) SetObjectStyle(hPromptP6, color=kAzure + 4, marker=kFullCircle) SetObjectStyle(hPromptP8, color=kRed + 1, marker=kFullCircle) SetObjectStyle(hFDP6, color=kAzure + 4, marker=kFullCircle) SetObjectStyle(hFDP8, color=kRed + 1, marker=kFullCircle) hPromptP6.GetXaxis().SetNdivisions(505) hFDP6.GetXaxis().SetNdivisions(505) hPromptP8.GetXaxis().SetNdivisions(505) hFDP8.GetXaxis().SetNdivisions(505) scanRange = inputCfg['scan_variable']['scan_range'] scanStep = inputCfg['scan_variable']['scan_step'] nEffBins = round((scanRange[1] - scanRange[0]) / scanStep) hEffPromptP6 = TH1F('hEffPromptP6', f';{varTitle} >;Efficiency', nEffBins, scanRange[0], scanRange[1]) hEffPromptP8 = TH1F('hEffPromptP8', f';{varTitle} >;Efficiency', nEffBins, scanRange[0], scanRange[1]) hEffFDP6 = TH1F('hEffFDP6', f';{varTitle} >;Efficiency', nEffBins, scanRange[0], scanRange[1]) hEffFDP8 = TH1F('hEffFDP8', f';{varTitle} >;Efficiency', nEffBins, scanRange[0], scanRange[1]) SetObjectStyle(hEffPromptP6, color=kAzure + 4, marker=kFullCircle) SetObjectStyle(hEffPromptP8, color=kRed + 1, marker=kFullCircle) SetObjectStyle(hEffFDP6, color=kAzure + 4, marker=kFullCircle) SetObjectStyle(hEffFDP8, color=kRed + 1, marker=kFullCircle) effPromptP6, effPromptP8, effFDP6, effFDP8 = ([] for _ in range(4)) effPromptUncP6, effPromptUncP8, effFDUncP6, effFDUncP8 = ( [] for _ in range(4)) labelsConf = inputCfg['legend']['conf_labels'] legPrompt = TLegend(0.25, 0.2, 0.6, 0.4) legPrompt.SetBorderSize(0) legPrompt.SetFillStyle(0) legPrompt.SetHeader('Prompt') legPrompt.AddEntry(hEffPromptP6, labelsConf[0], 'p') legPrompt.AddEntry(hEffPromptP8, labelsConf[1], 'p') legFD = TLegend(0.25, 0.2, 0.65, 0.4) legFD.SetBorderSize(0) legFD.SetFillStyle(0) legFD.SetHeader('Non-prompt') legFD.AddEntry(hEffPromptP6, labelsConf[0], 'p') legFD.AddEntry(hEffPromptP8, labelsConf[1], 'p') for iBin, cut in enumerate(np.arange(scanRange[0], scanRange[1], scanStep)): dfPromptP6Sel = dfPromptP6.query(f'{varName} > {cut}') dfPromptP8Sel = dfPromptP8.query(f'{varName} > {cut}') dfFDP6Sel = dfFDP6.query(f'{varName} > {cut}') dfFDP8Sel = dfFDP8.query(f'{varName} > {cut}') effPromptP6.append(float(len(dfPromptP6Sel) / len(dfPromptP6))) effPromptP8.append(float(len(dfPromptP8Sel) / len(dfPromptP8))) effFDP6.append(float(len(dfFDP6Sel) / len(dfFDP6))) effFDP8.append(float(len(dfFDP8Sel) / len(dfFDP8))) effPromptUncP6.append( np.sqrt(effPromptP6[-1] * (1 - effPromptP6[-1]) / len(dfPromptP6))) effPromptUncP8.append( np.sqrt(effPromptP8[-1] * (1 - effPromptP8[-1]) / len(dfPromptP8))) effFDUncP6.append( np.sqrt(effFDP6[-1] * (1 - effFDP6[-1]) / len(dfFDP6))) effFDUncP8.append( np.sqrt(effFDP8[-1] * (1 - effFDP8[-1]) / len(dfFDP8))) hEffPromptP6.SetBinContent(iBin + 1, effPromptP6[-1]) hEffPromptP8.SetBinContent(iBin + 1, effPromptP8[-1]) hEffFDP6.SetBinContent(iBin + 1, effFDP6[-1]) hEffFDP8.SetBinContent(iBin + 1, effFDP8[-1]) hEffPromptP6.SetBinError(iBin + 1, effPromptUncP6[-1]) hEffPromptP8.SetBinError(iBin + 1, effPromptUncP8[-1]) hEffFDP6.SetBinError(iBin + 1, effFDUncP6[-1]) hEffFDP8.SetBinError(iBin + 1, effFDUncP8[-1]) hEffPromptP6.GetXaxis().SetNdivisions(505) hEffFDP6.GetXaxis().SetNdivisions(505) hEffPromptP8.GetXaxis().SetNdivisions(505) hEffFDP8.GetXaxis().SetNdivisions(505) hEffPromptRatio = hEffPromptP8.Clone('hEffPromptRatio') hEffPromptRatio.Divide(hEffPromptP6) hEffPromptRatio.GetYaxis().SetTitle( f'Prompt eff ratio {labelsConf[1]} / {labelsConf[0]}') hEffFDRatio = hEffFDP8.Clone('hEffFDRatio') hEffFDRatio.Divide(hEffFDP6) hEffFDRatio.GetYaxis().SetTitle( f'Non-prompt eff ratio {labelsConf[1]} / {labelsConf[0]}') hEffPromptRatio.GetXaxis().SetNdivisions(505) hEffFDRatio.GetXaxis().SetNdivisions(505) cDistributions = TCanvas('cDistributions', '', 1920, 1080) cDistributions.Divide(2, 1) cDistributions.cd(1).SetLogy() hPromptP8.Draw('e') hPromptP6.Draw('esame') legPrompt.Draw() cDistributions.cd(2).SetLogy() hFDP8.Draw('e') hFDP6.Draw('esame') legFD.Draw() cEfficiency = TCanvas('cEfficiency', '', 1920, 1080) cEfficiency.Divide(2, 1) cEfficiency.cd(1).SetLogy() hEffPromptP6.Draw('e') hEffPromptP8.Draw('esame') legPrompt.Draw() cEfficiency.cd(2).SetLogy() hEffFDP6.Draw('e') hEffFDP8.Draw('esame') legFD.Draw() cEfficiencyRatio = TCanvas('cEfficiencyRatio', '', 1920, 1080) cEfficiencyRatio.Divide(2, 1) cEfficiencyRatio.cd(1) hEffPromptRatio.Draw('e') cEfficiencyRatio.cd(2) hEffFDRatio.Draw('e') tag = f'{labelsConf[0]}Vs{labelsConf[1]}_pT{ptMin}_{ptMax}' cDistributions.SaveAs(f'{args.outFileDir}/{varName}_Distr_{tag}.pdf') cEfficiency.SaveAs(f'{args.outFileDir}/{varName}_CutEff_{tag}.pdf') cEfficiencyRatio.SaveAs( f'{args.outFileDir}/{varName}_CutEffRatio_{tag}.pdf') print('Press any key to exit!') input()
parser.add_argument('cutSetFileName', metavar='text', default='cutSetFileName.yml', help='input file with cut set') parser.add_argument('outFileName', metavar='text', default='outFileName.root', help='output root file name') args = parser.parse_args() #config input file and df definition with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) inFileNames = inputCfg['infiles']['name'] dfSignif = LoadDfFromRootOrParquet(inputCfg['infiles']['name'], inputCfg['infiles']['dirname'], inputCfg['infiles']['treename']) dfSignif['Pt'] = dfSignif.apply(lambda row: (row.PtMin + row.PtMax) / 2, axis=1) VarDrawList = inputCfg['VarDrawList'] if not isinstance(VarDrawList, list): VarDrawList = [VarDrawList] #selections to be applied with open(args.cutSetFileName, 'r') as ymlCutSetFile: cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader) cutVars = cutSetCfg['cutvars'] if not 'ML_output_Bkg' or not 'ML_output_FD' in cutVars: print( '\t\t---Warning: no ML Bkg or FD output cut was provided. Are you sure you want to continue?---\n' )
sys.path.append('..') from utils.DfUtils import LoadDfFromRootOrParquet #pylint: disable=wrong-import-position,import-error # inputs parser = argparse.ArgumentParser(description='Arguments to pass') parser.add_argument('cfgFileName', metavar='text', default='config_training_FileName.yml', help='config file used for the training') args = parser.parse_args() # load configfiles with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) # load dataframes print('Reading input files') bkg = LoadDfFromRootOrParquet( inputCfg['input']['data'], inTreeNames=inputCfg['input']['treename']) bkg = bkg.query(inputCfg['data_prep']['filt_bkg_mass']) prompt = LoadDfFromRootOrParquet( inputCfg['input']['prompt'], inTreeNames=inputCfg['input']['treename']) if inputCfg['input']['FD']: FD = LoadDfFromRootOrParquet( inputCfg['input']['FD'], inTreeNames=inputCfg['input']['treename']) # loop over training pt bins for ptMin, ptMax, bkg_mult in zip(inputCfg['pt_ranges']['min'], inputCfg['pt_ranges']['max'], inputCfg['data_prep']['bkg_mult']): print(f'\nPt bin {ptMin}-{ptMax} GeV/c, available candidates:') numBkg = len(bkg.query(f'{ptMin} < pt_cand < {ptMax}')) numPrompt = len(prompt.query(f'{ptMin} < pt_cand < {ptMax}')) print(f' - bkg -> {numBkg}\n - prompt -> {numPrompt}')
def main(): # read config file parser = argparse.ArgumentParser(description='Arguments to pass') parser.add_argument('cfgFileName', metavar='text', default='cfgFileNameCheck.yml', help='config file name for check') args = parser.parse_args() print('Loading check configuration: ...', end='\r') with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) print('Loading check configuration: Done!') print('Loading data files: ...', end='\r') DfList = [] inDirName = inputCfg['input']['dirname'] inTreeName = inputCfg['input']['treename'] for filePath in inputCfg['input']['files']: DfList.append(LoadDfFromRootOrParquet(filePath, inDirName, inTreeName)) print('Loading data files: Done!') print('Appling simple pre-filtering: ...', end='\r') DfListSel = [] for df, query in zip(DfList, inputCfg['queries']): DfListSel.append(df.query(query)) print('Pre-filtering: Done!') del DfList VarsToDraw = inputCfg['plotting_columns'] LegLabels = inputCfg['output']['leg_labels'] Colors = inputCfg['output']['colors'] OutPutDir = inputCfg['output']['dir'] for PtMin, PtMax, LimMin, LimMax in zip(inputCfg['pt_ranges']['min'], inputCfg['pt_ranges']['max'], inputCfg['plot_lim_min'], inputCfg['plot_lim_max']): print(f'Plot variable distributions --- {PtMin} < pT < {PtMax} GeV/c') DfListPt = [] for df in DfListSel: DfListPt.append(df.query(f'{PtMin} < pt_cand < {PtMax}')) #print(len(DfListPt), len(Colors)) DistrPlot = plot_utils.plot_distr(DfListPt, VarsToDraw, 1000, LegLabels, figsize=(6, 6), density=True, histtype='stepfilled', grid=False, log=True, colors=Colors, alpha=0.3) plt.subplots_adjust(left=0.1, bottom=0.05, right=0.95, top=0.95, hspace=0.4) if not isinstance(DistrPlot, np.ndarray): DistrPlot = np.array([DistrPlot]) print(len(DistrPlot), len(LimMin), len(LimMax), len(inputCfg['xaxes_label'])) for ax, minVar, maxVar, xLabel in zip(DistrPlot, LimMin, LimMax, inputCfg['xaxes_label']): ax.set_xlim(minVar, maxVar) ax.set_xlabel(xLabel, fontsize=10, ha='right', position=(1, 20)) ax.set_ylabel('Counts (arb. units)', fontsize=10, ha='right', position=(20, 1)) plt.legend(frameon=False, fontsize=10, loc='best') ax.set_title('') ''' textstr = r'pp, $\sqrt{s}$ = 5.02 TeV' textstr2 = r'$3 < p_{\mathrm{T}} < 4~\mathrm{GeV}/c$' ax.text(0.56, 0.75, textstr, transform=ax.transAxes, fontsize=15, verticalalignment='top') ax.text(0.56, 0.69, textstr2, transform=ax.transAxes, fontsize=15, verticalalignment='top') ''' plt.tight_layout() plt.savefig(f'{OutPutDir}/NsigzoomDistrComp_pT_{PtMin}_{PtMax}.pdf') plt.close('all') del DfListPt del DfListSel
if iFile == 0: hEv, normCounter = LoadNormObjFromTask(inFileName, inputCfg) if isMC: _, sparseGen = LoadSparseFromTask(inFileName, inputCfg) #only gen sparses used else: hEvPart, normCounterPart = LoadNormObjFromTask(inFileName, inputCfg) hEv.Add(hEvPart) normCounter.Add(normCounterPart) if isMC: _, sparseGenPart = LoadSparseFromTask(inFileName, inputCfg) #only gen sparses used for sparseType in sparseGenPart: sparseGen[sparseType].Add(sparseGenPart[sparseType]) #load trees if isMC: dataFramePrompt = LoadDfFromRootOrParquet(inputCfg['tree']['filenamePrompt'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) if 'cand_type' in dataFramePrompt.columns: #if not filtered tree, select only FD and not reflected dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitSignal, bitPrompt], 'and') dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitRefl], 'not') dataFrameFD = LoadDfFromRootOrParquet(inputCfg['tree']['filenameFD'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) if 'cand_type' in dataFrameFD.columns: #if not filtered tree, select only FD and not reflected dataFrameFD = FilterBitDf(dataFrameFD, 'cand_type', [bitSignal, bitFD], 'and') dataFrameFD = FilterBitDf(dataFrameFD, 'cand_type', [bitRefl], 'not') for iPt, (cuts, ptMin, ptMax) in enumerate(zip(selToApply, cutVars['Pt']['min'], cutVars['Pt']['max'])): print("Projecting distributions for %0.1f < pT < %0.1f GeV/c" % (ptMin, ptMax)) #gen histos from sparses
# inputs parser = argparse.ArgumentParser(description='Arguments to pass') parser.add_argument('cfgFileName', metavar='text', default='cfgFileName.yml', help='config file name with path of input dataframes for check') parser.add_argument('cutSetFileName', metavar='text', default='cutSetFileName.yml', help='cut set file name') parser.add_argument('--outputDir', metavar='text', default='.', help='output directory for plots') args = parser.parse_args() # input dataframes with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) isMC = inputCfg['isMC'] if isMC: dfPrompt = LoadDfFromRootOrParquet(inputCfg['tree']['filenamePrompt'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) dfFD = LoadDfFromRootOrParquet(inputCfg['tree']['filenameFD'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) else: dfAll = LoadDfFromRootOrParquet(inputCfg['tree']['filenameAll'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) # selections to be applied with open(args.cutSetFileName, 'r') as ymlCutSetFile: cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader) cutVars = cutSetCfg['cutvars'] selToApply = [] for iPt, _ in enumerate(cutVars['Pt']['min']): selToApply.append('') for varName in cutVars: if varName == 'InvMass':
preSelections = cfg['skimming']['preselections'] colsToKeep = cfg['skimming']['colstokeep'] if colsToKeep and 'inv_mass' not in colsToKeep: print( 'Warning: invariant mass branch (inv_mass) disabled. Are you sure you don\'t want to keep it?' ) if colsToKeep and 'pt_cand' not in colsToKeep: print( 'Warning: pt branch (pt_cand) disabled. Are you sure you don\'t want to keep it?' ) PtMin = cfg['skimming']['pt']['min'] PtMax = cfg['skimming']['pt']['max'] dataFrame = LoadDfFromRootOrParquet(inFileNames, inDirName, inTreeName) if not colsToKeep: colsToKeep = list(dataFrame.columns) colsToKeep.remove('cand_type') print('Applying selections') dataFramePtCut = dataFrame.query(f'pt_cand > {PtMin} & pt_cand < {PtMax}') del dataFrame if preSelections: dataFramePtCutSel = dataFramePtCut.astype(float).query(preSelections) del dataFramePtCut else: dataFramePtCutSel = dataFramePtCut if cfg['missingvalues']['enable']:
for varName in cutVars: if varName == 'InvMass': continue if selToApply[iPt] != '': selToApply[iPt] += ' & ' selToApply[ iPt] += f"{cutVars[varName]['min'][iPt]}<{cutVars[varName]['name']}<{cutVars[varName]['max'][iPt]}" # define filter bits bitSignal = 0 bitPrompt = 2 bitFD = 3 bitRefl = 4 dataFramePrompt = LoadDfFromRootOrParquet(inputCfg['tree']['filenamePrompt'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) if 'cand_type' in dataFramePrompt.columns: #if not filtered tree, select only prompt and not reflected dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitSignal, bitPrompt], 'and') dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitRefl], 'not') dataFramePrompt.reset_index(inplace=True) dataFrameFD = LoadDfFromRootOrParquet(inputCfg['tree']['filenameFD'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) if 'cand_type' in dataFrameFD.columns: #if not filtered tree, select only FD and not reflected dataFrameFD = FilterBitDf(dataFrameFD, 'cand_type', [bitSignal, bitFD], 'and')
help='config file name with root input files') parser.add_argument('outFileName', metavar='text', default='outFile.root', help='output root file name') parser.add_argument("--batch", help="suppress video output", action="store_true") args = parser.parse_args() with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) # load dataframes from input files dfPrompt = LoadDfFromRootOrParquet( inputCfg['infiles']['signal']['prompt']['filename'], inputCfg['infiles']['signal']['prompt']['dirname'], inputCfg['infiles']['signal']['prompt']['treename']) dfFD = LoadDfFromRootOrParquet( inputCfg['infiles']['signal']['feeddown']['filename'], inputCfg['infiles']['signal']['feeddown']['dirname'], inputCfg['infiles']['signal']['feeddown']['treename']) dfBkg = LoadDfFromRootOrParquet(inputCfg['infiles']['background']['filename'], inputCfg['infiles']['background']['dirname'], inputCfg['infiles']['background']['treename']) if inputCfg['infiles']['secpeak']['prompt']['filename']: dfSecPeakPrompt = LoadDfFromRootOrParquet( inputCfg['infiles']['signal']['prompt']['filename'], inputCfg['infiles']['signal']['prompt']['dirname'], inputCfg['infiles']['signal']['prompt']['treename']) else:
cutVars[var]['min'][iPt] * 1.0001) binMax = sparseBkg.GetAxis(cutVars[var]['axisnum']).FindBin( cutVars[var]['max'][iPt] * 0.9999) sparseBkg.GetAxis(cutVars[var]['axisnum']).SetRange(binMin, binMax) hMassSel.append(sparseBkg.Projection(0)) hMassSel[iPt].SetNameTitle( f'hMassSelPt{ptMin:.0f}_{ptMax:.0f}', f'{ptMin} < #it{{p}}_{{T}} < {ptMax} (GeV/#it{{c}});{massTitle};Counts' ) for iAxis in range(sparseBkg.GetNdimensions()): sparseBkg.GetAxis(iAxis).SetRange(-1, -1) else: # data from tree/dataframe dataFrameBkg = LoadDfFromRootOrParquet(inputCfg['tree']['filenameBkg'], inputCfg['tree']['dirname'], inputCfg['tree']['treename']) massBins = 500 massLimLow = min(dataFrameBkg['inv_mass']) massLimHigh = max(dataFrameBkg['inv_mass']) # selections to be applied selToApply = [] for iPt, _ in enumerate(cutVars['Pt']['min']): selToApply.append('') for varName in cutVars: if varName == 'InvMass': continue if selToApply[iPt] != '': selToApply[iPt] += ' & '
hNsigmaSel[det][spe]['0-2'][f'Pt{ptmin:.0f}_{ptmax:.0f}'], color=kRed, linealpha=0.25, fillalpha=0.25, markeralpha=1, markerstyle=kOpenCircle, markersize=0.3, linewidth=1) sparse.GetAxis(0).SetRange(-1, -1) sparse.GetAxis(1).SetRange(-1, -1) else: detectors = ['TPC', 'TOF', 'Comb'] species = ['Pi', 'K'] prongs = ['0', '1', '2'] dataDf = LoadDfFromRootOrParquet(inputCfg['inputfiles']) with open(ARGS.cutSetFileName, 'r') as ymlCutSetFile: cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader) cutVars = cutSetCfg['cutvars'] selToApply = [] for iPt, _ in enumerate(cutVars['Pt']['min']): selToApply.append('') for varName in cutVars: if varName == 'InvMass': continue if selToApply[iPt] != '': selToApply[iPt] += ' & ' selToApply[iPt] += ( f"({cutVars[varName]['min'][iPt]}<{cutVars[varName]['name']}" f"<{cutVars[varName]['max'][iPt]})")