metavar='text',
                    default='cfgFileName.yml',
                    help='config file name with root input files')
args = parser.parse_args()

with open(args.cfgFileName, 'r') as ymlCfgFile:
    inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
inKineFileName = inputCfg['inKineFileName']
Bhadrons = inputCfg['pdgCodeB']
FFbtoB = inputCfg['FFbtoB']
Dhadrons = inputCfg['pdgCodeD']
BRDhadrons = inputCfg['BRD']
partPlusAntiPart = inputCfg['partPlusAntiPart']
outFileName = inputCfg['outFileName']

kineDf = LoadDfFromRootOrParquet(inKineFileName, None, 'fTreeDecays')

SetGlobalStyle(padleftmargin=0.14,
               padbottommargin=0.12,
               titleoffsety=1.3,
               optstat=0)
Bcolors = [kRed + 1, kAzure + 4, kOrange + 7, kGreen + 2]
Bnames = {
    511: 'B^{+}',
    521: 'B^{0}',
    531: 'B_{s}^{0}',
    5122: '#Lambda_{b}^{0}'
}
Dnames = {
    411: 'D^{+}',
    421: 'D^{0}',
示例#2
0
def main():
    # read config file
    parser = argparse.ArgumentParser(description='Arguments to pass')
    parser.add_argument('cfgFileName',
                        metavar='text',
                        default='cfgFileNameML.yml',
                        help='config file name for ml')
    parser.add_argument("--train",
                        help="perform only training and testing",
                        action="store_true")
    parser.add_argument("--apply",
                        help="perform only application",
                        action="store_true")
    args = parser.parse_args()

    print('Loading analysis configuration: ...', end='\r')
    with open(args.cfgFileName, 'r') as ymlCfgFile:
        inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
    print('Loading analysis configuration: Done!')

    print('Loading data files: ...', end='\r')
    PromptDf = LoadDfFromRootOrParquet(inputCfg['input']['prompt'])
    FDDf = LoadDfFromRootOrParquet(inputCfg['input']['FD'])
    DataDf = LoadDfFromRootOrParquet(inputCfg['input']['data'])
    print('Loading data files: Done!')

    for iBin, (PtMin, PtMax) in enumerate(
            zip(inputCfg['pt_ranges']['min'], inputCfg['pt_ranges']['max'])):

        print(
            f'\n\033[94mStarting ML analysis --- {PtMin} < pT < {PtMax} GeV/c\033[0m'
        )

        OutPutDirPt = os.path.join(inputCfg['output']['dir'],
                                   f'pt{PtMin}_{PtMax}')
        if os.path.isdir(OutPutDirPt):
            print(
                'Output directory already exists, overwrites possibly ongoing!'
            )
        else:
            os.mkdir(OutPutDirPt)

        # data preparation
        #_____________________________________________
        TrainTestData, DataDfPtSel, PromptDfPtSelForEff, FDDfPtSelForEff = data_prep( \
            inputCfg, iBin, PtMin, PtMax, OutPutDirPt, DataDf, PromptDf, FDDf)

        # training, testing
        #_____________________________________________
        if not args.apply:
            ModelHandl = train_test(inputCfg, PtMin, PtMax, OutPutDirPt,
                                    TrainTestData)
        else:
            ModelList = inputCfg['ml']['saved_models']
            ModelPath = ModelList[iBin]
            if not isinstance(ModelPath, str):
                print(f'ERROR: path to model not correctly defined!')
                sys.exit()
            print(f'Loaded saved model: {ModelPath}')
            ModelHandl = ModelHandler()
            ModelHandl.load_model_handler(ModelPath)

        # model application
        #_____________________________________________
        if not args.train:
            appl(inputCfg, PtMin, PtMax, OutPutDirPt, ModelHandl, DataDfPtSel,
                 PromptDfPtSelForEff, FDDfPtSelForEff)

        # delete dataframes to release memory
        for data in TrainTestData:
            del data
        del DataDfPtSel, PromptDfPtSelForEff, FDDfPtSelForEff
                    help='config file name with root input files')
parser.add_argument('outFileName',
                    metavar='text',
                    default='outFile.root',
                    help='output root file name')
parser.add_argument("--batch",
                    help="suppress video output",
                    action="store_true")
args = parser.parse_args()

with open(args.cfgFileName, 'r') as ymlCfgFile:
    inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)

# load dataframes from input files
dfPrompt = LoadDfFromRootOrParquet(
    inputCfg['infiles']['signal']['prompt']['filename'],
    inputCfg['infiles']['signal']['prompt']['dirname'],
    inputCfg['infiles']['signal']['prompt']['treename'])
dfFD = LoadDfFromRootOrParquet(
    inputCfg['infiles']['signal']['feeddown']['filename'],
    inputCfg['infiles']['signal']['feeddown']['dirname'],
    inputCfg['infiles']['signal']['feeddown']['treename'])
dfBkg_tot = LoadDfFromRootOrParquet(
    inputCfg['infiles']['background']['filename'],
    inputCfg['infiles']['background']['dirname'],
    inputCfg['infiles']['background']['treename'])
if inputCfg['infiles']['secpeak']['prompt']['filename']:
    dfSecPeakPrompt = LoadDfFromRootOrParquet(
        inputCfg['infiles']['secpeak']['prompt']['filename'],
        inputCfg['infiles']['secpeak']['prompt']['dirname'],
        inputCfg['infiles']['secpeak']['prompt']['treename'])
else:
def main():  #pylint: disable=too-many-locals,too-many-statements
    """
    Main function of the script
    """
    parser = argparse.ArgumentParser(description='Arguments to pass')
    parser.add_argument('cfgFileName',
                        metavar='text',
                        default='cfgFileName.yml',
                        help='config file name')
    parser.add_argument('outFileDir',
                        metavar='text',
                        default='./',
                        help='output file directory')
    args = parser.parse_args()

    with open(args.cfgFileName, 'r') as ymlCfgFile:
        inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)

    #Load data
    dfPromptP6 = LoadDfFromRootOrParquet(inputCfg['input']['prompt_files'][0])
    dfPromptP8 = LoadDfFromRootOrParquet(inputCfg['input']['prompt_files'][1])
    dfFDP6 = LoadDfFromRootOrParquet(inputCfg['input']['fd_files'][0])
    dfFDP8 = LoadDfFromRootOrParquet(inputCfg['input']['fd_files'][1])

    #Select pt bin
    ptMin = inputCfg['pt_bin'][0]
    ptMax = inputCfg['pt_bin'][1]
    dfPromptP6 = dfPromptP6.query(f'{ptMin} < pt_cand < {ptMax}')
    dfPromptP8 = dfPromptP8.query(f'{ptMin} < pt_cand < {ptMax}')
    dfFDP6 = dfFDP6.query(f'{ptMin} < pt_cand < {ptMax}')
    dfFDP8 = dfFDP8.query(f'{ptMin} < pt_cand < {ptMax}')

    SetGlobalStyle(padbottommargin=0.14,
                   padleftmargin=0.18,
                   padrightmargin=0.06,
                   titleoffsety=1.6)
    varTitle = inputCfg['scan_variable']['title']
    nBins = inputCfg['scan_variable']['histo_bins']
    binLims = inputCfg['scan_variable']['histo_lims']
    varName = inputCfg['scan_variable']['name']
    hPromptP6 = TH1F('hPromptP6', f';{varTitle};Counts', nBins, binLims[0],
                     binLims[1])
    hPromptP8 = TH1F('hPromptP8', f';{varTitle};Counts', nBins, binLims[0],
                     binLims[1])
    hFDP6 = TH1F('hFDP6', f';{varTitle};Counts', nBins, binLims[0], binLims[1])
    hFDP8 = TH1F('hFDP8', f';{varTitle};Counts', nBins, binLims[0], binLims[1])
    scaleFactor = inputCfg['scan_variable']['rescale_factor']
    dfPromptP6[varName] = dfPromptP6[varName] * scaleFactor
    dfPromptP8[varName] = dfPromptP8[varName] * scaleFactor
    dfFDP6[varName] = dfFDP6[varName] * scaleFactor
    dfFDP8[varName] = dfFDP8[varName] * scaleFactor
    for value in dfPromptP6[varName].to_numpy():
        hPromptP6.Fill(value)
    for value in dfPromptP8[varName].to_numpy():
        hPromptP8.Fill(value)
    for value in dfFDP6[varName].to_numpy():
        hFDP6.Fill(value)
    for value in dfFDP8[varName].to_numpy():
        hFDP8.Fill(value)
    SetObjectStyle(hPromptP6, color=kAzure + 4, marker=kFullCircle)
    SetObjectStyle(hPromptP8, color=kRed + 1, marker=kFullCircle)
    SetObjectStyle(hFDP6, color=kAzure + 4, marker=kFullCircle)
    SetObjectStyle(hFDP8, color=kRed + 1, marker=kFullCircle)
    hPromptP6.GetXaxis().SetNdivisions(505)
    hFDP6.GetXaxis().SetNdivisions(505)
    hPromptP8.GetXaxis().SetNdivisions(505)
    hFDP8.GetXaxis().SetNdivisions(505)

    scanRange = inputCfg['scan_variable']['scan_range']
    scanStep = inputCfg['scan_variable']['scan_step']
    nEffBins = round((scanRange[1] - scanRange[0]) / scanStep)
    hEffPromptP6 = TH1F('hEffPromptP6', f';{varTitle} >;Efficiency', nEffBins,
                        scanRange[0], scanRange[1])
    hEffPromptP8 = TH1F('hEffPromptP8', f';{varTitle} >;Efficiency', nEffBins,
                        scanRange[0], scanRange[1])
    hEffFDP6 = TH1F('hEffFDP6', f';{varTitle} >;Efficiency', nEffBins,
                    scanRange[0], scanRange[1])
    hEffFDP8 = TH1F('hEffFDP8', f';{varTitle} >;Efficiency', nEffBins,
                    scanRange[0], scanRange[1])
    SetObjectStyle(hEffPromptP6, color=kAzure + 4, marker=kFullCircle)
    SetObjectStyle(hEffPromptP8, color=kRed + 1, marker=kFullCircle)
    SetObjectStyle(hEffFDP6, color=kAzure + 4, marker=kFullCircle)
    SetObjectStyle(hEffFDP8, color=kRed + 1, marker=kFullCircle)

    effPromptP6, effPromptP8, effFDP6, effFDP8 = ([] for _ in range(4))
    effPromptUncP6, effPromptUncP8, effFDUncP6, effFDUncP8 = (
        [] for _ in range(4))
    labelsConf = inputCfg['legend']['conf_labels']
    legPrompt = TLegend(0.25, 0.2, 0.6, 0.4)
    legPrompt.SetBorderSize(0)
    legPrompt.SetFillStyle(0)
    legPrompt.SetHeader('Prompt')
    legPrompt.AddEntry(hEffPromptP6, labelsConf[0], 'p')
    legPrompt.AddEntry(hEffPromptP8, labelsConf[1], 'p')
    legFD = TLegend(0.25, 0.2, 0.65, 0.4)
    legFD.SetBorderSize(0)
    legFD.SetFillStyle(0)
    legFD.SetHeader('Non-prompt')
    legFD.AddEntry(hEffPromptP6, labelsConf[0], 'p')
    legFD.AddEntry(hEffPromptP8, labelsConf[1], 'p')

    for iBin, cut in enumerate(np.arange(scanRange[0], scanRange[1],
                                         scanStep)):
        dfPromptP6Sel = dfPromptP6.query(f'{varName} > {cut}')
        dfPromptP8Sel = dfPromptP8.query(f'{varName} > {cut}')
        dfFDP6Sel = dfFDP6.query(f'{varName} > {cut}')
        dfFDP8Sel = dfFDP8.query(f'{varName} > {cut}')

        effPromptP6.append(float(len(dfPromptP6Sel) / len(dfPromptP6)))
        effPromptP8.append(float(len(dfPromptP8Sel) / len(dfPromptP8)))
        effFDP6.append(float(len(dfFDP6Sel) / len(dfFDP6)))
        effFDP8.append(float(len(dfFDP8Sel) / len(dfFDP8)))

        effPromptUncP6.append(
            np.sqrt(effPromptP6[-1] * (1 - effPromptP6[-1]) / len(dfPromptP6)))
        effPromptUncP8.append(
            np.sqrt(effPromptP8[-1] * (1 - effPromptP8[-1]) / len(dfPromptP8)))
        effFDUncP6.append(
            np.sqrt(effFDP6[-1] * (1 - effFDP6[-1]) / len(dfFDP6)))
        effFDUncP8.append(
            np.sqrt(effFDP8[-1] * (1 - effFDP8[-1]) / len(dfFDP8)))

        hEffPromptP6.SetBinContent(iBin + 1, effPromptP6[-1])
        hEffPromptP8.SetBinContent(iBin + 1, effPromptP8[-1])
        hEffFDP6.SetBinContent(iBin + 1, effFDP6[-1])
        hEffFDP8.SetBinContent(iBin + 1, effFDP8[-1])

        hEffPromptP6.SetBinError(iBin + 1, effPromptUncP6[-1])
        hEffPromptP8.SetBinError(iBin + 1, effPromptUncP8[-1])
        hEffFDP6.SetBinError(iBin + 1, effFDUncP6[-1])
        hEffFDP8.SetBinError(iBin + 1, effFDUncP8[-1])

    hEffPromptP6.GetXaxis().SetNdivisions(505)
    hEffFDP6.GetXaxis().SetNdivisions(505)
    hEffPromptP8.GetXaxis().SetNdivisions(505)
    hEffFDP8.GetXaxis().SetNdivisions(505)

    hEffPromptRatio = hEffPromptP8.Clone('hEffPromptRatio')
    hEffPromptRatio.Divide(hEffPromptP6)
    hEffPromptRatio.GetYaxis().SetTitle(
        f'Prompt eff ratio {labelsConf[1]} / {labelsConf[0]}')
    hEffFDRatio = hEffFDP8.Clone('hEffFDRatio')
    hEffFDRatio.Divide(hEffFDP6)
    hEffFDRatio.GetYaxis().SetTitle(
        f'Non-prompt eff ratio {labelsConf[1]} / {labelsConf[0]}')

    hEffPromptRatio.GetXaxis().SetNdivisions(505)
    hEffFDRatio.GetXaxis().SetNdivisions(505)

    cDistributions = TCanvas('cDistributions', '', 1920, 1080)
    cDistributions.Divide(2, 1)
    cDistributions.cd(1).SetLogy()
    hPromptP8.Draw('e')
    hPromptP6.Draw('esame')
    legPrompt.Draw()
    cDistributions.cd(2).SetLogy()
    hFDP8.Draw('e')
    hFDP6.Draw('esame')
    legFD.Draw()

    cEfficiency = TCanvas('cEfficiency', '', 1920, 1080)
    cEfficiency.Divide(2, 1)
    cEfficiency.cd(1).SetLogy()
    hEffPromptP6.Draw('e')
    hEffPromptP8.Draw('esame')
    legPrompt.Draw()
    cEfficiency.cd(2).SetLogy()
    hEffFDP6.Draw('e')
    hEffFDP8.Draw('esame')
    legFD.Draw()

    cEfficiencyRatio = TCanvas('cEfficiencyRatio', '', 1920, 1080)
    cEfficiencyRatio.Divide(2, 1)
    cEfficiencyRatio.cd(1)
    hEffPromptRatio.Draw('e')
    cEfficiencyRatio.cd(2)
    hEffFDRatio.Draw('e')

    tag = f'{labelsConf[0]}Vs{labelsConf[1]}_pT{ptMin}_{ptMax}'
    cDistributions.SaveAs(f'{args.outFileDir}/{varName}_Distr_{tag}.pdf')
    cEfficiency.SaveAs(f'{args.outFileDir}/{varName}_CutEff_{tag}.pdf')
    cEfficiencyRatio.SaveAs(
        f'{args.outFileDir}/{varName}_CutEffRatio_{tag}.pdf')

    print('Press any key to exit!')
    input()
parser.add_argument('cutSetFileName',
                    metavar='text',
                    default='cutSetFileName.yml',
                    help='input file with cut set')
parser.add_argument('outFileName',
                    metavar='text',
                    default='outFileName.root',
                    help='output root file name')
args = parser.parse_args()

#config input file and df definition
with open(args.cfgFileName, 'r') as ymlCfgFile:
    inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
inFileNames = inputCfg['infiles']['name']
dfSignif = LoadDfFromRootOrParquet(inputCfg['infiles']['name'],
                                   inputCfg['infiles']['dirname'],
                                   inputCfg['infiles']['treename'])
dfSignif['Pt'] = dfSignif.apply(lambda row: (row.PtMin + row.PtMax) / 2,
                                axis=1)
VarDrawList = inputCfg['VarDrawList']
if not isinstance(VarDrawList, list):
    VarDrawList = [VarDrawList]

#selections to be applied
with open(args.cutSetFileName, 'r') as ymlCutSetFile:
    cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader)
cutVars = cutSetCfg['cutvars']
if not 'ML_output_Bkg' or not 'ML_output_FD' in cutVars:
    print(
        '\t\t---Warning: no ML Bkg or FD output cut was provided. Are you sure you want to continue?---\n'
    )
sys.path.append('..')
from utils.DfUtils import LoadDfFromRootOrParquet #pylint: disable=wrong-import-position,import-error

# inputs
parser = argparse.ArgumentParser(description='Arguments to pass')
parser.add_argument('cfgFileName', metavar='text', default='config_training_FileName.yml',
                    help='config file used for the training')
args = parser.parse_args()

# load configfiles
with open(args.cfgFileName, 'r') as ymlCfgFile:
    inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)

# load dataframes
print('Reading input files')
bkg = LoadDfFromRootOrParquet(
    inputCfg['input']['data'], inTreeNames=inputCfg['input']['treename'])
bkg = bkg.query(inputCfg['data_prep']['filt_bkg_mass'])
prompt = LoadDfFromRootOrParquet(
    inputCfg['input']['prompt'], inTreeNames=inputCfg['input']['treename'])
if inputCfg['input']['FD']:
    FD = LoadDfFromRootOrParquet(
        inputCfg['input']['FD'], inTreeNames=inputCfg['input']['treename'])

# loop over training pt bins
for ptMin, ptMax, bkg_mult in zip(inputCfg['pt_ranges']['min'],
                                  inputCfg['pt_ranges']['max'],
                                  inputCfg['data_prep']['bkg_mult']):
    print(f'\nPt bin {ptMin}-{ptMax} GeV/c, available candidates:')
    numBkg = len(bkg.query(f'{ptMin} < pt_cand < {ptMax}'))
    numPrompt = len(prompt.query(f'{ptMin} < pt_cand < {ptMax}'))
    print(f'  - bkg -> {numBkg}\n  - prompt -> {numPrompt}')
示例#7
0
def main():
    # read config file
    parser = argparse.ArgumentParser(description='Arguments to pass')
    parser.add_argument('cfgFileName',
                        metavar='text',
                        default='cfgFileNameCheck.yml',
                        help='config file name for check')
    args = parser.parse_args()

    print('Loading check configuration: ...', end='\r')
    with open(args.cfgFileName, 'r') as ymlCfgFile:
        inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
    print('Loading check configuration: Done!')

    print('Loading data files: ...', end='\r')
    DfList = []
    inDirName = inputCfg['input']['dirname']
    inTreeName = inputCfg['input']['treename']
    for filePath in inputCfg['input']['files']:
        DfList.append(LoadDfFromRootOrParquet(filePath, inDirName, inTreeName))

    print('Loading data files: Done!')

    print('Appling simple pre-filtering: ...', end='\r')
    DfListSel = []
    for df, query in zip(DfList, inputCfg['queries']):
        DfListSel.append(df.query(query))
    print('Pre-filtering: Done!')
    del DfList

    VarsToDraw = inputCfg['plotting_columns']
    LegLabels = inputCfg['output']['leg_labels']
    Colors = inputCfg['output']['colors']
    OutPutDir = inputCfg['output']['dir']

    for PtMin, PtMax, LimMin, LimMax in zip(inputCfg['pt_ranges']['min'],
                                            inputCfg['pt_ranges']['max'],
                                            inputCfg['plot_lim_min'],
                                            inputCfg['plot_lim_max']):
        print(f'Plot variable distributions --- {PtMin} < pT < {PtMax} GeV/c')
        DfListPt = []
        for df in DfListSel:
            DfListPt.append(df.query(f'{PtMin} < pt_cand < {PtMax}'))
        #print(len(DfListPt), len(Colors))
        DistrPlot = plot_utils.plot_distr(DfListPt,
                                          VarsToDraw,
                                          1000,
                                          LegLabels,
                                          figsize=(6, 6),
                                          density=True,
                                          histtype='stepfilled',
                                          grid=False,
                                          log=True,
                                          colors=Colors,
                                          alpha=0.3)
        plt.subplots_adjust(left=0.1,
                            bottom=0.05,
                            right=0.95,
                            top=0.95,
                            hspace=0.4)
        if not isinstance(DistrPlot, np.ndarray):
            DistrPlot = np.array([DistrPlot])
        print(len(DistrPlot), len(LimMin), len(LimMax),
              len(inputCfg['xaxes_label']))
        for ax, minVar, maxVar, xLabel in zip(DistrPlot, LimMin, LimMax,
                                              inputCfg['xaxes_label']):
            ax.set_xlim(minVar, maxVar)

            ax.set_xlabel(xLabel, fontsize=10, ha='right', position=(1, 20))
            ax.set_ylabel('Counts (arb. units)',
                          fontsize=10,
                          ha='right',
                          position=(20, 1))
            plt.legend(frameon=False, fontsize=10, loc='best')

            ax.set_title('')
            '''
             textstr = r'pp, $\sqrt{s}$ = 5.02 TeV'
             textstr2 = r'$3 < p_{\mathrm{T}} < 4~\mathrm{GeV}/c$'

             ax.text(0.56, 0.75, textstr, transform=ax.transAxes, fontsize=15,
                    verticalalignment='top')
             ax.text(0.56, 0.69, textstr2, transform=ax.transAxes, fontsize=15,
                    verticalalignment='top')
            '''
            plt.tight_layout()
        plt.savefig(f'{OutPutDir}/NsigzoomDistrComp_pT_{PtMin}_{PtMax}.pdf')
        plt.close('all')
        del DfListPt

    del DfListSel
示例#8
0
    if iFile == 0:
        hEv, normCounter = LoadNormObjFromTask(inFileName, inputCfg)
        if isMC:
            _, sparseGen = LoadSparseFromTask(inFileName, inputCfg) #only gen sparses used
    else:
        hEvPart, normCounterPart = LoadNormObjFromTask(inFileName, inputCfg)
        hEv.Add(hEvPart)
        normCounter.Add(normCounterPart)
        if isMC:
            _, sparseGenPart = LoadSparseFromTask(inFileName, inputCfg) #only gen sparses used
            for sparseType in sparseGenPart:
                sparseGen[sparseType].Add(sparseGenPart[sparseType])

#load trees
if isMC:
    dataFramePrompt = LoadDfFromRootOrParquet(inputCfg['tree']['filenamePrompt'], inputCfg['tree']['dirname'],
                                              inputCfg['tree']['treename'])

    if 'cand_type' in dataFramePrompt.columns: #if not filtered tree, select only FD and not reflected
        dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitSignal, bitPrompt], 'and')
        dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitRefl], 'not')

    dataFrameFD = LoadDfFromRootOrParquet(inputCfg['tree']['filenameFD'], inputCfg['tree']['dirname'],
                                          inputCfg['tree']['treename'])

    if 'cand_type' in dataFrameFD.columns: #if not filtered tree, select only FD and not reflected
        dataFrameFD = FilterBitDf(dataFrameFD, 'cand_type', [bitSignal, bitFD], 'and')
        dataFrameFD = FilterBitDf(dataFrameFD, 'cand_type', [bitRefl], 'not')

    for iPt, (cuts, ptMin, ptMax) in enumerate(zip(selToApply, cutVars['Pt']['min'], cutVars['Pt']['max'])):
        print("Projecting distributions for %0.1f < pT < %0.1f GeV/c" % (ptMin, ptMax))
        #gen histos from sparses
# inputs
parser = argparse.ArgumentParser(description='Arguments to pass')
parser.add_argument('cfgFileName', metavar='text', default='cfgFileName.yml',
                    help='config file name with path of input dataframes for check')
parser.add_argument('cutSetFileName', metavar='text', default='cutSetFileName.yml',
                    help='cut set file name')
parser.add_argument('--outputDir', metavar='text', default='.',
                    help='output directory for plots')
args = parser.parse_args()

# input dataframes
with open(args.cfgFileName, 'r') as ymlCfgFile:
    inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
isMC = inputCfg['isMC']
if isMC:
    dfPrompt = LoadDfFromRootOrParquet(inputCfg['tree']['filenamePrompt'],
                                       inputCfg['tree']['dirname'], inputCfg['tree']['treename'])
    dfFD = LoadDfFromRootOrParquet(inputCfg['tree']['filenameFD'],
                                   inputCfg['tree']['dirname'], inputCfg['tree']['treename'])
else:
    dfAll = LoadDfFromRootOrParquet(inputCfg['tree']['filenameAll'],
                                    inputCfg['tree']['dirname'], inputCfg['tree']['treename'])

# selections to be applied
with open(args.cutSetFileName, 'r') as ymlCutSetFile:
    cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader)
cutVars = cutSetCfg['cutvars']
selToApply = []
for iPt, _ in enumerate(cutVars['Pt']['min']):
    selToApply.append('')
    for varName in cutVars:
        if varName == 'InvMass':
示例#10
0
preSelections = cfg['skimming']['preselections']
colsToKeep = cfg['skimming']['colstokeep']

if colsToKeep and 'inv_mass' not in colsToKeep:
    print(
        'Warning: invariant mass branch (inv_mass) disabled. Are you sure you don\'t want to keep it?'
    )
if colsToKeep and 'pt_cand' not in colsToKeep:
    print(
        'Warning: pt branch (pt_cand) disabled. Are you sure you don\'t want to keep it?'
    )

PtMin = cfg['skimming']['pt']['min']
PtMax = cfg['skimming']['pt']['max']

dataFrame = LoadDfFromRootOrParquet(inFileNames, inDirName, inTreeName)

if not colsToKeep:
    colsToKeep = list(dataFrame.columns)
    colsToKeep.remove('cand_type')

print('Applying selections')
dataFramePtCut = dataFrame.query(f'pt_cand > {PtMin} & pt_cand < {PtMax}')
del dataFrame
if preSelections:
    dataFramePtCutSel = dataFramePtCut.astype(float).query(preSelections)
    del dataFramePtCut
else:
    dataFramePtCutSel = dataFramePtCut

if cfg['missingvalues']['enable']:
示例#11
0
    for varName in cutVars:
        if varName == 'InvMass':
            continue
        if selToApply[iPt] != '':
            selToApply[iPt] += ' & '
        selToApply[
            iPt] += f"{cutVars[varName]['min'][iPt]}<{cutVars[varName]['name']}<{cutVars[varName]['max'][iPt]}"

# define filter bits
bitSignal = 0
bitPrompt = 2
bitFD = 3
bitRefl = 4

dataFramePrompt = LoadDfFromRootOrParquet(inputCfg['tree']['filenamePrompt'],
                                          inputCfg['tree']['dirname'],
                                          inputCfg['tree']['treename'])

if 'cand_type' in dataFramePrompt.columns:  #if not filtered tree, select only prompt and not reflected
    dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type',
                                  [bitSignal, bitPrompt], 'and')
    dataFramePrompt = FilterBitDf(dataFramePrompt, 'cand_type', [bitRefl],
                                  'not')
dataFramePrompt.reset_index(inplace=True)

dataFrameFD = LoadDfFromRootOrParquet(inputCfg['tree']['filenameFD'],
                                      inputCfg['tree']['dirname'],
                                      inputCfg['tree']['treename'])
if 'cand_type' in dataFrameFD.columns:  #if not filtered tree, select only FD and not reflected
    dataFrameFD = FilterBitDf(dataFrameFD, 'cand_type', [bitSignal, bitFD],
                              'and')
示例#12
0
                    help='config file name with root input files')
parser.add_argument('outFileName',
                    metavar='text',
                    default='outFile.root',
                    help='output root file name')
parser.add_argument("--batch",
                    help="suppress video output",
                    action="store_true")
args = parser.parse_args()

with open(args.cfgFileName, 'r') as ymlCfgFile:
    inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)

# load dataframes from input files
dfPrompt = LoadDfFromRootOrParquet(
    inputCfg['infiles']['signal']['prompt']['filename'],
    inputCfg['infiles']['signal']['prompt']['dirname'],
    inputCfg['infiles']['signal']['prompt']['treename'])
dfFD = LoadDfFromRootOrParquet(
    inputCfg['infiles']['signal']['feeddown']['filename'],
    inputCfg['infiles']['signal']['feeddown']['dirname'],
    inputCfg['infiles']['signal']['feeddown']['treename'])
dfBkg = LoadDfFromRootOrParquet(inputCfg['infiles']['background']['filename'],
                                inputCfg['infiles']['background']['dirname'],
                                inputCfg['infiles']['background']['treename'])

if inputCfg['infiles']['secpeak']['prompt']['filename']:
    dfSecPeakPrompt = LoadDfFromRootOrParquet(
        inputCfg['infiles']['signal']['prompt']['filename'],
        inputCfg['infiles']['signal']['prompt']['dirname'],
        inputCfg['infiles']['signal']['prompt']['treename'])
else:
示例#13
0
                cutVars[var]['min'][iPt] * 1.0001)
            binMax = sparseBkg.GetAxis(cutVars[var]['axisnum']).FindBin(
                cutVars[var]['max'][iPt] * 0.9999)
            sparseBkg.GetAxis(cutVars[var]['axisnum']).SetRange(binMin, binMax)
        hMassSel.append(sparseBkg.Projection(0))
        hMassSel[iPt].SetNameTitle(
            f'hMassSelPt{ptMin:.0f}_{ptMax:.0f}',
            f'{ptMin} < #it{{p}}_{{T}} < {ptMax} (GeV/#it{{c}});{massTitle};Counts'
        )

        for iAxis in range(sparseBkg.GetNdimensions()):
            sparseBkg.GetAxis(iAxis).SetRange(-1, -1)

else:  # data from tree/dataframe
    dataFrameBkg = LoadDfFromRootOrParquet(inputCfg['tree']['filenameBkg'],
                                           inputCfg['tree']['dirname'],
                                           inputCfg['tree']['treename'])

    massBins = 500
    massLimLow = min(dataFrameBkg['inv_mass'])
    massLimHigh = max(dataFrameBkg['inv_mass'])

    # selections to be applied
    selToApply = []
    for iPt, _ in enumerate(cutVars['Pt']['min']):
        selToApply.append('')
        for varName in cutVars:
            if varName == 'InvMass':
                continue
            if selToApply[iPt] != '':
                selToApply[iPt] += ' & '
示例#14
0
                    hNsigmaSel[det][spe]['0-2'][f'Pt{ptmin:.0f}_{ptmax:.0f}'],
                    color=kRed,
                    linealpha=0.25,
                    fillalpha=0.25,
                    markeralpha=1,
                    markerstyle=kOpenCircle,
                    markersize=0.3,
                    linewidth=1)

                sparse.GetAxis(0).SetRange(-1, -1)
                sparse.GetAxis(1).SetRange(-1, -1)
else:
    detectors = ['TPC', 'TOF', 'Comb']
    species = ['Pi', 'K']
    prongs = ['0', '1', '2']
    dataDf = LoadDfFromRootOrParquet(inputCfg['inputfiles'])

    with open(ARGS.cutSetFileName, 'r') as ymlCutSetFile:
        cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader)
    cutVars = cutSetCfg['cutvars']
    selToApply = []
    for iPt, _ in enumerate(cutVars['Pt']['min']):
        selToApply.append('')
        for varName in cutVars:
            if varName == 'InvMass':
                continue
            if selToApply[iPt] != '':
                selToApply[iPt] += ' & '
            selToApply[iPt] += (
                f"({cutVars[varName]['min'][iPt]}<{cutVars[varName]['name']}"
                f"<{cutVars[varName]['max'][iPt]})")