def countAndFillHistos(samplesPerGroup={}, syst='', verbose=False, outdir='./') :

    selections = allRegions()
    variables = variablesToPlot()

    mcGroups, fakeGroups = mcDatasetids().keys(), ['fake']
    objVariations, weightVariations, fakeVariations = systUtils.mcObjectVariations(), systUtils.mcWeightVariations(), systUtils.fakeSystVariations()
    def groupIsRelevantForSys(g, s) :
        isRelevant = (s=='NOM' or (g in mcGroups and s in objVariations+weightVariations) or (g in fakeGroups and s in fakeVariations))
        if verbose and not isRelevant : print "skipping %s for %s"%(g, s)
        return isRelevant
    def dropIrrelevantGroupsForThisSys(groups, sys) : return dict((g, samples) for g, samples in groups.iteritems() if groupIsRelevantForSys(g, syst))
    def dropSamplesWithoutTree(samples) : return [s for s in samples if s.hasInputHftTree(msg='Warning! ')]
    def dropGroupsWithoutSamples(groups) : return dict((g, samples) for g, samples in groups.iteritems() if len(samples))
    samplesPerGroup = dropIrrelevantGroupsForThisSys(samplesPerGroup, syst)
    samplesPerGroup = dict((g, dropSamplesWithoutTree(samples)) for g, samples in samplesPerGroup.iteritems())
    samplesPerGroup = dropGroupsWithoutSamples(samplesPerGroup)

    groups = samplesPerGroup.keys()
    counters = bookCounters(groups, selections)
    histos = bookHistos(variables, groups, selections)
    for group, samplesGroup in samplesPerGroup.iteritems() :
        logLine = "---->"
        if verbose : print 1*' ',group
        histosGroup = histos  [group]
        countsGroup = counters[group]
        for sample in samplesGroup :
            if verbose : logLine +=" %s"%sample.name
            fillAndCount(histosGroup, countsGroup, sample, blind=False)
        if verbose : print logLine
    if verbose : print 'done'
    return counters, histos
Exemplo n.º 2
0
def get_list_of_syst_to_fill(opts):
    systematics = ['NOM']
    sysOption = opts.syst
    excludedSyst = opts.exclude
    anySys = sysOption == None
    if sysOption == 'fake' or anySys:
        systematics += systUtils.fakeSystVariations()
    if sysOption == 'object' or anySys:
        systematics += systUtils.mcObjectVariations()
    if sysOption == 'weight' or anySys:
        systematics += systUtils.mcWeightVariations()
    if sysOption and sysOption.count(','):
        systematics = [
            s for s in systUtils.getAllVariations()
            if s in sysOption.split(',')
        ]
    elif sysOption in systUtils.getAllVariations():
        systematics = [sysOption]
    elif not anySys and len(systematics) == 1 and sysOption != 'NOM':
        raise ValueError("Invalid syst %s" % str(sysOption))
    if excludedSyst:
        systematics = [
            s for s in systematics
            if s not in filterWithRegexp(systematics, excludedSyst)
        ]
    return systematics
 def setSyst(self, sys='NOM') :
     nominal = 'NOM' # do we have differnt names for nom (mc vs fake)?
     self.isObjSys    = sys in systUtils.mcObjectVariations()
     self.isWeightSys = sys in systUtils.mcWeightVariations()
     self.isFakeSys   = sys in systUtils.fakeSystVariations()
     def nameObjectSys(s) : return s if self.isMc else nominal
     def nameWeightSys(s) : return s if self.isMc else nominal
     def nameFakeSys(s) : return s if self.isFake else nominal
     def identity(s) : return s
     sysNameFunc = nameObjectSys if self.isObjSys else nameWeightSys if self.isWeightSys else nameFakeSys if self.isFakeSys else identity
     self.syst = sysNameFunc(sys)
     return self
 def exploreAvailableSystematics(self, verbose=False) :
     systs = ['NOM']
     if self.isFake :
         systs += systUtils.fakeSystVariations()
     elif self.isMc :
         systs += systUtils.mcObjectVariations()
         systs += systUtils.mcWeightVariations()
     self.systematics = []
     for sys in systs :
         self.setSyst(sys)
         if os.path.exists(self.filenameHisto) :
             self.systematics.append(sys)
     if verbose : print "%s : found %d variations : %s"%(self.name, len(self.systematics), str(self.systematics))
def runFill(opts) :
    batchMode    = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir  = opts.input_gen
    outputDir    = opts.output_dir
    sysOption    = opts.syst
    excludedSyst = opts.exclude
    verbose      = opts.verbose

    if verbose : print "filling histos"
    mkdirIfNeeded(outputDir)
    systematics = ['NOM']
    anySys = sysOption==None
    if sysOption=='fake'   or anySys : systematics += systUtils.fakeSystVariations()
    if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations()
    if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations()
    if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')]
    elif sysOption in systUtils.getAllVariations() : systematics = [sysOption]
    elif not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption))
    if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)]

    if verbose : print "about to loop over these systematics:\n %s"%str(systematics)
    for syst in systematics :
        if batchMode :
            newOptions  = " --input-gen %s" % opts.input_gen
            newOptions += " --input-fake %s" % opts.input_fake
            newOptions += " --output-dir %s" % opts.output_dir
            newOptions += " --verbose %s" % opts.verbose
            newOptions += " --syst %s" % syst
            template = 'batch/templates/check_hft_fill.sh.template'
            script = "batch/hft_%s.sh"%syst
            scriptFile = open(script, 'w')
            scriptFile.write(open(template).read()
                             .replace('%(opt)s', newOptions)
                             .replace('%(logfile)s', 'log/hft/fill_'+syst+'.log')
                             .replace('%(jobname)s', 'fill_'+syst))
            scriptFile.close()
            cmd = "sbatch %s"%script
            if verbose : print cmd
            out = getCommandOutput(cmd)
            if verbose : print out['stdout']
            if out['stderr'] : print  out['stderr']
            continue
        if verbose : print '---- filling ',syst
        samplesPerGroup = allSamplesAllGroups()
        [s.setSyst(syst) for g, samples in samplesPerGroup.iteritems() for s in samples]
        counters, histos = countAndFillHistos(samplesPerGroup=samplesPerGroup, syst=syst, verbose=verbose, outdir=outputDir)
        printCounters(counters)
        saveHistos(samplesPerGroup, histos, outputDir, verbose)
Exemplo n.º 6
0
def get_list_of_syst_to_fill(opts):
    systematics = ['NOM']
    sysOption    = opts.syst
    excludedSyst = opts.exclude
    anySys       = sysOption==None
    if sysOption=='fake'   or anySys : systematics += systUtils.fakeSystVariations()
    if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations()
    if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations()
    if sysOption and sysOption.count(','):
        systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')]
    elif sysOption in systUtils.getAllVariations(): systematics = [sysOption]
    elif not anySys and len(systematics)==1 and sysOption!='NOM':
        raise ValueError("Invalid syst %s"%str(sysOption))
    if excludedSyst:
        systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)]
    return systematics
Exemplo n.º 7
0
def runPlot(opts) :
    inputDir     = opts.input_dir
    outputDir    = opts.output_dir
    verbose      = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand
    selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)
    variables = variables_to_plot()

    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    plot_groups = [systUtils.Group(g.name) for g in groups]
    sel_not_specified = len(regions_to_plot())==len(selections)
    if sel_not_specified:
        selections = guess_available_selections_from_histofiles(inputDir, first(plot_groups), verbose)
    systematics_to_use = get_list_of_syst_to_fill(opts)
    for group in plot_groups :
        group.setCurrentSelection(first(selections))
        group.setHistosDir(inputDir).setCurrentSelection(first(selections))
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose)
    available_systematics = sorted(list(set([s for g in plot_groups for s in g.systematics])))
    systematics = [s for s in systematics_to_use if s in available_systematics]
    if verbose :
        print "using the following systematics : {0}".format(systematics)
        print "missing the following systematics : {0}".format([s for s in systematics_to_use if s not in available_systematics])
    fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()]
    mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()]

    mkdirIfNeeded(outputDir)
    findByName = systUtils.findByName
    simBkgs = [g for g in plot_groups if g.isMcBkg]
    data = findByName(plot_groups, 'data')
    fake = findByName(plot_groups, 'fake')
    signal = findByName(plot_groups, 'signaltaumu')
    print 'names_stacked_groups to be improved'
    names_stacked_groups = [g.name for g in simBkgs+[fake]]
    for sel in selections :
        if verbose : print '-- plotting ',sel
        for var in variables :
            if verbose : print '---- plotting ',var
            print_summary_yield = var is 'onebin'
            for g in plot_groups :
                g.setSystNominal()
                g.setCurrentSelection(sel)
            nominalHistoData    = data.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoSign    = signal.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True))
                                        for g in simBkgs])
            nominalHistosBkg    = dict([('fake', nominalHistoFakeBkg)] +
                                       [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg  = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg,
                                              histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel,
                                    fakeVariations=fakeSystematics, mcVariations=mcSystematics,
                                    verbose=verbose, printYield=print_summary_yield)
            # if print_summary_yield:
            #     print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics)

            plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign,
                       histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand, systErrBand=systErrBand,
                       stack_order=names_stacked_groups,
                       topLabel=sel,
                       canvasName=(sel+'_'+var), outdir=outputDir, options=opts,
                       printYieldSummary=print_summary_yield)
    for group in plot_groups :
        group.printVariationsSummary()
Exemplo n.º 8
0
def runPlot(opts):
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand
    selections = regions_to_plot(opts.include_regions, opts.exclude_regions,
                                 opts.regions)
    variables = variables_to_plot()

    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    plot_groups = [systUtils.Group(g.name) for g in groups]
    sel_not_specified = len(regions_to_plot()) == len(selections)
    if sel_not_specified:
        selections = guess_available_selections_from_histofiles(
            inputDir, first(plot_groups), verbose)
    systematics_to_use = get_list_of_syst_to_fill(opts)
    for group in plot_groups:
        group.setCurrentSelection(first(selections))
        group.setHistosDir(inputDir).setCurrentSelection(first(selections))
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(systematics_to_use, opts.exclude,
                                       verbose)
    available_systematics = sorted(
        list(set([s for g in plot_groups for s in g.systematics])))
    systematics = [s for s in systematics_to_use if s in available_systematics]
    if verbose:
        print "using the following systematics : {0}".format(systematics)
        print "missing the following systematics : {0}".format(
            [s for s in systematics_to_use if s not in available_systematics])
    fakeSystematics = [
        s for s in systematics if s in systUtils.fakeSystVariations()
    ]
    mcSystematics = [
        s for s in systematics if s in systUtils.mcObjectVariations() +
        systUtils.mcWeightVariations()
    ]

    mkdirIfNeeded(outputDir)
    findByName = systUtils.findByName
    simBkgs = [g for g in plot_groups if g.isMcBkg]
    data = findByName(plot_groups, 'data')
    fake = findByName(plot_groups, 'fake')
    signal = findByName(plot_groups, 'signaltaumu')
    print 'names_stacked_groups to be improved'
    names_stacked_groups = [g.name for g in simBkgs + [fake]]
    for sel in selections:
        if verbose: print '-- plotting ', sel
        for var in variables:
            if verbose: print '---- plotting ', var
            print_summary_yield = var is 'onebin'
            for g in plot_groups:
                g.setSystNominal()
                g.setCurrentSelection(sel)
            nominalHistoData = data.getHistogram(variable=var,
                                                 selection=sel,
                                                 cacheIt=True)
            nominalHistoSign = signal.getHistogram(variable=var,
                                                   selection=sel,
                                                   cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var,
                                                    selection=sel,
                                                    cacheIt=True)
            nominalHistosSimBkg = dict([(g.name,
                                         g.getHistogram(variable=var,
                                                        selection=sel,
                                                        cacheIt=True))
                                        for g in simBkgs])
            nominalHistosBkg = dict(
                [('fake', nominalHistoFakeBkg)] +
                [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg,
                                             histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake,
                                    simBkgs=simBkgs,
                                    variable=var,
                                    selection=sel,
                                    fakeVariations=fakeSystematics,
                                    mcVariations=mcSystematics,
                                    verbose=verbose,
                                    printYield=print_summary_yield)
            # if print_summary_yield:
            #     print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics)

            plotHistos(histoData=nominalHistoData,
                       histoSignal=nominalHistoSign,
                       histoTotBkg=nominalHistoTotBkg,
                       histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand,
                       systErrBand=systErrBand,
                       stack_order=names_stacked_groups,
                       topLabel=sel,
                       canvasName=(sel + '_' + var),
                       outdir=outputDir,
                       options=opts,
                       printYieldSummary=print_summary_yield)
    for group in plot_groups:
        group.printVariationsSummary()
 def isNeededForSys(self, sys) :
     return (sys=='NOM'
             or (self.isMc and sys in systUtils.mcWeightVariations())
             or (self.isMc and sys in systUtils.mcObjectVariations())
             or (self.isFake and sys in systUtils.fakeSystVariations()))
def runPlot(opts) :
    inputDir     = opts.input_dir
    outputDir    = opts.output_dir
    sysOption    = opts.syst
    excludedSyst = opts.exclude
    verbose      = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand

    groups = allGroups()
    selections = allRegions()
    variables = variablesToPlot()
    for group in groups :
        group.setHistosDir(inputDir)
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(sysOption, excludedSyst, verbose)

    mkdirIfNeeded(outputDir)
    systematics = ['NOM']
    anySys = sysOption==None
    if sysOption=='fake'   or anySys : systematics += systUtils.fakeSystVariations()
    if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations()
    if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations()
    if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')]
    elif sysOption in systUtils.getAllVariations() : systematics = [sysOption]
    if not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption))
    if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)]
    if verbose : print "using the following systematics : %s"%str(systematics)

    fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()]
    mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()]

    simBkgs = [g for g in groups if g.isMcBkg]
    data, fake, signal = findByName(groups, 'data'), findByName(groups, 'fake'), findByName(groups, 'signal')

    for sel in selections :
        if verbose : print '-- plotting ',sel
        for var in variables :
            if verbose : print '---- plotting ',var
            for g in groups : g.setSystNominal()
            nominalHistoData    = data.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoSign    = signal.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs])
            nominalHistosBkg    = dict([('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg  = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel,
                                    fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose)

            plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg,
                       histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand, systErrBand=systErrBand,
                       canvasName=(sel+'_'+var), outdir=outputDir, verbose=verbose)
    for group in groups :
        summary = group.variationsSummary()
        for selection, summarySel in summary.iteritems() :
            colW = str(12)
            header = ' '.join([('%'+colW+'s')%colName for colName in ['variation', 'yield', 'delta[%]']])
            lineTemplate = '%(sys)'+colW+'s'+'%(counts)'+colW+'s'+'%(delta)'+colW+'s'
            print "---- summary of variations for %s ----" % group.name
            print "---             %s                 ---" % selection
            print header
            print '\n'.join(lineTemplate%{'sys':s,
                                          'counts':(("%.3f"%c) if type(c) is float else (str(c)+str(type(c)))),
                                          'delta' :(("%.3f"%d) if type(d) is float else '--' if d==None else (str(d)+str(type(d)))) }
                            for s,c,d in summarySel)