def main() :
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-t', '--tag')
    parser.add_option('-i', '--input_dir')
    parser.add_option('-n', '--histoname')
    parser.add_option('-o', '--output_dir')
    parser.add_option('-v','--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['tag', 'input_dir', 'histoname', 'output_dir']
    otherOptions = ['verbose']
    allOptions = requiredOptions + otherOptions
    def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None
    if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option')
    tag            = opts.tag.strip('_')
    inputDirname   = opts.input_dir
    inputDirname   = inputDirname+'/' if not inputDirname.endswith('/') else inputDirname
    histoName      = opts.histoname
    outputDirname  = opts.output_dir
    outputDirname  = outputDirname+'/' if not outputDirname.endswith('/') else outputDirname
    mkdirIfNeeded(outputDirname)
    verbose        = opts.verbose
    if verbose : print ('\nUsing the following options:\n'
                        +'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions))

    inputFiles = getInputFiles(inputDirname, tag, verbose)
    assert all(f for f in inputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in inputFiles.iteritems()]))
    histograms = getHistograms(inputFiles, histoName)
    can = r.TCanvas('can_'+histoName, histoName, 800, 600)
    draw(can, histograms, label=histoName)
    can.SaveAs(outputDirname+'/'+histoName+'.png')
Exemplo n.º 2
0
def runPlot(opts):
    lepton    = opts.lepton
    batchMode = opts.batch
    inputDir  = opts.input_dir
    outputDir = opts.output_dir
    verbose   = opts.verbose
    debug     = opts.debug
    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)

    inputDir  = outputDir+'/'+lepton+'/histos'
    outputDir = outputDir+'/'+lepton+'/plots'
    mkdirIfNeeded(outputDir)
    histonames = dict((g.name, histonamesOneSample(g.name, variables_to_plot(), regions, leptonSources))
                      for g in groups)
    groups_to_stack = [g.name for g in groups if not g.is_data]
    if verbose:
        print 'groups being included in the compositions: ',groups_to_stack
    for region in regions:
        all_histos = dict([(g.name,
                            rootUtils.fetchObjectsFromFile(os.path.join(inputDir, g.name+'_'+region+'.root'),
                                                           histonames[g.name][region],
                                                           verbose))
                           for g in groups])
        for v in variables_to_plot():
            histos = dict()
            for s in leptonSources:
                histos[s] = summedHisto(histos=[all_histos[g][v][s] for g in groups_to_stack],
                                        label='')
            histos['data'] = all_histos['data'][v]['Unknown']
            plotStackedHistos(histos=histos, datakey='data', stackkeys=leptonSources,
                              outputDir=outputDir+'/'+region, region=region,
                              colors=fakeu.colorsFillSources(), verbose=verbose)
    return
Exemplo n.º 3
0
def plotStackedHistos(histos={},
                      datakey=None,
                      stackkeys=[],
                      outputDir='',
                      region='',
                      colors={},
                      verbose=False):
    "input: a dictionary of histos[group]"
    mkdirIfNeeded(outputDir)
    bkg_histos = dict([(k, h) for k, h in histos.iteritems()
                       if k in stackkeys])
    tot_bkg = summedHisto(bkg_histos.values(), label='')
    err_band = None  # tmp disable
    # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg))
    empty_bkg = tot_bkg.Integral() == 0
    if empty_bkg:
        if verbose: print "empty backgrounds, skip %s" % tot_bkg.GetName()
        return
    histoname = tot_bkg.GetName()
    can = r.TCanvas('c_' + histoname, histoname, 800, 600)
    can.cd()
    pm = tot_bkg  # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update()  # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_' + tot_bkg.GetName(), '')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkg_histos.iteritems():
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    # err_band.Draw('E2 same')
    data = histos[datakey] if datakey and datakey in histos else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose:
            print "data : nEntries {:.1f} totWeight {:.1f} ".format(
                data.GetEntries(), data.Integral())
    yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h])
    # pm.SetMinimum(0.5)
    pm.SetMaximum(1.1 * yMax)
    can.Update()
    # can.SetLogy()
    topRightLabel(can,
                  "#splitline{%s}{%s}" % (histoname, region),
                  xpos=0.125,
                  align=13)
    drawLegendWithDictKeys(can,
                           dictSum(bkg_histos, {'stat err': err_band}),
                           opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()] + [data]
    can.Update()
    if verbose: print os.path.join(outputDir, histoname + '.png')
    can.SaveAs(os.path.join(outputDir, histoname + '.png'))
def main():
    if len(sys.argv)!=3:
        print "Usage: {0} inputdir outputdir".format(sys.argv[0])
        return
    inputdir = sys.argv[1]
    outputdir = sys.argv[2]
    verbose = True
    if not os.path.exists(inputdir):
        print "missing input dir {0}".format(inputdir)
        return
    utils.mkdirIfNeeded(outputdir)

    fake = systUtils.Group('fake')
    fake.setHistosDir(inputdir)

    fake.setSyst() # reset to nominal (state is undetermined after 'explore')
    c = r.TCanvas('c','')
    variables = ['mcollcoarse']

    for jetnojet in regions_to_plot().keys():
        for var in variables:
            sel_emu, sel_mue = regions_to_plot()[jetnojet]
            h_emu = fake.getHistogram(variable=var, selection=sel_emu, cacheIt=True)
            h_mue = fake.getHistogram(variable=var, selection=sel_mue, cacheIt=True)
            h_ratio = h_emu.Clone(h_emu.GetName().replace('emu', 'emu_over_mue'))
            h_ratio.Divide(h_mue)
            plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio,
                                    filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_wout_sys_err')
            h_with_totErrBand = {} # histo with stat+syst err (to get the correct error in the ratio)
            for sel in [sel_emu, sel_mue]:
                print ">>>plotting ",sel
                fake.setSystNominal()
                fake.setCurrentSelection(sel)
                fake.exploreAvailableSystematics(verbose)
                fakeSystematics = [s for s in fake.systematics if s!='NOM']
                nominalHistoData    = None
                nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True)
                nominalHistosBkg    = {'fake', nominalHistoFakeBkg}
                nominalHistoTotBkg  = buildTotBackgroundHisto(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs={})
                statErrBand = buildStatisticalErrorBand(nominalHistoTotBkg)
                systErrBand = buildFakeSystematicErrorBand(fake=fake, nominalHistosSimBkg={}, variable=var, selection=sel,
                                                           variations=fakeSystematics, verbose=verbose)
                totErrBand = systUtils.addErrorBandsInQuadrature(statErrBand, systErrBand)
                # c.cd()
                # c.Clear()
                # nominalHistoFakeBkg.Draw()
                # totErrBand.Draw('E2 same')
                # totErrBand.SetFillStyle(3005)
                # for ext in ['png', 'eps']:
                #     c.SaveAs("{0}/{1}_{2}.{3}".format(outputdir, sel, var, ext))
                h_with_totErrBand[sel] = systUtils.setHistErrFromErrBand(nominalHistoFakeBkg, totErrBand)
                pprint.pprint(h_with_totErrBand)
            h_emu = [h for k,h in h_with_totErrBand.iteritems() if 'emu' in k][0]
            h_mue = [h for k,h in h_with_totErrBand.iteritems() if 'mue' in k][0]

            h_ratio = h_emu.Clone(h_mue.GetName().replace('emu', 'emu_over_mue'))
            h_ratio.Divide(h_mue)
            plot_emu_mue_with_ratio(canvas=c, h_mue=h_mue, h_emu=h_emu, h_ratio=h_ratio,
                                    filename=outputdir+'/'+var+'_'+jetnojet+'_emu_over_mue_with_sys_err')
    return
def main() :
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-t', '--tag')
    parser.add_option('-i', '--input_dir')
    parser.add_option('-o', '--output_file')
    parser.add_option('-p', '--output_plot')
    parser.add_option('-v','--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['tag', 'input_dir', 'output_file', 'output_plot']
    otherOptions = ['verbose']
    allOptions = requiredOptions + otherOptions
    def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None
    if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option')
    tag = opts.tag
    inputDirname  = opts.input_dir
    outputFname   = opts.output_file
    outputPlotDir = opts.output_plot
    verbose       = opts.verbose
    if verbose : print '\nUsing the following options:\n'+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions)

    allInputFiles = getInputFiles(inputDirname, tag, verbose) # includes allBkg, which is used only for sys
    assert all(f for f in allInputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in allInputFiles.iteritems()]))
    mkdirIfNeeded(outputPlotDir)
    outputFile = r.TFile.Open(outputFname, 'recreate')
    inputFiles = dict((k, v) for k, v in allInputFiles.iteritems() if k in fakeProcesses())

    buildMuonRates    (inputFiles, outputFile, outputPlotDir, verbose)
    buildElectronRates(inputFiles, outputFile, outputPlotDir, verbose)
    buildSystematics  (allInputFiles['allBkg'], outputFile)
    outputFile.Close()
    if verbose : print "output saved to \n%s"%'\n'.join([outputFname, outputPlotDir])
Exemplo n.º 6
0
def submit_batch_fill_job_per_group(group, opts):
    options_dict = vars(opts)
    group_name = group.name if hasattr(group, 'name') else group
    systematic = opts.syst if hasattr(opts, 'syst') and opts.syst else None
    verbose = opts.verbose
    options_dict['group'] = group_name
    options_with_value = dict((k,v) for k,v in options_dict.iteritems() if v and v is not True)
    # note to self: the line below assumes that the argument-less options have a default=False
    options_with_toggle = dict((k,v) for k,v in options_dict.iteritems() if v and v is True and k!="batch")
    def escape_regex(v) : return v if v!='.*' else "'.*'"
    def back_to_dash(v) : return v.replace('_','-')
    cmd_line_options = ' '.join(["--%s %s"%(back_to_dash(k), escape_regex(str(v)))
                                 for k,v in options_with_value.iteritems()]
                                +["--%s"%back_to_dash(k) for k in options_with_toggle.keys()])
    template = 'batch/templates/plot_emu.sh'
    default_log_dir = opts.output_dir.replace('out/', 'log/')
    if default_log_dir.count('/histos')==1:
        default_log_dir = default_log_dir.replace('/histos','')
    log_dir = mkdirIfNeeded(opts.log_dir if opts.log_dir else default_log_dir)
    script_dir = mkdirIfNeeded('batch/plot_emu')
    script_name = os.path.join(script_dir, group_name+("_{0}".format(systematic) if systematic else '')+'.sh')
    log_name = log_dir+'/'+group_name+("_{0}".format(systematic) if systematic else '')+'.log'
    script_file = open(script_name, 'w')
    script_file.write(open(template).read()
                      .replace('%(opt)s', cmd_line_options)
                      .replace('%(logfile)s', log_name)
                      .replace('%(jobname)s', group_name)
                      .replace('%(queue)s', opts.queue))
    script_file.close()
    cmd = "sbatch %s"%script_name
    if verbose : print cmd
    out = getCommandOutput(cmd)
    if verbose : print out['stdout']
    if out['stderr'] : print  out['stderr']
Exemplo n.º 7
0
def main():
    options = parse_options()
    inputdf = options.input
    outdir = options.output_dir
    regexp = options.sample_regexp
    exclude = options.exclude_regexp
    tag = options.tag
    verbose = options.verbose
    debug = options.debug

    utils.mkdirIfNeeded(outdir)
    if debug: dataset.Dataset.verbose_parsing = True
    datasets = dataset.build_all_datasets_from_dir_or_file(inputdf)
    datasets = utils.filterWithRegexp(datasets, regexp,
                                      lambda _: _.name) if regexp else datasets
    datasets = utils.excludeWithRegexp(
        datasets, exclude, lambda _: _.name) if exclude else datasets
    counter = {'fail': 0, 'pass': 0}
    for d in datasets:
        outcome = 'pass' if d.build_filelist(gpatlas_dir(d, tag), outdir,
                                             verbose) else 'fail'
        counter[outcome] += 1
    if verbose:
        print "created %d filelists (%d failures)" % (counter['pass'],
                                                      counter['fail'])
def plotPerSourceEff(histosPerVar={}, outputDir='', lepton='', region='', sample='', verbose=False, zoomIn=True):
    "plot efficiency for each source (and 'anysource') as a function of each var; expect histos[var][source][loose,tight]"
    variables = histosPerVar.keys()
    sources = [s for s in first(histosPerVar).keys() if s!='real'] # only fake eff really need a scale factor
    colors = colorsLineSources
    mkdirIfNeeded(outputDir)
    for var in filter(lambda x : x in ['pt1', 'eta1'], histosPerVar.keys()):
        histosPerSource = dict((s, histosPerVar[var][s]) for s in sources)
        canvasBasename = region+'_efficiency_'+lepton+'_'+var+("_%s"%sample if sample else '')
        missingSources = [s for s, h in histosPerSource.iteritems() if not h['loose'] or not h['tight']]
        if missingSources:
            if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources))
            continue
        anySourceLoose = summedHisto([h['loose'] for h in histosPerSource.values()])
        anySourceTight = summedHisto([h['tight'] for h in histosPerSource.values()])
        anySourceLoose.SetName(histoNamePerSource(var, 'any', 'loose', region))
        anySourceTight.SetName(histoNamePerSource(var, 'any', 'tight', region))
        histosPerSource['any'] = { 'loose' : anySourceLoose, 'tight' : anySourceTight }
        emptyBkg = anySourceLoose.Integral()==0 or anySourceTight.Integral()==0
        if emptyBkg:
            if verbose : print "empty backgrounds, skip %s"%canvasBasename
            continue
        def computeEfficiencies(histosPerSource={}) :
            sources = histosPerSource.keys()
            num = dict((s, histosPerSource[s]['tight']) for s in sources)
            den = dict((s, histosPerSource[s]['loose']) for s in sources)
            eff = dict((s, h.Clone(h.GetName().replace('tight', 'tight_over_loose')))
                       for s, h in num.iteritems())
            [eff[s].Divide(den[s]) for s in sources]
            return eff

        effs = computeEfficiencies(histosPerSource)
        can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600)
        can.cd()
        pm = first(effs) # pad master
        pm.SetStats(False)
        pm.Draw('axis')
        can.Update()
        for s, h in effs.iteritems() :
            h.SetMarkerColor(colors[s] if s in colors else r.kBlack)
            h.SetLineColor(h.GetMarkerColor())
            h.SetLineWidth(2*h.GetLineWidth())
            h.SetMarkerStyle(markersSources[s] if s in markersSources else r.kDot)
            h.Draw('ep same')
            h.SetDirectory(0)
        #pprint.pprint(effs)
        yMin, yMax = getMinMax(effs.values())
        pm.SetMinimum(0.0)
        pm.SetMaximum(0.25 if yMax < 0.5 and zoomIn else 1.1)
        can.Update()
        topRightLabel(can, canvasBasename, xpos=0.125, align=13)
        drawLegendWithDictKeys(can, effs, opt='lp')
        can.RedrawAxis()
        can._histos = effs
        can.Update()
        outFname = os.path.join(outputDir, canvasBasename+'.png')
        utils.rmIfExists(outFname)
        can.SaveAs(outFname)
def plotStackedHistos(histosPerGroup={}, outputDir='', region='', verbose=False):
    groups = histosPerGroup.keys()
    variables = first(histosPerGroup).keys()
    leptonTypes = first(first(histosPerGroup)).keys()
    colors = getGroupColor()
    mkdirIfNeeded(outputDir)
    histosPerName = dict([(region+'_'+var+'_'+lt, # one canvas for each histo, so key with histoname w/out group
                           dict([(g, histosPerGroup[g][var][lt]) for g in groups]))
                          for var in variables for lt in leptonTypes])
    for histoname, histosPerGroup in histosPerName.iteritems():
        missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
        if missingGroups:
            if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
            continue
        bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if g not in ['data', 'signal']])
        totBkg = summedHisto(bkgHistos.values())
        err_band = None # buildErrBandGraph(totBkg, computeStatErr2(totBkg))
        emptyBkg = totBkg.Integral()==0
        if emptyBkg:
            if verbose : print "empty backgrounds, skip %s"%histoname
            continue
        can = r.TCanvas('c_'+histoname, histoname, 800, 600)
        can.cd()
        pm = totBkg # pad master
        pm.SetStats(False)
        pm.Draw('axis')
        can.Update() # necessary to fool root's dumb object ownership
        stack = r.THStack('stack_'+histoname,'')
        can.Update()
        r.SetOwnership(stack, False)
        for s, h in bkgHistos.iteritems() :
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
        stack.Draw('hist same')
        # err_band.Draw('E2 same')
        data = histosPerGroup['data']
        if data and data.GetEntries():
            data.SetMarkerStyle(r.kFullDotLarge)
            data.Draw('p same')
        # yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) # fixme with err_band
        yMin, yMax = 0.0, data.GetMaximum()
        pm.SetMinimum(0.0)
        pm.SetMaximum(1.1*yMax)
        can.Update()
        topRightLabel(can, histoname, xpos=0.125, align=13)
        # drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
        drawLegendWithDictKeys(can, bkgHistos, opt='f')
        can.RedrawAxis()
        can._stack = stack
        can._histos = [h for h in stack.GetHists()]+[data]
        can.Update()
        outFname = os.path.join(outputDir, histoname+'.png')
        utils.rmIfExists(outFname)
        can.SaveAs(outFname)
def plotStackedHistosSources(histosPerVar={},
                             outputDir='',
                             region='',
                             verbose=False):
    variables = histosPerVar.keys()
    sources = first(histosPerVar).keys()
    colors = colorsFillSources
    mkdirIfNeeded(outputDir)
    for var in variables:
        for lOrT in ['loose', 'tight']:
            histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources)
            canvasBasename = region + '_region_' + var + '_' + lOrT
            missingSources = [s for s, h in histos.iteritems() if not h]
            if missingSources:
                if verbose:
                    print "skip %s, missing histos for %s" % (
                        var, str(missingSources))
                continue
            totBkg = summedHisto(histos.values())
            err_band = None  # buildErrBandGraph(totBkg, computeStatErr2(totBkg))
            emptyBkg = totBkg.Integral() == 0
            if emptyBkg:
                if verbose: print "empty backgrounds, skip %s" % canvasBasename
                continue
            can = r.TCanvas('c_' + canvasBasename, canvasBasename, 800, 600)
            can.cd()
            pm = totBkg  # pad master
            pm.SetStats(False)
            pm.Draw('axis')
            can.Update()  # necessary to fool root's dumb object ownership
            stack = r.THStack('stack_' + canvasBasename, '')
            can.Update()
            r.SetOwnership(stack, False)
            for s, h in histos.iteritems():
                h.SetFillColor(colors[s] if s in colors else r.kOrange)
                h.SetDrawOption('bar')
                h.SetDirectory(0)
                stack.Add(h)
            stack.Draw('hist same')
            # err_band.Draw('E2 same')
            yMin, yMax = getMinMax(
                [h for h in [totBkg, err_band] if h is not None])
            pm.SetMinimum(0.0)
            pm.SetMaximum(1.1 * yMax)
            can.Update()
            topRightLabel(can, canvasBasename, xpos=0.125, align=13)
            # drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f')
            drawLegendWithDictKeys(can, histos, opt='f')
            can.RedrawAxis()
            can._stack = stack
            can._histos = [h for h in stack.GetHists()]
            can.Update()
            outFname = os.path.join(outputDir, canvasBasename + '.png')
            utils.rmIfExists(outFname)
            can.SaveAs(outFname)
def plotStackedHistosWithData(histosPerGroup={}, outputDir='', canvasname='', canvastitle='', colors={}, verbose=False):
    "histosPerGroup[group], where group=data is treated as special"
    groups = histosPerGroup.keys()
    mkdirIfNeeded(outputDir)
    missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
    if missingGroups:
        if verbose : print "skip %s, missing histos for %s"%(histoname, str(missingGroups))
        return
    bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems() if not isDataSample(g)])
    totBkg = summedHisto(bkgHistos.values())
    err_band = buildErrBandGraph(totBkg, computeStatErr2(totBkg))
    emptyBkg = totBkg.Integral()==0
    histoname, region = totBkg.GetName(), 'emu' # tmp replacement vars, to be fixed
    if emptyBkg:
        if verbose : print "empty backgrounds, skip %s"%histoname
        return
    can = r.TCanvas(canvasname, canvastitle, 800, 600)
    can.cd()
    pm = totBkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+histoname,'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkgHistos.iteritems() :
        h.SetFillColor(colors[s] if s in colors else r.kOrange)
        h.SetDrawOption('bar')
        h.SetDirectory(0)
        stack.Add(h)
    stack.Draw('hist same')
    err_band.Draw('E2 same')
    data = histosPerGroup['data'] if 'data' in histosPerGroup else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose :
            print "integrals : {0} tot.bkg.: {1}, data: {2}".format(histoname, totBkg.Integral(), data.Integral())
    else:
        print "no data"
    yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h])
    pm.SetMinimum(0.0)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.15, ypos=(1.0-0.5*can.GetTopMargin()), align=13)
    drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    filename=os.path.join(outputDir, histoname+'.png')
    rmIfExists(filename)
    can.SaveAs(filename)
Exemplo n.º 12
0
def submit_batch_fill_job_per_group_per_selection(group=None,
                                                  selection='',
                                                  opts=None):
    "if we are processing cached selections, we can submit one job per selection"
    options_dict = vars(opts)
    group_name = group.name if hasattr(group, 'name') else group
    systematic = opts.syst if hasattr(opts, 'syst') and opts.syst else None
    verbose = opts.verbose
    options_dict['group'] = group_name
    options_dict['region'] = selection
    options_dict['regions'] = None
    options_with_value = dict(
        (k, v) for k, v in options_dict.iteritems() if v and v is not True)
    # note to self: the line below assumes that the argument-less options have a default=False
    options_with_toggle = dict((k, v) for k, v in options_dict.iteritems()
                               if v and v is True and k != "batch")

    def escape_regex(v):
        return v if v != '.*' else "'.*'"

    def back_to_dash(v):
        return v.replace('_', '-')

    cmd_line_options = ' '.join([
        "--%s %s" % (back_to_dash(k), escape_regex(str(v)))
        for k, v in options_with_value.iteritems()
    ] + ["--%s" % back_to_dash(k) for k in options_with_toggle.keys()])
    template = 'batch/templates/plot_emu.sh'
    default_log_dir = opts.output_dir.replace('out/', 'log/')
    if default_log_dir.count('/histos') == 1:
        default_log_dir = default_log_dir.replace('/histos', '')
    log_dir = mkdirIfNeeded(opts.log_dir if opts.log_dir else default_log_dir)
    script_dir = mkdirIfNeeded('batch/plot_emu')
    script_name = os.path.join(
        script_dir, group_name + '_' + selection +
        ("_{0}".format(systematic) if systematic else '') + '.sh')
    log_name = log_dir + '/' + group_name + '_' + selection + (
        "_{0}".format(systematic) if systematic else '') + '.log'
    script_file = open(script_name, 'w')
    script_file.write(
        open(template).read().replace('%(opt)s', cmd_line_options).replace(
            '%(logfile)s',
            log_name).replace('%(jobname)s',
                              group_name + '_' + selection).replace(
                                  '%(queue)s', opts.queue))
    script_file.close()
    cmd = "sbatch %s" % script_name
    if verbose: print cmd
    out = getCommandOutput(cmd)
    if verbose: print out['stdout']
    if out['stderr']: print out['stderr']
def plotStackedHistosSources(histosPerVar={}, outputDir='', region='', verbose=False):
    variables = histosPerVar.keys()
    sources = first(histosPerVar).keys()
    colors = colorsFillSources
    mkdirIfNeeded(outputDir)
    for var in variables:
        for lOrT in ['loose', 'tight']:
            histos = dict((s, histosPerVar[var][s][lOrT]) for s in sources)
            canvasBasename = region+'_region_'+var+'_'+lOrT
            missingSources = [s for s, h in histos.iteritems() if not h]
            if missingSources:
                if verbose : print "skip %s, missing histos for %s"%(var, str(missingSources))
                continue
            totBkg = summedHisto(histos.values())
            err_band = None # buildErrBandGraph(totBkg, computeStatErr2(totBkg))
            emptyBkg = totBkg.Integral()==0
            if emptyBkg:
                if verbose : print "empty backgrounds, skip %s"%canvasBasename
                continue
            can = r.TCanvas('c_'+canvasBasename, canvasBasename, 800, 600)
            can.cd()
            pm = totBkg # pad master
            pm.SetStats(False)
            pm.Draw('axis')
            can.Update() # necessary to fool root's dumb object ownership
            stack = r.THStack('stack_'+canvasBasename,'')
            can.Update()
            r.SetOwnership(stack, False)
            for s, h in histos.iteritems() :
                h.SetFillColor(colors[s] if s in colors else r.kOrange)
                h.SetDrawOption('bar')
                h.SetDirectory(0)
                stack.Add(h)
            stack.Draw('hist same')
            # err_band.Draw('E2 same')
            yMin, yMax = getMinMax([h for h in [totBkg, err_band] if h is not None])
            pm.SetMinimum(0.0)
            pm.SetMaximum(1.1*yMax)
            can.Update()
            topRightLabel(can, canvasBasename, xpos=0.125, align=13)
            # drawLegendWithDictKeys(can, dictSum(histos, {'stat err':err_band}), opt='f')
            drawLegendWithDictKeys(can, histos, opt='f')
            can.RedrawAxis()
            can._stack = stack
            can._histos = [h for h in stack.GetHists()]
            can.Update()
            outFname = os.path.join(outputDir, canvasBasename+'.png')
            utils.rmIfExists(outFname)
            can.SaveAs(outFname)
def runFill(opts) :
    batchMode    = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir  = opts.input_gen
    outputDir    = opts.output_dir
    sysOption    = opts.syst
    excludedSyst = opts.exclude
    verbose      = opts.verbose

    if verbose : print "filling histos"
    mkdirIfNeeded(outputDir)
    systematics = ['NOM']
    anySys = sysOption==None
    if sysOption=='fake'   or anySys : systematics += systUtils.fakeSystVariations()
    if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations()
    if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations()
    if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')]
    elif sysOption in systUtils.getAllVariations() : systematics = [sysOption]
    elif not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption))
    if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)]

    if verbose : print "about to loop over these systematics:\n %s"%str(systematics)
    for syst in systematics :
        if batchMode :
            newOptions  = " --input-gen %s" % opts.input_gen
            newOptions += " --input-fake %s" % opts.input_fake
            newOptions += " --output-dir %s" % opts.output_dir
            newOptions += " --verbose %s" % opts.verbose
            newOptions += " --syst %s" % syst
            template = 'batch/templates/check_hft_fill.sh.template'
            script = "batch/hft_%s.sh"%syst
            scriptFile = open(script, 'w')
            scriptFile.write(open(template).read()
                             .replace('%(opt)s', newOptions)
                             .replace('%(logfile)s', 'log/hft/fill_'+syst+'.log')
                             .replace('%(jobname)s', 'fill_'+syst))
            scriptFile.close()
            cmd = "sbatch %s"%script
            if verbose : print cmd
            out = getCommandOutput(cmd)
            if verbose : print out['stdout']
            if out['stderr'] : print  out['stderr']
            continue
        if verbose : print '---- filling ',syst
        samplesPerGroup = allSamplesAllGroups()
        [s.setSyst(syst) for g, samples in samplesPerGroup.iteritems() for s in samples]
        counters, histos = countAndFillHistos(samplesPerGroup=samplesPerGroup, syst=syst, verbose=verbose, outdir=outputDir)
        printCounters(counters)
        saveHistos(samplesPerGroup, histos, outputDir, verbose)
def main() :
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-t', '--tag')
    parser.add_option('-i', '--input_dir')
    parser.add_option('-c', '--input_iter')
    parser.add_option('-o', '--output_dir')
    parser.add_option('-O', '--output_file',help='store ratio histograms here')
    parser.add_option('-v','--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['tag', 'input_dir', 'input_iter', 'output_dir']
    otherOptions = ['verbose']
    allOptions = requiredOptions + otherOptions
    def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None
    if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option')
    tag            = opts.tag.strip('_')
    inputDirname   = opts.input_dir
    fnameInputIter = opts.input_iter
    outputDirname  = opts.output_dir
    outputDirname  = outputDirname+'/' if not outputDirname.endswith('/') else outputDirname
    outputFilename = opts.output_file
    mkdirIfNeeded(outputDirname)
    verbose        = opts.verbose
    if verbose : print ('\nUsing the following options:\n'
                        +'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions))
    inputDirname = inputDirname+'/' if not inputDirname.endswith('/') else inputDirname
    fileData = r.TFile.Open(inputDirname+'data_'       +tag+'.root')
    fileMc   = r.TFile.Open(inputDirname+'allBkg_'     +tag+'.root')
    fileHf   = r.TFile.Open(inputDirname+'heavyflavor_'+tag+'.root')
    fileIter = r.TFile.Open(fnameInputIter)
    assert fileData, "Missing input file data %s"%str(fileData)
    assert fileMc,   "Missing input file mc   %s"%str(fileMc)
    assert fileHf,   "Missing input file hf   %s"%str(fileHf)
    assert fileIter, "Missing input file iter %s"%str(fileIter)
    outputFile = r.TFile.Open(outputFilename, 'recreate') if outputFilename else None
    el_conv_sf = computeAndPlotConvSf(fileData, fileMc, 'elec', 'all_l_pt', outputDirname, outputFile)
    el_qcd_sf  = computeAndPlotHfSf  (fileIter, fileHf, 'elec', 'all_l_pt', outputDirname, outputFile)
    mu_qcd_sf  = computeAndPlotHfSf  (fileIter, fileHf, 'muon', 'all_l_pt', outputDirname, outputFile)
    el_real_sf = computeAndPlotRealSf(fileData, fileMc, 'elec', 'all_l_pt', outputDirname)
    mu_real_sf = computeAndPlotRealSf(fileData, fileMc, 'muon', 'all_l_pt', outputDirname)

    el_conv_sf2d = computeAndPlotConvSf2d(fileData, fileMc, 'elec', 'all_l_pt', outputDirname)
    el_qcd_sf2d  = computeAndPlotHfSf2d  (fileIter, fileHf, 'elec', 'all_l_pt', outputDirname)
    mu_qcd_sf2d  = computeAndPlotHfSf2d  (fileIter, fileHf, 'muon', 'all_l_pt', outputDirname)

    print "# --- paste the lines below in buildWeightedMatrix.py ---"
    print "# %s, %s"%(tag, datetime.datetime.now())
    print "mu_qcdSF, mu_realSF = %s, %s"%(mu_qcd_sf, mu_real_sf)
    print "el_convSF, el_qcdSF, el_realSF = %s, %s, %s"%(el_conv_sf, el_qcd_sf, el_real_sf)
    if outputFile : outputFile.Close()
Exemplo n.º 16
0
def plotStackedHistos(histos={}, datakey=None, stackkeys=[], outputDir='', region='', colors={}, verbose=False):
    "input: a dictionary of histos[group]"
    mkdirIfNeeded(outputDir)
    bkg_histos = dict([(k,h) for k,h in histos.iteritems() if k in stackkeys])
    tot_bkg = summedHisto(bkg_histos.values(), label='')
    err_band = None # tmp disable
    # err_band = buildErrBandGraph(tot_bkg, computeStatErr2(tot_bkg))
    empty_bkg = tot_bkg.Integral()==0
    if empty_bkg:
        if verbose : print "empty backgrounds, skip %s"%tot_bkg.GetName()
        return
    histoname = tot_bkg.GetName()
    can = r.TCanvas('c_'+histoname, histoname, 800, 600)
    can.cd()
    pm = tot_bkg # pad master
    pm.SetStats(False)
    pm.Draw('axis')
    can.Update() # necessary to fool root's dumb object ownership
    stack = r.THStack('stack_'+tot_bkg.GetName(),'')
    can.Update()
    r.SetOwnership(stack, False)
    for s, h in bkg_histos.iteritems() :
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
    stack.Draw('hist same')
    # err_band.Draw('E2 same')
    data = histos[datakey] if datakey and datakey in histos else None
    if data and data.GetEntries():
        data.SetMarkerStyle(r.kFullDotLarge)
        data.Draw('p same')
        if verbose:
            print "data : nEntries {:.1f} totWeight {:.1f} ".format(data.GetEntries(), data.Integral())
    yMin, yMax = getMinMax([h for h in [tot_bkg, data, err_band] if h])
    # pm.SetMinimum(0.5)
    pm.SetMaximum(1.1*yMax)
    can.Update()
    # can.SetLogy()
    topRightLabel(can, "#splitline{%s}{%s}"%(histoname, region), xpos=0.125, align=13)
    drawLegendWithDictKeys(can, dictSum(bkg_histos, {'stat err':err_band}), opt='f')
    can.RedrawAxis()
    can._stack = stack
    can._histos = [h for h in stack.GetHists()]+[data]
    can.Update()
    if verbose : print os.path.join(outputDir, histoname+'.png')
    can.SaveAs(os.path.join(outputDir, histoname+'.png'))
def main() :
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-t', '--tag')
    parser.add_option('-f', '--input_fake')
    parser.add_option('-i', '--input_dir')
    parser.add_option('-o', '--output_dir')
    parser.add_option('-v','--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['tag', 'input_fake', 'input_dir', 'output_dir',]
    otherOptions = ['verbose']
    allOptions = requiredOptions + otherOptions
    def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None
    if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option')
    tag           = opts.tag.strip('_')
    inputFakeFile = opts.input_fake
    inputDirname  = opts.input_dir
    outputDir     = opts.output_dir
    outputDir     = outputDir if outputDir.endswith('/') else outputDir+'/'
    verbose       = opts.verbose
    if verbose : print '\nUsing the following options:\n'+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions)

    inputFiles = getInputFiles(inputDirname, tag, verbose)
    inputFiles[fakeSample()] = r.TFile.Open(inputFakeFile)
    assert all(f for f in inputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in inputFiles.iteritems()]))
    mkdirIfNeeded(outputDir)

    for region in ['cr8lptee', 'cr8lptmm', 'cr9lpt', 'sr8', 'sr9', 'srSsEwk', 'crSsEwkLoose'] :
        for channel in ['ee', 'em', 'mm'] :
            for varname in ['l0_pt', 'l1_pt', 'll_M', 'metrel', 'met', 'njets', 'nbjets'] :
                histo_basename = region+'_'+channel+'_'+varname
                hists, err2s = buildHists(inputFiles, histo_basename)
                if not hists[dataSample()].GetEntries() : continue
                err_band     = buildErrBandGraph(hists['sm'], err2s)
                err_band_r   = buildErrBandRatioGraph(err_band)
                can = r.TCanvas('can_'+histo_basename, histo_basename, 800, 600)
                botPad, topPad = buildBotTopPads(can)
                can.cd()
                topPad.Draw()
                drawTop(topPad, hists, err_band, (channel, region))
                can.cd()
                botPad.Draw()
                drawBot(botPad, hists[dataSample()], hists['sm'], err_band_r, xaxisLabel(varname))
                can.Update()
                outFilename = outputDir+histo_basename+'.png'
                rmIfExists(outFilename) # avoid root warnings
                can.SaveAs(outFilename)
    if verbose : print "output saved to \n%s"%outputDir
Exemplo n.º 18
0
def runPlot(opts):
    lepton = opts.lepton
    batchMode = opts.batch
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug
    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions,
                              opts.regions)

    inputDir = outputDir + '/' + lepton + '/histos'
    outputDir = outputDir + '/' + lepton + '/plots'
    mkdirIfNeeded(outputDir)
    histonames = dict((g.name,
                       histonamesOneSample(g.name, variables_to_plot(),
                                           regions, leptonSources))
                      for g in groups)
    groups_to_stack = [g.name for g in groups if not g.is_data]
    if verbose:
        print 'groups being included in the compositions: ', groups_to_stack
    for region in regions:
        all_histos = dict([(g.name,
                            rootUtils.fetchObjectsFromFile(
                                os.path.join(inputDir,
                                             g.name + '_' + region + '.root'),
                                histonames[g.name][region], verbose))
                           for g in groups])
        for v in variables_to_plot():
            histos = dict()
            for s in leptonSources:
                histos[s] = summedHisto(
                    histos=[all_histos[g][v][s] for g in groups_to_stack],
                    label='')
            histos['data'] = all_histos['data'][v]['Unknown']
            plotStackedHistos(histos=histos,
                              datakey='data',
                              stackkeys=leptonSources,
                              outputDir=outputDir + '/' + region,
                              region=region,
                              colors=fakeu.colorsFillSources(),
                              verbose=verbose)
    return
Exemplo n.º 19
0
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-t', '--tag')
    parser.add_option('-i', '--input_dir')
    parser.add_option('-o', '--output_file')
    parser.add_option('-p', '--output_plot')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['tag', 'input_dir', 'output_file', 'output_plot']
    otherOptions = ['verbose']
    allOptions = requiredOptions + otherOptions

    def optIsNotSpecified(o):
        return not hasattr(opts, o) or getattr(opts, o) is None

    if any(optIsNotSpecified(o) for o in requiredOptions):
        parser.error('Missing required option')
    tag = opts.tag
    inputDirname = opts.input_dir
    outputFname = opts.output_file
    outputPlotDir = opts.output_plot
    verbose = opts.verbose
    if verbose:
        print '\nUsing the following options:\n' + '\n'.join(
            "%s : %s" % (o, str(getattr(opts, o))) for o in allOptions)

    allInputFiles = getInputFiles(
        inputDirname, tag,
        verbose)  # includes allBkg, which is used only for sys
    assert all(f for f in allInputFiles.values()), (
        "missing inputs: \n%s" %
        '\n'.join(["%s : %s" % kv for kv in allInputFiles.iteritems()]))
    mkdirIfNeeded(outputPlotDir)
    outputFile = r.TFile.Open(outputFname, 'recreate')
    inputFiles = dict(
        (k, v) for k, v in allInputFiles.iteritems() if k in fakeProcesses())

    buildMuonRates(inputFiles, outputFile, outputPlotDir, verbose)
    buildElectronRates(inputFiles, outputFile, outputPlotDir, verbose)
    buildSystematics(allInputFiles['allBkg'], outputFile)
    outputFile.Close()
    if verbose:
        print "output saved to \n%s" % '\n'.join([outputFname, outputPlotDir])
Exemplo n.º 20
0
def submit_batch_fill_job_per_group(group, opts):
    options_dict = vars(opts)
    group_name = group.name if hasattr(group, 'name') else group
    verbose = opts.verbose
    options_dict['group'] = group_name
    options_with_value = dict(
        (k, v) for k, v in options_dict.iteritems() if v and v is not True)
    # note to self: the line below assumes that the argument-less options have a default=False
    options_with_toggle = dict((k, v) for k, v in options_dict.iteritems()
                               if v and v is True and k != "batch")

    def escape_regex(v):
        return v if v != '.*' else "'.*'"

    def back_to_dash(v):
        return v.replace('_', '-')

    cmd_line_options = ' '.join([
        "--%s %s" % (back_to_dash(k), escape_regex(str(v)))
        for k, v in options_with_value.iteritems()
    ] + ["--%s" % back_to_dash(k)
         for k in options_with_toggle.keys()] + ['--just-fill'])
    template = 'batch/templates/plot_by_source.sh'
    default_log_dir = opts.output_dir.replace('out/', 'log/')
    if default_log_dir.count('/histos') == 1:
        default_log_dir = default_log_dir.replace('/histos', '')
    log_dir = mkdirIfNeeded(opts.log_dir if opts.log_dir else default_log_dir)
    script_dir = mkdirIfNeeded('batch/plot_by_source')
    script_name = os.path.join(script_dir, group_name + '.sh')
    log_name = log_dir + '/' + group_name + '.log'
    script_file = open(script_name, 'w')
    script_file.write(
        open(template).read().replace('%(opt)s', cmd_line_options).replace(
            '%(logfile)s',
            log_name).replace('%(jobname)s',
                              group_name).replace('%(queue)s', opts.queue))
    script_file.close()
    cmd = "sbatch %s" % script_name
    if verbose: print cmd
    out = getCommandOutput(cmd)
    if verbose: print out['stdout']
    if out['stderr']: print out['stderr']
def main() :
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-t', '--tag')
    parser.add_option('-i', '--input_dir')
    parser.add_option('-f', '--input_fractions')
    parser.add_option('-o', '--output_file')
    parser.add_option('-p', '--output_plot')
    parser.add_option('-s', '--input-el-sf', default=[], action='append', help='electron bin-by-bin scale factors (from compute_fake_el_scale_factor)')
    parser.add_option('-z', '--zoom-in', help='vertical axis efficiency plots')
    parser.add_option('-v','--verbose', action='store_true', default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ['tag', 'input_dir', 'output_file', 'output_plot']
    otherOptions = ['verbose']
    allOptions = requiredOptions + otherOptions
    def optIsNotSpecified(o) : return not hasattr(opts, o) or getattr(opts,o) is None
    if any(optIsNotSpecified(o) for o in requiredOptions) : parser.error('Missing required option')
    tag = opts.tag
    inputDirname  = opts.input_dir
    inputFracFname= opts.input_fractions
    inputSfFnames = opts.input_el_sf
    outputFname   = opts.output_file
    outputPlotDir = opts.output_plot
    zoomIn        = opts.zoom_in
    verbose       = opts.verbose
    if verbose : print '\nUsing the following options:\n'+'\n'.join("%s : %s"%(o, str(getattr(opts, o))) for o in allOptions)

    allInputFiles = getInputFiles(inputDirname, tag, verbose) # includes allBkg, which is used only for sys
    assert all(f for f in allInputFiles.values()), ("missing inputs: \n%s"%'\n'.join(["%s : %s"%kv for kv in allInputFiles.iteritems()]))
    if inputSfFnames and any([not os.path.exists(f) for f in inputSfFnames]) : parser.error("invalid electron sf file(s) %s"%inputSfFnames)
    outputPlotDir = outputPlotDir+'/' if not outputPlotDir.endswith('/') else ''
    mkdirIfNeeded(outputPlotDir)
    outputFile = r.TFile.Open(outputFname, 'recreate')
    inputFiles = dict((k, v) for k, v in allInputFiles.iteritems() if k in fakeProcesses())
    inputFracFile = r.TFile.Open(inputFracFname) if inputFracFname else None
    if inputFracFname and not inputFracFile : parser.error("invalid fraction file %s"%inputFracFname)

    buildMuonRates    (inputFiles, outputFile, outputPlotDir, inputFracFile=inputFracFile, verbose=verbose, zoomIn=zoomIn)
    buildElectronRates(inputFiles, outputFile, outputPlotDir, inputFracFile=inputFracFile, inputElecSfFiles=inputSfFnames, verbose=verbose, zoomIn=zoomIn)
    buildSystematics  (allInputFiles['allBkg'], outputFile, verbose)
    outputFile.Close()
    if verbose : print "output saved to \n%s"%'\n'.join([outputFname, outputPlotDir])
Exemplo n.º 22
0
def main():
    options = parse_options()
    inputdf = options.input
    outdir  = options.output_dir
    regexp  = options.sample_regexp
    exclude = options.exclude_regexp
    tag     = options.tag
    verbose = options.verbose
    debug   = options.debug

    utils.mkdirIfNeeded(outdir)
    if debug : dataset.Dataset.verbose_parsing = True
    datasets = dataset.build_all_datasets_from_dir_or_file(inputdf)
    datasets = utils.filterWithRegexp (datasets, regexp, lambda _: _.name) if regexp else datasets
    datasets = utils.excludeWithRegexp(datasets, exclude, lambda _: _.name) if exclude else datasets
    counter = {'fail':0, 'pass':0}
    for d in datasets:
        outcome = 'pass' if  d.build_filelist(gpatlas_dir(d, tag), outdir, verbose) else 'fail'
        counter[outcome] += 1
    if verbose:
        print "created %d filelists (%d failures)" % (counter['pass'], counter['fail'])
def plotVar(bkgHistos, sigHistos, llnjvar, plotdir='./') :
    def preferredSignal(signals):
        pref = 'Herwigpp_sM_wA_noslep_notauhad_WH_2Lep_1'
        return pref if pref in signals else first(sorted(signals))
    signalSample = preferredSignal(sigHistos.keys())
    allHistos = bkgHistos.values() + [sigHistos[signalSample],]
    allHistosEmpty = all([h.GetEntries()==0 for h in allHistos])
    if allHistosEmpty : return
    can = r.TCanvas('can_'+llnjvar, llnjvar, 800, 800)
    botPad, topPad = buildBotTopPads(can, splitFraction=0.75, squeezeMargins=False)
    totBkg = summedHisto(bkgHistos.values())
    totBkg.SetDirectory(0)
    can._totBkg = totBkg
    can._histos = [bkgHistos, sigHistos]
    can.cd()
    botPad.Draw()
    drawBottom(botPad, totBkg, bkgHistos, sigHistos[signalSample], llnjvar)
    can.cd()
    topPad.Draw()
    drawTop(topPad, totBkg, sigHistos[signalSample])
    mkdirIfNeeded(plotdir)
    outFilename = plotdir+'/'+llnjvar+'.png'
    rmIfExists(outFilename) # avoid root warnings
    can.SaveAs(outFilename)
def plotPerSourceEff(histosPerVar={},
                     outputDir='',
                     lepton='',
                     region='',
                     sample='',
                     verbose=False,
                     zoomIn=True):
    "plot efficiency for each source (and 'anysource') as a function of each var; expect histos[var][source][loose,tight]"
    variables = histosPerVar.keys()
    sources = [s for s in first(histosPerVar).keys()
               if s != 'real']  # only fake eff really need a scale factor
    colors = colorsLineSources
    mkdirIfNeeded(outputDir)
    for var in filter(lambda x: x in ['pt1', 'eta1'], histosPerVar.keys()):
        histosPerSource = dict((s, histosPerVar[var][s]) for s in sources)
        canvasBasename = region + '_efficiency_' + lepton + '_' + var + (
            "_%s" % sample if sample else '')
        missingSources = [
            s for s, h in histosPerSource.iteritems()
            if not h['loose'] or not h['tight']
        ]
        if missingSources:
            if verbose:
                print "skip %s, missing histos for %s" % (var,
                                                          str(missingSources))
            continue
        anySourceLoose = summedHisto(
            [h['loose'] for h in histosPerSource.values()])
        anySourceTight = summedHisto(
            [h['tight'] for h in histosPerSource.values()])
        anySourceLoose.SetName(histoNamePerSource(var, 'any', 'loose', region))
        anySourceTight.SetName(histoNamePerSource(var, 'any', 'tight', region))
        histosPerSource['any'] = {
            'loose': anySourceLoose,
            'tight': anySourceTight
        }
        emptyBkg = anySourceLoose.Integral() == 0 or anySourceTight.Integral(
        ) == 0
        if emptyBkg:
            if verbose: print "empty backgrounds, skip %s" % canvasBasename
            continue

        def computeEfficiencies(histosPerSource={}):
            sources = histosPerSource.keys()
            num = dict((s, histosPerSource[s]['tight']) for s in sources)
            den = dict((s, histosPerSource[s]['loose']) for s in sources)
            eff = dict(
                (s, h.Clone(h.GetName().replace('tight', 'tight_over_loose')))
                for s, h in num.iteritems())
            [eff[s].Divide(den[s]) for s in sources]
            return eff

        effs = computeEfficiencies(histosPerSource)
        can = r.TCanvas('c_' + canvasBasename, canvasBasename, 800, 600)
        can.cd()
        pm = first(effs)  # pad master
        pm.SetStats(False)
        pm.Draw('axis')
        can.Update()
        for s, h in effs.iteritems():
            h.SetMarkerColor(colors[s] if s in colors else r.kBlack)
            h.SetLineColor(h.GetMarkerColor())
            h.SetLineWidth(2 * h.GetLineWidth())
            h.SetMarkerStyle(markersSources[s] if s in
                             markersSources else r.kDot)
            h.Draw('ep same')
            h.SetDirectory(0)
        #pprint.pprint(effs)
        yMin, yMax = getMinMax(effs.values())
        pm.SetMinimum(0.0)
        pm.SetMaximum(0.25 if yMax < 0.5 and zoomIn else 1.1)
        can.Update()
        topRightLabel(can, canvasBasename, xpos=0.125, align=13)
        drawLegendWithDictKeys(can, effs, opt='lp')
        can.RedrawAxis()
        can._histos = effs
        can.Update()
        outFname = os.path.join(outputDir, canvasBasename + '.png')
        utils.rmIfExists(outFname)
        can.SaveAs(outFname)
Exemplo n.º 25
0
def runPlot(opts):
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand
    selections = regions_to_plot(opts.include_regions, opts.exclude_regions,
                                 opts.regions)
    variables = variables_to_plot()

    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    plot_groups = [systUtils.Group(g.name) for g in groups]
    sel_not_specified = len(regions_to_plot()) == len(selections)
    if sel_not_specified:
        selections = guess_available_selections_from_histofiles(
            inputDir, first(plot_groups), verbose)
    systematics_to_use = get_list_of_syst_to_fill(opts)
    for group in plot_groups:
        group.setCurrentSelection(first(selections))
        group.setHistosDir(inputDir).setCurrentSelection(first(selections))
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(systematics_to_use, opts.exclude,
                                       verbose)
    available_systematics = sorted(
        list(set([s for g in plot_groups for s in g.systematics])))
    systematics = [s for s in systematics_to_use if s in available_systematics]
    if verbose:
        print "using the following systematics : {0}".format(systematics)
        print "missing the following systematics : {0}".format(
            [s for s in systematics_to_use if s not in available_systematics])
    fakeSystematics = [
        s for s in systematics if s in systUtils.fakeSystVariations()
    ]
    mcSystematics = [
        s for s in systematics if s in systUtils.mcObjectVariations() +
        systUtils.mcWeightVariations()
    ]

    mkdirIfNeeded(outputDir)
    findByName = systUtils.findByName
    simBkgs = [g for g in plot_groups if g.isMcBkg]
    data = findByName(plot_groups, 'data')
    fake = findByName(plot_groups, 'fake')
    signal = findByName(plot_groups, 'signaltaumu')
    print 'names_stacked_groups to be improved'
    names_stacked_groups = [g.name for g in simBkgs + [fake]]
    for sel in selections:
        if verbose: print '-- plotting ', sel
        for var in variables:
            if verbose: print '---- plotting ', var
            print_summary_yield = var is 'onebin'
            for g in plot_groups:
                g.setSystNominal()
                g.setCurrentSelection(sel)
            nominalHistoData = data.getHistogram(variable=var,
                                                 selection=sel,
                                                 cacheIt=True)
            nominalHistoSign = signal.getHistogram(variable=var,
                                                   selection=sel,
                                                   cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var,
                                                    selection=sel,
                                                    cacheIt=True)
            nominalHistosSimBkg = dict([(g.name,
                                         g.getHistogram(variable=var,
                                                        selection=sel,
                                                        cacheIt=True))
                                        for g in simBkgs])
            nominalHistosBkg = dict(
                [('fake', nominalHistoFakeBkg)] +
                [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg,
                                             histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake,
                                    simBkgs=simBkgs,
                                    variable=var,
                                    selection=sel,
                                    fakeVariations=fakeSystematics,
                                    mcVariations=mcSystematics,
                                    verbose=verbose,
                                    printYield=print_summary_yield)
            # if print_summary_yield:
            #     print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics)

            plotHistos(histoData=nominalHistoData,
                       histoSignal=nominalHistoSign,
                       histoTotBkg=nominalHistoTotBkg,
                       histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand,
                       systErrBand=systErrBand,
                       stack_order=names_stacked_groups,
                       topLabel=sel,
                       canvasName=(sel + '_' + var),
                       outdir=outputDir,
                       options=opts,
                       printYieldSummary=print_summary_yield)
    for group in plot_groups:
        group.printVariationsSummary()
Exemplo n.º 26
0
def runFill(opts):
    batchMode = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir = opts.input_other
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug
    blinded = not opts.unblind
    tightight = opts.require_tight_tight

    if debug: dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if not skip_charge_flip:
        groups.append(
            dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions,
                              opts.regions)
    if verbose:
        print "about to loop over these systematics:\n %s" % str(systematics)
    if verbose: print "about to loop over these regions:\n %s" % str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(
                            group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [
                s for s in systematics
                if systUtils.Group(group.name).isNeededForSys(s)
            ]
            if not systematics:
                print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name == 'fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(
                        os.path.join(
                            input_dir,
                            systUtils.Sample(
                                ds.name,
                                group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(
                        group.name, chain.GetEntries(), len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                        group.name + '/')
                tcuts = [
                    r.TCut(reg,
                           selection_formulas()[reg]) for reg in regions
                ]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
                )
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
                )
                if verbose:
                    print 'filling cached cuts: ', ' '.join(
                        [c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        cached_cut=cut)
                    out_filename = (systUtils.Group(
                        group.name).setSyst(systematic).setHistosDir(
                            outputDir).setCurrentSelection(
                                cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose:
                        print 'filling uncached cuts: ', ' '.join(
                            [c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(
                        chain=chain,
                        sample=group.name,
                        syst=systematic,
                        verbose=verbose,
                        debug=debug,
                        blinded=blinded,
                        onthefly_tight_def=onthefly_tight_def,
                        tightight=tightight,
                        quicktest=opts.quick_test,
                        noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(
                            group.name).setSyst(systematic).setHistosDir(
                                outputDir).setCurrentSelection(sel)
                                        ).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/tight_variables_plots', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))
    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_tight_plots.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    if not samples : samples = [guessSampleFromFilename(f) for f in tupleFilenames] # if the fast guess didn't work, try the slow one
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt','eta','d0sig','z0SinTheta','etCone','ptCone','etConeCorr','ptConeCorr']
    vars += ['relEtConeStd', 'relPtConeStd', 'relEtConeMod', 'relPtConeMod']
    groups = samplesPerGroup.keys()
    sources = leptonSources
    #fill histos
    if doFillHistograms :
        lepLabel = "(probe %s)"%lepton
        histosPerGroup = bookHistosPerGroup(vars, groups, lepLabel=lepLabel)
        histosPerSource = bookHistosPerSource(vars, sources, lepLabel=lepLabel)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            histosThisGroup = histosPerGroup[group]
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            print "%s : %d entries"%(group, chain.GetEntries())
            fillHistos(chain, histosThisGroup, histosPerSource, isData, lepton, group, verbose)
        writeHistos(cacheFileName, {'perGroup':histosPerGroup, 'perSource':histosPerSource}, verbose)
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNames(vars, sources), verbose)
    plotStackedHistos(histosPerGroup,  outputDir+'/by_group',  region, colors=SampleUtils.colors, verbose=verbose)
    plotStackedHistos(histosPerSource, outputDir+'/by_source', region, colors=fakeu.colorsFillSources(), verbose=verbose)
    plotIsoComparison(histosPerSource, outputDir+'/',          region, lepton, verbose)
Exemplo n.º 28
0
def runPlot(opts) :
    inputDir     = opts.input_dir
    outputDir    = opts.output_dir
    verbose      = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand
    selections = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)
    variables = variables_to_plot()

    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    plot_groups = [systUtils.Group(g.name) for g in groups]
    sel_not_specified = len(regions_to_plot())==len(selections)
    if sel_not_specified:
        selections = guess_available_selections_from_histofiles(inputDir, first(plot_groups), verbose)
    systematics_to_use = get_list_of_syst_to_fill(opts)
    for group in plot_groups :
        group.setCurrentSelection(first(selections))
        group.setHistosDir(inputDir).setCurrentSelection(first(selections))
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(systematics_to_use, opts.exclude, verbose)
    available_systematics = sorted(list(set([s for g in plot_groups for s in g.systematics])))
    systematics = [s for s in systematics_to_use if s in available_systematics]
    if verbose :
        print "using the following systematics : {0}".format(systematics)
        print "missing the following systematics : {0}".format([s for s in systematics_to_use if s not in available_systematics])
    fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()]
    mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()]

    mkdirIfNeeded(outputDir)
    findByName = systUtils.findByName
    simBkgs = [g for g in plot_groups if g.isMcBkg]
    data = findByName(plot_groups, 'data')
    fake = findByName(plot_groups, 'fake')
    signal = findByName(plot_groups, 'signaltaumu')
    print 'names_stacked_groups to be improved'
    names_stacked_groups = [g.name for g in simBkgs+[fake]]
    for sel in selections :
        if verbose : print '-- plotting ',sel
        for var in variables :
            if verbose : print '---- plotting ',var
            print_summary_yield = var is 'onebin'
            for g in plot_groups :
                g.setSystNominal()
                g.setCurrentSelection(sel)
            nominalHistoData    = data.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoSign    = signal.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True))
                                        for g in simBkgs])
            nominalHistosBkg    = dict([('fake', nominalHistoFakeBkg)] +
                                       [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg  = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg,
                                              histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel,
                                    fakeVariations=fakeSystematics, mcVariations=mcSystematics,
                                    verbose=verbose, printYield=print_summary_yield)
            # if print_summary_yield:
            #     print_stat_syst_yield(fake=fake, variable=var, selection=sel, fakeVariations=fakeSystematics)

            plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign,
                       histoTotBkg=nominalHistoTotBkg, histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand, systErrBand=systErrBand,
                       stack_order=names_stacked_groups,
                       topLabel=sel,
                       canvasName=(sel+'_'+var), outdir=outputDir, options=opts,
                       printYieldSummary=print_summary_yield)
    for group in plot_groups :
        group.printVariationsSummary()
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-n", "--n_iter", type="int", default=8)
    parser.add_option("-m", "--input_mc")
    parser.add_option("-d", "--input_data")
    parser.add_option("-o", "--output")
    parser.add_option("-p", "--plot", help="plot inputs")  # todo: implement sanity plot vs. n_iter
    parser.add_option("-v", "--verbose", action="store_true", default=False)
    (opts, args) = parser.parse_args()
    requiredOptions = ["n_iter", "input_mc", "input_data", "output"]
    otherOptions = ["plot", "verbose"]
    allOptions = requiredOptions + otherOptions

    def optIsNotSpecified(o):
        return not hasattr(opts, o) or getattr(opts, o) is None

    if any(optIsNotSpecified(o) for o in requiredOptions):
        parser.error("Missing required option")
    nIter = opts.n_iter
    fnameInputMc = opts.input_mc
    fnameInputDa = opts.input_data
    fnameOutput = opts.output
    plotdir = opts.plot
    verbose = opts.verbose
    if verbose:
        print (
            "\nUsing the following options:\n" + "\n".join("%s : %s" % (o, str(getattr(opts, o))) for o in allOptions)
        )
    fileData = r.TFile.Open(fnameInputDa)
    fileMc = r.TFile.Open(fnameInputMc)
    if plotdir:
        mkdirIfNeeded(plotdir)
    assert fileData and fileMc, "Missing input files: data %s, mc %s" % (str(fileData), str(fileMc))
    correctionHistos = {}
    for lep in ["muon", "elec"]:
        if verbose:
            print "Lepton: %s" % lep
        hRealDataCr = getNumDenHistos(fileData, lep + "_realCR_all_l_pt")
        hFakeDataLo = getNumDenHistos(fileData, lep + "_fakeHF_all_l_pt")
        hFakeDataHi = getNumDenHistos(fileData, lep + "_fakeHF_high_all_l_pt")
        hFakeMcLo = getNumDenHistos(fileMc, lep + "_fakeHF_all_l_pt")
        hFakeMcHi = getNumDenHistos(fileMc, lep + "_fakeHF_high_all_l_pt")
        if plotdir:
            hNumDen = [hFakeDataLo, hFakeDataHi, hFakeMcLo, hFakeMcHi]
            for nd in ["num", "den"]:
                plotHistos([h[nd] for h in hNumDen], "c_" + lep + "_" + nd, plotdir)
            plotHistosRatio(hNumDen, "c_" + lep + "_ratio", plotdir)
        h2dRealDataCr = getNumDenHistos(fileData, lep + "_realCR_all_l_pt_eta")
        h2dFakeDataLo = getNumDenHistos(fileData, lep + "_fakeHF_all_l_pt_eta")
        h2dFakeDataHi = getNumDenHistos(fileData, lep + "_fakeHF_high_all_l_pt_eta")
        h2dFakeMcLo = getNumDenHistos(fileMc, lep + "_fakeHF_all_l_pt_eta")
        h2dFakeMcHi = getNumDenHistos(fileMc, lep + "_fakeHF_high_all_l_pt_eta")

        def missingInputHisto(ndHistos):
            return any(not h for h in ndHistos.values())

        histoCollToBeChecked = ["hRealDataCr", "hFakeDataLo", "hFakeDataHi", "hFakeMcLo", "hFakeMcHi"]
        missingHistos = dict(
            [(nhc, hp) for nhc, hp in [(hc, eval(hc)) for hc in histoCollToBeChecked] if missingInputHisto(hp)]
        )
        for v in histoCollToBeChecked:
            print "entries 1d %s : num %d den %d (%s)" % (
                v,
                eval(v)["num"].GetEntries(),
                eval(v)["den"].GetEntries(),
                str(eval(v)["den"]),
            )

        histoCollToBeChecked = ["h2dRealDataCr", "h2dFakeDataLo", "h2dFakeDataHi", "h2dFakeMcLo", "h2dFakeMcHi"]
        missingHistos = dict(
            [(nhc, hp) for nhc, hp in [(hc, eval(hc)) for hc in histoCollToBeChecked] if missingInputHisto(hp)]
        )

        for v in histoCollToBeChecked:
            print "entries 2d %s : num %d den %d (%s)" % (
                v,
                eval(v)["num"].GetEntries(),
                eval(v)["den"].GetEntries(),
                str(eval(v)["den"]),
            )
        print histoCollToBeChecked
        print missingHistos

        if len(missingHistos):
            print (
                lep
                + " : missing histograms: \n"
                + "\n".join(["%s: num %s den %s" % (k, v["num"], v["den"]) for k, v in missingHistos.iteritems()])
            )
            continue

        correctionHistos[lep] = buildCorrectionHisto(
            hRealDataCr,
            hFakeDataLo,
            hFakeDataHi,
            hFakeMcLo,
            hFakeMcHi,
            nIter=nIter,
            verbose=verbose,
            histoname=lep + "_corHFRate",
            plotdir=plotdir,
        )

        # here do the 2d ones
        print 10 * "--", " now doing the 2d ones ", 10 * "--"
        dummy = h2dRealDataCr["num"]
        xAx, yAx = dummy.GetXaxis(), dummy.GetYaxis()
        print dummy.GetName(), ": bins (%d, %d)" % (dummy.GetNbinsX(), dummy.GetNbinsY())
        nEtaBins = yAx.GetNbins()
        print "nEtaBins: ", nEtaBins
        xMin, xMax = xAx.GetXmin(), xAx.GetXmax()
        etaBins = range(1, 1 + nEtaBins)
        for eb in etaBins:

            def etaSlice(h, b, p):
                return h.ProjectionX(p + h.GetName() + "_eta%d" % b, b, b)  # prefix needed to avoid overwriting

            hRealDataCr = dict((k, etaSlice(h, eb, "rdc")) for k, h in h2dRealDataCr.iteritems())
            hFakeDataLo = dict((k, etaSlice(h, eb, "fdl")) for k, h in h2dFakeDataLo.iteritems())
            hFakeDataHi = dict((k, etaSlice(h, eb, "fdh")) for k, h in h2dFakeDataHi.iteritems())
            hFakeMcLo = dict((k, etaSlice(h, eb, "fml")) for k, h in h2dFakeMcLo.iteritems())
            hFakeMcHi = dict((k, etaSlice(h, eb, "fmh")) for k, h in h2dFakeMcHi.iteritems())
            print "eta bin ", eb
            for k, h in hFakeDataLo.iteritems():
                print "fakeDataLo %s : %s" % (k, lf2s(getBinContents(h)))

            correctionHistos[lep + "_eta%d" % eb] = buildCorrectionHisto(
                hRealDataCr,
                hFakeDataLo,
                hFakeDataHi,
                hFakeMcLo,
                hFakeMcHi,
                nIter=nIter,
                verbose=verbose,
                histoname=lep + "_corHFRate" + "_eta_bin%d" % eb,
            )
        correctionHistos[lep + "_eta"] = combineEtaSlices(
            template2d=h2dRealDataCr["num"],
            etaSlicedRates=dict((k, h) for k, h in correctionHistos.iteritems() if (lep + "_eta") in k),
            histoname=lep + "_corHFRate_eta",
        )

        print 10 * "--", "    done               ", 10 * "--"
    if verbose:
        print "saving output to ", fnameOutput
    fileOut = r.TFile.Open(fnameOutput, "recreate")
    fileOut.cd()
    print "keys ", correctionHistos.keys()
    for l, h in correctionHistos.iteritems():
        if verbose:
            print "%s : writing %s\n%s" % (l, h.GetName(), histo1dToTxt(h))
        h.Write()
    fileOut.Close()
Exemplo n.º 30
0
def runFill(opts):
    lepton    = opts.lepton
    batchMode = opts.batch
    inputDir  = opts.input_dir
    outputDir = opts.output_dir
    verbose   = opts.verbose
    debug     = opts.debug

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    if opts.group : groups = [g for g in groups if g.name==opts.group]
    if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups)
    if debug :
        print '\n'.join("group {0} : {1} samples: {2}".format(g.name,
                                                              len(g.datasets),
                                                              '\n\t'+'\n\t'.join(d.name for d in g.datasets))
                        for g in groups)
    if verbose : print "filling histos"
    outputDir = outputDir+'/'+lepton+'/histos'
    mkdirIfNeeded(outputDir)
    if batchMode:
        for group in groups:
            submit_batch_fill_job_per_group(group, opts)
    else:
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                chain.Add(os.path.join(inputDir, ds.name+'.root'))
            if opts.verbose:
                print "{0} : {1} entries from {2} samples".format(group.name,
                                                                  chain.GetEntries(),
                                                                  len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
            tcuts = [r.TCut(reg, selection_formulas()[reg])
                     for reg in regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list()
            uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list()
            print 'todo: skip cuts for which the histo files are there'
            if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
            for cut in cached_tcuts:
                chain.preselect(cut)
                c_pre, h_pre = count_and_fill(chain=chain, opts=opts,
                                              group=group,
                                              cached_cut=cut)
                counters_pre = dictSum(counters_pre, c_pre)
                histos_pre = dictSum(histos_pre, h_pre)
            if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                counters_npre, histos_npre = count_and_fill(chain=chain, opts=opts,
                                                            group=group,
                                                            noncached_cuts=uncached_tcuts)
                chain.save_lists()
            all_histos = dictSum(histos_pre, histos_npre)
            for sel, histos in all_histos.iteritems():
                # write histos for each sel to a separate file (finer granularity, better caching)
                out_filename = os.path.join(outputDir, group.name+'_'+sel+'.root')
                if verbose : print 'saving to ',out_filename
                writeObjectsToFile(out_filename, histos, verbose)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-m', '--mode', help='real, conv, hflf')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    mode      = options.mode
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    validModesEl = ['real', 'hflf'] + ['conv']
    validModesMu = ['real', 'hflf']
    if mode not in (validModesEl if lepton=='el' else validModesMu) : parser.error("invalid mode %s"%mode)
    tupleStem, treeName = {'conv' : ('mcconv_tuple', 'ConversionExtractionRegion'),
                           'hflf' : ('mcqcd_tuple', 'HfLfExtractionRegion'),
                           'real' : ('mcreal_tuple', 'RealExtractionRegion')
                           }[mode]
    templateInputFilename = "*_%(stem)s_%(tag)s.root" % {'tag':tag, 'stem':tupleStem}
    templateOutputFilename =  "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton}
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    print 'input filenames: ',os.path.join(inputDir, templateInputFilename)

    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'pt_eta']
    groups = [g for g in samplesPerGroup.keys() if not isDataSample(g) and not g=='higgs']
    if lepton=='el' : groups = [g for g in groups if g!='heavyflavor']

    sourcesThisMode = {'real' : ['real'], # use same convention as in FakeLeptonSources.h
                       'conv' : ['conv'],
                       'hflf' : ['heavy', 'light', 'qcd']
                       }[mode]
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode)
        for group in groups:
            filenames = filenamesPerGroup[group]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys())
            histosAnyGroupPerSource  = dict((v, histosPerGroupPerSource[v]['anygroup']) for v in histosPerGroupPerSource.keys())

            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroupPerSource, histosAnyGroupPerSource,
                                                lepton, mode,
                                                onthefly_tight_def=onthefly_tight_def, verbose=verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute efficiencies
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose)
    effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t]
    for s in sourcesThisMode:
        for v in vars:
            groups = first(effs).keys()
            varIs1D, varIs2D = v=='pt', v=='pt_eta'
            effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups)
            densThisSourceThisVar = dict((g, histosPerGroupPerSource[v][g][s]['loose']) for g in groups if g!='anygroup')
            numsThisSourceThisVar = dict((g, histosPerGroupPerSource[v][g][s]['tight']) for g in groups if g!='anygroup')
            if varIs1D:
                cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                zoomIn = True
                fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn)
                cname = 'stack_loose_'+lepton+'_'+s
                lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotParametrizedFractions.plotStackedHistos(densThisSourceThisVar, cname, outputDir, title)
                cname = 'stack_tight_'+lepton+'_'+s
                lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotParametrizedFractions.plotStackedHistos(numsThisSourceThisVar, cname, outputDir, title)

            elif varIs2D:
                cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
def plotIsoComparison(histosPerSource={}, outputDir='', region='', lepton='', verbose=False):
    """
    plot a comparison of eff(T|L) for real and for fake leptons
    vs. pt, where the numerator is one of the tight definitions
    """
    var = 'pt'
    sources = histosPerSource.keys()
    lOrTOrTs = first(first(histosPerSource)).keys()
    histosPtPerSource = dict((s, dict((lt, histosPerSource[s][var][lt]) for lt in lOrTOrTs)) for s in sources)
    def buildTotFakeHistos():
        "add up all the non-real (fake) sources"
        notRealSources = [s for s in sources if s!='real']
        aSource = first(notRealSources)
        totFakeHistos = dict()
        for lt in ['loose', 'tight', 'tight_std', 'tight_minden', 'tight_tight']:
            template = histosPtPerSource[aSource][lt]
            h = template.Clone(template.GetName().replace(aSource, 'fake'))
            h.Reset()
            for s in sources : h.Add(histosPtPerSource[s][lt])
            totFakeHistos[lt] = h
        return totFakeHistos
    histosPtPerSource['fake'] = buildTotFakeHistos()
    effReal_wh     = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight'       ], histosPtPerSource['real']['loose'])
    effReal_std    = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_std'   ], histosPtPerSource['real']['loose'])
    effReal_minden = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_minden'], histosPtPerSource['real']['loose'])
    effReal_tight  = rootUtils.buildRatioHistogram(histosPtPerSource['real']['tight_tight' ], histosPtPerSource['real']['loose'])
    effFake_wh     = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight'       ], histosPtPerSource['fake']['loose'])
    effFake_std    = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_std'   ], histosPtPerSource['fake']['loose'])
    effFake_minden = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_minden'], histosPtPerSource['fake']['loose'])
    effFake_tight  = rootUtils.buildRatioHistogram(histosPtPerSource['fake']['tight_tight' ], histosPtPerSource['fake']['loose'])
    frameName, frameTitle = region+'_'+lepton, "fake and real efficiencies for %s in %s"%(lepton, region)
    can = r.TCanvas('c_'+frameName, frameTitle, 800, 600)
    can.cd()
    pm = effReal_wh
    pm.SetMinimum(0.0)
    pm.SetMaximum(1.1)
    pm.GetYaxis().SetTitle("#epsilon(T|L)")
    colorReal, colorFake = r.kBlue, r.kRed
    markerWh, markerStd, markerMinden, markerTight = r.kMultiply, r.kCircle, r.kOpenTriangleUp, r.kOpenSquare
    def setAttrs(h, mark, col):
        h.SetLineColor(col)
        h.SetMarkerColor(col)
        h.SetMarkerStyle(mark)
    setAttrs(effReal_wh,     markerWh,     colorReal)
    setAttrs(effReal_std,    markerStd,    colorReal)
    setAttrs(effReal_minden, markerMinden, colorReal)
    setAttrs(effReal_tight,  markerTight,  colorReal)
    setAttrs(effFake_wh,     markerWh,     colorFake)
    setAttrs(effFake_std,    markerStd,    colorFake)
    setAttrs(effFake_minden, markerMinden, colorFake)
    setAttrs(effFake_tight,  markerTight,  colorFake)
    pm.SetStats(0)
    pm.Draw('axis')
    #for h in [effReal_wh, effReal_std, effReal_tight, effFake_wh, effFake_std, effFake_tight]:
    for h in [effReal_wh, effReal_std, effReal_minden, effFake_wh, effFake_std, effFake_minden]:
        h.Draw('same')
    leg = rightLegend(can)
    leg.SetBorderSize(0)
    leg.AddEntry(r.TObject(),   'Real', '')
    leg.AddEntry(effReal_std,   'std iso', 'lp')
    #leg.AddEntry(effReal_tight, 'tight iso', 'lp')
    leg.AddEntry(effReal_minden,'minden iso', 'lp')
    leg.AddEntry(effReal_wh,    'wh iso',  'lp')
    leg.AddEntry(r.TObject(),   'Fake', '')
    leg.AddEntry(effFake_std,   'std iso', 'lp')
    #leg.AddEntry(effFake_tight, 'tight iso', 'lp')
    leg.AddEntry(effFake_minden,'minden iso', 'lp')
    leg.AddEntry(effFake_wh,  '  wh iso',  'lp')
    leg.Draw()
    topRightLabel(can, "#splitline{%s}{%s}"%(lepton, region), xpos=0.125, align=13)
    can.RedrawAxis()
    can._histos = [effReal_wh, effReal_std, effFake_wh, effFake_std]
    can.Update()
    mkdirIfNeeded(outputDir)
    can.SaveAs(os.path.join(outputDir, frameTitle+'.png'))
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-c', '--comp-histos', help='output from compute_fake_compositions.py')
    parser.add_option('-e', '--eff-histos', default=[], action='append', help='output files from compute_eff_from_ntuple.py')
    parser.add_option('-r', '--region', help='where we have the compositions, and want the fake matrix, e.g. ssinc1j, emu')
    parser.add_option('-s', '--scale-factors', default=[], action='append', help='bin-by-bin data/mc from compute_fake_scale_factor')
    parser.add_option('-o', '--output-dir', default='./out/fake_weighted_average', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('--also-anygroup', action='store_true', help='also build matrix without compositions,'                      ' to evaluate the systematic uncertainty on the composition')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    compFname = options.comp_histos
    effFnames = options.eff_histos
    region    = options.region
    sfFnames  = options.scale_factors
    outputDir = options.output_dir
    lepton    = options.lepton
    verbose   = options.verbose
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    if region not in ['emu', 'ssinc', 'ssinc1j','razor0j'] : parser.error("invalid region '%s'"%region)
    if not compFname or not os.path.exists(compFname) : parser.error("invalid composition file '%s'"%compFname)
    if not effFnames or not all(os.path.exists(f) for f in effFnames) : parser.error("invalid efficiency file '%s'"%str(effFnames))
    if not sfFnames  or not all(os.path.exists(f) for f in sfFnames) :
        # parser.error("invalid electron sf file(s) %s"%str(sfFnames))
        print "missing sf files, using flat scale factors"# do not crash, fall back on flat scale factors
    optionsToPrint = ['inputDir', 'outputDir']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    mkdirIfNeeded(outputDir)
    # collect inputs
    regions = [region, ]
    groups=['diboson', 'heavyflavor', 'ttbar', 'wjets', 'zjets']
    if lepton=='el' : groups = filter(lambda _ : _!='heavyflavor', groups) # note this must be in sync with compute_fake_compositions; TODO: implement a way to get the groups from the available histos
    compositions = fetchCompositionHistos(compFname, lepton, groups, regions, verbose) # [var][group][reg][orig], note here orig=[conv,heavy,light]
    # pprint.pprint(compositions)
    efficiencies = fetchEffienciesHistos(effFnames, lepton, groups, verbose) # [var][group][orig], note here orig=[conv,heavy,light,qcd]
    # pprint.pprint(efficiencies)
    scale_factor_histos = fetchSfHistos(sfFnames, lepton, verbose)
    convSF_vs_eta = scale_factor_histos['conv'] if lepton=='el' else None
    qcdSF_vs_eta  = scale_factor_histos['hflf'] if 'hflf' in scale_factor_histos else None
    # if verbose:
    #     print "convSF: "+("vs. eta {0}".format(getBinContents(convSF_vs_eta)) if convSF_vs_eta else el_convSF)
    #     print "qcdSF: "+("vs. eta {0}".format(getBinContents(qcdSF_vs_eta)) if qcdSF_vs_eta else el_qcdSF)
    def scale_factor_to_str(sf):
        if 'vs_eta' in sf: return '['+', '.join("%.3f" % _ for _ in sf['vs_eta'])+']'
        else : return "%.3f" % sf['flat']
    if lepton=='el':
        scaleFactors = {'conv' : {'flat' : el_convSF, 'vs_eta' : convSF_vs_eta},
                        'heavy' : {'flat' : el_qcdSF, 'vs_eta' : qcdSF_vs_eta} }
        if verbose : print_scale_factor_dict(scaleFactors)
        scaleFakeEfficiencies(efficiencies, scaleFactors)
    elif lepton=='mu':
        scaleFactors = {'heavy' : {'flat' : mu_qcdSF, 'vs_eta' : qcdSF_vs_eta} }
        if verbose : print_scale_factor_dict(scaleFactors)
        scaleFakeEfficiencies(efficiencies, scaleFactors)

    # for now compute the weighted avg only for 'ssinc1j'
    avgEfficiencies = dict()
    for reg in first(first(compositions)).keys():
        avgEfficiencies[reg] = dict()
        for var in ['pt', 'pt_eta']:
            is1D = var=='pt'
            lT = "%s #varepsilon(T|L) fake %s"%(reg, lepton)
            lX = 'p_{T} [GeV]'
            lY = '#varepsilon(T|L)' if is1D else '#eta'
            hname = "%(lep)s_fake_%(var)s_%(reg)s"%{'lep':lepton, 'var':var, 'reg':reg}
            htitle = lT+';'+lX+';'+lY
            groups  = first(compositions).keys()
            origins = first(first(first(compositions))).keys()
            if verbose : print 'origins :',origins,'\n' + 'groups :',groups
            histosEff  = dict((group+'_'+orig, efficiencies[var][group]     [orig]) for group in groups for orig in origins)
            histosComp = dict((group+'_'+orig, compositions[var][group][reg][orig]) for group in groups for orig in origins)
            avgEff =  weightedAverage(histosEff, histosComp, hname, htitle, verbose)
            avgEfficiencies[reg][var] = avgEff
            if is1D:
                fakeu.plot1dEfficiencies({reg : avgEff}, 'eff1d_'+lepton+'_fake_'+reg, outputDir, htitle, zoomIn=True)
            else:
                fakeu.plot2dEfficiencies({reg : avgEff}, 'eff2d_'+lepton+'_fake_'+reg, outputDir, htitle, zoomIn=True)
    writeHistos(os.path.join(outputDir,'fake_matrices_'+lepton+'.root'), avgEfficiencies, verbose)
    if options.also_anygroup:# test with the group-independent efficiencies
	print 'fetchCompositionHistos ',compFname
	compositions = fetchCompositionHistos(compFname, lepton, ['anygroup'], verbose)
	pprint.pprint(compositions)
	print 'fetchEffienciesHistos ',effFnames
	efficiencies = fetchEffienciesHistos(effFnames, lepton, ['anygroup'], verbose)
	avgEfficiencies = dict()
	for reg in first(first(compositions)).keys():
	    avgEfficiencies[reg] = dict()
	    for var in ['pt', 'pt_eta']:
	        is1D = var=='pt'
	        lT = "%s #varepsilon(T|L) fake %s"%(reg, lepton)
	        lX = 'p_{T} [GeV]'
	        lY = '#varepsilon(T|L)' if is1D else '#eta'
	        hname = "%(lep)s_fake_%(var)s_%(reg)s"%{'lep':lepton, 'var':var, 'reg':reg}
	        htitle = lT+';'+lX+';'+lY
	        groups  = first(compositions).keys()
	        origins = first(first(first(compositions))).keys()
	        if verbose : print 'origins :',origins,'\n' + 'groups :',groups
	        histosEff  = dict((group+'_'+orig, efficiencies[var][group]     [orig])
                                  for group in groups for orig in origins)
	        histosComp = dict((group+'_'+orig, compositions[var][group][reg][orig])
                                  for group in groups for orig in origins)
	        avgEff =  weightedAverage(histosEff, histosComp, hname, htitle, verbose)
	        avgEfficiencies[reg][var] = avgEff
	        if is1D:
	            fakeu.plot1dEfficiencies({reg : avgEff}, 'eff1d_'+lepton+'_fake_'+reg+'_anygroup',
                                             outputDir, htitle, zoomIn=True)
	        else:
	            fakeu.plot2dEfficiencies({reg : avgEff}, 'eff2d_'+lepton+'_fake_'+reg+'_anygroup',
                                             outputDir, htitle, zoomIn=True)
	writeHistos(os.path.join(outputDir,'fake_matrices_'+lepton+'_anygroup.root'),
                    avgEfficiencies, verbose)
Exemplo n.º 34
0
def runFill(opts):
    lepton = opts.lepton
    batchMode = opts.batch
    inputDir = opts.input_dir
    outputDir = opts.output_dir
    verbose = opts.verbose
    debug = opts.debug

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        opts.samples_dir)
    if opts.group: groups = [g for g in groups if g.name == opts.group]
    if verbose:
        print '\n'.join(
            "group {0} : {1} samples".format(g.name, len(g.datasets))
            for g in groups)
    if debug:
        print '\n'.join("group {0} : {1} samples: {2}".format(
            g.name, len(g.datasets), '\n\t' + '\n\t'.join(d.name
                                                          for d in g.datasets))
                        for g in groups)
    if verbose: print "filling histos"
    outputDir = outputDir + '/' + lepton + '/histos'
    mkdirIfNeeded(outputDir)
    if batchMode:
        for group in groups:
            submit_batch_fill_job_per_group(group, opts)
    else:
        for group in groups:
            tree_name = 'ss3l_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                chain.Add(os.path.join(inputDir, ds.name + '.root'))
            if opts.verbose:
                print "{0} : {1} entries from {2} samples".format(
                    group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                    group.name + '/')
            tcuts = [
                r.TCut(reg,
                       selection_formulas()[reg])
                for reg in regions_to_plot(opts.include_regions,
                                           opts.exclude_regions, opts.regions)
            ]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list(
            )
            uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list(
            )
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print 'filling cached cuts: ', ' '.join(
                    [c.GetName() for c in cached_tcuts])
            for cut in cached_tcuts:
                chain.preselect(cut)
                c_pre, h_pre = count_and_fill(chain=chain,
                                              opts=opts,
                                              group=group,
                                              cached_cut=cut)
                counters_pre = dictSum(counters_pre, c_pre)
                histos_pre = dictSum(histos_pre, h_pre)
            if verbose:
                print 'filling uncached cuts: ', ' '.join(
                    [c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                counters_npre, histos_npre = count_and_fill(
                    chain=chain,
                    opts=opts,
                    group=group,
                    noncached_cuts=uncached_tcuts)
                chain.save_lists()
            all_histos = dictSum(histos_pre, histos_npre)
            for sel, histos in all_histos.iteritems():
                # write histos for each sel to a separate file (finer granularity, better caching)
                out_filename = os.path.join(outputDir,
                                            group.name + '_' + sel + '.root')
                if verbose: print 'saving to ', out_filename
                writeObjectsToFile(out_filename, histos, verbose)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    filestems, treenames = utils.verticalSlice(fakeu.tupleStemsAndNames)
    regions = filestems
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(regions))

    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(region)s_%(l)s_scale_histos.root" % {'region':region, 'l':lepton}
    treeName = treenames[regions.index(region)]
    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+region+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'region', 'tag', 'doFillHistograms', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    if verbose : print 'input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1']
    groups = samplesPerGroup.keys()
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, groups, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, region=region)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            if verbose:
                print " --- group : %s ---".format(group)
                print '\n\t'.join(filenames)
            histosThisGroup = histosPerGroup[group]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group]) for v in histosPerGroupPerSource.keys())
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                histosThisGroupPerSource,
                                                lepton, group, region,
                                                onthefly_tight_def=onthefly_tight_def, verbose=verbose)
        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, groups, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in groups:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(objs_eta, objs_pt), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_el_scale_factor', help='dir for plots')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='where we want the compositions,'
                      ' i.e. one of the regions for which we saved the fake nutples'
                      ' (eg. ssinc1j_tuple*, emu_tuple*')
    parser.add_option('-s', '--syst-fudge', help='scale down main group (el:wjets, mu:bb/cc) to evaluate fraction syst unc')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    systfudge = options.syst_fudge
    region    = options.region
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if not region : parser.error('region is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    outputDir = outputDir+'/'+lepton # split the output in subdirectories, so we don't overwrite things

    templateInputFilename = "*_%(region)s_tuple_%(tag)s.root" % {'tag':tag, 'region':region}
    templateOutputFilename =  "%(l)s_composition_histos.root" % {'l':lepton}
    treeName = dict(fakeu.tupleStemsAndNames)[region]
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    optionsToPrint = ['inputDir', 'outputDir', 'tag', 'doFillHistograms', 'systfudge']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
    # collect inputs
    print '----> input files ',os.path.join(inputDir, templateInputFilename)
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'eta', 'pt_eta', 'mt', 'mdeltar']
    groups = samplesPerGroup.keys()
    if lepton=='el' : groups = [g for g in groups if g!='heavyflavor']
    selections = [region]
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, leptonSources, selections)
        for group in groups:
            isData = isDataSample(group)
            filenames = filenamesPerGroup[group]
            histosThisGroupPerSource = histosPerGroupPerSource[group]
            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            print "%s : %d entries (%d files)"%(group, chain.GetEntries(), chain.GetListOfFiles().GetEntries())
            num_processed_entries += fillHistos(chain, histosThisGroupPerSource,
                                                isData, lepton, group, region, verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # compute and plot fractions
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, leptonSources, selections))
    histosCompositions = dict()
    for sel in selections:
        histosCompositions[sel] = dict()
        for var in vars:
            hs, groups = histosPerGroupPerSource, histosPerGroupPerSource.keys()
            groups = [g for g in groups if g!='data' and g!='higgs']
            histosHeavy = dict((g, hs[g][sel]['heavy'][var]['loose']) for g in groups)
            histosLight = dict((g, hs[g][sel]['light'][var]['loose']) for g in groups)
            histosConv  = dict((g, hs[g][sel]['conv' ][var]['loose']) for g in groups)
            normalizeHistos   = plotParametrizedFractions.normalizeHistos
            plotStackedHistos = plotParametrizedFractions.plotStackedHistos

            frameTitle = 'hf '+lepton+': '+sel+' loose;'+var
            canvasName = lepton+'_hf'+sel+'_'+var+'_den'
            plotStackedHistos(histosHeavy, canvasName, outputDir, frameTitle)

            frameTitle = 'lf '+lepton+': '+sel+' loose;'+var
            canvasName = lepton+'_lf'+sel+'_'+var+'_den'
            plotStackedHistos(histosHeavy, canvasName, outputDir, frameTitle)

            frameTitle = 'conv '+lepton+': '+sel+' loose;'+var
            canvasName = lepton+'_conv'+sel+'_'+var+'_den'
            plotStackedHistos(histosConv, canvasName, outputDir, frameTitle)

            # normalize and draw fractions (den only)
            histos = dict([(k+'_heavy',  h) for k,h in histosHeavy.iteritems()] +
                          [(k+'_light',  h) for k,h in histosLight.iteritems()] +
                          [(k+'_conv', h) for k,h in histosConv.iteritems()])
            if systfudge: fudgeCompositions(histosHeavy, histosLight, histosConv if lepton=='el' else None)
            normalizeHistos(histos)
            anygroupCompositions = buildCompositionsAddingGroups({'heavy':histosHeavy, 'light':histosLight, 'conv':histosConv})
            histosCompositions[sel][var] = {'bygroup':histos, 'anygroup': anygroupCompositions}
            is1Dhisto = var!='pt_eta' # can only stack 1D plots
            if is1Dhisto:
                histosBySource = {'heavy':histosHeavy, 'light':histosLight, 'conv':histosConv}
                frameTitle = lepton+': '+sel+';'+var
                canvasBaseName = lepton+'_fake'+sel+'_'+var+'_frac'
                plotFractionsStacked(histosBySource, canvasBaseName+'_stack', outputDir, frameTitle)
    writeHistos(outputFileName, histosCompositions, verbose)
def plotStackedHistos(histosPerGroup={},
                      outputDir='',
                      region='',
                      verbose=False):
    groups = histosPerGroup.keys()
    variables = first(histosPerGroup).keys()
    leptonTypes = first(first(histosPerGroup)).keys()
    colors = getGroupColor()
    mkdirIfNeeded(outputDir)
    histosPerName = dict([
        (
            region + '_' + var + '_' +
            lt,  # one canvas for each histo, so key with histoname w/out group
            dict([(g, histosPerGroup[g][var][lt]) for g in groups]))
        for var in variables for lt in leptonTypes
    ])
    for histoname, histosPerGroup in histosPerName.iteritems():
        missingGroups = [g for g, h in histosPerGroup.iteritems() if not h]
        if missingGroups:
            if verbose:
                print "skip %s, missing histos for %s" % (histoname,
                                                          str(missingGroups))
            continue
        bkgHistos = dict([(g, h) for g, h in histosPerGroup.iteritems()
                          if g not in ['data', 'signal']])
        totBkg = summedHisto(bkgHistos.values())
        err_band = None  # buildErrBandGraph(totBkg, computeStatErr2(totBkg))
        emptyBkg = totBkg.Integral() == 0
        if emptyBkg:
            if verbose: print "empty backgrounds, skip %s" % histoname
            continue
        can = r.TCanvas('c_' + histoname, histoname, 800, 600)
        can.cd()
        pm = totBkg  # pad master
        pm.SetStats(False)
        pm.Draw('axis')
        can.Update()  # necessary to fool root's dumb object ownership
        stack = r.THStack('stack_' + histoname, '')
        can.Update()
        r.SetOwnership(stack, False)
        for s, h in bkgHistos.iteritems():
            h.SetFillColor(colors[s] if s in colors else r.kOrange)
            h.SetDrawOption('bar')
            h.SetDirectory(0)
            stack.Add(h)
        stack.Draw('hist same')
        # err_band.Draw('E2 same')
        data = histosPerGroup['data']
        if data and data.GetEntries():
            data.SetMarkerStyle(r.kFullDotLarge)
            data.Draw('p same')
        # yMin, yMax = getMinMax([h for h in [totBkg, data, err_band] if h]) # fixme with err_band
        yMin, yMax = 0.0, data.GetMaximum()
        pm.SetMinimum(0.0)
        pm.SetMaximum(1.1 * yMax)
        can.Update()
        topRightLabel(can, histoname, xpos=0.125, align=13)
        # drawLegendWithDictKeys(can, dictSum(bkgHistos, {'stat err':err_band}), opt='f')
        drawLegendWithDictKeys(can, bkgHistos, opt='f')
        can.RedrawAxis()
        can._stack = stack
        can._histos = [h for h in stack.GetHists()] + [data]
        can.Update()
        outFname = os.path.join(outputDir, histoname + '.png')
        utils.rmIfExists(outFname)
        can.SaveAs(outFname)
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-g', '--group', help='group to be processed (used only in fill mode)')
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-r', '--region', help='one of the regions for which we saved the fake ntuples')
    parser.add_option('--samples-dir', default='samples/', help='directory with the list of samples; default ./samples/')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('--keep-real', action='store_true', default=False, help='do not subtract real (to get real lep efficiency)')
    parser.add_option('--debug', action='store_true')
    parser.add_option('--verbose', action='store_true')
    parser.add_option('--disable-cache', action='store_true', help='disable the entry cache')
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    region    = options.region
    keepreal  = options.keep_real
    debug     = options.debug
    verbose   = options.verbose
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    regions = kin.selection_formulas().keys()
    assert region in regions,"invalid region '%s', must be one of %s"%(region, str(sorted(regions)))
    regions = [region]

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(options.samples_dir)
    if options.group : groups = [g for g in groups if g.name==options.group]
    group_names = [g.name for g in groups]

    outputDir = outputDir+'/'+region+'/'+lepton # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    templateOutputFilename = "scale_factor_{0}.root".format(lepton)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    if verbose : utils.print_running_conditions(parser, options)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, group_names, region=region)
        histosPerSource = bookHistosPerSource(vars, leptonSources, region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, group_names, leptonSources, region=region)
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                fname = os.path.join(inputDir, ds.name+'.root')
                if os.path.exists(fname):
                    chain.Add(fname)
            if verbose:
                print "{0} : {1} entries from {2} samples".format(group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
            tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
            print 'tcuts ',[c.GetName() for c in tcuts]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            print 'tcuts_with_existing_list ',str([c.GetName() for c in chain.tcuts_with_existing_list()])
            print 'tcuts_without_existing_list ',str([c.GetName() for c in chain.tcuts_without_existing_list()])
            cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list()
            print 'cached_tcuts ',[c.GetName() for c in cached_tcuts]
            uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list()
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print " --- group : {0} ---".format(group.name)
                print '\n\t'.join(chain.filenames)
            if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
            if verbose: print "%s : %d entries"%(group.name, chain.GetEntries())
            histosThisGroup = histosPerGroup[group.name]
            histosThisGroupPerSource = dict((v, histosPerGroupPerSource[v][group.name]) for v in histosPerGroupPerSource.keys())
            for cut in cached_tcuts:
                print 'cached_tcut ',cut
                chain.preselect(cut)
                num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                    histosThisGroupPerSource,
                                                    lepton, group,
                                                    cut, cut_is_cached=True,
                                                    onthefly_tight_def=onthefly_tight_def,
                                                    verbose=verbose)
            if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                assert len(uncached_tcuts)==1, "expecting only one cut, got {}".format(len(uncached_tcuts))
                cut = uncached_tcuts[0]
                chain.preselect(None)
                num_processed_entries += fillHistos(chain, histosThisGroup, histosPerSource,
                                                    histosThisGroupPerSource,
                                                    lepton, group,
                                                    cut, cut_is_cached=False,
                                                    onthefly_tight_def=onthefly_tight_def,
                                                    verbose=verbose)
                chain.save_lists()

        writeHistos(cacheFileName, histosPerGroup, histosPerSource, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>60 else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # return
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName, histoNames(vars, group_names, region), verbose)
    histosPerSource = fetchHistos(cacheFileName, histoNamesPerSource(vars, leptonSources, region), verbose)
    histosPerSamplePerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, group_names, leptonSources, region), verbose)
    plotStackedHistos(histosPerGroup, outputDir+'/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir+'/by_source', region, verbose)
    plotPerSourceEff(histosPerVar=histosPerSource, outputDir=outputDir+'/by_source', lepton=lepton, region=region, verbose=verbose)
    for g in group_names:
        hps = dict((v, histosPerSamplePerSource[v][g])for v in vars)
        plotPerSourceEff(histosPerVar=hps, outputDir=outputDir, lepton=lepton, region=region, sample=g, verbose=verbose)


    hn_sf_eta = histoname_sf_vs_eta           (lepton)
    hn_sf_pt  = histoname_sf_vs_pt            (lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt  = histoname_data_fake_eff_vs_pt (lepton)
    subtractReal = not keepreal
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1', hn_sf_eta, hn_da_eta, outputDir, region, subtractReal, verbose)
    objs_pt  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',  hn_sf_pt,  hn_da_pt,  outputDir, region, subtractReal, verbose)
    objs_pt_eta  = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1_eta1',
                                                     histoname_sf_vs_pt_eta(lepton),
                                                     histoname_data_fake_eff_vs_pt_eta(lepton),
                                                     outputDir, region, subtractReal, verbose)
    rootUtils.writeObjectsToFile(outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta), verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
Exemplo n.º 39
0
def runFill(opts) :
    batchMode    = opts.batch
    inputFakeDir = opts.input_fake
    inputGenDir  = opts.input_other
    outputDir    = opts.output_dir
    verbose      = opts.verbose
    debug        = opts.debug
    blinded      = not opts.unblind
    tightight    = opts.require_tight_tight

    if debug : dataset.Dataset.verbose_parsing = True
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(opts.samples_dir)
    if not skip_charge_flip : groups.append(dataset.DatasetGroup.build_qflip_from_simulated_samples(groups))
    groups.append(first([g for g in groups if g.is_data]).clone_data_as_fake())
    groups = parse_group_option(opts, groups)
    if verbose : print '\n'.join("group {0} : {1} samples".format(g.name, len(g.datasets)) for g in groups)
    if debug :
        print '\n'.join("group {0} : {1} samples: {2}".format(g.name,
                                                              len(g.datasets),
                                                              '\n\t'+'\n\t'.join(d.name for d in g.datasets))
                        for g in groups)
    if verbose : print "filling histos"
    # eval will take care of aborting on typos
    onthefly_tight_def = eval(opts.tight_def) if opts.tight_def else None
    mkdirIfNeeded(outputDir)
    systematics = get_list_of_syst_to_fill(opts)
    regions = regions_to_plot(opts.include_regions, opts.exclude_regions, opts.regions)
    if verbose : print "about to loop over these systematics:\n %s"%str(systematics)
    if verbose : print "about to loop over these regions:\n %s"%str(regions)
    if batchMode:
        for group in groups:
            for systematic in systematics:
                if systUtils.Group(group.name).isNeededForSys(systematic):
                    opts.syst = systematic
                    for selection in regions:
                        submit_batch_fill_job_per_group_per_selection(group=group, selection=selection, opts=opts)
    else:
        for group in groups:
            systematics = [s for s in systematics if systUtils.Group(group.name).isNeededForSys(s)]
            if not systematics : print "warning, empty syst list. You should have at least the nominal"
            for systematic in systematics:
                # note to self: here you will want to use a modified Sample.setHftInputDir
                # for now we just have the fake syst that are in the nominal tree
                tree_name = 'hlfv_tuple'
                chain = IndexedChain(tree_name)
                input_dir = opts.input_fake if group.name=='fake' else opts.input_other
                for ds in group.datasets:
                    chain.Add(os.path.join(input_dir, systUtils.Sample(ds.name, group.name).setSyst(systematic).filename))
                if opts.verbose:
                    print "{0} : {1} entries from {2} samples".format(group.name,
                                                                      chain.GetEntries(),
                                                                      len(group.datasets))
                chain.cache_directory = os.path.abspath('./selection_cache/'+group.name+'/')
                tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
                chain.retrieve_entrylists(tcuts)
                counters_pre, histos_pre = dict(), dict()
                counters_npre, histos_npre = dict(), dict()
                cached_tcuts = [] if opts.disable_cache else chain.tcuts_with_existing_list()
                uncached_tcuts = tcuts if opts.disable_cache else chain.tcuts_without_existing_list()
                if verbose : print 'filling cached cuts: ',' '.join([c.GetName() for c in cached_tcuts])
                for cut in cached_tcuts:
                    chain.preselect(cut)
                    c_pre, h_pre = count_and_fill(chain=chain, sample=group.name,
                                                  syst=systematic, verbose=verbose,
                                                  debug=debug, blinded=blinded,
                                                  onthefly_tight_def=onthefly_tight_def,
                                                  tightight=tightight, quicktest=opts.quick_test,
                                                  cached_cut=cut)
                    out_filename = (systUtils.Group(group.name)
                                    .setSyst(systematic)
                                    .setHistosDir(outputDir)
                                    .setCurrentSelection(cut.GetName())).filenameHisto
                    writeObjectsToFile(out_filename, h_pre, verbose)
                    counters_pre = dictSum(counters_pre, c_pre)
                    histos_pre = dictSum(histos_pre, h_pre)
                if uncached_tcuts:
                    if verbose : print 'filling uncached cuts: ',' '.join([c.GetName() for c in uncached_tcuts])
                    counters_npre, histos_npre = count_and_fill(chain=chain, sample=group.name,
                                                                syst=systematic, verbose=verbose,
                                                                debug=debug, blinded=blinded,
                                                                onthefly_tight_def=onthefly_tight_def,
                                                                tightight=tightight,
                                                                quicktest=opts.quick_test,
                                                                noncached_cuts=uncached_tcuts)
                    for sel, histos in histos_npre.iteritems():
                        out_filename = (systUtils.Group(group.name)
                                        .setSyst(systematic)
                                        .setHistosDir(outputDir)
                                        .setCurrentSelection(sel)).filenameHisto
                        writeObjectsToFile(out_filename, histos, verbose)
                chain.save_lists()
def subtractRealAndComputeScaleFactor(histosPerGroup={}, variable='', outRatiohistoname='',outDataeffhistoname='',
                                      outputDir='./', region='', subtractReal=True, verbose=False):
    "efficiency scale factor"
    groups = histosPerGroup.keys()
    mkdirIfNeeded(outputDir)
    histosPerType = dict([(lt,
                           dict([(g,
                                  histosPerGroup[g][variable][lt])
                                 for g in groups]))
                          for lt in leptonTypes])
    for lt in leptonTypes :
        histosPerType[lt]['totSimBkg'] = summedHisto([histo for group,histo in histosPerType[lt].iteritems()
                                                      if group not in ['data', 'signal']])

    simuTight = histosPerType['fake_tight']['totSimBkg']
    simuLoose = histosPerType['fake_loose']['totSimBkg']
    dataTight = histosPerType['tight'     ]['data'     ]
    dataLoose = histosPerType['loose'     ]['data'     ]
    # subtract real contribution from data
    # _Note to self_: currently estimating the real contr from MC; in
    # the past also used iterative corr, which might be more
    # appropriate in cases like here, where the normalization is
    # so-so.  Todo: investigate the normalization.
    dataSubTight = dataTight.Clone(dataTight.GetName().replace('data_tight','data_minus_prompt_tight'))
    dataSubLoose = dataLoose.Clone(dataLoose.GetName().replace('data_loose','data_minus_prompt_loose'))
    dataSubTight.SetDirectory(0)
    dataSubLoose.SetDirectory(0)
    dataSubTight.Add(histosPerType['real_tight']['totSimBkg'], -1.0 if subtractReal else 0.0)
    dataSubLoose.Add(histosPerType['real_loose']['totSimBkg'], -1.0 if subtractReal else 0.0)
    effData = dataSubTight.Clone(outDataeffhistoname)
    effData.SetDirectory(0)
    effData.Divide(dataSubLoose)
    effSimu = simuTight.Clone(simuTight.GetName().replace('fake_tight','fake_eff'))
    effSimu.SetDirectory(0)
    effSimu.Divide(simuLoose)
    print "eff(T|L) vs. ",variable
    def formatFloat(floats): return ["%.4f"%f for f in floats]
    print "efficiency data : ",formatFloat(getBinContents(effData))
    print "efficiency simu : ",formatFloat(getBinContents(effSimu))
    ratio = effData.Clone(outRatiohistoname)
    ratio.SetDirectory(0)
    ratio.Divide(effSimu)
    print "sf    data/simu : ",formatFloat(getBinContents(ratio))
    print "            +/- : ",formatFloat(getBinErrors(ratio))
    can = r.TCanvas('c_'+outRatiohistoname, outRatiohistoname, 800, 600)
    botPad, topPad = rootUtils.buildBotTopPads(can)
    can.cd()
    topPad.Draw()
    topPad.cd()
    pm = effData
    pm.SetStats(0)
    pm.Draw('axis')
    xAx, yAx = pm.GetXaxis(), pm.GetYaxis()
    xAx.SetTitle('')
    xAx.SetLabelSize(0)
    yAx.SetRangeUser(0.0, 0.25)
    textScaleUp = 1.0/topPad.GetHNDC()
    yAx.SetLabelSize(textScaleUp*0.04)
    yAx.SetTitleSize(textScaleUp*0.04)
    yAx.SetTitle('#epsilon(T|L)')
    yAx.SetTitleOffset(yAx.GetTitleOffset()/textScaleUp)
    effSimu.SetLineColor(r.kRed)
    effSimu.SetMarkerStyle(r.kOpenCross)
    effSimu.SetMarkerColor(effSimu.GetLineColor())
    effData.Draw('same')
    effSimu.Draw('same')
    leg = drawLegendWithDictKeys(topPad, {'data':effData, 'simulation':simuTight}, legWidth=0.4)
    leg.SetHeader('scale factor '+region+' '+('electron' if '_el_'in outRatiohistoname else
                                              'muon' if '_mu_' in outRatiohistoname else ''))
    can.cd()
    botPad.Draw()
    botPad.cd()
    ratio.SetStats(0)
    ratio.Draw()
    textScaleUp = 1.0/botPad.GetHNDC()
    xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis()
    yAx.SetRangeUser(0.0, 2.0)
    xAx.SetTitle({'pt1':'p_{T}', 'eta1':'|#eta|', 'pt1_eta1':'p_{T}'}[variable])
    yAx.SetNdivisions(-202)
    yAx.SetTitle('Data/Sim')
    yAx.CenterTitle()
    xAx.SetLabelSize(textScaleUp*0.04)
    xAx.SetTitleSize(textScaleUp*0.04)
    yAx.SetLabelSize(textScaleUp*0.04)
    yAx.SetTitleSize(textScaleUp*0.04)
    refLine = rootUtils.referenceLine(xAx.GetXmin(), xAx.GetXmax())
    refLine.Draw()
    can.Update()
    outFname = os.path.join(outputDir, region+'_'+outRatiohistoname)
    for ext in ['.eps','.png']:
        utils.rmIfExists(outFname+ext)
        can.SaveAs(outFname+ext)
    return {outRatiohistoname : ratio,
            outDataeffhistoname : effData,
            outDataeffhistoname.replace('_fake_rate_data_', '_tight_data_minus_prompt') : dataSubTight,
            outDataeffhistoname.replace('_fake_rate_data_', '_loose_data_minus_prompt') : dataSubLoose
            }
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-g',
                      '--group',
                      help='group to be processed (used only in fill mode)')
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fake_scale_factor')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option(
        '-r',
        '--region',
        help='one of the regions for which we saved the fake ntuples')
    parser.add_option(
        '--samples-dir',
        default='samples/',
        help='directory with the list of samples; default ./samples/')
    parser.add_option(
        '-T',
        '--tight-def',
        help=
        'on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.'
    )
    parser.add_option('-f',
                      '--fill-histos',
                      action='store_true',
                      default=False,
                      help='force fill (default only if needed)')
    parser.add_option('--keep-real',
                      action='store_true',
                      default=False,
                      help='do not subtract real (to get real lep efficiency)')
    parser.add_option('--debug', action='store_true')
    parser.add_option('--verbose', action='store_true')
    parser.add_option('--disable-cache',
                      action='store_true',
                      help='disable the entry cache')
    (options, args) = parser.parse_args()
    inputDir = options.input_dir
    outputDir = options.output_dir
    lepton = options.lepton
    region = options.region
    keepreal = options.keep_real
    debug = options.debug
    verbose = options.verbose
    if lepton not in ['el', 'mu']: parser.error("invalid lepton '%s'" % lepton)
    regions = kin.selection_formulas().keys()
    assert region in regions, "invalid region '%s', must be one of %s" % (
        region, str(sorted(regions)))
    regions = [region]

    dataset.Dataset.verbose_parsing = True if debug else False
    groups = dataset.DatasetGroup.build_groups_from_files_in_dir(
        options.samples_dir)
    if options.group: groups = [g for g in groups if g.name == options.group]
    group_names = [g.name for g in groups]

    outputDir = outputDir + '/' + region + '/' + lepton  # split the output in subdirectories, so we don't overwrite things
    mkdirIfNeeded(outputDir)
    templateOutputFilename = "scale_factor_{0}.root".format(lepton)
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(
        options.tight_def
    ) if options.tight_def else None  # eval will take care of aborting on typos
    if verbose: utils.print_running_conditions(parser, options)
    vars = ['mt0', 'mt1', 'pt0', 'pt1', 'eta1', 'pt1_eta1']
    #fill histos
    if doFillHistograms:
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroup = bookHistos(vars, group_names, region=region)
        histosPerSource = bookHistosPerSource(vars,
                                              leptonSources,
                                              region=region)
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars,
                                                               group_names,
                                                               leptonSources,
                                                               region=region)
        for group in groups:
            tree_name = 'hlfv_tuple'
            chain = IndexedChain(tree_name)
            for ds in group.datasets:
                fname = os.path.join(inputDir, ds.name + '.root')
                if os.path.exists(fname):
                    chain.Add(fname)
            if verbose:
                print "{0} : {1} entries from {2} samples".format(
                    group.name, chain.GetEntries(), len(group.datasets))
            chain.cache_directory = os.path.abspath('./selection_cache/' +
                                                    group.name + '/')
            tcuts = [r.TCut(reg, selection_formulas()[reg]) for reg in regions]
            print 'tcuts ', [c.GetName() for c in tcuts]
            chain.retrieve_entrylists(tcuts)
            counters_pre, histos_pre = dict(), dict()
            counters_npre, histos_npre = dict(), dict()
            print 'tcuts_with_existing_list ', str(
                [c.GetName() for c in chain.tcuts_with_existing_list()])
            print 'tcuts_without_existing_list ', str(
                [c.GetName() for c in chain.tcuts_without_existing_list()])
            cached_tcuts = [] if options.disable_cache else chain.tcuts_with_existing_list(
            )
            print 'cached_tcuts ', [c.GetName() for c in cached_tcuts]
            uncached_tcuts = tcuts if options.disable_cache else chain.tcuts_without_existing_list(
            )
            print 'todo: skip cuts for which the histo files are there'
            if verbose:
                print " --- group : {0} ---".format(group.name)
                print '\n\t'.join(chain.filenames)
            if verbose:
                print 'filling cached cuts: ', ' '.join(
                    [c.GetName() for c in cached_tcuts])
            if verbose:
                print "%s : %d entries" % (group.name, chain.GetEntries())
            histosThisGroup = histosPerGroup[group.name]
            histosThisGroupPerSource = dict(
                (v, histosPerGroupPerSource[v][group.name])
                for v in histosPerGroupPerSource.keys())
            for cut in cached_tcuts:
                print 'cached_tcut ', cut
                chain.preselect(cut)
                num_processed_entries += fillHistos(
                    chain,
                    histosThisGroup,
                    histosPerSource,
                    histosThisGroupPerSource,
                    lepton,
                    group,
                    cut,
                    cut_is_cached=True,
                    onthefly_tight_def=onthefly_tight_def,
                    verbose=verbose)
            if verbose:
                print 'filling uncached cuts: ', ' '.join(
                    [c.GetName() for c in uncached_tcuts])
            if uncached_tcuts:
                assert len(uncached_tcuts
                           ) == 1, "expecting only one cut, got {}".format(
                               len(uncached_tcuts))
                cut = uncached_tcuts[0]
                chain.preselect(None)
                num_processed_entries += fillHistos(
                    chain,
                    histosThisGroup,
                    histosPerSource,
                    histosThisGroupPerSource,
                    lepton,
                    group,
                    cut,
                    cut_is_cached=False,
                    onthefly_tight_def=onthefly_tight_def,
                    verbose=verbose)
                chain.save_lists()

        writeHistos(cacheFileName, histosPerGroup, histosPerSource,
                    histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        if verbose:
            print("processed {0:d} entries ".format(num_processed_entries) +
                  "in " +
                  ("{0:d} min ".format(int(delta_time / 60))
                   if delta_time > 60 else "{0:.1f} s ".format(delta_time)) +
                  "({0:.1f} kHz)".format(num_processed_entries / delta_time))
    # return
    # compute scale factors
    histosPerGroup = fetchHistos(cacheFileName,
                                 histoNames(vars, group_names, region),
                                 verbose)
    histosPerSource = fetchHistos(
        cacheFileName, histoNamesPerSource(vars, leptonSources, region),
        verbose)
    histosPerSamplePerSource = fetchHistos(
        cacheFileName,
        histoNamesPerSamplePerSource(vars, group_names, leptonSources, region),
        verbose)
    plotStackedHistos(histosPerGroup, outputDir + '/by_group', region, verbose)
    plotStackedHistosSources(histosPerSource, outputDir + '/by_source', region,
                             verbose)
    plotPerSourceEff(histosPerVar=histosPerSource,
                     outputDir=outputDir + '/by_source',
                     lepton=lepton,
                     region=region,
                     verbose=verbose)
    for g in group_names:
        hps = dict((v, histosPerSamplePerSource[v][g]) for v in vars)
        plotPerSourceEff(histosPerVar=hps,
                         outputDir=outputDir,
                         lepton=lepton,
                         region=region,
                         sample=g,
                         verbose=verbose)

    hn_sf_eta = histoname_sf_vs_eta(lepton)
    hn_sf_pt = histoname_sf_vs_pt(lepton)
    hn_da_eta = histoname_data_fake_eff_vs_eta(lepton)
    hn_da_pt = histoname_data_fake_eff_vs_pt(lepton)
    subtractReal = not keepreal
    objs_eta = subtractRealAndComputeScaleFactor(histosPerGroup, 'eta1',
                                                 hn_sf_eta, hn_da_eta,
                                                 outputDir, region,
                                                 subtractReal, verbose)
    objs_pt = subtractRealAndComputeScaleFactor(histosPerGroup, 'pt1',
                                                hn_sf_pt, hn_da_pt, outputDir,
                                                region, subtractReal, verbose)
    objs_pt_eta = subtractRealAndComputeScaleFactor(
        histosPerGroup, 'pt1_eta1', histoname_sf_vs_pt_eta(lepton),
        histoname_data_fake_eff_vs_pt_eta(lepton), outputDir, region,
        subtractReal, verbose)
    rootUtils.writeObjectsToFile(
        outputFileName, dictSum(dictSum(objs_eta, objs_pt), objs_pt_eta),
        verbose)
    if verbose: print "saved scale factors to %s" % outputFileName
def subtractRealAndComputeScaleFactor(histosPerGroup={},
                                      variable='',
                                      outRatiohistoname='',
                                      outDataeffhistoname='',
                                      outputDir='./',
                                      region='',
                                      subtractReal=True,
                                      verbose=False):
    "efficiency scale factor"
    groups = histosPerGroup.keys()
    mkdirIfNeeded(outputDir)
    histosPerType = dict([(lt,
                           dict([(g, histosPerGroup[g][variable][lt])
                                 for g in groups])) for lt in leptonTypes])
    for lt in leptonTypes:
        histosPerType[lt]['totSimBkg'] = summedHisto([
            histo for group, histo in histosPerType[lt].iteritems()
            if group not in ['data', 'signal']
        ])

    simuTight = histosPerType['fake_tight']['totSimBkg']
    simuLoose = histosPerType['fake_loose']['totSimBkg']
    dataTight = histosPerType['tight']['data']
    dataLoose = histosPerType['loose']['data']
    # subtract real contribution from data
    # _Note to self_: currently estimating the real contr from MC; in
    # the past also used iterative corr, which might be more
    # appropriate in cases like here, where the normalization is
    # so-so.  Todo: investigate the normalization.
    dataSubTight = dataTight.Clone(dataTight.GetName().replace(
        'data_tight', 'data_minus_prompt_tight'))
    dataSubLoose = dataLoose.Clone(dataLoose.GetName().replace(
        'data_loose', 'data_minus_prompt_loose'))
    dataSubTight.SetDirectory(0)
    dataSubLoose.SetDirectory(0)
    dataSubTight.Add(histosPerType['real_tight']['totSimBkg'],
                     -1.0 if subtractReal else 0.0)
    dataSubLoose.Add(histosPerType['real_loose']['totSimBkg'],
                     -1.0 if subtractReal else 0.0)
    effData = dataSubTight.Clone(outDataeffhistoname)
    effData.SetDirectory(0)
    effData.Divide(dataSubLoose)
    effSimu = simuTight.Clone(simuTight.GetName().replace(
        'fake_tight', 'fake_eff'))
    effSimu.SetDirectory(0)
    effSimu.Divide(simuLoose)
    print "eff(T|L) vs. ", variable

    def formatFloat(floats):
        return ["%.4f" % f for f in floats]

    print "efficiency data : ", formatFloat(getBinContents(effData))
    print "efficiency simu : ", formatFloat(getBinContents(effSimu))
    ratio = effData.Clone(outRatiohistoname)
    ratio.SetDirectory(0)
    ratio.Divide(effSimu)
    print "sf    data/simu : ", formatFloat(getBinContents(ratio))
    print "            +/- : ", formatFloat(getBinErrors(ratio))
    can = r.TCanvas('c_' + outRatiohistoname, outRatiohistoname, 800, 600)
    botPad, topPad = rootUtils.buildBotTopPads(can)
    can.cd()
    topPad.Draw()
    topPad.cd()
    pm = effData
    pm.SetStats(0)
    pm.Draw('axis')
    xAx, yAx = pm.GetXaxis(), pm.GetYaxis()
    xAx.SetTitle('')
    xAx.SetLabelSize(0)
    yAx.SetRangeUser(0.0, 0.25)
    textScaleUp = 1.0 / topPad.GetHNDC()
    yAx.SetLabelSize(textScaleUp * 0.04)
    yAx.SetTitleSize(textScaleUp * 0.04)
    yAx.SetTitle('#epsilon(T|L)')
    yAx.SetTitleOffset(yAx.GetTitleOffset() / textScaleUp)
    effSimu.SetLineColor(r.kRed)
    effSimu.SetMarkerStyle(r.kOpenCross)
    effSimu.SetMarkerColor(effSimu.GetLineColor())
    effData.Draw('same')
    effSimu.Draw('same')
    leg = drawLegendWithDictKeys(topPad, {
        'data': effData,
        'simulation': simuTight
    },
                                 legWidth=0.4)
    leg.SetHeader('scale factor ' + region + ' ' +
                  ('electron' if '_el_' in outRatiohistoname else
                   'muon' if '_mu_' in outRatiohistoname else ''))
    can.cd()
    botPad.Draw()
    botPad.cd()
    ratio.SetStats(0)
    ratio.Draw()
    textScaleUp = 1.0 / botPad.GetHNDC()
    xAx, yAx = ratio.GetXaxis(), ratio.GetYaxis()
    yAx.SetRangeUser(0.0, 2.0)
    xAx.SetTitle({
        'pt1': 'p_{T}',
        'eta1': '|#eta|',
        'pt1_eta1': 'p_{T}'
    }[variable])
    yAx.SetNdivisions(-202)
    yAx.SetTitle('Data/Sim')
    yAx.CenterTitle()
    xAx.SetLabelSize(textScaleUp * 0.04)
    xAx.SetTitleSize(textScaleUp * 0.04)
    yAx.SetLabelSize(textScaleUp * 0.04)
    yAx.SetTitleSize(textScaleUp * 0.04)
    refLine = rootUtils.referenceLine(xAx.GetXmin(), xAx.GetXmax())
    refLine.Draw()
    can.Update()
    outFname = os.path.join(outputDir, region + '_' + outRatiohistoname)
    for ext in ['.eps', '.png']:
        utils.rmIfExists(outFname + ext)
        can.SaveAs(outFname + ext)
    return {
        outRatiohistoname:
        ratio,
        outDataeffhistoname:
        effData,
        outDataeffhistoname.replace('_fake_rate_data_', '_tight_data_minus_prompt'):
        dataSubTight,
        outDataeffhistoname.replace('_fake_rate_data_', '_loose_data_minus_prompt'):
        dataSubLoose
    }
def main():
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-i', '--input-dir', default='./out/fakerate')
    parser.add_option('-o', '--output-dir', default='./out/fakerate/efficiencies')
    parser.add_option('-l', '--lepton', default='el', help='either el or mu')
    parser.add_option('-m', '--mode', help='emu')
    parser.add_option('-t', '--tag', help='tag used to select the input files (e.g. Apr_04)')
    parser.add_option('-f', '--fill-histos', action='store_true', default=False, help='force fill (default only if needed)')
    parser.add_option('-T', '--tight-def', help='on-the-fly tight def, one of defs in fakeUtils.py: fakeu.lepIsTight_std, etc.')
    parser.add_option('-v', '--verbose', action='store_true', default=False)
    (options, args) = parser.parse_args()
    inputDir  = options.input_dir
    outputDir = options.output_dir
    lepton    = options.lepton
    mode      = options.mode
    tag       = options.tag
    verbose   = options.verbose
    if not tag : parser.error('tag is a required option')
    if lepton not in ['el', 'mu'] : parser.error("invalid lepton '%s'"%lepton)
    validModes = ['emu']
    if mode not in validModes : parser.error("invalid mode %s"%mode)
    tupleStem, treeName = filter(lambda _: _[0]==mode, fakeu.tupleStemsAndNames)[0]

    templateInputFilename = "*_%(stem)s_tuple_%(tag)s.root" % {'tag':tag, 'stem':tupleStem}
    templateOutputFilename =  "%(stem)s_%(l)s_eff.root" % {'stem':tupleStem.replace('tuple','histos'), 'l':lepton}
    outputFileName = os.path.join(outputDir, templateOutputFilename)
    cacheFileName = outputFileName.replace('.root', '_'+mode+'_cache.root')
    doFillHistograms = options.fill_histos or not os.path.exists(cacheFileName)
    onthefly_tight_def = eval(options.tight_def) if options.tight_def else None # eval will take care of aborting on typos
    optionsToPrint = ['inputDir', 'outputDir', 'mode', 'tag', 'doFillHistograms', 'cacheFileName', 'onthefly_tight_def']
    if verbose :
        print "working from %s"%os.getcwd()
        print "being called as : %s"%' '.join(os.sys.argv)
        print "options parsed:\n"+'\n'.join(["%s : %s"%(o, eval(o)) for o in optionsToPrint])
        print 'input filenames: ',os.path.join(inputDir, templateInputFilename)
    # collect inputs
    tupleFilenames = glob.glob(os.path.join(inputDir, templateInputFilename))
    samples = setSameGroupForAllData(fastSamplesFromFilenames(tupleFilenames, verbose))
    samplesPerGroup = collections.defaultdict(list)
    filenamesPerGroup = collections.defaultdict(list)
    mkdirIfNeeded(outputDir)
    for s, f in zip(samples, tupleFilenames) :
        samplesPerGroup[s.group].append(s)
        filenamesPerGroup[s.group].append(f)
    vars = ['pt', 'pt_eta']
    groups = [g for g in samplesPerGroup.keys() if g is not 'higgs']
    if lepton=='el' : groups = [g for g in groups if g is not 'heavyflavor']
    sourcesThisMode = ['real', 'conv', 'heavy', 'light', 'unknown'] if lepton=='el' else ['real', 'heavy', 'light', 'unknown']
    #fill histos
    if doFillHistograms :
        start_time = time.clock()
        num_processed_entries = 0
        histosPerGroupPerSource = bookHistosPerSamplePerSource(vars, groups, sourcesThisMode, mode=mode)
        for group in groups:
            filenames = filenamesPerGroup[group]
            sources = histosPerGroupPerSource.keys()
            histosThisGroupPerSource = dict((s, histosPerGroupPerSource[s][group]) for s in sources)
            histosAnyGroupPerSource  = dict((s, histosPerGroupPerSource[s]['anygroup']) for s in sources) if group!='data' else {}

            chain = r.TChain(treeName)
            [chain.Add(fn) for fn in filenames]
            if verbose: print "%s : %d entries"%(group, chain.GetEntries())
            is_data = group in ['data']
            print 'is_data ',is_data
            num_processed_entries += fillHistos(chain=chain,
                                                histosPerSource=histosThisGroupPerSource,
                                                histosPerSourceAnygroup=histosAnyGroupPerSource,
                                                lepton=lepton,
                                                onthefly_tight_def=onthefly_tight_def,
                                                verbose=verbose)
        writeHistos(cacheFileName, histosPerGroupPerSource, verbose)
        end_time = time.clock()
        delta_time = end_time - start_time
        one_minute = 60
        if verbose:
            print ("processed {0:d} entries ".format(num_processed_entries)
                   +"in "+("{0:d} min ".format(int(delta_time/60)) if delta_time>one_minute else
                           "{0:.1f} s ".format(delta_time))
                   +"({0:.1f} kHz)".format(num_processed_entries/delta_time))
    # plot histos
    histosPerGroupPerSource = fetchHistos(cacheFileName, histoNamesPerSamplePerSource(vars, groups, sourcesThisMode, mode), verbose)

    # effs = computeEfficiencies(histosPerGroupPerSource) # still [var][gr][source][l/t]
    for v in vars:
        varIs1D, varIs2D = v=='pt', v=='pt_eta'
        densThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['loose']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
        numsThisSourceThisVar = dictSum(dict((s, histosPerGroupPerSource[v]['anygroup'][s]['tight']) for s in sourcesThisMode),
                                        {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
        if varIs1D:
            lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
            cname = 'stack_loose_'+lepton
            lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(densThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)
            cname = 'stack_tight_'+lepton
            lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
            title = lT+' '+'anysource'+' '+lepton+';'+lX+';'+lY
            plotStackedHistosWithData(numsThisSourceThisVar,
                                      outputDir, cname, title,
                                      colors=fakeu.colorsFillSources(),
                                      verbose=verbose)

    for s in sourcesThisMode:
        for v in vars:
            groups = first(histosPerGroupPerSource).keys()
            varIs1D, varIs2D = v=='pt', v=='pt_eta'
            # effsThisSourceThisVar = dict((g, effs[v][g][s]) for g in groups)
            densThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g][s]['loose'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['loose']})
            numsThisSourceThisVar = dictSum(dict((g, histosPerGroupPerSource[v][g]['unknown']['tight'])
                                                 for g in groups if g not in ['anygroup','data']),
                                            {'data' : histosPerGroupPerSource[v]['data']['unknown']['tight']})
            if varIs1D:
                # cname = 'eff_'+lepton+'_'+s
                lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#varepsilon(T|L)'
                # title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                # zoomIn = True
                # fakeu.plot1dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn)
                cname = 'stack_loose_'+lepton+'_'+s
                lT, lY = 'loose '+lepton+', denominator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(densThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)
                cname = 'stack_tight_'+lepton+'_'+s
                lT, lY = 'tight '+lepton+', numerator to #varepsilon(T|L)', '#varepsilon(T|L)'
                title = lT+' '+s+' '+lepton+';'+lX+';'+lY
                plotStackedHistosWithData(numsThisSourceThisVar,
                                          outputDir, cname, title,
                                          colors=SampleUtils.colors,
                                          verbose=verbose)

            # elif varIs2D:
            #     cname = 'eff_'+lepton+'_'+s
            #     lT, lX, lY = '#varepsilon(T|L)', 'p_{T} [GeV]', '#eta'
            #     title = lT+' '+s+' '+lepton+';'+lX+';'+lY
            #     fakeu.plot2dEfficiencies(effsThisSourceThisVar, cname, outputDir, title, zoomIn=zoomIn)
    # writeHistos(outputFileName, effs, verbose)
    if verbose : print "saved scale factors to %s" % outputFileName
def runPlot(opts) :
    inputDir     = opts.input_dir
    outputDir    = opts.output_dir
    sysOption    = opts.syst
    excludedSyst = opts.exclude
    verbose      = opts.verbose
    mkdirIfNeeded(outputDir)
    buildTotBkg = systUtils.buildTotBackgroundHisto
    buildStat = systUtils.buildStatisticalErrorBand
    buildSyst = systUtils.buildSystematicErrorBand

    groups = allGroups()
    selections = allRegions()
    variables = variablesToPlot()
    for group in groups :
        group.setHistosDir(inputDir)
        group.exploreAvailableSystematics(verbose)
        group.filterAndDropSystematics(sysOption, excludedSyst, verbose)

    mkdirIfNeeded(outputDir)
    systematics = ['NOM']
    anySys = sysOption==None
    if sysOption=='fake'   or anySys : systematics += systUtils.fakeSystVariations()
    if sysOption=='object' or anySys : systematics += systUtils.mcObjectVariations()
    if sysOption=='weight' or anySys : systematics += systUtils.mcWeightVariations()
    if sysOption and sysOption.count(',') : systematics = [s for s in systUtils.getAllVariations() if s in sysOption.split(',')]
    elif sysOption in systUtils.getAllVariations() : systematics = [sysOption]
    if not anySys and len(systematics)==1 and sysOption!='NOM' : raise ValueError("Invalid syst %s"%str(sysOption))
    if excludedSyst : systematics = [s for s in systematics if s not in filterWithRegexp(systematics, excludedSyst)]
    if verbose : print "using the following systematics : %s"%str(systematics)

    fakeSystematics = [s for s in systematics if s in systUtils.fakeSystVariations()]
    mcSystematics = [s for s in systematics if s in systUtils.mcObjectVariations() + systUtils.mcWeightVariations()]

    simBkgs = [g for g in groups if g.isMcBkg]
    data, fake, signal = findByName(groups, 'data'), findByName(groups, 'fake'), findByName(groups, 'signal')

    for sel in selections :
        if verbose : print '-- plotting ',sel
        for var in variables :
            if verbose : print '---- plotting ',var
            for g in groups : g.setSystNominal()
            nominalHistoData    = data.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoSign    = signal.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistoFakeBkg = fake.getHistogram(variable=var, selection=sel, cacheIt=True)
            nominalHistosSimBkg = dict([(g.name, g.getHistogram(variable=var, selection=sel, cacheIt=True)) for g in simBkgs])
            nominalHistosBkg    = dict([('fake', nominalHistoFakeBkg)] + [(g, h) for g, h in nominalHistosSimBkg.iteritems()])
            nominalHistoTotBkg  = buildTotBkg(histoFakeBkg=nominalHistoFakeBkg, histosSimBkgs=nominalHistosSimBkg)
            statErrBand = buildStat(nominalHistoTotBkg)
            systErrBand = buildSyst(fake=fake, simBkgs=simBkgs, variable=var, selection=sel,
                                    fakeVariations=fakeSystematics, mcVariations=mcSystematics, verbose=verbose)

            plotHistos(histoData=nominalHistoData, histoSignal=nominalHistoSign, histoTotBkg=nominalHistoTotBkg,
                       histosBkg=nominalHistosBkg,
                       statErrBand=statErrBand, systErrBand=systErrBand,
                       canvasName=(sel+'_'+var), outdir=outputDir, verbose=verbose)
    for group in groups :
        summary = group.variationsSummary()
        for selection, summarySel in summary.iteritems() :
            colW = str(12)
            header = ' '.join([('%'+colW+'s')%colName for colName in ['variation', 'yield', 'delta[%]']])
            lineTemplate = '%(sys)'+colW+'s'+'%(counts)'+colW+'s'+'%(delta)'+colW+'s'
            print "---- summary of variations for %s ----" % group.name
            print "---             %s                 ---" % selection
            print header
            print '\n'.join(lineTemplate%{'sys':s,
                                          'counts':(("%.3f"%c) if type(c) is float else (str(c)+str(type(c)))),
                                          'delta' :(("%.3f"%d) if type(d) is float else '--' if d==None else (str(d)+str(type(d)))) }
                            for s,c,d in summarySel)