Пример #1
0
def latexfitresults(filename,regionList,sampleList,exactRegionNames=False,dataname='obsData',showSum=False, doAsym=True, blinded=False):
  workspacename = 'w'
  w = Util.GetWorkspaceFromFile(filename,'w')

  if w==None:
    print "ERROR : Cannot open workspace : ", workspacename
    sys.exit(1) 

  resultAfterFit = w.obj('RooExpandedFitResult_afterFit')
  if resultAfterFit==None:
    print "ERROR : Cannot open fit result after fit RooExpandedFitResult_afterFit"
    sys.exit(1)

  resultBeforeFit = w.obj('RooExpandedFitResult_beforeFit')
  if resultBeforeFit==None:
    print "ERROR : Cannot open fit result before fit RooExpandedFitResult_beforeFit"
    sys.exit(1)

  data_set = w.data(dataname)
  if data_set==None:
    print "ERROR : Cannot open dataset : ", "data_set"+suffix
    sys.exit(1)
      
  regionCat = w.obj("channelCat")
  data_set.table(regionCat).Print("v");

  regionFullNameList = [ Util.GetFullRegionName(regionCat, region) for region in regionList]
  print regionFullNameList

  ######

  snapshot =  'snapshot_paramsVals_RooExpandedFitResult_afterFit'
  w.loadSnapshot(snapshot)

  if not w.loadSnapshot(snapshot):
    print "ERROR : Cannot load snapshot : ", snapshot
    sys.exit(1)

  tablenumbers = {}

  # SUM ALL REGIONS
  sumName = ""
  for index, reg in enumerate(regionList):
    if index == 0:
      sumName = reg
    else:
      sumName = sumName + " + " + reg
  
  regionListWithSum = list(regionList)
  if showSum:
    regionListWithSum.append(sumName)

  tablenumbers['names'] = regionListWithSum

  regionCatList = [ 'channelCat==channelCat::' +region.Data() for region in regionFullNameList]
  
  regionDatasetList = [data_set.reduce(regioncat) for regioncat in regionCatList]
  for index, data in enumerate(regionDatasetList):
    data.SetName("data_" + regionList[index])
    data.SetTitle("data_" + regionList[index])
    
  nobs_regionList = [ data.sumEntries() for data in regionDatasetList]
  #SUM
  sumNobs = 0.
  for nobs in nobs_regionList:
    sumNobs += nobs
    ## print " \n XXX nobs = ", nobs, "    sumNobs = ", sumNobs
  if showSum:
    nobs_regionList.append(sumNobs)
  tablenumbers['nobs'] = nobs_regionList
 
  ######
  ######
  ######  FROM HERE ON OUT WE CALCULATE THE FITTED NUMBER OF EVENTS __AFTER__ THE FIT
  ######
  ######

  # total pdf, not splitting in components
  pdfinRegionList = [ Util.GetRegionPdf(w, region)  for region in regionList]
  varinRegionList =  [ Util.GetRegionVar(w, region) for region in regionList]
  rrspdfinRegionList = []
  for index,pdf in enumerate(pdfinRegionList):
#    pdf.Print("t")
    prodList = pdf.pdfList()
    foundRRS = 0
    for idx in range(prodList.getSize()):
      #      if "BG" in prodList[idx].GetName():
      #        prodList[idx].Print("t")
      if prodList[idx].InheritsFrom("RooRealSumPdf"):
        rrspdfInt =  prodList[idx].createIntegral(RooArgSet(varinRegionList[index]));
        rrspdfinRegionList.append(rrspdfInt)
        foundRRS += 1
    if foundRRS >1 or foundRRS==0:
      print " \n\n WARNING: ", pdf.GetName(), " has ", foundRRS, " instances of RooRealSumPdf"
      print pdf.GetName(), " component list:", prodList.Print("v")
    
  nFittedInRegionList =  [ pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)]
  pdfFittedErrInRegionList = [ Util.GetPropagatedError(pdf, resultAfterFit, doAsym) for pdf in rrspdfinRegionList]

  if showSum:
    pdfInAllRegions = RooArgSet()
    for index, pdf in enumerate(rrspdfinRegionList):
      pdfInAllRegions.add(pdf)
    pdfSumInAllRegions = RooAddition( "pdf_AllRegions_AFTER", "pdf_AllRegions_AFTER", RooArgList(pdfInAllRegions))
    pdfSumInAllRegions.Print()
    nPdfSumVal = pdfSumInAllRegions.getVal()
    nPdfSumError = Util.GetPropagatedError(pdfSumInAllRegions, resultAfterFit, doAsym)
    nFittedInRegionList.append(nPdfSumVal)
    pdfFittedErrInRegionList.append(nPdfSumError)
  
  tablenumbers['TOTAL_FITTED_bkg_events']    =  nFittedInRegionList
  tablenumbers['TOTAL_FITTED_bkg_events_err']    =  pdfFittedErrInRegionList
 
  if blinded: 
    nobs_regionList = [ data.sumEntries() for data in regionDatasetList]
    nobs_regionList[-1] = -1
    tablenumbers['nobs'] = nobs_regionList

  # components
  for isam, sample in enumerate(sampleList):
    sampleName=getName(sample)
    nSampleInRegionVal = []
    nSampleInRegionError = []
    sampleInAllRegions = RooArgSet()
    for ireg, region in enumerate(regionList):
      sampleInRegion=getPdfInRegions(w,sample,region)
      #sampleInRegion = Util.GetComponent(w,sample,region,exactRegionNames)
      sampleInRegionVal = 0.
      sampleInRegionError = 0.
      if not sampleInRegion==None:
        sampleInRegion.Print()
        sampleInRegionVal = sampleInRegion.getVal()
        sampleInRegionError = Util.GetPropagatedError(sampleInRegion, resultAfterFit, doAsym) 
        sampleInAllRegions.add(sampleInRegion)
      else:
        print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region =",region, "\n"
      nSampleInRegionVal.append(sampleInRegionVal)
      nSampleInRegionError.append(sampleInRegionError)
    if showSum:
      sampleSumInAllRegions = RooAddition( (sampleName+"_AllRegions_FITTED"), (sampleName+"_AllRegions_FITTED"), RooArgList(sampleInAllRegions))
      sampleSumInAllRegions.Print()
      nSampleSumVal = sampleSumInAllRegions.getVal()
      nSampleSumError = Util.GetPropagatedError(sampleSumInAllRegions, resultAfterFit, doAsym)
      nSampleInRegionVal.append(nSampleSumVal)
      nSampleInRegionError.append(nSampleSumError)
    tablenumbers['Fitted_events_'+sampleName]   = nSampleInRegionVal
    tablenumbers['Fitted_err_'+sampleName]   = nSampleInRegionError

  print tablenumbers

  ######
  ######
  ######  FROM HERE ON OUT WE CALCULATE THE EXPECTED NUMBER OF EVENTS __BEFORE__ THE FIT
  ######
  ######

  #  FROM HERE ON OUT WE CALCULATE THE EXPECTED NUMBER OF EVENTS BEFORE THE FIT
  w.loadSnapshot('snapshot_paramsVals_RooExpandedFitResult_beforeFit')

  # check if any of the initial scaling factors is != 1
  _result = w.obj('RooExpandedFitResult_beforeFit')
  _muFacs = _result.floatParsFinal()

  for i in range(len(_muFacs)):
    if "mu_" in _muFacs[i].GetName() and _muFacs[i].getVal() != 1.0:
      print  " \n WARNING: scaling factor %s != 1.0 (%g) expected MC yield WILL BE WRONG!" % (_muFacs[i].GetName(), _muFacs[i].getVal())
  
  pdfinRegionList = [ Util.GetRegionPdf(w, region)  for region in regionList]
  varinRegionList =  [ Util.GetRegionVar(w, region) for region in regionList]
  rrspdfinRegionList = []
  for index,pdf in enumerate(pdfinRegionList):
    prodList = pdf.pdfList()
    foundRRS = 0
    for idx in range(prodList.getSize()):
      if prodList[idx].InheritsFrom("RooRealSumPdf"):
        prodList[idx].Print()
        rrspdfInt =  prodList[idx].createIntegral(RooArgSet(varinRegionList[index]))
        rrspdfinRegionList.append(rrspdfInt)
        foundRRS += 1
    if foundRRS >1 or foundRRS==0:
      print " \n\n WARNING: ", pdf.GetName(), " has ", foundRRS, " instances of RooRealSumPdf"
      print pdf.GetName(), " component list:", prodList.Print("v")

  nExpInRegionList =  [ pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)]
  pdfExpErrInRegionList = [ Util.GetPropagatedError(pdf, resultBeforeFit, doAsym)  for pdf in rrspdfinRegionList]
  
  if showSum:
    pdfInAllRegions = RooArgSet()
    for index, pdf in enumerate(rrspdfinRegionList):
      pdfInAllRegions.add(pdf)
    pdfSumInAllRegions = RooAddition( "pdf_AllRegions_BEFORE", "pdf_AllRegions_BEFORE", RooArgList(pdfInAllRegions))
    nPdfSumVal = pdfSumInAllRegions.getVal()
    nPdfSumError = Util.GetPropagatedError(pdfSumInAllRegions, resultBeforeFit, doAsym)
    nExpInRegionList.append(nPdfSumVal)
    pdfExpErrInRegionList.append(nPdfSumError)
  
  tablenumbers['TOTAL_MC_EXP_BKG_events']    =  nExpInRegionList
  tablenumbers['TOTAL_MC_EXP_BKG_err']    =  pdfExpErrInRegionList
  
  for isam, sample in enumerate(sampleList):
    sampleName=getName(sample)
    nMCSampleInRegionVal = []
    nMCSampleInRegionError = []
    MCSampleInAllRegions = RooArgSet()
    for ireg, region in enumerate(regionList):
      MCSampleInRegion = getPdfInRegions(w,sample,region)
      #MCSampleInRegion = Util.GetComponent(w,sample,region,exactRegionNames)
      MCSampleInRegionVal = 0.
      MCSampleInRegionError = 0.
      if not MCSampleInRegion==None:
        MCSampleInRegionVal = MCSampleInRegion.getVal()
        MCSampleInRegionError = Util.GetPropagatedError(MCSampleInRegion, resultBeforeFit, doAsym) 
        MCSampleInAllRegions.add(MCSampleInRegion)
      else:
        print " \n WARNING: sample=", sampleName, " non-existent (empty) in region=",region
      nMCSampleInRegionVal.append(MCSampleInRegionVal)
      nMCSampleInRegionError.append(MCSampleInRegionError)
    if showSum:
      MCSampleSumInAllRegions = RooAddition( (sampleName+"_AllRegions_MC"), (sampleName+"_AllRegions_MC"), RooArgList(MCSampleInAllRegions))
      nMCSampleSumVal = MCSampleSumInAllRegions.getVal()
      nMCSampleSumError = Util.GetPropagatedError(MCSampleSumInAllRegions, resultBeforeFit, doAsym)
      nMCSampleInRegionVal.append(nMCSampleSumVal)
      nMCSampleInRegionError.append(nMCSampleSumError)
    tablenumbers['MC_exp_events_'+sampleName] = nMCSampleInRegionVal
    tablenumbers['MC_exp_err_'+sampleName] = nMCSampleInRegionError

    #  sorted(tablenumbers, key=lambda sample: sample[1])   # sort by age
  map_listofkeys = tablenumbers.keys()
  map_listofkeys.sort()
  
  for name in map_listofkeys:
    if tablenumbers.has_key(name) :
      print name, ": ", tablenumbers[name]
      
  ###
  return tablenumbers
Пример #2
0
def latexfitresults(filename,
                    regionList,
                    sampleList,
                    dataname='obsData',
                    showSum=False,
                    doAsym=True,
                    blinded=False,
                    splitBins=False):
    """
  Calculate before/after-fit yields in all channels given
  
  @param filename The filename containing afterFit workspace
  @param regionList A list of regions to be considered
  @param sampleList A list of samples to be considered
  @param dataname The name of dataset (default='obsData')
  @param showSum Calculates sum of all regions if set to true (default=False)
  @param doAsym Calculates asymmetric errors taken from MINOS (default=True)
  @param blinded Observed event count will not be shown if set to True (default=False)
  @param splitBins Calculates bin-by-bin yields for all regions if set to True (default=False)
  """
    """
  pick up workspace from file
  """
    workspacename = 'w'
    w = Util.GetWorkspaceFromFile(filename, 'w')
    if w == None:
        print "ERROR : Cannot open workspace : ", workspacename
        sys.exit(1)
    """
  pick up after-fit RooExpandedFitResult from workspace
  """
    resultAfterFit = w.obj('RooExpandedFitResult_afterFit')
    if resultAfterFit == None:
        print "ERROR : Cannot open fit result after fit RooExpandedFitResult_afterFit"
        sys.exit(1)
    """
  pick up before-fit RooExpandedFitResult from workspace
  """
    resultBeforeFit = w.obj('RooExpandedFitResult_beforeFit')
    if resultBeforeFit == None:
        print "ERROR : Cannot open fit result before fit RooExpandedFitResult_beforeFit"
        sys.exit(1)
    """
  pick up dataset from workspace
  """
    data_set = w.data(dataname)
    if data_set == None:
        print "ERROR : Cannot open dataset : ", "data_set" + suffix
        sys.exit(1)
    """
  pick up channel category (RooCategory) from workspace
  """
    regionCat = w.obj("channelCat")
    if not blinded:
        data_set.table(regionCat).Print("v")
    """
  find full (long) name list of regions (i.e. short=SR3J, long=SR3J_meffInc30_JVF25pt50)
  """
    regionFullNameList = [
        Util.GetFullRegionName(regionCat, region) for region in regionList
    ]
    """
  load afterFit workspace snapshot (=set all parameters to values after fit)
  """
    snapshot = 'snapshot_paramsVals_RooExpandedFitResult_afterFit'
    w.loadSnapshot(snapshot)

    if not w.loadSnapshot(snapshot):
        print "ERROR : Cannot load snapshot : ", snapshot
        sys.exit(1)
    """
  define set, for all names/yields to be saved in
  """
    tablenumbers = {}
    """
  if showSum=True define names for sum of all regions and add to regionList
  """
    sumName = ""
    for index, reg in enumerate(regionList):
        if index == 0:
            sumName = reg
        else:
            sumName = sumName + " + " + reg

    regionListWithSum = list(regionList)
    if showSum:
        regionListWithSum.append(sumName)

    tablenumbers['names'] = regionListWithSum
    """
  make a list of channelCat calls for every region
  """
    regionCatList = [
        'channelCat==channelCat::' + region.Data()
        for region in regionFullNameList
    ]
    """
  retrieve number of observed (=data) events per region
  """
    regionDatasetList = [
        data_set.reduce(regioncat) for regioncat in regionCatList
    ]
    for index, data in enumerate(regionDatasetList):
        data.SetName("data_" + regionList[index])
        data.SetTitle("data_" + regionList[index])

    nobs_regionList = [data.sumEntries() for data in regionDatasetList]
    """
  if showSum=True calculate the total number of observed events in all regions  
  """
    sumNobs = 0.
    for nobs in nobs_regionList:
        sumNobs += nobs
    if showSum:
        nobs_regionList.append(sumNobs)
    tablenumbers['nobs'] = nobs_regionList
    """
  FROM HERE ON OUT WE CALCULATE THE FITTED NUMBER OF EVENTS __AFTER__ THE FIT
  """
    """
  get a list of pdf's and variables per region
  """
    pdfinRegionList = [Util.GetRegionPdf(w, region) for region in regionList]
    varinRegionList = [Util.GetRegionVar(w, region) for region in regionList]
    """
  if splitBins=True get the list of Nbins, binMax and binMin; make a list of new region names for each bin
  """
    varNbinsInRegionList = []
    varBinLowInRegionList = []
    varBinHighInRegionList = []
    rangeNameBinsInRegionList = []
    if splitBins:
        varNbinsInRegionList = [
            Util.GetRegionVar(w, region).getBinning().numBins()
            for region in regionList
        ]
        varBinLowInRegionList = [[
            Util.GetRegionVar(w, region).getBinning(
                (region + "binning")).binLow(ibin)
            for ibin in range(0, varNbinsInRegionList[idx])
        ] for idx, region in enumerate(regionList)]
        varBinHighInRegionList = [[
            Util.GetRegionVar(w, region).getBinning(
                (region + "binning")).binHigh(ibin)
            for ibin in range(0, varNbinsInRegionList[idx])
        ] for idx, region in enumerate(regionList)]
        rangeNameBinsInRegionList = [[
            regionList[idx] + "_bin" + str(ibin)
            for ibin in range(0, varNbinsInRegionList[idx])
        ] for idx, region in enumerate(regionList)]
        for index, region in enumerate(regionList):
            if varNbinsInRegionList[index] == 1:
                print " \n YieldsTable.py: WARNING: you have called -P (= per-bin yields) but this region ", region, " has only 1 bin \n"
    """
  if splitBins=True reshuffle the regionName list; each region name is followed by names of each bin (i.e. regionNameList=['SR3J','SR3J_bin1','SR3j_bin2','SR4J','SR4J_bin1'])
  """
    regionListWithBins = []
    if splitBins:
        for index, region in enumerate(regionList):
            regionListWithBins.append(region)
            for ibin in range(0, varNbinsInRegionList[index]):
                regionListWithBins.append(
                    rangeNameBinsInRegionList[index][ibin])
        tablenumbers['names'] = regionListWithBins
    """
  calculate number of observed(=data) events per bin
  """
    nobs_regionListWithBins = []
    if splitBins:
        binFuncInRegionList = [
            RooBinningCategory("bin_" + region, "bin_" + region,
                               varinRegionList[index])
            for index, region in enumerate(regionList)
        ]
        for index, data in enumerate(regionDatasetList):
            data.addColumn(binFuncInRegionList[index])
            if not blinded:
                data.table(binFuncInRegionList[index]).Print("v")
            nobs_regionListWithBins.append(data.sumEntries())
            for ibin in range(0, varNbinsInRegionList[index]):
                nobs_regionListWithBins.append(
                    (data.reduce(binFuncInRegionList[index].GetName() + "==" +
                                 binFuncInRegionList[index].GetName() + "::" +
                                 varinRegionList[index].GetName() + "_bin" +
                                 str(ibin))).sumEntries())

        tablenumbers['nobs'] = nobs_regionListWithBins
    """
  if blinded=True, set all numbers of observed events to -1
  """
    if blinded:
        for index, nobs in enumerate(nobs_regionListWithBins):
            nobs_regionListWithBins[index] = -1
        tablenumbers['nobs'] = nobs_regionListWithBins
    """
  get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
  """
    rrspdfinRegionList = []
    for index, pdf in enumerate(pdfinRegionList):
        prodList = pdf.pdfList()
        foundRRS = 0
        for idx in range(prodList.getSize()):
            if prodList[idx].InheritsFrom("RooRealSumPdf"):
                rrspdfInt = prodList[idx].createIntegral(
                    RooArgSet(varinRegionList[index]))
                rrspdfinRegionList.append(rrspdfInt)
                if splitBins:
                    origMin = varinRegionList[index].getMin()
                    origMax = varinRegionList[index].getMax()
                    for ibin in range(0, varNbinsInRegionList[index]):
                        rangeName = rangeNameBinsInRegionList[index][ibin]
                        varinRegionList[index].setRange(
                            rangeName, varBinLowInRegionList[index][ibin],
                            varBinHighInRegionList[index][ibin])
                        rrspdfInt = prodList[idx].createIntegral(
                            RooArgSet(varinRegionList[index]), rangeName)
                        rrspdfinRegionList.append(rrspdfInt)
                    varinRegionList[index].setRange(origMin, origMax)
                foundRRS += 1
        if foundRRS > 1 or foundRRS == 0:
            print " \n\n WARNING: ", pdf.GetName(
            ), " has ", foundRRS, " instances of RooRealSumPdf"
            print pdf.GetName(), " component list:", prodList.Print("v")
    """
  calculate total pdf number of fitted events and error
  """
    nFittedInRegionList = [
        pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)
    ]
    pdfFittedErrInRegionList = [
        Util.GetPropagatedError(pdf, resultAfterFit, doAsym)
        for pdf in rrspdfinRegionList
    ]
    """
  if showSum=True calculate the total number of fitted events in all regions  
  """
    if showSum:
        pdfInAllRegions = RooArgSet()
        for index, pdf in enumerate(rrspdfinRegionList):
            pdfInAllRegions.add(pdf)
        pdfSumInAllRegions = RooAddition("pdf_AllRegions_AFTER",
                                         "pdf_AllRegions_AFTER",
                                         RooArgList(pdfInAllRegions))
        nPdfSumVal = pdfSumInAllRegions.getVal()
        nPdfSumError = Util.GetPropagatedError(pdfSumInAllRegions,
                                               resultAfterFit, doAsym)
        nFittedInRegionList.append(nPdfSumVal)
        pdfFittedErrInRegionList.append(nPdfSumError)

    tablenumbers['TOTAL_FITTED_bkg_events'] = nFittedInRegionList
    tablenumbers['TOTAL_FITTED_bkg_events_err'] = pdfFittedErrInRegionList
    """
  calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
  """
    for isam, sample in enumerate(sampleList):
        sampleName = getName(sample)
        nSampleInRegionVal = []
        nSampleInRegionError = []
        sampleInAllRegions = RooArgSet()
        for ireg, region in enumerate(regionList):
            sampleInRegion = getPdfInRegions(w, sample, region)
            sampleInRegionVal = 0.
            sampleInRegionError = 0.
            if not sampleInRegion == None:
                sampleInRegionVal = sampleInRegion.getVal()
                sampleInRegionError = Util.GetPropagatedError(
                    sampleInRegion, resultAfterFit, doAsym)
                sampleInAllRegions.add(sampleInRegion)
            else:
                print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region =", region, "\n"
            nSampleInRegionVal.append(sampleInRegionVal)
            nSampleInRegionError.append(sampleInRegionError)
            """
      if splitBins=True calculate numbers of fitted events plus error per bin      
      """
            if splitBins:
                origMin = varinRegionList[ireg].getMin()
                origMax = varinRegionList[ireg].getMax()
                for ibin in range(0, varNbinsInRegionList[ireg]):
                    rangeName = rangeNameBinsInRegionList[ireg][ibin]
                    sampleInRegion = getPdfInRegionsWithRangeName(
                        w, sample, region, rangeName)
                    sampleInRegionVal = 0.
                    sampleInRegionError = 0.
                    if not sampleInRegion == None:
                        varinRegionList[ireg].setRange(
                            rangeName, varBinLowInRegionList[ireg][ibin],
                            varBinHighInRegionList[ireg][ibin])
                        sampleInRegionVal = sampleInRegion.getVal()
                        sampleInRegionError = Util.GetPropagatedError(
                            sampleInRegion, resultAfterFit, doAsym)
                    else:
                        print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region=", region, " bin=", ibin, " \n"
                    nSampleInRegionVal.append(sampleInRegionVal)
                    nSampleInRegionError.append(sampleInRegionError)

                varinRegionList[ireg].setRange(origMin, origMax)
        """
    if showSum=True calculate the total number of fitted events in all regions  
    """
        if showSum:
            sampleSumInAllRegions = RooAddition(
                (sampleName + "_AllRegions_FITTED"),
                (sampleName + "_AllRegions_FITTED"),
                RooArgList(sampleInAllRegions))
            nSampleSumVal = sampleSumInAllRegions.getVal()
            nSampleSumError = Util.GetPropagatedError(sampleSumInAllRegions,
                                                      resultAfterFit, doAsym)
            nSampleInRegionVal.append(nSampleSumVal)
            nSampleInRegionError.append(nSampleSumError)
        tablenumbers['Fitted_events_' + sampleName] = nSampleInRegionVal
        tablenumbers['Fitted_err_' + sampleName] = nSampleInRegionError

    print "\n starting BEFORE-FIT calculations \n"
    """
  FROM HERE ON OUT WE CALCULATE THE EXPECTED NUMBER OF EVENTS __BEFORRE__ THE FIT
  """
    """
  load beforeFit workspace snapshot (=set all parameters to values before fit)
  """
    w.loadSnapshot('snapshot_paramsVals_RooExpandedFitResult_beforeFit')
    """
  check if any of the initial scaling factors is != 1
  """
    _result = w.obj('RooExpandedFitResult_beforeFit')
    _muFacs = _result.floatParsFinal()

    for i in range(len(_muFacs)):
        if "mu_" in _muFacs[i].GetName() and _muFacs[i].getVal() != 1.0:
            print " \n WARNING: scaling factor %s != 1.0 (%g) expected MC yield WILL BE WRONG!" % (
                _muFacs[i].GetName(), _muFacs[i].getVal())
    """
  get a list of pdf's and variables per region
  """
    pdfinRegionList = [Util.GetRegionPdf(w, region) for region in regionList]
    varinRegionList = [Util.GetRegionVar(w, region) for region in regionList]
    """
  get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
  """
    rrspdfinRegionList = []
    for index, pdf in enumerate(pdfinRegionList):
        prodList = pdf.pdfList()
        foundRRS = 0
        for idx in range(prodList.getSize()):
            if prodList[idx].InheritsFrom("RooRealSumPdf"):
                rrspdfInt = prodList[idx].createIntegral(
                    RooArgSet(varinRegionList[index]))
                rrspdfinRegionList.append(rrspdfInt)
                if splitBins:
                    origMin = varinRegionList[index].getMin()
                    origMax = varinRegionList[index].getMax()
                    for ibin in range(0, varNbinsInRegionList[index]):
                        rangeName = rangeNameBinsInRegionList[index][ibin]
                        varinRegionList[index].setRange(
                            rangeName, varBinLowInRegionList[index][ibin],
                            varBinHighInRegionList[index][ibin])
                        rrspdfInt = prodList[idx].createIntegral(
                            RooArgSet(varinRegionList[index]), rangeName)
                        rrspdfinRegionList.append(rrspdfInt)
                    varinRegionList[index].setRange(origMin, origMax)
                foundRRS += 1
        if foundRRS > 1 or foundRRS == 0:
            print " \n\n WARNING: ", pdf.GetName(
            ), " has ", foundRRS, " instances of RooRealSumPdf"
            print pdf.GetName(), " component list:", prodList.Print("v")
    """
  calculate total pdf number of expected events and error
  """
    nExpInRegionList = [
        pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)
    ]
    pdfExpErrInRegionList = [
        Util.GetPropagatedError(pdf, resultBeforeFit, doAsym)
        for pdf in rrspdfinRegionList
    ]
    """
  if showSum=True calculate the total number of expected events in all regions  
  """
    if showSum:
        pdfInAllRegions = RooArgSet()
        for index, pdf in enumerate(rrspdfinRegionList):
            pdfInAllRegions.add(pdf)
        pdfSumInAllRegions = RooAddition("pdf_AllRegions_BEFORE",
                                         "pdf_AllRegions_BEFORE",
                                         RooArgList(pdfInAllRegions))
        nPdfSumVal = pdfSumInAllRegions.getVal()
        nPdfSumError = Util.GetPropagatedError(pdfSumInAllRegions,
                                               resultBeforeFit, doAsym)
        nExpInRegionList.append(nPdfSumVal)
        pdfExpErrInRegionList.append(nPdfSumError)

    tablenumbers['TOTAL_MC_EXP_BKG_events'] = nExpInRegionList
    tablenumbers['TOTAL_MC_EXP_BKG_err'] = pdfExpErrInRegionList
    """
  calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
  """
    for isam, sample in enumerate(sampleList):
        sampleName = getName(sample)
        nMCSampleInRegionVal = []
        nMCSampleInRegionError = []
        MCSampleInAllRegions = RooArgSet()
        for ireg, region in enumerate(regionList):
            MCSampleInRegion = getPdfInRegions(w, sample, region)
            MCSampleInRegionVal = 0.
            MCSampleInRegionError = 0.
            if not MCSampleInRegion == None:
                MCSampleInRegionVal = MCSampleInRegion.getVal()
                MCSampleInRegionError = Util.GetPropagatedError(
                    MCSampleInRegion, resultBeforeFit, doAsym)
                MCSampleInAllRegions.add(MCSampleInRegion)
            else:
                print " \n WARNING: sample=", sampleName, " non-existent (empty) in region=", region
            nMCSampleInRegionVal.append(MCSampleInRegionVal)
            nMCSampleInRegionError.append(MCSampleInRegionError)
            """
      if splitBins=True calculate numbers of fitted events plus error per bin      
      """
            if splitBins:
                origMin = varinRegionList[ireg].getMin()
                origMax = varinRegionList[ireg].getMax()
                for ibin in range(0, varNbinsInRegionList[ireg]):
                    rangeName = rangeNameBinsInRegionList[ireg][ibin]
                    MCSampleInRegion = getPdfInRegionsWithRangeName(
                        w, sample, region, rangeName)
                    MCSampleInRegionVal = 0.
                    MCSampleInRegionError = 0.
                    if not MCSampleInRegion == None:
                        varinRegionList[ireg].setRange(
                            rangeName, varBinLowInRegionList[ireg][ibin],
                            varBinHighInRegionList[ireg][ibin])
                        MCSampleInRegionVal = MCSampleInRegion.getVal()
                        MCSampleInRegionError = Util.GetPropagatedError(
                            MCSampleInRegion, resultBeforeFit, doAsym)
                    else:
                        print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region=", region, " bin=", ibin, " \n"
                    nMCSampleInRegionVal.append(MCSampleInRegionVal)
                    nMCSampleInRegionError.append(MCSampleInRegionError)

                varinRegionList[ireg].setRange(origMin, origMax)
        """
    if showSum=True calculate the total number of fitted events in all regions  
    """
        if showSum:
            MCSampleSumInAllRegions = RooAddition(
                (sampleName + "_AllRegions_MC"),
                (sampleName + "_AllRegions_MC"),
                RooArgList(MCSampleInAllRegions))
            nMCSampleSumVal = MCSampleSumInAllRegions.getVal()
            nMCSampleSumError = Util.GetPropagatedError(
                MCSampleSumInAllRegions, resultBeforeFit, doAsym)
            nMCSampleInRegionVal.append(nMCSampleSumVal)
            nMCSampleInRegionError.append(nMCSampleSumError)
        tablenumbers['MC_exp_events_' + sampleName] = nMCSampleInRegionVal
        tablenumbers['MC_exp_err_' + sampleName] = nMCSampleInRegionError
    """
  sort the tablenumbers set
  """
    map_listofkeys = tablenumbers.keys()
    map_listofkeys.sort()
    """
  print the sorted tablenumbers set
  """
    for name in map_listofkeys:
        if tablenumbers.has_key(name):
            print name, ": ", tablenumbers[name]

    return tablenumbers
Пример #3
0
  calculate the systematics breakdown for each channel/region given in chanList
   choose whether to use method-1 or method-2
   choose whether calculate systematic for full model or just a sample chosen by user
  """
  for chan in origChanList:

    if not chosenSample:
      if method == "2":
        regSys = latexfitresults_method2(wsFileName,resultName,chan,'',fitRegionsStr,'obsData',doAsym) 
      else:
        regSys = latexfitresults(wsFileName,namemap,chan,'',resultName,'obsData',doAsym)
      chanSys[chan] = regSys
      chanList.append(chan)
    else:
      for sample in sampleList:
        sampleName=getName(sample)
        if method == "2":
          regSys = latexfitresults_method2(wsFileName,resultName,chan,sample,fitRegionsStr,'obsData',doAsym) 
        else:
          regSys = latexfitresults(wsFileName,namemap,chan,sample,resultName,'obsData',doAsym) 
        chanSys[chan+"_"+sampleName] = regSys
        chanList.append(chan+"_"+sampleName)
        pass
      pass

  """
  write out LaTeX table by calling function from SysTableTex.py function tablefragment
  """
  line_chanSysTight = tablefragment(chanSys,chanList,skiplist,chanStr,showPercent)
  
  f = open(outputFileName, 'w')
def latexfitresults(filename,regionList,sampleList,dataname='obsData',showSum=False, doAsym=True, blinded=False, splitBins=False):
  """
  Calculate before/after-fit yields in all channels given

  @param filename The filename containing afterFit workspace
  @param regionList A list of regions to be considered
  @param sampleList A list of samples to be considered
  @param dataname The name of dataset (default='obsData')
  @param showSum Calculates sum of all regions if set to true (default=False)
  @param doAsym Calculates asymmetric errors taken from MINOS (default=True)
  @param blinded Observed event count will not be shown if set to True (default=False)
  @param splitBins Calculates bin-by-bin yields for all regions if set to True (default=False)
  """

  """
  pick up workspace from file
  """
  workspacename = 'w'
  w = Util.GetWorkspaceFromFile(filename,'w')
  if w==None:
    print "ERROR : Cannot open workspace : ", workspacename
    sys.exit(1)

  """
  pick up after-fit RooExpandedFitResult from workspace
  """
  resultAfterFit = w.obj('RooExpandedFitResult_afterFit')
  if resultAfterFit==None:
    print "ERROR : Cannot open fit result after fit RooExpandedFitResult_afterFit"
    sys.exit(1)

  """
  pick up before-fit RooExpandedFitResult from workspace
  """
  resultBeforeFit = w.obj('RooExpandedFitResult_beforeFit')
  if resultBeforeFit==None:
    print "ERROR : Cannot open fit result before fit RooExpandedFitResult_beforeFit"
    sys.exit(1)

  """
  pick up dataset from workspace
  """
  data_set = w.data(dataname)
  if data_set==None:
    print "ERROR : Cannot open dataset : ", "data_set"+suffix
    sys.exit(1)

  """
  pick up channel category (RooCategory) from workspace
  """
  regionCat = w.obj("channelCat")
  if not blinded:
    data_set.table(regionCat).Print("v")

  """
  find full (long) name list of regions (i.e. short=SR3J, long=SR3J_meffInc30_JVF25pt50)
  """
  regionFullNameList = [ Util.GetFullRegionName(regionCat, region) for region in regionList]


  """
  load afterFit workspace snapshot (=set all parameters to values after fit)
  """
  snapshot =  'snapshot_paramsVals_RooExpandedFitResult_afterFit'
  w.loadSnapshot(snapshot)

  if not w.loadSnapshot(snapshot):
    print "ERROR : Cannot load snapshot : ", snapshot
    sys.exit(1)

  """
  define set, for all names/yields to be saved in
  """
  tablenumbers = {}

  """
  if showSum=True define names for sum of all regions and add to regionList
  """
  sumName = ""
  for index, reg in enumerate(regionList):
    if index == 0:
      sumName = reg
    else:
      sumName = sumName + " + " + reg

  regionListWithSum = list(regionList)
  if showSum:
    regionListWithSum.append(sumName)

  tablenumbers['names'] = regionListWithSum

  """
  make a list of channelCat calls for every region
  """
  regionCatList = [ 'channelCat==channelCat::' +region.Data() for region in regionFullNameList]

  """
  retrieve number of observed (=data) events per region
  """
  print regionCatList
  srindex = regionFullNameList.index("SR_cuts")
  regionDatasetList = [data_set.reduce(regioncat) for regioncat in regionCatList]

  for index, data in enumerate(regionDatasetList):
    print "data," , data, data.GetName()
    data.SetName("data_" + regionList[index])
    data.SetTitle("data_" + regionList[index])

  nobs_regionList = [ data.sumEntries() for data in regionDatasetList]
  if blinded :  nobs_regionList[srindex] = -1
  """
  if showSum=True calculate the total number of observed events in all regions
  """
  sumNobs = 0.
  for nobs in nobs_regionList:
    sumNobs += nobs
  if showSum:
    nobs_regionList.append(sumNobs)
  tablenumbers['nobs'] = nobs_regionList


  """
  FROM HERE ON OUT WE CALCULATE THE FITTED NUMBER OF EVENTS __AFTER__ THE FIT
  """

  """
  get a list of pdf's and variables per region
  """
  pdfinRegionList = [ Util.GetRegionPdf(w, region)  for region in regionList]
  varinRegionList =  [ Util.GetRegionVar(w, region) for region in regionList]

  """
  if splitBins=True get the list of Nbins, binMax and binMin; make a list of new region names for each bin
  """
  varNbinsInRegionList =  []
  varBinLowInRegionList = []
  varBinHighInRegionList =  []
  rangeNameBinsInRegionList = []
  if splitBins:
    varNbinsInRegionList = [Util.GetRegionVar(w, region).getBinning().numBins() for region in regionList]
    varBinLowInRegionList = [[Util.GetRegionVar(w, region).getBinning((region+"binning")).binLow(ibin) for ibin in range(0, varNbinsInRegionList[idx]) ] for idx,region in enumerate(regionList)]
    varBinHighInRegionList = [[Util.GetRegionVar(w, region).getBinning((region+"binning")).binHigh(ibin) for ibin in range(0, varNbinsInRegionList[idx]) ] for idx,region in enumerate(regionList)]
    rangeNameBinsInRegionList = [[regionList[idx]+"_bin"+str(ibin) for ibin in range(0, varNbinsInRegionList[idx]) ] for idx,region in enumerate(regionList)]
    for index,region in enumerate(regionList):
      if varNbinsInRegionList[index]==1:
        print " \n YieldsTable.py: WARNING: you have called -P (= per-bin yields) but this region ", region, " has only 1 bin \n"



  """
  if splitBins=True reshuffle the regionName list; each region name is followed by names of each bin (i.e. regionNameList=['SR3J','SR3J_bin1','SR3j_bin2','SR4J','SR4J_bin1'])
  """
  regionListWithBins = []
  if splitBins:
    for index,region in enumerate(regionList):
      regionListWithBins.append(region)
      for ibin in range(0,varNbinsInRegionList[index]):
        regionListWithBins.append(rangeNameBinsInRegionList[index][ibin])
    tablenumbers['names'] = regionListWithBins


  """
  calculate number of observed(=data) events per bin
  """
  nobs_regionListWithBins = []
  if splitBins:
    binFuncInRegionList = [RooBinningCategory("bin_"+region,"bin_"+region,varinRegionList[index]) for index,region in enumerate(regionList)]
    for index, data in enumerate(regionDatasetList):
      data.addColumn(binFuncInRegionList[index])
      if not blinded:
        data.table(binFuncInRegionList[index]).Print("v")
      nobs_regionListWithBins.append(data.sumEntries())
      for ibin in range(0,varNbinsInRegionList[index]):
        nobs_regionListWithBins.append((data.reduce(binFuncInRegionList[index].GetName()+"=="+binFuncInRegionList[index].GetName()+"::"+varinRegionList[index].GetName()+"_bin"+str(ibin))).sumEntries())

    tablenumbers['nobs'] = nobs_regionListWithBins

  """
  if blinded=True, set all numbers of observed events to -1
  """
  if blinded and splitBins:
    for index, nobs in enumerate(nobs_regionListWithBins):
      nobs_regionListWithBins[index] = -1
    tablenumbers['nobs'] = nobs_regionListWithBins


  """
  get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
  """
  rrspdfinRegionList = []
  for index,pdf in enumerate(pdfinRegionList):
    prodList = pdf.pdfList()
    foundRRS = 0
    for idx in range(prodList.getSize()):
      if prodList[idx].InheritsFrom("RooRealSumPdf"):
        rrspdfInt =  prodList[idx].createIntegral(RooArgSet(varinRegionList[index]))
        rrspdfinRegionList.append(rrspdfInt)
        if splitBins:
          origMin = varinRegionList[index].getMin()
          origMax = varinRegionList[index].getMax()
          for ibin in range(0,varNbinsInRegionList[index]):
            rangeName = rangeNameBinsInRegionList[index][ibin]
            varinRegionList[index].setRange(rangeName,varBinLowInRegionList[index][ibin],varBinHighInRegionList[index][ibin])
            rrspdfInt =  prodList[idx].createIntegral(RooArgSet(varinRegionList[index]),rangeName)
            rrspdfinRegionList.append(rrspdfInt)
          varinRegionList[index].setRange(origMin,origMax)
        foundRRS += 1
    if foundRRS >1 or foundRRS==0:
      print " \n\n WARNING: ", pdf.GetName(), " has ", foundRRS, " instances of RooRealSumPdf"
      print pdf.GetName(), " component list:", prodList.Print("v")

  """
  calculate total pdf number of fitted events and error
  """
  nFittedInRegionList =  [ pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)]
  pdfFittedErrInRegionList = [ Util.GetPropagatedError(pdf, resultAfterFit, doAsym) for pdf in rrspdfinRegionList]


  """
  if showSum=True calculate the total number of fitted events in all regions
  """
  if showSum:
    pdfInAllRegions = RooArgSet()
    for index, pdf in enumerate(rrspdfinRegionList):
      pdfInAllRegions.add(pdf)
    pdfSumInAllRegions = RooAddition( "pdf_AllRegions_AFTER", "pdf_AllRegions_AFTER", RooArgList(pdfInAllRegions))
    nPdfSumVal = pdfSumInAllRegions.getVal()
    nPdfSumError = Util.GetPropagatedError(pdfSumInAllRegions, resultAfterFit, doAsym)
    nFittedInRegionList.append(nPdfSumVal)
    pdfFittedErrInRegionList.append(nPdfSumError)

  tablenumbers['TOTAL_FITTED_bkg_events']    =  nFittedInRegionList
  tablenumbers['TOTAL_FITTED_bkg_events_err']    =  pdfFittedErrInRegionList

  """
  calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
  """
  for isam, sample in enumerate(sampleList):
    sampleName=getName(sample)
    nSampleInRegionVal = []
    nSampleInRegionError = []
    sampleInAllRegions = RooArgSet()
    for ireg, region in enumerate(regionList):
      sampleInRegion=getPdfInRegions(w,sample,region)
      sampleInRegionVal = 0.
      sampleInRegionError = 0.
      if not sampleInRegion==None:
        sampleInRegionVal = sampleInRegion.getVal()
        sampleInRegionError = Util.GetPropagatedError(sampleInRegion, resultAfterFit, doAsym)
        sampleInAllRegions.add(sampleInRegion)
      else:
        print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region =",region, "\n"
      nSampleInRegionVal.append(sampleInRegionVal)
      nSampleInRegionError.append(sampleInRegionError)

      """
      if splitBins=True calculate numbers of fitted events plus error per bin
      """
      if splitBins:
        origMin = varinRegionList[ireg].getMin()
        origMax = varinRegionList[ireg].getMax()
        for ibin in range(0,varNbinsInRegionList[ireg]):
          rangeName = rangeNameBinsInRegionList[ireg][ibin]
          sampleInRegion=getPdfInRegionsWithRangeName(w,sample,region,rangeName)
          sampleInRegionVal = 0.
          sampleInRegionError = 0.
          if not sampleInRegion==None:
            varinRegionList[ireg].setRange(rangeName,varBinLowInRegionList[ireg][ibin],varBinHighInRegionList[ireg][ibin])
            sampleInRegionVal = sampleInRegion.getVal()
            sampleInRegionError = Util.GetPropagatedError(sampleInRegion, resultAfterFit, doAsym)
          else:
            print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region=",region, " bin=",ibin, " \n"
          nSampleInRegionVal.append(sampleInRegionVal)
          nSampleInRegionError.append(sampleInRegionError)

        varinRegionList[ireg].setRange(origMin,origMax)

    """
    if showSum=True calculate the total number of fitted events in all regions
    """
    if showSum:
      sampleSumInAllRegions = RooAddition( (sampleName+"_AllRegions_FITTED"), (sampleName+"_AllRegions_FITTED"), RooArgList(sampleInAllRegions))
      nSampleSumVal = sampleSumInAllRegions.getVal()
      nSampleSumError = Util.GetPropagatedError(sampleSumInAllRegions, resultAfterFit, doAsym)
      nSampleInRegionVal.append(nSampleSumVal)
      nSampleInRegionError.append(nSampleSumError)
    tablenumbers['Fitted_events_'+sampleName]   = nSampleInRegionVal
    tablenumbers['Fitted_err_'+sampleName]   = nSampleInRegionError



  print "\n starting BEFORE-FIT calculations \n"
  """
  FROM HERE ON OUT WE CALCULATE THE EXPECTED NUMBER OF EVENTS __BEFORRE__ THE FIT
  """

  """
  load beforeFit workspace snapshot (=set all parameters to values before fit)
  """
  w.loadSnapshot('snapshot_paramsVals_RooExpandedFitResult_beforeFit')

  """
  check if any of the initial scaling factors is != 1
  """
  _result = w.obj('RooExpandedFitResult_beforeFit')
  _muFacs = _result.floatParsFinal()

  for i in range(len(_muFacs)):
    if "mu_" in _muFacs[i].GetName() and _muFacs[i].getVal() != 1.0:
      print  " \n WARNING: scaling factor %s != 1.0 (%g) expected MC yield WILL BE WRONG!" % (_muFacs[i].GetName(), _muFacs[i].getVal())

  """
  get a list of pdf's and variables per region
  """
  pdfinRegionList = [ Util.GetRegionPdf(w, region)  for region in regionList]
  varinRegionList =  [ Util.GetRegionVar(w, region) for region in regionList]

  """
  get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
  """
  rrspdfinRegionList = []
  for index,pdf in enumerate(pdfinRegionList):
    prodList = pdf.pdfList()
    foundRRS = 0
    for idx in range(prodList.getSize()):
      if prodList[idx].InheritsFrom("RooRealSumPdf"):
        rrspdfInt =  prodList[idx].createIntegral(RooArgSet(varinRegionList[index]))
        rrspdfinRegionList.append(rrspdfInt)
        if splitBins:
          origMin = varinRegionList[index].getMin()
          origMax = varinRegionList[index].getMax()
          for ibin in range(0,varNbinsInRegionList[index]):
            rangeName = rangeNameBinsInRegionList[index][ibin]
            varinRegionList[index].setRange(rangeName,varBinLowInRegionList[index][ibin],varBinHighInRegionList[index][ibin])
            rrspdfInt =  prodList[idx].createIntegral(RooArgSet(varinRegionList[index]),rangeName)
            rrspdfinRegionList.append(rrspdfInt)
          varinRegionList[index].setRange(origMin,origMax)
        foundRRS += 1
    if foundRRS >1 or foundRRS==0:
      print " \n\n WARNING: ", pdf.GetName(), " has ", foundRRS, " instances of RooRealSumPdf"
      print pdf.GetName(), " component list:", prodList.Print("v")

  """
  calculate total pdf number of expected events and error
  """
  nExpInRegionList =  [ pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)]
  pdfExpErrInRegionList = [ Util.GetPropagatedError(pdf, resultBeforeFit, doAsym)  for pdf in rrspdfinRegionList]

  """
  if showSum=True calculate the total number of expected events in all regions
  """
  if showSum:
    pdfInAllRegions = RooArgSet()
    for index, pdf in enumerate(rrspdfinRegionList):
      pdfInAllRegions.add(pdf)
    pdfSumInAllRegions = RooAddition( "pdf_AllRegions_BEFORE", "pdf_AllRegions_BEFORE", RooArgList(pdfInAllRegions))
    nPdfSumVal = pdfSumInAllRegions.getVal()
    nPdfSumError = Util.GetPropagatedError(pdfSumInAllRegions, resultBeforeFit, doAsym)
    nExpInRegionList.append(nPdfSumVal)
    pdfExpErrInRegionList.append(nPdfSumError)

  tablenumbers['TOTAL_MC_EXP_BKG_events']    =  nExpInRegionList
  tablenumbers['TOTAL_MC_EXP_BKG_err']    =  pdfExpErrInRegionList

  """
  calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
  """
  for isam, sample in enumerate(sampleList):
    sampleName=getName(sample)
    nMCSampleInRegionVal = []
    nMCSampleInRegionError = []
    MCSampleInAllRegions = RooArgSet()
    for ireg, region in enumerate(regionList):
      MCSampleInRegion = getPdfInRegions(w,sample,region)
      MCSampleInRegionVal = 0.
      MCSampleInRegionError = 0.
      if not MCSampleInRegion==None:
        MCSampleInRegionVal = MCSampleInRegion.getVal()
        MCSampleInRegionError = Util.GetPropagatedError(MCSampleInRegion, resultBeforeFit, doAsym)
        MCSampleInAllRegions.add(MCSampleInRegion)
      else:
        print " \n WARNING: sample=", sampleName, " non-existent (empty) in region=",region
      nMCSampleInRegionVal.append(MCSampleInRegionVal)
      nMCSampleInRegionError.append(MCSampleInRegionError)

      """
      if splitBins=True calculate numbers of fitted events plus error per bin
      """
      if splitBins:
        origMin = varinRegionList[ireg].getMin()
        origMax = varinRegionList[ireg].getMax()
        for ibin in range(0,varNbinsInRegionList[ireg]):
          rangeName = rangeNameBinsInRegionList[ireg][ibin]
          MCSampleInRegion=getPdfInRegionsWithRangeName(w,sample,region,rangeName)
          MCSampleInRegionVal = 0.
          MCSampleInRegionError = 0.
          if not MCSampleInRegion==None:
            varinRegionList[ireg].setRange(rangeName,varBinLowInRegionList[ireg][ibin],varBinHighInRegionList[ireg][ibin])
            MCSampleInRegionVal = MCSampleInRegion.getVal()
            MCSampleInRegionError = Util.GetPropagatedError(MCSampleInRegion, resultBeforeFit, doAsym)
          else:
            print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region=",region, " bin=",ibin, " \n"
          nMCSampleInRegionVal.append(MCSampleInRegionVal)
          nMCSampleInRegionError.append(MCSampleInRegionError)

        varinRegionList[ireg].setRange(origMin,origMax)

    """
    if showSum=True calculate the total number of fitted events in all regions
    """
    if showSum:
      MCSampleSumInAllRegions = RooAddition( (sampleName+"_AllRegions_MC"), (sampleName+"_AllRegions_MC"), RooArgList(MCSampleInAllRegions))
      nMCSampleSumVal = MCSampleSumInAllRegions.getVal()
      nMCSampleSumError = Util.GetPropagatedError(MCSampleSumInAllRegions, resultBeforeFit, doAsym)
      nMCSampleInRegionVal.append(nMCSampleSumVal)
      nMCSampleInRegionError.append(nMCSampleSumError)
    tablenumbers['MC_exp_events_'+sampleName] = nMCSampleInRegionVal
    tablenumbers['MC_exp_err_'+sampleName] = nMCSampleInRegionError

  """
  sort the tablenumbers set
  """
  map_listofkeys = tablenumbers.keys()
  map_listofkeys.sort()

  """
  print the sorted tablenumbers set
  """
  for name in map_listofkeys:
    if tablenumbers.has_key(name) :
      print name, ": ", tablenumbers[name]

  return tablenumbers
Пример #5
0
       f = open(wsFileName, 'r')
       m3 = pickle.load(f)
       f.close()
   else:
       m3 = latexfitresults(wsFileName, chanList, sampleList, dataname,
                            showSumAllRegions, doAsym, blinded, splitBins)
       f = open(outputFileName.replace(".tex", ".pickle"), 'w')
       pickle.dump(m3, f)
       f.close()
   """
 when multiple samples to be evaluated together (for example [SingleTop,ttbarV] when calling with -s 'TTbar,[SingleTop,ttbarV]')
 the names need to re-specified, as a set of samples produces only one number
 """
   sampleList_decoded = []
   for isam, sample in enumerate(sampleList):
       sampleName = getName(sample)
       sampleList_decoded.append(sampleName)
   """
 write out LaTeX table by calling function from YieldsTableTex.py
 """
   f = open(outputFileName, 'w')
   f.write(tablestart())
   f.write(
       tablefragment(m3, tableName, regionsList_2Digits, sampleList_decoded,
                     showBeforeFitError))
   if tableCaption != "" or tableLabel != "":
       f.write(tableEndWithCaptionAndLabel(tableCaption, tableLabel))
   else:
       f.write(tableend(userString, tableName))
   f.close()
   print "\nResult written in:"
Пример #6
0
def systable(workspace, samples, channels, output_name):

    chan_str = channels.replace(",","_")
    chan_list = channels.split(",")

    chosen_sample = False
    if samples:
        sample_str = samples.replace(",","_") + "_"
        from cmdLineUtils import cmdStringToListOfLists
        sample_list = cmdStringToListOfLists(samples)
        chosen_sample = True

    show_percent = True
    doAsym = True

    result_name = 'RooExpandedFitResult_afterFit'

    skip_list = ['sqrtnobsa', 'totbkgsysa', 'poisqcderr','sqrtnfitted','totsyserr','nfitted']

    chan_sys = {}
    orig_chan_list = list(chan_list)
    chan_list = []

    # calculate the systematics breakdown for each channel/region given in chanList
    # choose whether to use method-1 or method-2
    # choose whether calculate systematic for full model or just a sample chosen by user
    for chan in orig_chan_list:

        if not chosen_sample:
            reg_sys = latexfitresults(workspace, chan, '', result_name, 'obsData', doAsym)

            chan_sys[chan] = reg_sys
            chan_list.append(chan)
        else:
            for sample in sample_list:
                sample_name = getName(sample)

                reg_sys = latexfitresults(workspace, chan, sample, result_name, 'obsData', doAsym)
                chan_sys[chan+"_"+sample_name] = reg_sys
                chan_list.append(chan+"_"+sample_name)

    # write out LaTeX table by calling function from SysTableTex.py function tablefragment
    #line_chan_sys_tight = tablefragment(chanSys,chanList,skiplist,chanStr,showPercent)
    if not chosen_sample:
        field_names = ['\\textbf{Uncertainties}',] + [ '\\textbf{%s}' % reg for reg in  chan_list ]
    elif len(sample_list) == 1:
        sample_label = labels_latex_dict.get(getName(sample_list[0]), getName(sample_list[0]))

        field_names = ['\\textbf{Uncertainties (%s)}' % sample_label ] + [ '\\textbf{%s}' % (reg.split('_')[0]) for reg in  chan_list ]
    else:
        field_names = ['\\textbf{Uncertainties}',] + [ '\\textbf{%s (%s)}' % (reg.split('_')[0], reg.split('_')[1]) for reg in  chan_list ]
    align = ['l',] + [ 'r' for i in chan_list ]

    tablel = LatexTable(field_names, align=align, env=True)

    # print the total fitted (after fit) number of events
    row = ['Total background expectation',]
    for region in chan_list:
        row.append("$%.2f$"  % chan_sys[region]['nfitted'])

    tablel.add_row(row)
    tablel.add_line()

    # print sqrt(N_obs) - for comparison with total systematic
    row = ['Total statistical $(\\sqrt{N_\\mathrm{exp}})$',]
    for region in chan_list:
        row.append("$\\pm %.2f$" % chan_sys[region]['sqrtnfitted'])

    tablel.add_row(row)

    # print total systematic uncertainty
    row = [ 'Total background systematic', ]

    for region in chan_list:
        percentage = chan_sys[region]['totsyserr']/chan_sys[region]['nfitted'] * 100.0
        row.append("$\\pm %.2f\ [%.2f\%%]$" % (chan_sys[region]['totsyserr'], percentage))

    tablel.add_row(row)
    tablel.add_line()
    tablel.add_line()

    # print systematic uncertainty per floated parameter (or set of parameters, if requested)
    d = chan_sys[chan_list[0]]
    m_listofkeys = sorted(d.iterkeys(), key=lambda k: d[k], reverse=True)


    # uncertanties dict
    unc_dict = dict()
    unc_order = []
    for name in m_listofkeys:

        if name in skip_list:
            continue

        printname = name.replace('syserr_','')

        #slabel = label.split('_')
        #label = 'MC stat. (%s)' % slabel[2]

        # skip negligible uncertainties in all requested regions:
        zero = True
        for index, region in enumerate(chan_list):
            percentage = chan_sys[region][name]/chan_sys[region]['nfitted'] * 100.0

            if ('%.4f' % chan_sys[region][name]) != '0.0000' and ('%.2f' % percentage) != '0.00':
                zero = False

        if zero:
            continue

        # Parameter name -> parameter label
        if printname.startswith('gamma_stat'):
            label = 'MC stat.'

        elif printname.startswith('gamma_shape_JFAKE_STAT_jfake'):
            label = 'jet $\\to\\gamma$ fakes stat.'

        elif printname.startswith('gamma_shape_EFAKE_STAT_efake'):
            label = '$e\\to\\gamma$ fakes stat.'

        else:
            if printname in systdict and systdict[printname]:
                label = systdict[printname]
            else:
                label = printname

        # Fill dict
        for index, region in enumerate(chan_list):

            if printname.startswith('gamma') and not region.split('_')[0] in printname:
                continue

            if not label in unc_dict:
                unc_dict[label] = []
                unc_order.append(label)

            if not show_percent:
                unc_dict[label].append("$\\pm %.2f$" % chan_sys[region][name])
            else:
                percentage = chan_sys[region][name]/chan_sys[region]['nfitted'] * 100.0
                if percentage < 1:
                    unc_dict[label].append("$\\pm %.2f\ [%.2f\%%]$" % (chan_sys[region][name], percentage))
                else:
                    unc_dict[label].append("$\\pm %.2f\ [%.1f\%%]$" % (chan_sys[region][name], percentage))



    # fill table
    for label in unc_order:
        tablel.add_row([label,] + unc_dict[label])

    tablel.add_line()

    tablel.save_tex(output_name)
Пример #7
0
def latexfitresults(filename, region_list, sample_list):

    f = ROOT.TFile.Open(filename)
    w = f.Get('w')

    if w is None:
        print "ERROR : Cannot open workspace : w"
        sys.exit(1)

    resultAfterFit = w.obj('RooExpandedFitResult_afterFit')
    if resultAfterFit is None:
        print "ERROR : Cannot open fit result after fit RooExpandedFitResult_afterFit"
        sys.exit(1)

    resultBeforeFit = w.obj('RooExpandedFitResult_beforeFit')
    if resultBeforeFit is None:
        print "ERROR : Cannot open fit result before fit RooExpandedFitResult_beforeFit"
        sys.exit(1)

    # pick up dataset from workspace
    data_set = w.data('obsData')
      
    # pick up channel category (RooCategory) from workspace
    regionCat = w.obj("channelCat")
    # if not blinded:
    #   data_set.table(regionCat).Print("v")

    # find full (long) name list of regions (i.e. short=SR3J, long=SR3J_meffInc30_JVF25pt50)
    regionFullNameList = [ Util.GetFullRegionName(regionCat, region) for region in region_list ]

    # load afterFit workspace snapshot (=set all parameters to values after fit)
    snapshot =  'snapshot_paramsVals_RooExpandedFitResult_afterFit'
    w.loadSnapshot(snapshot)

    if not w.loadSnapshot(snapshot):
        print "ERROR : Cannot load snapshot : ", snapshot
        sys.exit(1)

    # define set, for all names/yields to be saved in
    tablenumbers = {}

    tablenumbers['names'] = region_list

    # make a list of channelCat calls for every region
    regionCatList = [ 'channelCat==channelCat::' + region.Data() for region in regionFullNameList]
  
    # retrieve number of observed (=data) events per region
    regionDatasetList = [data_set.reduce(regioncat) for regioncat in regionCatList]
    for index, data in enumerate(regionDatasetList):
        data.SetName("data_" + region_list[index])
        data.SetTitle("data_" + region_list[index])
    
    nobs_regionList = [ data.sumEntries() for data in regionDatasetList]

    tablenumbers['nobs'] = nobs_regionList


    # FROM HERE ON OUT WE CALCULATE THE FITTED NUMBER OF EVENTS __AFTER__ THE FIT
    
    #get a list of pdf's and variables per region
    pdfinRegionList = [ Util.GetRegionPdf(w, region)  for region in region_list]
    varinRegionList =  [ Util.GetRegionVar(w, region) for region in region_list]
  
    # if splitBins=True get the list of Nbins, binMax and binMin; make a list of new region names for each bin
    varNbinsInRegionList =  [] 
    varBinLowInRegionList = []  
    varBinHighInRegionList =  [] 
    rangeNameBinsInRegionList = [] 
  
    # if blinded=True, set all numbers of observed events to -1
    # if blinded: 
    #     for index, nobs in enumerate(nobs_regionListWithBins):
    #         nobs_regionListWithBins[index] = -1
    # tablenumbers['nobs'] = nobs_regionListWithBins


    #  get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
    rrspdfinRegionList = []
    for index, pdf in enumerate(pdfinRegionList):
        if not pdf:
            print "WARNING: pdf is NULL for index {0}".format(index)
            continue
        prodList = pdf.pdfList()
        foundRRS = 0

        for idx in range(prodList.getSize()):
            if prodList[idx].InheritsFrom("RooRealSumPdf"):
                rrspdfInt =  prodList[idx].createIntegral(ROOT.RooArgSet(varinRegionList[index]))
                rrspdfinRegionList.append(rrspdfInt)

                foundRRS += 1
        if foundRRS >1 or foundRRS==0:
            print " \n\n WARNING: ", pdf.GetName(), " has ", foundRRS, " instances of RooRealSumPdf"
            print pdf.GetName(), " component list:", prodList.Print("v")
    
    # calculate total pdf number of fitted events and error
    nFittedInRegionList =  [ pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)]
    pdfFittedErrInRegionList = [ Util.GetPropagatedError(pdf, resultAfterFit, True) for pdf in rrspdfinRegionList]

    tablenumbers['TOTAL_FITTED_bkg_events']        =  nFittedInRegionList
    tablenumbers['TOTAL_FITTED_bkg_events_err']    =  pdfFittedErrInRegionList
 
    # calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
    for isam, sample in enumerate(sample_list):

        sampleName = getName(sample)

        nSampleInRegionVal = []
        nSampleInRegionError = []
        sampleInAllRegions = ROOT.RooArgSet()
        
        for ireg, region in enumerate(region_list):
            sampleInRegion = getPdfInRegions(w, sample, region)
            sampleInRegionVal = 0.
            sampleInRegionError = 0.

            try: ##if sampleInRegion is not None:
                sampleInRegionVal = sampleInRegion.getVal()
                sampleInRegionError = Util.GetPropagatedError(sampleInRegion, resultAfterFit, True) 
                sampleInAllRegions.add(sampleInRegion)
            except:
                print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region =", region, "\n"
            nSampleInRegionVal.append(sampleInRegionVal)
            nSampleInRegionError.append(sampleInRegionError)
      
        tablenumbers['Fitted_events_'+sampleName]   = nSampleInRegionVal
        tablenumbers['Fitted_err_'+sampleName]   = nSampleInRegionError


  
    # FROM HERE ON OUT WE CALCULATE THE EXPECTED NUMBER OF EVENTS __BEFORRE__ THE FIT
    
    #load beforeFit workspace snapshot (=set all parameters to values before fit)
    w.loadSnapshot('snapshot_paramsVals_RooExpandedFitResult_beforeFit')

    # check if any of the initial scaling factors is != 1
    _result = w.obj('RooExpandedFitResult_beforeFit')
    _muFacs = _result.floatParsFinal()

    for i in xrange(len(_muFacs)):

        if "mu_" in _muFacs[i].GetName() and _muFacs[i].getVal() != 1.0:
            print  " \n WARNING: scaling factor %s != 1.0 (%g) expected MC yield WILL BE WRONG!" % (_muFacs[i].GetName(), _muFacs[i].getVal())
  
    # get a list of pdf's and variables per region
    pdfinRegionList = [ Util.GetRegionPdf(w, region)  for region in region_list]
    varinRegionList =  [ Util.GetRegionVar(w, region) for region in region_list]


    # get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
    rrspdfinRegionList = []
    for index,pdf in enumerate(pdfinRegionList):
        if not pdf: 
            print "WARNING: pdf is NULL for index {0}".format(index)
            continue
        prodList = pdf.pdfList()
        foundRRS = 0
        for idx in range(prodList.getSize()):
            if prodList[idx].InheritsFrom("RooRealSumPdf"):
                rrspdfInt =  prodList[idx].createIntegral(ROOT.RooArgSet(varinRegionList[index]))
                rrspdfinRegionList.append(rrspdfInt)
                foundRRS += 1

        if foundRRS >1 or foundRRS==0:
            print " \n\n WARNING: ", pdf.GetName(), " has ", foundRRS, " instances of RooRealSumPdf"
            print pdf.GetName(), " component list:", prodList.Print("v")

    # calculate total pdf number of expected events and error
    nExpInRegionList =  [ pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)]
    pdfExpErrInRegionList = [ Util.GetPropagatedError(pdf, resultBeforeFit, True)  for pdf in rrspdfinRegionList]
  
    tablenumbers['TOTAL_MC_EXP_BKG_events']    =  nExpInRegionList
    tablenumbers['TOTAL_MC_EXP_BKG_err']    =  pdfExpErrInRegionList
  
    # calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
    for isam, sample in enumerate(sample_list):
      
        sampleName = getName(sample)

        nMCSampleInRegionVal = []
        nMCSampleInRegionError = []
        MCSampleInAllRegions = ROOT.RooArgSet()

        for ireg, region in enumerate(region_list):
            MCSampleInRegion = getPdfInRegions(w,sample,region)
            MCSampleInRegionVal = 0.
            MCSampleInRegionError = 0.

            try:
                MCSampleInRegionVal = MCSampleInRegion.getVal()
                MCSampleInRegionError = Util.GetPropagatedError(MCSampleInRegion, resultBeforeFit, True) 
                MCSampleInAllRegions.add(MCSampleInRegion)
            except:
                print " \n WARNING: sample=", sampleName, " non-existent (empty) in region=",region
                
            nMCSampleInRegionVal.append(MCSampleInRegionVal)
            nMCSampleInRegionError.append(MCSampleInRegionError)

        tablenumbers['MC_exp_events_'+sampleName] = nMCSampleInRegionVal
        tablenumbers['MC_exp_err_'+sampleName] = nMCSampleInRegionError

    # sort the tablenumbers set
    map_listofkeys = tablenumbers.keys()
    map_listofkeys.sort()
  
    return tablenumbers
Пример #8
0
def yieldstable(workspace, samples, channels, output_name, table_name, is_cr=False, show_before_fit=False, unblind=True):

    if is_cr:
        show_before_fit=True
        normalization_factors = get_normalization_factors(workspace)

    #sample_str = samples.replace(",","_")
    from cmdLineUtils import cmdStringToListOfLists
    samples_list = cmdStringToListOfLists(samples)

    regions_list = [ '%s_cuts' % r for r in channels.split(",") ]
    #samples_list = samples.split(",")

    # call the function to calculate the numbers, or take numbers from pickle file  
    if workspace.endswith(".pickle"):
        print "READING PICKLE FILE"
        f = open(workspace, 'r')
        m = pickle.load(f)
        f.close()
    else:
        #m = YieldsTable.latexfitresults(workspace, regions_list, samples_list, 'obsData') 
        m = latexfitresults(workspace, regions_list, samples_list)

        with open(output_name.replace('.tex',  '.pickle'), 'w') as f:
            pickle.dump(m, f)


    regions_names = [ region.replace("_cuts", "").replace('_','\_') for region in m['names'] ]

    field_names = [table_name,] + regions_names
    align = ['l',] + [ 'r' for i in regions_names ]

    samples_list_decoded = []
    for isam, sample in enumerate(samples_list):
        sampleName = getName(sample)
        samples_list_decoded.append(sampleName)

    samples_list = samples_list_decoded

    tablel = LatexTable(field_names, align=align, env=True)
    tablep = PrettyTable(field_names, align=align)

    #  number of observed events
    if unblind:
        row = ['Observed events',] + [ '%d' % n for n in m['nobs'] ]
    else:
        row = ['Observed events',] + [ '-' for n in m['nobs'] ]

    tablel.add_row(row)
    tablep.add_row(row)
    tablel.add_line()
    tablep.add_line()

    #print the total fitted (after fit) number of events
    # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
    rowl = ['Expected SM events', ]
    rowp = ['Expected SM events', ]

    for index, n in enumerate(m['TOTAL_FITTED_bkg_events']):

        if (n - m['TOTAL_FITTED_bkg_events_err'][index]) > 0. :
            rowl.append('$%.2f \pm %.2f$' % (n, m['TOTAL_FITTED_bkg_events_err'][index]))
            rowp.append('%.2f &plusmn %.2f' % (n, m['TOTAL_FITTED_bkg_events_err'][index]))

        else:
            #print "WARNING:   negative symmetric error after fit extends below 0. for total bkg pdf:  will print asymmetric error w/ truncated negative error reaching to 0."
            rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index]))
            rowp.append('%.2f -%.2f +%.2f' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index]))

    tablel.add_row(rowl)
    tablel.add_line()
    tablep.add_row(rowp)
    tablep.add_line()

    map_listofkeys = m.keys()

    # print fitted number of events per sample
    # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
    for sample in samples_list:
        for name in map_listofkeys:

            rowl = []
            rowp = []

            if not "Fitted_events_" in name: 
                continue

            sample_name = name.replace("Fitted_events_", "")
            if sample_name != sample:
                continue
        
            rowl.append('%s' % labels_latex_dict.get(sample_name, sample_name).replace('_', '\_'))
            rowp.append('%s' % labels_html_dict.get(sample_name, sample_name))

            for index, n in enumerate(m[name]):

                if ((n - m['Fitted_err_'+sample][index]) > 0.) or not abs(n) > 0.00001:
                    rowl.append('$%.2f \\pm %.2f$' % (n, m['Fitted_err_'+sample][index]))
                    rowp.append('%.2f &plusmn %.2f' % (n, m['Fitted_err_'+sample][index]))

                else:
                    #print "WARNING:   negative symmetric error after fit extends below 0. for sample", sample, "    will print asymmetric error w/ truncated negative error reaching to 0."
                    rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['Fitted_err_'+sample][index]))
                    rowp.append('%.2f -%.2f +%.2f' % (n, n, m['Fitted_err_'+sample][index]))

            tablel.add_row(rowl)
            tablep.add_row(rowp)
  
    tablel.add_line()
    tablep.add_line()

    # print the total expected (before fit) number of events
    if show_before_fit:

        # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
        rowl = ['Before SM events',]
        rowp = ['(before fit) SM events',]

        total_before = []
        purity_before = []
            
        for index, n in enumerate(m['TOTAL_MC_EXP_BKG_events']):

            if regions_names[index].startswith('CR'):
                total_before.append(n)

            rowl.append('$%.2f$' % n)
            rowp.append('%.2f' % n)

        tablel.add_row(rowl)
        tablel.add_line()

        tablep.add_row(rowp)
        tablep.add_line()

        map_listofkeys = m.keys()

        # print expected number of events per sample
        # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
        for sample in samples_list:

            for name in map_listofkeys:

                rowl = []
                rowp = []

                if "MC_exp_events_" in name and sample in name:

                    sample_name = name.replace("MC_exp_events_","")

                    if sample_name != sample:
                        continue
              
                    rowl.append('(before fit) %s' % labels_latex_dict.get(sample_name, sample_name).replace('_', '\_'))
                    rowp.append('(before fit) %s' % labels_html_dict.get(sample_name, sample_name))

                    for index, n in enumerate(m[name]):
                    
                        if regions_names[index] == 'CRQ' and sample == 'photonjet':
                            purity_before.append(n)
                        if regions_names[index] == 'CRW' and sample == 'wgamma':
                            purity_before.append(n)
                        if regions_names[index] == 'CRT' and sample == 'ttbarg':
                            purity_before.append(n)

                        rowl.append('$%.2f$' % n)
                        rowp.append('%.2f' % n)

                    tablel.add_row(rowl)
                    tablep.add_row(rowp)
  
        tablel.add_line()
        tablep.add_line()

    if show_before_fit and all([r.startswith('CR') for r in regions_names]) and normalization_factors is not None:

        tablel.add_row(['', '', '', ''])
        tablel.add_line()

        tablep.add_row(['', '', '', ''])
        tablep.add_line()

        # purity
        rowl = ['Background purity',]
        rowp = ['Background purity',]

        for index, region in enumerate(regions_names):

            purity = int(purity_before[index]/total_before[index] * 100.)

            rowl.append('$%i\%%$' % purity)
            rowp.append('%i%%' % purity)
            
        tablel.add_row(rowl)
        tablel.add_line()

        tablep.add_row(rowp)
        tablep.add_line()

        # normalization
        rowl = ['Normalization factor ($\mu$)',]
        rowp = ['Normalization factor (mu)',]
        for region in regions_names:
            rowl.append('$%.2f \pm %.2f$' % normalization_factors[region])
            rowp.append('%.2f &plusmn %.2f' % normalization_factors[region])

        tablel.add_row(rowl)
        tablel.add_line()
        tablep.add_row(rowp)
        tablep.add_line()


    tablel.save_tex(output_name)

    with open(output_name.replace('.tex', '.html'), 'w+') as f:
        f.write(tablep.get_html_string())
Пример #9
0
def latexfitresults(filename, region_list, sample_list):

    f = ROOT.TFile.Open(filename)
    w = f.Get('w')

    doAsym = True

    if w is None:
        print "ERROR : Cannot open workspace : w"
        sys.exit(1)

    resultAfterFit = w.obj('RooExpandedFitResult_afterFit')
    if resultAfterFit is None:
        print(
            "ERROR : Cannot open fit result after fit RooExpandedFitResult_afterFit"
        )
        sys.exit(1)

    resultBeforeFit = w.obj('RooExpandedFitResult_beforeFit')
    if resultBeforeFit is None:
        print(
            "ERROR : Cannot open fit result before fit RooExpandedFitResult_beforeFit"
        )
        sys.exit(1)

    # pick up dataset from workspace
    data_set = w.data('obsData')

    # pick up channel category (RooCategory) from workspace
    regionCat = w.obj("channelCat")

    # find full (long) name list of regions (i.e. short=SR3J, long=SR3J_meffInc30_JVF25pt50)
    regionFullNameList = [
        Util.GetFullRegionName(regionCat, region) for region in region_list
    ]

    # load afterFit workspace snapshot (=set all parameters to values after fit)
    snapshot = 'snapshot_paramsVals_RooExpandedFitResult_afterFit'
    w.loadSnapshot(snapshot)

    if not w.loadSnapshot(snapshot):
        print "ERROR : Cannot load snapshot : ", snapshot
        sys.exit(1)

    # define set, for all names/yields to be saved in
    tablenumbers = dict()

    tablenumbers['names'] = region_list

    # make a list of channelCat calls for every region
    regionCatList = [
        'channelCat==channelCat::' + region.Data()
        for region in regionFullNameList
    ]

    # retrieve number of observed (=data) events per region
    regionDatasetList = [
        data_set.reduce(regioncat) for regioncat in regionCatList
    ]
    for index, data in enumerate(regionDatasetList):
        data.SetName("data_" + region_list[index])
        data.SetTitle("data_" + region_list[index])

    nobs_regionList = [data.sumEntries() for data in regionDatasetList]

    tablenumbers['nobs'] = nobs_regionList

    # FROM HERE ON OUT WE CALCULATE THE FITTED NUMBER OF EVENTS __AFTER__ THE FIT

    #get a list of pdf's and variables per region
    pdfinRegionList = [Util.GetRegionPdf(w, region) for region in region_list]
    varinRegionList = [Util.GetRegionVar(w, region) for region in region_list]

    varNbinsInRegionList = []
    varBinLowInRegionList = []
    varBinHighInRegionList = []
    rangeNameBinsInRegionList = []

    #  get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
    rrspdfinRegionList = []
    for index, pdf in enumerate(pdfinRegionList):
        if not pdf:
            print "WARNING: pdf is NULL for index {0}".format(index)
            continue

        prodList = pdf.pdfList()
        foundRRS = 0

        for idx in range(prodList.getSize()):
            if prodList[idx].InheritsFrom("RooRealSumPdf"):
                rrspdfInt = prodList[idx].createIntegral(
                    ROOT.RooArgSet(varinRegionList[index]))
                rrspdfinRegionList.append(rrspdfInt)
                foundRRS += 1

        if foundRRS > 1 or foundRRS == 0:
            print " \n\n WARNING: ", pdf.GetName(
            ), " has ", foundRRS, " instances of RooRealSumPdf"
            print pdf.GetName(), " component list:", prodList.Print("v")

    # calculate total pdf number of fitted events and error
    nFittedInRegionList = [
        pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)
    ]
    pdfFittedErrInRegionList = [
        Util.GetPropagatedError(pdf, resultAfterFit, doAsym)
        for pdf in rrspdfinRegionList
    ]

    tablenumbers['TOTAL_FITTED_bkg_events'] = nFittedInRegionList
    tablenumbers['TOTAL_FITTED_bkg_events_err'] = pdfFittedErrInRegionList

    # calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
    for isam, sample in enumerate(sample_list):

        sampleName = getName(sample)

        nSampleInRegionVal = []
        nSampleInRegionError = []
        sampleInAllRegions = ROOT.RooArgSet()

        for ireg, region in enumerate(region_list):
            sampleInRegion = getPdfInRegions(w, sample, region)
            sampleInRegionVal = 0.
            sampleInRegionError = 0.

            try:
                sampleInRegionVal = sampleInRegion.getVal()
                sampleInRegionError = Util.GetPropagatedError(
                    sampleInRegion, resultAfterFit, doAsym)
                sampleInAllRegions.add(sampleInRegion)
            except:
                print " \n YieldsTable.py: WARNING: sample =", sampleName, " non-existent (empty) in region =", region, "\n"

            nSampleInRegionVal.append(sampleInRegionVal)
            nSampleInRegionError.append(sampleInRegionError)

        tablenumbers['Fitted_events_' + sampleName] = nSampleInRegionVal
        tablenumbers['Fitted_err_' + sampleName] = nSampleInRegionError

    # FROM HERE ON OUT WE CALCULATE THE EXPECTED NUMBER OF EVENTS __BEFORRE__ THE FIT

    #load beforeFit workspace snapshot (=set all parameters to values before fit)
    w.loadSnapshot('snapshot_paramsVals_RooExpandedFitResult_beforeFit')

    # check if any of the initial scaling factors is != 1
    _result = w.obj('RooExpandedFitResult_beforeFit')
    _muFacs = _result.floatParsFinal()

    for i in xrange(len(_muFacs)):

        if "mu_" in _muFacs[i].GetName() and _muFacs[i].getVal() != 1.0:
            print " \n WARNING: scaling factor %s != 1.0 (%g) expected MC yield WILL BE WRONG!" % (
                _muFacs[i].GetName(), _muFacs[i].getVal())

    # get a list of pdf's and variables per region
    pdfinRegionList = [Util.GetRegionPdf(w, region) for region in region_list]
    varinRegionList = [Util.GetRegionVar(w, region) for region in region_list]

    # get a list of RooRealSumPdf per region (RooRealSumPdf is the top-pdf per region containing all samples)
    rrspdfinRegionList = []
    for index, pdf in enumerate(pdfinRegionList):
        if not pdf:
            print "WARNING: pdf is NULL for index {0}".format(index)
            continue
        prodList = pdf.pdfList()
        foundRRS = 0
        for idx in xrange(prodList.getSize()):
            if prodList[idx].InheritsFrom("RooRealSumPdf"):
                rrspdfInt = prodList[idx].createIntegral(
                    ROOT.RooArgSet(varinRegionList[index]))
                rrspdfinRegionList.append(rrspdfInt)
                foundRRS += 1

        if foundRRS > 1 or foundRRS == 0:
            print " \n\n WARNING: ", pdf.GetName(
            ), " has ", foundRRS, " instances of RooRealSumPdf"
            print pdf.GetName(), " component list:", prodList.Print("v")

    # calculate total pdf number of expected events and error
    nExpInRegionList = [
        pdf.getVal() for index, pdf in enumerate(rrspdfinRegionList)
    ]
    pdfExpErrInRegionList = [
        Util.GetPropagatedError(pdf, resultBeforeFit, doAsym)
        for pdf in rrspdfinRegionList
    ]

    tablenumbers['TOTAL_MC_EXP_BKG_events'] = nExpInRegionList
    tablenumbers['TOTAL_MC_EXP_BKG_err'] = pdfExpErrInRegionList

    # calculate the fitted number of events and propagated error for each requested sample, by splitting off each sample pdf
    for isam, sample in enumerate(sample_list):

        sampleName = getName(sample)

        nMCSampleInRegionVal = []
        nMCSampleInRegionError = []
        MCSampleInAllRegions = ROOT.RooArgSet()

        for ireg, region in enumerate(region_list):
            MCSampleInRegion = getPdfInRegions(w, sample, region)
            MCSampleInRegionVal = 0.
            MCSampleInRegionError = 0.

            try:
                MCSampleInRegionVal = MCSampleInRegion.getVal()
                MCSampleInRegionError = Util.GetPropagatedError(
                    MCSampleInRegion, resultBeforeFit, doAsym)
                MCSampleInAllRegions.add(MCSampleInRegion)
            except:
                print " \n WARNING: sample=", sampleName, " non-existent (empty) in region=", region

            nMCSampleInRegionVal.append(MCSampleInRegionVal)
            nMCSampleInRegionError.append(MCSampleInRegionError)

        tablenumbers['MC_exp_events_' + sampleName] = nMCSampleInRegionVal
        tablenumbers['MC_exp_err_' + sampleName] = nMCSampleInRegionError

    # sort the tablenumbers set
    map_listofkeys = tablenumbers.keys()
    map_listofkeys.sort()

    f.Close()

    return tablenumbers
Пример #10
0
def yieldstable(workspace,
                samples,
                channels,
                output_name,
                table_name='',
                show_before_fit=False,
                unblind=True,
                show_cr_info=False,
                cr_dict={}):

    if show_cr_info:
        show_before_fit = True
        normalization_factors = get_normalization_factors(workspace)

    samples_list = cmdStringToListOfLists(samples)

    regions_list = ['%s_cuts' % r for r in channels.split(",")]

    # call the function to calculate the numbers, or take numbers from pickle file
    if workspace.endswith(".pickle"):
        print "Reading from pickle file"
        f = open(workspace, 'r')
        m = pickle.load(f)
        f.close()
    else:
        #m = YieldsTable.latexfitresults(workspace, regions_list, samples_list, 'obsData')
        m = latexfitresults(workspace, regions_list, samples_list)

        with open(output_name.replace('.tex', '.pickle'), 'w') as f:
            pickle.dump(m, f)

    regions_names = [
        region.replace("_cuts", "").replace('_', '\_') for region in m['names']
    ]

    field_names = [
        table_name,
    ] + regions_names
    align = [
        'l',
    ] + ['r' for i in regions_names]

    samples_list_decoded = []
    for isam, sample in enumerate(samples_list):
        sampleName = getName(sample)
        samples_list_decoded.append(sampleName)

    samples_list = samples_list_decoded

    tablel = LatexTable(field_names, align=align, env=True)

    #  number of observed events
    if unblind:
        row = [
            'Observed events',
        ] + ['%d' % n for n in m['nobs']]
    else:
        row = [
            'Observed events',
        ] + ['-' for n in m['nobs']]

    tablel.add_row(row)
    tablel.add_line()

    # Total fitted (after fit) number of events
    # if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0
    rowl = [
        'Expected SM events',
    ]

    for index, n in enumerate(m['TOTAL_FITTED_bkg_events']):

        if (n - m['TOTAL_FITTED_bkg_events_err'][index]) > 0.:
            rowl.append('$%.2f \pm %.2f$' %
                        (n, m['TOTAL_FITTED_bkg_events_err'][index]))
        else:
            rowl.append('$%.2f_{-%.2f}^{+%.2f}$' %
                        (n, n, m['TOTAL_FITTED_bkg_events_err'][index]))

    tablel.add_row(rowl)
    tablel.add_line()

    map_listofkeys = m.keys()

    # After fit number of events per sample (if the N_fit-N_error extends below 0, make the error physical, meaning extend to 0)
    for sample in samples_list:
        for name in map_listofkeys:

            rowl = []

            if not "Fitted_events_" in name:
                continue

            sample_name = name.replace("Fitted_events_", "")
            if sample_name != sample:
                continue

            rowl.append('%s' % labels_latex_dict.get(
                sample_name, sample_name).replace('_', '\_'))

            for index, n in enumerate(m[name]):

                if ((n - m['Fitted_err_' + sample][index]) >
                        0.) or not abs(n) > 0.00001:
                    rowl.append('$%.2f \\pm %.2f$' %
                                (n, m['Fitted_err_' + sample][index]))
                else:
                    rowl.append('$%.2f_{-%.2f}^{+%.2f}$' %
                                (n, n, m['Fitted_err_' + sample][index]))

            tablel.add_row(rowl)

    tablel.add_line()

    # Total expected (before fit) number of events
    if show_before_fit:

        # if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0
        rowl = [
            'Before fit SM events',
        ]

        total_before = {}
        purity_before = {}

        for index, n in enumerate(m['TOTAL_MC_EXP_BKG_events']):

            reg_name = regions_names[index]

            if cr_dict and reg_name in cr_dict:
                total_before[reg_name] = n

            rowl.append('$%.2f$' % n)

        tablel.add_row(rowl)
        tablel.add_line()

        map_listofkeys = m.keys()

        # Expected number of events per sample (if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0)
        for sample in samples_list:

            for name in map_listofkeys:

                rowl = []

                if "MC_exp_events_" in name and sample in name:

                    sample_name = name.replace("MC_exp_events_", "")

                    if sample_name != sample:
                        continue

                    rowl.append('Before fit %s' % labels_latex_dict.get(
                        sample_name, sample_name).replace('_', '\_'))

                    for index, n in enumerate(m[name]):
                        reg_name = regions_names[index]
                        if cr_dict and reg_name in cr_dict and sample == cr_dict[
                                reg_name]:
                            purity_before[reg_name] = n

                        rowl.append('$%.2f$' % n)

                    tablel.add_row(rowl)

        tablel.add_line()

    if show_cr_info and normalization_factors is not None:

        tablel.add_row(['' for i in range(len(regions_names) + 1)])
        tablel.add_line()

        # purity
        rowl = [
            'Background purity',
        ]

        for region in regions_names:

            try:
                purity = int(
                    round(purity_before[region] / total_before[region] * 100.))
                rowl.append('$%i\%%$' % purity)
            except:
                rowl.append('-')

        tablel.add_row(rowl)
        tablel.add_line()

        # normalization
        rowl = [
            'Normalization factor ($\mu$)',
        ]
        for region in regions_names:
            try:
                rowl.append('$%.2f \pm %.2f$' % normalization_factors[region])
            except:
                rowl.append('-')

        tablel.add_row(rowl)
        tablel.add_line()

    tablel.save_tex(output_name)