示例#1
0
 def setPhysicsOptions(self, physOptions):
     '''Receive a list of strings with the physics options from command line'''
     for po in physOptions:
         self.opts.parse(po)
     if self.opts.inclusive:
         self.categories = set(
             ['Inc_nolead', 'Inc_nosub', 'Inc_leadtag', 'Inc_subtag'])
     if self.opts.lightConstantsJson:
         self.constants = prettyjson.loads(
             open(self.opts.lightConstantsJson, 'r').read())
         if not self.opts.fitLightEff:
             self.pars.remove('SLightE')
             self.pars.remove('LLightE')
             self.pars.extend([
                 'mc_lead_light_eff', 'mc_sub_light_eff',
                 self.constants['light_SF']['nuisance_name']
             ])
示例#2
0
import URAnalysis.Utilities.prettyjson as prettyjson
from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument('wp')
args = parser.parse_args()

jobid = os.environ['jobid']
wpdir = 'plots/%s/ctageff/mass_discriminant/%s' % (jobid, args.wp)
datacard = [i for i in open('%s/datacard.txt' % wpdir).readlines()]

toys_dir = '%s/toys' % wpdir
if not os.path.isdir(toys_dir):
    os.mkdir(toys_dir)

mceffs = prettyjson.loads(open('plots/%s/ctageff/info.json' % jobid).read())
json = {
    'mceffs': mceffs[args.wp],
}
new_card = []
sig_norm = None
#fix datacard text
for line in datacard:
    if line.startswith('rate '):
        tokens = line.split()[1:]
        for tok in tokens:
            if tok != '0.00000' and tok != '1.00000':
                line = line.replace(tok, '1.00000')
    elif ' param ' in line:
        if line.startswith('signal_norm'):
            sig_norm = float(line.split()[2])
示例#3
0
 def setPhysicsOptions(self, physOptions):
     '''Receive a list of strings with the physics options from command line'''
     for po in physOptions:
         self.opts.parse(po)
     self.constants = prettyjson.loads(open(self.opts.constants).read())
示例#4
0
   return ret

parser = ArgumentParser()
parser.add_argument('varname')

args = parser.parse_args()
input_dir = 'plots/%s/ttxsec/%s' % (os.environ['jobid'], args.varname)

mlfit_file = io.root_open(
   '%s/%s.mlfit.root' % (input_dir, args.varname)
   )
datacard_file = io.root_open(
   '%s/%s.root' % (input_dir, args.varname)
) #contains data
binning_file = prettyjson.loads(
   open('%s/%s.binning.json' % (input_dir, args.varname)).read()
)

def ordering(histo):
   multiplier = 1.
   if histo.title.startswith('tt_wrong'):
      multiplier = 20.
   elif histo.title.startswith('tt_right'):
      multiplier = 100000.
   return multiplier*histo.Integral()

postfit_shapes = mlfit_file.shapes_fit_s
categories = [i.name for i in postfit_shapes.keys()]
regex = re.compile('^(?P<base_category>[A-Za-z0-9]+)_(?P<njets>\d+)Jets$')
plotter = BasePlotter(
   '%s/postfit' % input_dir,
示例#5
0
from glob import glob
import URAnalysis.Utilities.prettyjson as prettyjson
from uncertainties import ufloat

def convert(jm):
	ret = {}
	for 

jmap = prettyjson.loads(open('yields.json').read())
#convert to ufloats
for sam in jmap:
	for cat in jmap[sam]:
		vals = jmap[sam][cat]
		jmap[sam][cat] = ufloat(*vals)
示例#6
0
      rbin.error = pbin.error
   return ret

parser = ArgumentParser()
parser.add_argument('varname')

args = parser.parse_args()
input_dir = 'plots/%s/ttxsec/%s' % (os.environ['jobid'], args.varname)

datacard = datacard.load('%s/%s.txt' % (input_dir, args.varname))
shapes = io.root_open(
   '%s/%s.root' % (input_dir, args.varname)
) #contains data

sig_yields = prettyjson.loads(
   open('%s/%s.json' % (input_dir, args.varname)).read()
   )

## binning_file = prettyjson.loads(
##    open('%s/%s.binning.json' % (input_dir, args.varname)).read()
## )

categories = datacard.bins
signals = set(datacard.signals)
regex = re.compile('^(?P<base_category>[A-Za-z0-9]+)_(?P<njets>\d+)Jets$')

#this ordering is done by hand, therefore it should not break anything if it fails
def ordering(histo):
   multiplier = 1.
   if histo.title == 'tt_wrong':
      multiplier = 20.
示例#7
0
import URAnalysis.Utilities.prettyjson as prettyjson
import URAnalysis.Utilities.das as das
from argparse import ArgumentParser
import shutil
from copy import deepcopy
from fnmatch import fnmatch

parser = ArgumentParser(description=__doc__)
parser.add_argument('json')
parser.add_argument('newProd')
parser.add_argument('--valid', action='store_true', help='require the dataset to be valid, otherwise any status is displayed')
args = parser.parse_args()

shutil.copyfile(args.json, '%s.bak' % args.json)
json = prettyjson.loads(open(args.json).read())

for sample in json:
	das_name = sample['DBSName']
	if sample['name'].startswith('data'):
		print 'This is data! (%s) Skipping..' % das_name
		continue
	elif das_name == "TO BE SET":
		print "The sample %s has no associated DAS entry! Skipping..." % sample['name']
		continue
	elif das_name.startswith('TO BE UPDATED -- '):
		das_name = das_name.replace('TO BE UPDATED -- ', '')
	_, prim_dset, prod, tier = tuple(das_name.split('/'))

	if fnmatch(prod, args.newProd):
		print "%s already matches! Skipping..." % sample['name']
示例#8
0
def run_module(**kwargs):
   args = Struct(**kwargs)
   redundant_binning = {}
   with open(args.binning) as bins:
      redundant_binning = prettyjson.loads(bins.read())

   #group binning to merge jet categories
   grouping = re.compile('^(?P<base_category>[A-Za-z0-9]+)_\d+Jets$')
   binning = {}
   for var, categories in redundant_binning.iteritems():
      if var not in binning: binning[var] = {}
      for category, bin_info in categories.iteritems():
         m = grouping.match(category)
         if not m:
            raise ValueError('Category name %s did not match the regex!' % category)
         base = m.group('base_category')
         if base not in binning[var]:
            binning[var][base] = copy.deepcopy(bin_info)
         else:
            #make sure that all jet categories have the same bin edges
            assert(binning[var][base] == bin_info)

   for info in binning.itervalues():
      edges = set(
         i['low_edge'] for i in info.itervalues()
         )
      edges.update(
         set(
            i['up_edge'] for i in info.itervalues()
            )
         )
      edges = sorted(list(edges))
      info['edges'] = edges

   prefit_norms = {}
   with io.root_open(args.input_shape) as shapes:
      for key in shapes.keys():
         obj = key.ReadObj()
         if not obj.InheritsFrom('TDirectory'): continue
         
         for hkey in obj.GetListOfKeys():
            hist = hkey.ReadObj()
            if not hist.InheritsFrom('TH1'): continue
            
            err = ROOT.Double()
            integral = hist.IntegralAndError(
               1,
               hist.GetNbinsX(),
               err
               )
            val_id = uuid.uuid4().hex
            val =  ROOT.RooRealVar(val_id, val_id, integral)
            val.setError(err)
            prefit_norms['%s/%s' % (obj.GetName(), hist.GetName())] = val
               

   with io.root_open(args.fitresult) as results:
      dirs = ['']
      if args.toys:
         dirs = [i.GetName() for i in results.GetListOfKeys() if i.GetName().startswith('toy_')]

      postfit_table = Table('Bin:%7s', 'Category:%10s', 'Sample:%20s', 'Yield:%5.1f', 'Error:%5.1f')
      postfit_norms = [(i.name, i.value, i.error) for i in results.norm_fit_s]
      postfit_norms.sort(key=lambda x: x[0])
      for name, val, err in postfit_norms:
         bincat, sample = tuple(name.split('/'))
         bin, category = tuple(bincat.split('_'))
         postfit_table.add_line(bin, category, sample, val, err)
      postfit_table.add_separator()
      with open(args.out.replace('.root','.raw_txt'), 'w') as out:
         out.write(postfit_table.__repr__())
      
      with io.root_open(args.out, 'recreate') as output:
         is_prefit_done = False
         for dirname in dirs:
            input_dir = results.Get(dirname) if dirname else results
            if not hasattr(input_dir, 'fit_s'):
               continue

            fit_result = input_dir.fit_s
            pars = asrootpy(fit_result.floatParsFinal())
            prefit_pars = asrootpy(fit_result.floatParsInit())
            tdir = output
            if dirname: 
               tdir = output.mkdir(dirname)
               tdir.cd()
            hcorr = asrootpy(fit_result.correlationHist())
            par_names = set([i.name for i in pars])
            yield_par_names = filter(lambda x: '_FullYield_' in x, par_names)
            hcorr.Write()
            for observable, info in binning.iteritems():
               var_dir = tdir.mkdir(observable)
               var_dir.cd()
               hists = {}
               hists_prefit = {}
               for rvar_name in yield_par_names:
                  category, sample = tuple(rvar_name.split('_FullYield_'))
                  if category not in info: continue
                  if sample not in hists:
                     hists[sample] = plotting.Hist(
                        info['edges'],
                        name = sample
                        )
                     if not is_prefit_done:
                        hists_prefit[sample] = plotting.Hist(
                           info['edges'],
                           name = sample
                           )
                  idx = info[category]['idx']+1
                  hists[sample][idx].value = pars[rvar_name].value
                  error = pars[rvar_name].error
                  hists[sample][idx].error = max(abs(i) for i in error) if isinstance(error, tuple) else error #get max of asym error

                  if not is_prefit_done:
                     hists_prefit[sample][idx].value = prefit_pars[rvar_name].value
                  ## Pre-fit floating parameters have no uncertainties
                  ## hists_prefit[sample][idx].error = max(prefit_pars[rvar_name].error)
                  logging.debug(
                     'Assigning label %s to bin %i for %s/%s' % (rvar_name, idx, category, sample)
                     )
                  hists[sample].xaxis.SetBinLabel(idx, rvar_name)

               for h in hists.itervalues():
                  logging.debug( h.Write() )

               if not is_prefit_done:
                  is_prefit_done = True
                  output.mkdir('prefit').cd()
                  for h in hists_prefit.itervalues():
                     logging.debug( h.Write() )
示例#9
0
    if jrank == 'leading': passing.append('lead_tagged')
    else: passing.append('sublead_tagged')

    n_total = sum(jmap[i][jrank][qtype] for i in total)
    n_passing = sum(jmap[i][jrank][qtype] for i in passing)
    return n_passing / n_total


jobid = os.environ['jobid']
jsons = glob('plots/%s/btageff/mass_discriminant/*/*/*/flavors_rightw.json' %
             jobid)

json_ufloat = lambda d, n: ufloat(d[n], d['%s_err' % n])

jobid = os.environ['jobid']
summary = prettyjson.loads(
    open('plots/%s/btageff/summary.json' % jobid).read())
wpoints = summary["mass_discriminant"]['working_points']

jmaps = {}
for jfile in jsons:
    wpoint, category, jetrank = tuple(jfile.split('/')[-4:-1])
    if not wpoint in jmaps: jmaps[wpoint] = {}
    if not category in jmaps[wpoint]: jmaps[wpoint][category] = {}
    if not jetrank in jmaps[wpoint][category]:
        jmaps[wpoint][category][jetrank] = {}

    jmaps[wpoint][category][jetrank] = prettyjson.loads(open(jfile).read())

#filter out incomplete (service) working points
categories = ['sublead_tagged', 'lead_tagged', 'both_untagged', 'both_tagged']
to_rm = [i for i in jmaps if jmaps[i].keys() != categories]
def unfolding_toy_diagnostics(indir, variable):

    plotter = BasePlotter(defaults={
        'clone': False,
        'name_canvas': True,
        'show_title': True,
        'save': {
            'png': True,
            'pdf': False
        }
    }, )
    styles = {
        'dots': {
            'linestyle': 0,
            'markerstyle': 21,
            'markercolor': 1
        },
        'compare': {
            'linesstyle': [1, 0],
            'markerstyle': [0, 21],
            'markercolor': [2, 1],
            'linecolor': [2, 1],
            'drawstyle': ['hist', 'pe'],
            'legendstyle': ['l', 'p']
        }
    }

    xaxislabel = set_pretty_label(variable)

    true_distribution = None

    curdir = os.getcwd()
    os.chdir(indir)
    toydirs = get_immediate_subdirectories(".")

    methods = []
    pulls_lists = {}
    pull_means_lists = {}
    pull_mean_errors_lists = {}
    pull_sums_lists = {}
    pull_sigmas_lists = {}
    pull_sigma_errors_lists = {}
    deltas_lists = {}
    delta_means_lists = {}
    delta_mean_errors_lists = {}
    delta_sigmas_lists = {}
    delta_sigma_errors_lists = {}
    ratio_sums_lists = {}
    nneg_bins_lists = {}
    unfoldeds_lists = {}
    unfolded_sigmas_lists = {}
    taus_lists = {}

    histos_created = False
    lists_created = False
    idir = 0
    true_distro = None
    #loop over toys
    for directory in toydirs:
        if not directory.startswith('toy_'): continue
        os.chdir(directory)
        log.debug('Inspecting toy %s' % directory)
        idir = idir + 1
        i = 0
        if not os.path.isfile("result_unfolding.root"):
            raise ValueError('root file not found in %s' % os.getcwd())
        with io.root_open("result_unfolding.root") as inputfile:
            log.debug('Iteration %s over the file' % i)
            i = i + 1
            if not methods:
                keys = [i.name for i in inputfile.keys()]
                for key in keys:
                    if hasattr(getattr(inputfile, key), "hdata_unfolded"):
                        methods.append(key)

            unfolded_hists = [
                inputfile.get('%s/hdata_unfolded' % i) for i in methods
            ]
            unfolded_wps_hists = [
                inputfile.get('%s/hdata_unfolded_ps_corrected' % i)
                for i in methods
            ]
            for unf, unfps, method in zip(unfolded_hists, unfolded_wps_hists,
                                          methods):
                unf.name = method
                unfps.name = method
            if true_distro is None:
                true_distribution = inputfile.true_distribution
                ROOT.TH1.AddDirectory(False)
                true_distro = true_distribution.Clone()
            taus = prettyjson.loads(inputfile.best_taus.GetTitle())
            if len(taus_lists) == 0:
                taus_lists = dict((i, []) for i in taus)
            for i, t in taus.iteritems():
                taus_lists[i].append(t)

            for histo in unfolded_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    for ibin in range(1, nbins + 1):
                        outname = "pull_" + name + "_bin" + str(ibin)
                        pulls_lists[outname] = []
                        outname = "delta_" + name + "_bin" + str(ibin)
                        deltas_lists[outname] = []
                        outname = "unfolded_" + name + "_bin" + str(ibin)
                        unfoldeds_lists[outname] = []
                        unfolded_sigmas_lists[outname] = []
                    outname = "pull_" + name
                    pull_means_lists[outname] = {}
                    pull_mean_errors_lists[outname] = {}
                    pull_sigmas_lists[outname] = {}
                    pull_sigma_errors_lists[outname] = {}

                    outname = "delta_" + name
                    delta_means_lists[outname] = {}
                    delta_mean_errors_lists[outname] = {}
                    delta_sigmas_lists[outname] = {}
                    delta_sigma_errors_lists[outname] = {}

                for ibin in range(1, nbins + 1):
                    outname = "pull_" + name + "_bin" + str(ibin)
                    unfolded_bin_content = histo.GetBinContent(ibin)
                    unfolded_bin_error = histo.GetBinError(ibin)
                    true_bin_content = true_distro.GetBinContent(ibin)
                    true_bin_error = true_distro.GetBinError(ibin)
                    total_bin_error = math.sqrt(unfolded_bin_error**2)  #???
                    if (total_bin_error != 0):
                        pull = (unfolded_bin_content -
                                true_bin_content) / total_bin_error
                    else:
                        pull = 9999
                    log.debug(
                        'unfolded bin content %s +/- %s, true bin content %s, pull %s'
                        % (unfolded_bin_content, unfolded_bin_error,
                           true_bin_content, pull))
                    pulls_lists[outname].append(pull)
                    outname = "delta_" + name + "_bin" + str(ibin)
                    delta = unfolded_bin_content - true_bin_content
                    log.debug(
                        'unfolded bin content %s +/- %s, true bin content %s, delta %s'
                        % (unfolded_bin_content, unfolded_bin_error,
                           true_bin_content, delta))
                    deltas_lists[outname].append(delta)
                    outname = "unfolded_" + name + "_bin" + str(ibin)
                    unfoldeds_lists[outname].append(unfolded_bin_content)
                    unfolded_sigmas_lists[outname].append(unfolded_bin_error)

            nneg_bins_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("nneg_bins")
            ]
            nneg_bins_hists = [asrootpy(i.ReadObj()) for i in nneg_bins_hists]
            for histo in nneg_bins_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    nneg_bins_lists[outname] = []
                outname = name
                nneg_bins_lists[outname].append(histo.GetBinContent(1))

            pull_sums_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("sum_of_pulls")
            ]
            pull_sums_hists = [asrootpy(i.ReadObj()) for i in pull_sums_hists]
            for histo in pull_sums_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    pull_sums_lists[outname] = []
                outname = name
                pull_sums_lists[outname].append(histo.GetBinContent(1))

            ratio_sums_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("sum_of_ratios")
            ]
            ratio_sums_hists = [
                asrootpy(i.ReadObj()) for i in ratio_sums_hists
            ]
            for histo in ratio_sums_hists:
                #create ratio/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    ratio_sums_lists[outname] = []
                outname = name
                ratio_sums_lists[outname].append(histo.GetBinContent(1))

            #after the first iteration on the file all the lists are created
            lists_created = True

        os.chdir("..")

    #create histograms
    #histo containers
    taus = {}
    for name, vals in taus_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if val_min == val_max:
            if tau_nbins % 2:  #if odd
                val_min, val_max = val_min - 0.01, val_min + 0.01
            else:
                brange = 0.02
                bwidth = brange / tau_nbins
                val_min, val_max = val_min - 0.01 + bwidth / 2., val_min + 0.01 + bwidth / 2.
        title = '#tau choice - %s ;#tau;N_{toys}' % (name)
        histo = Hist(tau_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        taus[name] = histo

    pulls = {}
    for name, vals in pulls_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        abs_max = max(abs(val_min), abs(val_max))
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Pulls - %s - %s ;Pull;N_{toys}' % (binno, method)
        histo = Hist(pull_nbins, -abs_max, abs_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        pulls[name] = histo

    deltas = {}
    for name, vals in deltas_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Deltas - %s - %s ;Delta;N_{toys}' % (binno, method)
        histo = Hist(delta_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        deltas[name] = histo

    unfoldeds = {}
    for name, vals in unfoldeds_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Unfoldeds - %s - %s ;Unfolded;N_{toys}' % (binno, method)
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        unfoldeds[name] = histo

    nneg_bins = {}
    for name, vals, in nneg_bins_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0 if val_min > 0 else val_min - 1
        val_max = max(vals)
        val_max = 0 if val_max < 0 else val_max + 1
        if 'L_curve' in name:
            method = 'L_curve'
        else:
            set_trace()
            _, method, _ = tuple(name.split('_'))
        title = 'N of negative bins - %s ;N. neg bins;N_{toys}' % method
        histo = Hist(int(val_max - val_min + 1),
                     val_min,
                     val_max,
                     name=name,
                     title=title)
        for val in vals:
            histo.Fill(val)
        nneg_bins[name] = histo

    pull_sums = {}
    for name, vals in pull_sums_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
        else:
            set_trace()
            _, _, _, _, _, method = tuple(name.split('_'))
        title = 'Pull sums - %s ;#Sigma(pull)/N_{bins};N_{toys}' % method
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        pull_sums[name] = histo

    ratio_sums = {}
    for name, vals in ratio_sums_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            set_trace()
            _, _, _, _, _, method = tuple(name.split('_'))
        title = 'Ratio sums - %s;#Sigma(ratio)/N_{bins};N_{toys}' % method
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        ratio_sums[name] = histo

    unfolded_sigmas = {}
    for name, vals in unfolded_sigmas_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Unfolded uncertainties - %s - %s ;Uncertainty;N_{toys}' % (
            binno, method)
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        unfolded_sigmas[name] = histo

    for name, histo in pulls.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        pull_means_lists[general_name][idx] = mean
        pull_mean_errors_lists[general_name][idx] = meanError
        pull_sigmas_lists[general_name][idx] = sigma
        pull_sigma_errors_lists[general_name][idx] = sigmaError

    for name, histo in deltas.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        delta_means_lists[general_name][idx] = mean
        delta_mean_errors_lists[general_name][idx] = meanError
        delta_sigmas_lists[general_name][idx] = sigma
        delta_sigma_errors_lists[general_name][idx] = sigmaError

    outfile = rootpy.io.File("unfolding_diagnostics.root", "RECREATE")
    outfile.cd()

    pull_means = {}
    pull_sigmas = {}
    pull_means_summary = {}
    pull_sigmas_summary = {}
    delta_means = {}
    delta_sigmas = {}
    delta_means_summary = {}
    delta_sigmas_summary = {}

    for outname, pmeans in pull_means_lists.iteritems():
        outname_mean = outname + "_mean"
        outtitle = "Pull means - " + outname + ";Pull mean; N_{toys}"
        pull_mean_min = min(pmeans.values())
        pull_mean_max = max(pmeans.values())
        pull_mean_newmin = pull_mean_min - (pull_mean_max -
                                            pull_mean_min) * 0.5
        pull_mean_newmax = pull_mean_max + (pull_mean_max -
                                            pull_mean_min) * 0.5
        pull_means[outname] = plotting.Hist(pull_mean_nbins,
                                            pull_mean_newmin,
                                            pull_mean_newmax,
                                            name=outname_mean,
                                            title=outtitle)

        outname_mean_summary = outname + "_mean_summary"
        outtitle_mean_summary = "Pull mean summary - " + outname
        histocloned = true_distro.Clone(outname_mean_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Pull mean'
        histocloned.title = outtitle_mean_summary
        pull_means_summary[outname] = histocloned

        for idx, pmean in pmeans.iteritems():
            pull_means[outname].Fill(pmean)
            histocloned[idx].value = pmean
            histocloned[idx].error = pull_mean_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(pmeans.values()),
                                       max(pmeans.values()))

    for outname, psigmas in pull_sigmas_lists.iteritems():
        outname_sigma = outname + "_sigma"
        outtitle_sigma = "Pull #sigma's - " + outname + ";Pull #sigma; N_{toys}"
        pull_sigma_min = min(psigmas.values())
        pull_sigma_max = max(psigmas.values())
        pull_sigma_newmin = pull_sigma_min - (pull_sigma_max -
                                              pull_sigma_min) * 0.5
        pull_sigma_newmax = pull_sigma_max + (pull_sigma_max -
                                              pull_sigma_min) * 0.5
        pull_sigmas[outname] = plotting.Hist(pull_sigma_nbins,
                                             pull_sigma_newmin,
                                             pull_sigma_newmax,
                                             name=outname_sigma,
                                             title=outtitle_sigma)

        outname_sigma_summary = outname + "_sigma_summary"
        outtitle_sigma_summary = "Pull #sigma summary - " + outname
        histocloned = true_distro.Clone(outname_sigma_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Pull #sigma'
        histocloned.title = outtitle_sigma_summary
        pull_sigmas_summary[outname] = histocloned

        for idx, psigma in psigmas.iteritems():
            pull_sigmas[outname].Fill(psigma)
            histocloned[idx].value = psigma
            histocloned[idx].error = pull_sigma_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(psigmas.values()),
                                       max(psigmas.values()))

    for outname, dmeans in delta_means_lists.iteritems():
        outname_mean = outname + "_mean"
        outtitle = "Delta means - " + outname + ";Delta mean; N_{toys}"
        delta_mean_min = min(dmeans.values())
        delta_mean_max = max(dmeans.values())
        delta_mean_newmin = delta_mean_min - (delta_mean_max -
                                              delta_mean_min) * 0.5
        delta_mean_newmax = delta_mean_max + (delta_mean_max -
                                              delta_mean_min) * 0.5
        delta_means[outname] = plotting.Hist(delta_mean_nbins,
                                             delta_mean_newmin,
                                             delta_mean_newmax,
                                             name=outname_mean,
                                             title=outtitle)

        outname_mean_summary = outname + "_mean_summary"
        outtitle_mean_summary = "Delta mean summary - " + outname
        histocloned = true_distro.Clone(outname_mean_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Delta mean'
        histocloned.title = outtitle_mean_summary
        delta_means_summary[outname] = histocloned

        for idx, dmean in dmeans.iteritems():
            delta_means[outname].Fill(dmean)
            histocloned[idx].value = dmean
            histocloned[idx].error = delta_mean_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(dmeans.values()),
                                       max(dmeans.values()))

    for outname, dsigmas in delta_sigmas_lists.iteritems():
        outname_sigma = outname + "_sigma"
        outtitle_sigma = "Delta #sigma's - " + outname + ";Delta #sigma; N_{toys}"
        delta_sigma_min = min(dsigmas.values())
        delta_sigma_max = max(dsigmas.values())
        delta_sigma_newmin = delta_sigma_min - (delta_sigma_max -
                                                delta_sigma_min) * 0.5
        delta_sigma_newmax = delta_sigma_max + (delta_sigma_max -
                                                delta_sigma_min) * 0.5
        delta_sigmas[outname] = plotting.Hist(delta_sigma_nbins,
                                              delta_sigma_newmin,
                                              delta_sigma_newmax,
                                              name=outname_sigma,
                                              title=outtitle_sigma)

        outname_sigma_summary = outname + "_sigma_summary"
        outtitle_sigma_summary = "Delta #sigma summary - " + outname
        histocloned = true_distro.Clone(outname_sigma_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Delta #sigma'
        histocloned.title = outtitle_sigma_summary
        delta_sigmas_summary[outname] = histocloned

        for idx, dsigma in dsigmas.iteritems():
            delta_sigmas[outname].Fill(dsigma)
            histocloned[idx].value = dsigma
            histocloned[idx].error = delta_sigma_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(dsigmas.values()),
                                       max(dsigmas.values()))

    unfolded_summary = {}
    unfolded_average = {}
    unfolded_envelope = {}
    for name, histo in unfoldeds.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        if general_name not in unfolded_summary:
            histo = true_distro.Clone("%s_unfolded_summary" % general_name)
            outtitle_unfolded_summary = "Unfolded summary - " + general_name
            histo.Reset()
            histo.xaxis.title = xaxislabel
            histo.yaxis.title = 'N_{events}'
            histo.title = outtitle_unfolded_summary
            unfolded_summary[general_name] = histo

            unfolded_envelope[general_name] = histo.Clone(
                "%s_unfolded_envelope" % general_name)
            unfolded_average[general_name] = histo.Clone(
                "%s_unfolded_average" % general_name)

        unfolded_summary[general_name][idx].value = mean
        unfolded_summary[general_name][idx].error = meanError

        unfolded_envelope[general_name][idx].value = mean
        unfolded_envelope[general_name][idx].error = sigma

        unfolded_average[general_name][idx].value = mean
        unfolded_average[general_name][idx].error = \
           unfolded_sigmas['%s_bin%i' % (general_name, idx)].GetMean()

    plotter.set_subdir('taus')
    for name, histo in taus.iteritems():
        #canvas = plotter.create_and_write_canvas_single(0, 21, 1, False, False, histo, write=False)
        plotter.canvas.cd()
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)

        info = plotter.make_text_box(
            'mode #tau = %.5f' % histo[histo.GetMaximumBin()].x.center,
            position=(plotter.pad.GetLeftMargin(), plotter.pad.GetTopMargin(),
                      0.3, 0.025))
        info.Draw()

        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('pulls')
    for name, histo in pulls.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_means.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()
    for name, histo in pull_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()

    plotter.set_subdir('pull_summaries')
    for name, histo in pull_means_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        line = ROOT.TLine(histo.GetBinLowEdge(1), 0,
                          histo.GetBinLowEdge(histo.GetNbinsX() + 1), 0)
        line.Draw("same")
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_sigmas_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        line = ROOT.TLine(histo.GetBinLowEdge(1), 1,
                          histo.GetBinLowEdge(histo.GetNbinsX() + 1), 1)
        line.Draw("same")
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('deltas')
    for name, histo in deltas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in delta_means.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()
    for name, histo in delta_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()

    plotter.set_subdir('delta_summaries')
    for name, histo in delta_means_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in delta_sigmas_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolding_unc')
    for name, histo in unfolded_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded')
    for name, histo in unfoldeds.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_summaries')
    for name, histo in unfolded_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    for name, histo in unfolded_summary.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_average')
    for name, histo in unfolded_average.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        #set_trace()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_envelope')
    for name, histo in unfolded_envelope.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('figures_of_merit')
    for name, histo in nneg_bins.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_sums.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in ratio_sums.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    outfile.close()
    os.chdir(curdir)
示例#11
0
parser.add_argument(
    '--show',
    default='exp0',
    help='what should be shown',
    choices=['exp0', 'exp+1', 'exp+2', 'exp-1', 'exp-2', 'obs'])
parser.add_argument('--out', default='comparison')
args = parser.parse_args()

colors = ['black', '#4169e1', '#cc2c44', '#008000', '#ff9a00', '#b000ff']
if len(colors) < len(args.limits):
    raise RuntimeError('I have more limits than colors to display them!')

limits = []
for info in args.limits:
    fname, label = tuple(info.split(':'))
    jmap = prettyjson.loads(open(fname).read())
    graph = Graph(len(jmap.keys()))
    points = [(float(p), float(vals[args.show]))
              for p, vals in jmap.iteritems()]
    points.sort()
    for idx, info in enumerate(points):
        p, val = info
        graph.SetPoint(idx, p, val)
    graph.title = label
    limits.append(graph)

outdir = os.path.dirname(args.out)
base = os.path.basename(args.out)
plotter = BasePlotter(outdir)
plotter.overlay(
    limits,
示例#12
0
	def __init__(self, mode, lumi=None):
		'Inits the plotter, mode is either "electrons" or "muons" and identifies what will be plotted'
		lumi = lumi if lumi > 0 else None
		filtering = lambda x: not os.path.basename(x).startswith('data') or \
			 (os.path.basename(x).startswith('data') and mode[:-1] in os.path.basename(x).lower())
		self.tt_to_use = 'ttJets'
		self.tt_shifted = {
			'mtop_up'   : 'ttJets_mtopup',
			'mtop_down' : 'ttJets_mtopdown',
			'scale_up'   : 'ttJets_scaleup', 
			'scale_down' : 'ttJets_scaledown',
			}
		jobid = os.environ['jobid']
		files = glob.glob('results/%s/htt_simple/*.root' % jobid)
		files = filter(filtering, files)
		logging.debug('files found %s' % files.__repr__())
		lumis = glob.glob('inputs/%s/*.lumi' % jobid)
		lumis = filter(filtering, lumis)
		logging.debug('lumi files found %s' % lumis.__repr__())
		self.tt_lhe_weights = prettyjson.loads(
			open('inputs/%s/ttJets.weights.json' % jobid).read()
			)
		outdir= 'plots/%s/htt/%s' % (jobid, mode)
		super(HTTPlotter, self).__init__(
			files, lumis, outdir, styles, None, lumi
			#defaults = {'save' : {'png' : True, 'pdf' : False}}
			)

		#select only mode subdir
		for info in self.views.itervalues():
			info['view'] = views.SubdirectoryView(info['view'], mode)
			info['unweighted_view'] = views.SubdirectoryView(info['unweighted_view'], mode)
		self.views['data']['view'] = urviews.BlindView(
			self.views['data']['view'], 
			'\w+/tight/MTHigh/(:?(:?m_tt)|(:?.+_ctstar)|(:?cdelta_ld)|(:?hframe_ctheta_d))'
			)

		self.defaults = {
			'blurb' : [13, self.views['data']['intlumi']]
			}
		self.jobid = jobid

		self.views['ttJets_preselection'] = self.views['ttJets']

		self.views['ttJets_right'] = {
			'view' : self.create_tt_subsample(
				['right'],
				't#bar{t} matched',
				ROOT.kOrange + 9
				)
			}
		self.views['ttJets_matchable'] = {
			'view' : self.create_tt_subsample(
				['matchable'], #,  'right_th', 'wrong'],
				't#bar{t} matchable',
				ROOT.kRed - 1
				)
			}
		self.views['ttJets_unmatchable'] = {
			'view' : self.create_tt_subsample(
				['unmatchable'],#  'right_th', 'wrong'],
				't#bar{t} unmatchable',
				ROOT.kMagenta - 2
				)
			}

		self.views['ttJets_other'] = {
			'view' : self.create_tt_subsample(
				['noslep'], 
				'Other t#bar{t}',
				ROOT.kCyan - 1
				)
			}

		##
		## General plotting views
		##
		self.views['ttJets_generic'] = {
			'view' : views.TitleView(
				views.StyleView(
					views.SumView(
						self.views['ttJets_other'      ]['view'],
						self.views['ttJets_unmatchable']['view'],
						self.views['ttJets_matchable'  ]['view'],
						self.views['ttJets_right'      ]['view'],						
						),
					fillcolor = ROOT.kOrange + 1
					),
				't#bar{t}'
				)
			}

		self.views['EWK'] = {
			'view' : views.TitleView(
				views.StyleView(
					views.SumView(*[self.get_view(i) for i in ['[WZ][WZ]', '[WZ]Jets', 'tt[WZ]*']]),
					fillcolor = ROOT.kGreen + 1
					),
					'EW'
				)
			}

		self.views['AllButTT'] = {
			'view' : views.TitleView(
				views.StyleView(
					views.SumView(
						self.get_view('EWK'),
						self.get_view('QCD*'),
						self.get_view('single*'),
						),
					fillcolor = ROOT.kGray
					),
				'Other'
				)
			}

		##
		## signal sub-samples
		## 
		added_samples = []
		for sample in self.views.keys():
			if sample.startswith('AtoTT_'): 
				raise ValueError("I did not implement it yet remember to swap the H and A")
			if sample.startswith('HtoTT_'):# or sample.startswith('AtoTT_'):
				_, mass, width, pI = tuple(sample.split('_'))				
				samtype = 'int' if pI == 'Int' else 'sgn'
				if pI == 'Int':
					sub_name = 'ggA_neg-%s-%s-%s' % (samtype, width, mass)
					self.views[sub_name] = {
						'view' : views.ScaleView(
							self.create_subsample(sample, ['negative'], '%s negative' % sample, color='#9999CC'),
							-1
							)
						}
					added_samples.append(sub_name)
				sub_name = 'ggA_pos-%s-%s-%s' % (samtype, width, mass)
				self.views[sub_name] = {
					'view' : self.create_subsample(sample, ['positive'], '%s positive' % sample, color='#9999CC')
					}
				added_samples.append(sub_name)

		self.generic_mcs = [
			'QCD*',
			'EWK',
			'single*',			
			'ttJets_generic',
			]

		self.mc_samples = self.generic_mcs
		self.split_mcs = [
			'AllButTT',
			'ttJets_other',
			'ttJets_unmatchable',
			'ttJets_matchable',
			'ttJets_right',
			]

		self.card_names = {
			#synced
			'QCDmujets' if mode == 'muons' else 'QCDejets' : ['QCD*'],
			'TT' : ['ttJets_other', 'ttJets_unmatchable', 'ttJets_matchable', 'ttJets_right'],
			'VV' : ['[WZ][WZ]'],
			'TTV': ['tt[WZ]*'],
			'WJets'	: ['W[0-9]Jets'],
			'ZJets'	: ['ZJets'],
			'tChannel' : ['single*_[st]channel'],
			'tWChannel' : ['single*_tW'],
			'data_obs'	: ['data']
			}
		self.card_names.update({i : [i] for i in added_samples})

		self.systematics = {
			#JES JER
			'CMS_scale_j_13TeV' : {
				'samples' : ['.*'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'jes_up'),
				'-' : lambda x: x.replace('nosys', 'jes_down'),
				'value' : 1.00,
				},
			'CMS_res_j_13TeV' : {
				'samples' : ['.*'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'jer_up'),
				'-' : lambda x: x.replace('nosys', 'jer_down'),
				'constants' : ('jer_down', 'jer_up'),
				'value' : 1.00,				
				},
			#BTAG
			'CMS_eff_b_13TeV' : {
				'samples' : ['.*'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'beff_up'),
				'-' : lambda x: x.replace('nosys', 'beff_down'),
				'value' : 1.00,
				},
			'CMS_fake_b_13TeV' : {
				'samples' : ['.*'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'bfake_up'),
				'-' : lambda x: x.replace('nosys', 'bfake_down'),
				'value' : 1.00,
				},
			#SCALE/PS
			'QCDscaleMERenorm_TT' : {
				'samples' : ['TT$'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'renorm_up'),
				'-' : lambda x: x.replace('nosys', 'renorm_down'),
				'value' : 1.00,
				'scales' : (1./self.tt_lhe_weights['3'], 1./self.tt_lhe_weights['6']),
				},
			'QCDscaleMEFactor_TT' : {
				'samples' : ['TT$'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'factor_up'),
				'-' : lambda x: x.replace('nosys', 'factor_down'),
				'value' : 1.00,
				'scales' : (1./self.tt_lhe_weights['1'], 1./self.tt_lhe_weights['2']),
				},
			'QCDscaleMERenormFactor_TT' : {
				'samples' : ['TT$'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'renfactor_up'),
				'-' : lambda x: x.replace('nosys', 'renfactor_down'),
				'value' : 1.00,
				'scales' : (1./self.tt_lhe_weights['4'], 1./self.tt_lhe_weights['8']),
				},
			'QCDscalePS_TT' : {
				'samples' : ['TT$'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'scale_up'),
				'-' : lambda x: x.replace('nosys', 'scale_down'),
				'value' : 1.00,
				},
			#TMass
			'TMass' : {
				'samples' : ['TT$'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'mtop_up'),
				'-' : lambda x: x.replace('nosys', 'mtop_down'),
				'value' : 1.00,
				},
			#OTHER
			'CMS_pileup' : {
				'samples' : ['.*'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'pu_up'),
				'-' : lambda x: x.replace('nosys', 'pu_down'),
				},
			'CMS_METunclustered_13TeV' : {
				'samples' : ['.*'],
				'categories' : ['.*'],
				'type' : 'shape',
				'+' : lambda x: x.replace('nosys', 'met_up'),
				'-' : lambda x: x.replace('nosys', 'met_down'),
				},
			'pdf' : {
				'samples' : ['TT$'],
				'categories' : ['.*'],
				'type' : 'pdf',
				'pdftype' : 'nnpdf'
				},

			}
		self.card = None
		self.binning = {
			}
示例#13
0
#! /bin/env python

import URAnalysis.Utilities.prettyjson as prettyjson
from pprint import pprint
import argparse
import os
from fnmatch import fnmatch

parser = argparse.ArgumentParser()
parser.add_argument('query', help='posix regex query')

args = parser.parse_args()

samples_path = os.path.join(os.environ['URA_PROJECT'], 'samples.json')
if not os.path.isfile(samples_path):
    raise RuntimeError('%s not found' % samples_path)

samples = prettyjson.loads(open(samples_path).read())

for sample in samples:
    if fnmatch(sample['name'], args.query):
        pprint(sample)
示例#14
0
    def doParametersOfInterest(self):
        """Create POI and other parameters, and define the POI set."""
        with open(self.opts.yieldsJson) as jfile:
            yields_json = prettyjson.loads(jfile.read())
        pois = []
        vars_created = [
        ]  #this is just to make sure we don't have name clashes
        categories = self.DC.list_of_bins()
        #group categories according to bin number
        #form a dict with {base category name : (category name, number of jets)}
        groups = {}
        for category in categories:
            m = self.regex.match(category)
            if not m:
                raise ValueError('Category name %s did not match the regex!' %
                                 category)
            base = m.group('base_category')
            njets = int(m.group('njets'))
            if base not in groups:
                groups[base] = []
            groups[base].append(category)

        #loop over the base groups, for each of them build
        #  -- ncategories - 1 POI as contribution ratios
        #  -- 1 POI with the total number of events
        #  -- last ratio is (1 - sum(other ratios)), to avoid getting negative
        #     the last ratio is linked to the jet category w/ the most events
        for base, cats in groups.iteritems():
            single_category = (len(cats) == 1)
            for signal in self.DC.signals:
                #make global SF, our REAL POI
                global_sf = '%s_FullYield_%s' % (base, signal)
                total_yield = sum(yields_json[i][signal] for i in cats)
                self.modelBuilder.doVar(
                    '%s[%f,0,%f]' %
                    (global_sf, total_yield, total_yield *
                     4))  #range between 0 and 4 times the total yield
                pois.append(global_sf)
                vars_created.append(global_sf)
                self.available_vars.add(global_sf)
                #if there is only one category use directly the total yield
                #nothing more to do here
                if (single_category): continue

                #sort categories according to the yields of this signal
                yields_by_category = [(i, yields_json[i][signal])
                                      for i in cats]
                yields_by_category.sort(key=lambda x: x[1])
                #make categories ratios (all but last)
                ratio_names = []
                for full_category, category_yield in yields_by_category[:-1]:
                    cat_ratio = '%sYieldRatio_%s' % (full_category, signal)
                    start_val = category_yield / total_yield
                    self.modelBuilder.doVar('%s[%.3f,0,1]' %
                                            (cat_ratio, start_val))
                    pois.append(cat_ratio)
                    ratio_names.append(cat_ratio)
                    vars_created.append(cat_ratio)
                    self.available_vars.add(cat_ratio)
                #make last ratio, does not create a POI
                full_category, _ = yields_by_category[-1]
                cat_ratio = '%sYieldRatio_%s' % (full_category, signal)
                self.modelBuilder.factory_(
                    'expr::{NAME}("1-{IDXS}", {RATIO_NAMES})'.format(
                        NAME=cat_ratio,
                        IDXS='-'.join(
                            ['@%i' % i for i in range(len(ratio_names))]),
                        RATIO_NAMES=','.join(ratio_names)))
                vars_created.append(cat_ratio)
                self.available_vars.add(cat_ratio)

                for full_category, _ in yields_by_category:
                    cat_sf = '%sYield_%s' % (full_category, signal)
                    cat_ratio = '%sYieldRatio_%s' % (full_category, signal)
                    self.modelBuilder.factory_(
                        'expr::{NAME}("@0*@1", {GLOBAL}, {CATEGORY})'.format(
                            NAME=cat_sf, CATEGORY=cat_ratio, GLOBAL=global_sf))
                    vars_created.append(cat_sf)
                    self.available_vars.add(cat_sf)
        assert (len(vars_created) == len(set(vars_created)))
        self.modelBuilder.doSet('POI', ','.join(pois))
示例#15
0
args = parser.parse_args()

if len(args.inputs) == 1:
   shutil.copy(args.inputs[0], args.output)
else:
   nevts = 0
   nweighted = 0
	 sumw = None
   partial = args.output.replace('.json', '.part.json')
   tmp = args.output.replace('.json', '.tmp.json')
   tmp2= args.output.replace('.json', '.tmp2.json')
   with open(partial, 'w') as p:
      p.write(prettyjson.dumps({}))

   for jin in args.inputs:
      jmap = prettyjson.loads(open(jin).read())
      nevts += jmap['events']
      nweighted += jmap['weightedEvents']
			if 'sum_weights' in jmap:
				if sumw is None:
					sumw = [i for i in jmap['sum_weights']]:
				else:
					if len(jmap['sum_weights']) and len(sumw):
						raise ValueError(
							'I got a vector of size %d and'
							' I was expecting it %d long' % (len(sumw), len(jmap['sum_weights'])))
					for i in range(sumw):
						sumw[i] += jmap['sum_weights']
			
      with open(tmp, 'w') as t:
         t.write(prettyjson.dumps(jmap['lumimap']))
示例#16
0
def run_module(**kwargs):
    args = Struct(**kwargs)
    if not args.name:
        args.name = args.var
    results = [prettyjson.loads(open(i).read())[-1] for i in args.results]
    #set_trace()
    results.sort(key=lambda x: x['median'])

    nevts_graph = plotting.Graph(len(results))
    upbound_graph = plotting.Graph(len(results))
    max_unc = 0.
    bound_range = args.vrange  #results[-1]['up_edge'] - results[0]['up_edge']
    step = results[1]['up_edge'] - results[0]['up_edge']
    #bound_range += step
    bound_min = results[0]['up_edge'] - step

    for idx, info in enumerate(results):
        nevts_graph.SetPoint(idx, info['median'],
                             info["one_sigma"]["relative"])
        upbound_graph.SetPoint(idx, info['up_edge'],
                               info["one_sigma"]["relative"])
        if info["one_sigma"]["relative"] > max_unc:
            max_unc = info["one_sigma"]["relative"]

    canvas = plotting.Canvas()
    nevts_graph.Draw('APL')
    nevts_graph.GetXaxis().SetTitle('average number of events')
    nevts_graph.GetYaxis().SetTitle('relative fit uncertainty')
    canvas.SaveAs(os.path.join(args.outdir, 'nevts_%s.png' % args.name))

    tf1 = Plotter.parse_formula(
        'scale / (x - shift) + offset',
        'scale[1,0,10000],shift[%.2f,%.2f,%.2f],offset[0, 0, 1]' %
        (bound_min, bound_min - 2 * step, bound_min + step))
    # ROOT.TF1('ret', '[0]/(x - [1])', -3, 1000)
    tf1.SetRange(0, 1000)
    tf1.SetLineColor(ROOT.EColor.kAzure)
    tf1.SetLineWidth(3)
    result = upbound_graph.Fit(tf1, 'MES')  #WL

    scale = tf1.GetParameter('scale')
    shift = tf1.GetParameter('shift')
    offset = tf1.GetParameter('offset')

    upbound_graph.Draw('APL')
    upbound_graph.GetXaxis().SetTitle('upper bin edge')
    upbound_graph.GetYaxis().SetTitle('relative fit uncertainty')
    if args.fullrange:
        upbound_graph.GetYaxis().SetRangeUser(offset, max_unc * 1.2)
    upbound_graph.GetXaxis().SetRangeUser(shift, (shift + bound_range) * 1.2)

    delta = lambda x, y: ((x - shift) / bound_range)**2 + ((y - offset) /
                                                           (max_unc - offset))
    points = ((bound_min + step) + i * (bound_range / 100.)
              for i in xrange(100))
    math_best_x = min(points, key=lambda x: delta(x, tf1.Eval(x)))
    math_best_y = tf1.Eval(math_best_x)

    ## math_best_x = math.sqrt(scale*bound_range*(max_unc-offset))+shift
    ## math_best_y = tf1.Eval(math_best_x) #max_unc*math.sqrt(scale)+offset
    best = min(results, key=lambda x: abs(x['up_edge'] - math_best_x))

    upbound_best = plotting.Graph(1)
    upbound_best.SetPoint(0, best['up_edge'], best["one_sigma"]["relative"])
    upbound_best.markerstyle = 29
    upbound_best.markersize = 3
    upbound_best.markercolor = 2
    upbound_best.Draw('P same')

    print math_best_x, math_best_y
    print best['up_edge'], best["one_sigma"]["relative"]

    math_best = plotting.Graph(1)
    math_best.SetPoint(0, math_best_x, math_best_y)
    math_best.markerstyle = 29
    math_best.markersize = 3
    math_best.markercolor = ROOT.EColor.kAzure
    math_best.Draw('P same')

    canvas.SaveAs(os.path.join(args.outdir, 'upbound_%s.png' % args.name))
    json = {'best': best['up_edge'], 'unc': best["one_sigma"]["relative"]}
    with open(os.path.join(args.outdir, '%s.json' % args.name), 'w') as jfile:
        jfile.write(prettyjson.dumps(json))
示例#17
0
import URAnalysis.Utilities.prettyjson as prettyjson
import os
from uncertainties import ufloat
from pdb import set_trace
import math
from URAnalysis.Utilities.quad import quad
import URAnalysis.Utilities.tables as tables

json_ufloat = lambda d, n: ufloat(d[n], d['%s_err' % n])

jobid = os.environ['jobid']
summary = prettyjson.loads(
    open('plots/%s/ctageff/summary.json' % jobid).read())
summary = summary["mass_discriminant"]
wpoints = summary['working_points']

mc_effs = prettyjson.loads(
    open('plots/%s/ctageff/mc_effs.json' % jobid).read())

#set_trace()
nevts = json_ufloat(
    summary, 'nevts')  #ufloat(summary['nevts'    ], summary['nevts_err'])
lcfrac = json_ufloat(
    summary,
    'leadCFrac')  #ufloat(summary['leadCFrac'], summary['leadCFrac_err'])
scfrac = json_ufloat(
    summary,
    'subCFrac')  #ufloat(summary['subCFrac' ], summary['subCFrac_err'])
llfrac = 1 - lcfrac - scfrac  #all-light fraction

factor = lambda leff, ceff, cfrac: ceff * cfrac + leff * (1 - cfrac)
示例#18
0
import logging
logging.basicConfig(level=logging.WARNING, stream=sys.stderr)

__doc__ = 'computes the equivalent luminosity of an MC sample'

parser = ArgumentParser(description=__doc__)
parser.add_argument('metaJson', type=str,
                    help='json file containing the '
                    'meta-information for the sample')
parser.add_argument('samples', type=str,
                    help='json file containing the '
                    'samples definitions')
args = parser.parse_args()

sample_name = os.path.basename(args.metaJson).split('.')[0]
meta_info = prettyjson.loads(open(args.metaJson).read())
samples = prettyjson.loads(open(args.samples).read())

xsection = -1
for sample in samples:
   if sample['name'] == sample_name:
      xsection = sample['xsection']
      break

if xsection == -1:
   logging.warning("Sample %s not found!" % sample_name)

print meta_info['weightedEvents']/float(xsection)


示例#19
0
parser.add_argument('--sample-def', dest='sample_def', type=str,
                    help='json file containing the samples definition ')
parser.add_argument('--crab', dest='crab', type=int,
                    default=0, help='Version of crab to use')
parser.add_argument('--local', dest='local', action='store_true',
                    default=False, help='Submit to local (NOT SUPPORTED YET)')

args = parser.parse_args()

if not (bool(args.crab) or args.local):
   raise ValueError('You did not specify how you want to run! (crab2/3 or local)')

if not os.path.isfile(args.sample_def):
   raise ValueError('file %s does not exist' % args.sample_def)
   
all_samples = [Struct(**i) for i in prettyjson.loads(open(args.sample_def).read())]
to_submit = reduce(
   lambda x,y: x+y, 
   [fnselect(all_samples, pattern, key=lambda x: x.name) for pattern in args.samples],
   []
)
#remove duplicate samples selected by multiple patterns

to_submit = set(to_submit)

jobs = [
   Job(
      'make_pat_and_ntuples.py',
      args.jobid,
      sample.name,
      sample.DBSName,
示例#20
0
ROOT.gStyle.SetOptStat(0)

parser = ArgumentParser()
parser.add_argument('bin_directory', help='where all the trial bins are stored')
parser.add_argument('binning', nargs='+', help='input json files')
parser.add_argument('-o', dest='out', help='output filename', default='out.png')

args = parser.parse_args()
npoints = len(args.binning) -1
input_dirs = ['_'.join(args.binning[:idx]) for idx in range(2, len(args.binning)+1)]
inputs = [os.path.join(args.bin_directory, i, 'toys/shapes/tt_right.json') for i in input_dirs]

jsons = []
for i in inputs:
   jsons.append(
      prettyjson.loads(open(i).read()) 
      )
jsons.sort(key=lambda x: len(x))
max_bins = len(jsons[-1])

colors = ['#FFCC66', '#2aa198', '#9999CC', '#5555ab', 
          '#aaaad5', '#ab5555', '#d5aaaa', '#668db3',
          '#6699ff', '#ff8066', '#a12a33', '#2aa15d',
          '#a15d2a', '#000000', '#a206f4', '#06f4a2',
          '#f406cf', '#cff406']
hists = [Hist(max_bins, 0.5, max_bins+0.5, title='Bin %i' % (i+1)) for i in range(max_bins)]
low_edge = args.binning[0]
for hist, color, up_edge in zip(hists, colors, args.binning[1:]):
   hist.markercolor = color
   hist.fillcolor = color
   hist.title += ' [%s, %s]' % (low_edge, up_edge)
示例#21
0
#! /bin/env python
__doc__ = 'simple script to read a son file and dump part of it into a new json file'

import URAnalysis.Utilities.prettyjson as prettyjson
from argparse import ArgumentParser

parser = ArgumentParser(description=__doc__)
parser.add_argument('input', metavar='input.json', type=str, help='input json')
parser.add_argument('output',
                    metavar='output.json',
                    type=str,
                    help='Json output file name')
parser.add_argument(
    'toget',
    metavar='this:that',
    type=str,
    help='column-separated list of things to get, means json[this][that]')
#TODO ' futher syntax may be implemented to support list slicing and regex matching')

args = parser.parse_args()
json = prettyjson.loads(open(args.input).read())
chain = args.toget.split(':')
to_get = json
for i in chain:
    to_get = to_get[i]

with open(args.output, 'w') as out:
    out.write(prettyjson.dumps(to_get))
示例#22
0
externals = glob(
        os.path.join(swdir, 'inputs', 'data', '*.*')
)

#external inputs, jobid specific: PU reweighting, TTSolver training (prob.root), ecc
externals_jobid_specific = glob(
        os.path.join(swdir, 'inputs', jobid, 'INPUT', '*.*')
)
transferfiles = [exe_cfg]+externals+externals_jobid_specific

transferfiles_config = ', '.join(transferfiles)

filesperjob = 10
splitting = None
if os.path.isfile(args.splitting):
  splitting = prettyjson.loads(open(args.splitting).read())
else:
        filesperjob = int(args.splitting)

for sf in samplefiles:
  infile = os.path.join(inputdir, sf)
  sample = os.path.basename(sf).split('.txt')[0]
  if splitting:
    filesperjob = get_best_style(sample, splitting)
  if not any(i.match(sample) for i in filters): continue
  jobpath = os.path.join(
          jobdir, 
          sample
          )
  os.mkdir(jobpath)
  infiledes = open(infile, 'r')