示例#1
0
file_names = [
#
# CvsL
#
   ('CvsL_76_17Jan/ROCs/5565121efc_ROCs.root', '7.6 Optimization'),
   ('test/ROCs.root', 'separate testing'),
#
# CvsB
#
# ('CvsB_76_18Jan/ROCs/82d2a6148a_ROCs.root', '7.5 Optimization'),
# ('CvsB_76_18Jan/ROCs/ff481cb37f_ROCs.root', '7.6 Optimization'),
]
output = 'test/test.png'

if args.input:
   jconf = prettyjson.loads(open(args.input[-1]).read())
   file_names = [tuple(i) for i in jconf['file_names']]
   output = jconf['output']
   graph_path = jconf['graph_path']

#dump configuration in json for bookkeeping
jconf = {
   'file_names' : file_names,
   'output' : output,
   'graph_path' : graph_path,
}

jout = output.split('.')[0]
with open('%s.json' % jout, 'w') as out:
   out.write(prettyjson.dumps(jconf))
示例#2
0
def processNtuple(infile_name, outfile_name, variables, sample, 
                  flav_weight=False, pteta_weight=False, cat_weight=False,
                  tag=''):  
  log.debug("processing %s --> %s" % (infile_name, outfile_name))
  type_dict = {
    'i' : int,
    'l' : long,
    'f' : float
    }
  
  fname_regex = re.compile('[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root')
  match = fname_regex.match(infile_name)

  if not match:
    raise ValueError("Could not match the regex to the file %s" % infile_name)
  flavor = match.group('flavor')
  full_category = match.group('category')
  weight_tfile = None
  flav_dir = None
  tfile_category = ''
  if pteta_weight or flav_weight or cat_weight:
    weight_tfile = io.root_open('data/%s_weights.root' % sample)
    flav_dir = weight_tfile.Get(flavor)
    categories = [i.name for i in flav_dir.keys()]
    #match existing categories to this one, which might be a subset of general category stored in the root file
    tfile_category = [i for i in categories if i in full_category][0] 

  weights = None
  if pteta_weight:    
    weights = flav_dir.Get('%s/kin' % tfile_category)

  flavor_weight = 1.
  if flav_weight:
    flavor_weight = prettyjson.loads(
      weight_tfile.flavour_weights.String().Data()
      )[flavor]

  #put bias weights
  category_weights = None
  if cat_weight:
    category_weights = flav_dir.Get('%s/bias' % tfile_category)
  
  with io.root_open(outfile_name, 'recreate') as outfile:
    outtree = Tree('tree', title='c-tagging training tree')
    branches_def = dict((name, info['type']) for name, info in variables.iteritems())
    if pteta_weight:
      branches_def['kinematic_weight'] = 'F'
      branches_def['total_weight'] = 'F'
    if flav_weight:
      branches_def['flavour_weight'] = 'F'
      branches_def['total_weight'] = 'F'
    if cat_weight:
      branches_def['slcategory_weight'] = 'F'
      branches_def['total_weight'] = 'F'
        
    outtree.create_branches(
      branches_def
      )
    with io.root_open(infile_name) as infile:
      intree = infile.Get(full_category)
      for e_idx, entry in enumerate(intree):
        if e_idx % 1000 == 0:
          log.debug("processing entry: %i" % e_idx)
        for name, info in variables.iteritems():
          value = info['default']
          try:
            if 'var' in info and hasattr(entry, info['var']):
              var = getattr(entry, info['var'])
              vtype = type_dict[info['type'].lower()]
              if 'idx' in info:
                if var.size() > info['idx']:
                  value = vtype(var[info['idx']])
              else:
                value = vtype(var)
            elif 'fcn' in info:
              vtype = type_dict[info['type'].lower()]
              fcn = globals()[info['fcn']]
              value = vtype(fcn(entry, *info['args']))
          except:
            set_trace() 
          # else:
          #   set_trace()
          #   raise RuntimeError("something went wrong processing variable %s" % name)
            
          #if value is nan, then set to default (maybe better if you skip the whole jet)
          value = info['default'] if math.isnan(value) else value
          setattr(outtree, name, value)
        total_weight = 1.
        if pteta_weight:
          bin_idx = weights.FindFixBin(entry.jetPt, abs(entry.jetEta))
          outtree.kinematic_weight = weights[bin_idx].value
          total_weight *= weights[bin_idx].value
        if flav_weight:
          outtree.flavour_weight = flavor_weight
          total_weight *= flavor_weight
        if cat_weight:
          bin_idx = category_weights.FindFixBin(entry.jetPt, abs(entry.jetEta))          
          outtree.slcategory_weight = category_weights[bin_idx].value
          total_weight *= category_weights[bin_idx].value
        if 'total_weight' in branches_def:
          outtree.total_weight = total_weight
        #set_trace()
        outtree.Fill()
  log.info("processing done [%s]" % tag)
def processNtuple(infile_name, outfile_name, sample, 
                  flav_weight=False, pteta_weight=False, cat_weight=False,
                  tag=''):  
  log.debug("processing %s --> %s" % (infile_name, outfile_name))
  
  fname_regex = re.compile('[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root')
  match = fname_regex.match(infile_name)

  if not match:
    raise ValueError("Could not match the regex to the file %s" % infile_name)
  flavor = match.group('flavor')
  full_category = match.group('category')
  weight_tfile = None
  flav_dir = None
  tfile_category = ''
  if pteta_weight or flav_weight or cat_weight:
    weight_tfile = io.root_open('../data_trees/%s_weights.root' % sample)
    flav_dir = weight_tfile.Get(flavor)
    categories = [i.name for i in flav_dir.keys()]
    #match existing categories to this one, which might be a subset of general category stored in the root file
    tfile_category = [i for i in categories if i in full_category][0] 

  weights = None
  if pteta_weight:    
    weights = flav_dir.Get('%s/kin' % tfile_category)
  flavor_weight = 1.
  if flav_weight:
    flavor_weight = prettyjson.loads(
      weight_tfile.flavour_weights.String().Data()
      )[flavor]
  #put bias weights
  category_weights = None
  if cat_weight:
    category_weights = flav_dir.Get('%s/bias' % tfile_category)
  
  print "Starting to process %s" %infile_name
  
  # make copy of input ntuple to be safe and work with that
  print "copying %s to %s" %(infile_name, outfile_name)
  shutil.copy2("%s" %(infile_name), "%s"%(outfile_name))
  
  # retrieve the ntuple of interest
  inFile = TFile.Open( "%s" %(outfile_name), "update" ) # this now uses the copied file in outDirName
  inTreeName = "ttree"
  myTree = inFile.Get( inTreeName )
  
  #create new branches
  weight_etaPt = array( "f", [ 0. ] )
  weight_category = array( "f", [ 0. ] )
  weight_flavour = array( "f", [ 0. ] )
  weight = array( "f", [ 0. ] )

  b_weight_etaPt = myTree.Branch( "weight_etaPt", weight_etaPt, 'weight_etaPt/F' )
  b_weight_category = myTree.Branch( "weight_category", weight_category, 'weight_category/F' )
  b_weight_flavour = myTree.Branch( "weight_flavour", weight_flavour, 'weight_flavour/F' )
  b_weight = myTree.Branch( "weight", weight, 'weight/F' )
  # connect branches needed for weight calculation
  Jet_pt = array( "f", [ 0. ] )
  Jet_eta = array( "f", [ 0. ] )
  myTree.SetBranchAddress( 'Jet_pt', Jet_pt )
  myTree.SetBranchAddress( 'Jet_eta', Jet_eta )

  ### actual loop ###
  entries = myTree.GetEntriesFast()
  print "%s: Starting event loop" %(multiprocessing.current_process().name)
  startTime = time.time()
  for ientry in xrange(entries):
    # get the next tree in the chain and verify
    myTree.GetEntry(ientry)
  
    # timing
    reportEveryNevents = 50000
    if (ientry%reportEveryNevents==0):
      if (ientry != 0):
        print "%s: Progress: %3.1f%%" %(multiprocessing.current_process().name, float(ientry)/(entries)*100)
        endTime = time.time()
        deltaTime = endTime - startTime
        rate = float(reportEveryNevents)/deltaTime
        print "%s: current rate: %5.2f Hz" %(multiprocessing.current_process().name, rate)
        startTime = time.time()
    
    # obtain the different weights
    weight[0] = 1.
    if pteta_weight:
      bin_idx = weights.FindFixBin(Jet_pt[0], abs(Jet_eta[0]))
      weight_etaPt[0] = weights[bin_idx].value
      weight[0] *= weights[bin_idx].value
    if flav_weight:
      weight_flavour[0] = flavor_weight
      weight[0] *= flavor_weight
    if cat_weight:
      bin_idx = category_weights.FindFixBin(Jet_pt[0], abs(Jet_eta[0]))          
      weight_category[0] = category_weights[bin_idx].value
      weight[0] *= category_weights[bin_idx].value
    # and fill the branches
    b_weight_etaPt.Fill()
    b_weight_category.Fill()
    b_weight_flavour.Fill()
    b_weight.Fill()
    
  inFile.Write()
  inFile.Close()
  print "%s: Total time: %5.2f s" %(multiprocessing.current_process().name, time.clock())
示例#4
0
from argparse import ArgumentParser
import rootpy.io as io
from rootpy.plotting import Hist2D
import prettyjson
import glob
import re
import rootpy
from pdb import set_trace
import copy
import binning 
import ROOT
log = rootpy.log["/compute_weights"]
log.setLevel(rootpy.log.INFO)

qcd_yields = prettyjson.loads(open('data/qcd_yields.json'   ).read())
ttj_yields = prettyjson.loads(open('data/ttjets_yields.json').read())

biased_qcd = copy.deepcopy(qcd_yields)

cat_weight = copy.deepcopy(qcd_yields)
bin_weight = copy.deepcopy(qcd_yields)

flavours = qcd_yields.keys()
categories = qcd_yields[flavours[0]].keys()
bins = qcd_yields[flavours[0]][categories[0]].keys()

total_yield = sum( k for i in qcd_yields.itervalues() for j in i.itervalues() for k in j.itervalues() ) #sum( k for k in j.itervalues() for j in i.itervalues() for i in qcd_yields.itervalues() )


flav_weights = {}
from argparse import ArgumentParser
import rootpy.io as io
from rootpy.plotting import Hist2D
import prettyjson
import glob
import re
import rootpy
from pdb import set_trace
import copy
import binning 
import ROOT
log = rootpy.log["/compute_weights"]
log.setLevel(rootpy.log.INFO)

qcd_yields = prettyjson.loads(open('../data_trees/qcd_yields.json'   ).read())
ttj_yields = prettyjson.loads(open('../data_trees/ttjets_yields.json').read())

biased_qcd = copy.deepcopy(qcd_yields)

cat_weight = copy.deepcopy(qcd_yields)
bin_weight = copy.deepcopy(qcd_yields)

flavours = qcd_yields.keys()
categories = qcd_yields[flavours[0]].keys()
bins = qcd_yields[flavours[0]][categories[0]].keys()

total_yield = sum( k for i in qcd_yields.itervalues() for j in i.itervalues() for k in j.itervalues() ) #sum( k for k in j.itervalues() for j in i.itervalues() for i in qcd_yields.itervalues() )


flav_weights = {}
示例#6
0
def processNtuple(infile_name,
                  outfile_name,
                  variables,
                  sample,
                  flav_weight=False,
                  pteta_weight=False,
                  cat_weight=False,
                  tag=''):
    log.debug("processing %s --> %s" % (infile_name, outfile_name))
    type_dict = {'i': int, 'l': long, 'f': float}

    fname_regex = re.compile(
        '[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root'
    )
    match = fname_regex.match(infile_name)

    if not match:
        raise ValueError("Could not match the regex to the file %s" %
                         infile_name)
    flavor = match.group('flavor')
    full_category = match.group('category')
    weight_tfile = None
    flav_dir = None
    tfile_category = ''
    if pteta_weight or flav_weight or cat_weight:
        weight_tfile = io.root_open('data/%s_weights.root' % sample)
        flav_dir = weight_tfile.Get(flavor)
        categories = [i.name for i in flav_dir.keys()]
        #match existing categories to this one, which might be a subset of general category stored in the root file
        tfile_category = [i for i in categories if i in full_category][0]

    weights = None
    if pteta_weight:
        weights = flav_dir.Get('%s/kin' % tfile_category)

    flavor_weight = 1.
    if flav_weight:
        flavor_weight = prettyjson.loads(
            weight_tfile.flavour_weights.String().Data())[flavor]

    #put bias weights
    category_weights = None
    if cat_weight:
        category_weights = flav_dir.Get('%s/bias' % tfile_category)

    with io.root_open(outfile_name, 'recreate') as outfile:
        outtree = Tree('tree', title='c-tagging training tree')
        branches_def = dict(
            (name, info['type']) for name, info in variables.iteritems())
        if pteta_weight:
            branches_def['kinematic_weight'] = 'F'
            branches_def['total_weight'] = 'F'
        if flav_weight:
            branches_def['flavour_weight'] = 'F'
            branches_def['total_weight'] = 'F'
        if cat_weight:
            branches_def['slcategory_weight'] = 'F'
            branches_def['total_weight'] = 'F'

        outtree.create_branches(branches_def)
        with io.root_open(infile_name) as infile:
            intree = infile.Get(full_category)
            for e_idx, entry in enumerate(intree):
                if e_idx % 1000 == 0:
                    log.debug("processing entry: %i" % e_idx)
                for name, info in variables.iteritems():
                    value = info['default']
                    try:
                        if 'var' in info and hasattr(entry, info['var']):
                            var = getattr(entry, info['var'])
                            vtype = type_dict[info['type'].lower()]
                            if 'idx' in info:
                                if var.size() > info['idx']:
                                    value = vtype(var[info['idx']])
                            else:
                                value = vtype(var)
                        elif 'fcn' in info:
                            vtype = type_dict[info['type'].lower()]
                            fcn = globals()[info['fcn']]
                            value = vtype(fcn(entry, *info['args']))
                    except:
                        set_trace()
                    # else:
                    #   set_trace()
                    #   raise RuntimeError("something went wrong processing variable %s" % name)

                    #if value is nan, then set to default (maybe better if you skip the whole jet)
                    value = info['default'] if math.isnan(value) else value
                    setattr(outtree, name, value)
                total_weight = 1.
                if pteta_weight:
                    bin_idx = weights.FindFixBin(entry.jetPt,
                                                 abs(entry.jetEta))
                    outtree.kinematic_weight = weights[bin_idx].value
                    total_weight *= weights[bin_idx].value
                if flav_weight:
                    outtree.flavour_weight = flavor_weight
                    total_weight *= flavor_weight
                if cat_weight:
                    bin_idx = category_weights.FindFixBin(
                        entry.jetPt, abs(entry.jetEta))
                    outtree.slcategory_weight = category_weights[bin_idx].value
                    total_weight *= category_weights[bin_idx].value
                if 'total_weight' in branches_def:
                    outtree.total_weight = total_weight
                #set_trace()
                outtree.Fill()
    log.info("processing done [%s]" % tag)
示例#7
0
file_names = [
    #
    # CvsL
    #
    ('CvsL_76_17Jan/ROCs/5565121efc_ROCs.root', '7.6 Optimization'),
    ('test/ROCs.root', 'separate testing'),
    #
    # CvsB
    #
    # ('CvsB_76_18Jan/ROCs/82d2a6148a_ROCs.root', '7.5 Optimization'),
    # ('CvsB_76_18Jan/ROCs/ff481cb37f_ROCs.root', '7.6 Optimization'),
]
output = 'test/test.png'

if args.input:
    jconf = prettyjson.loads(open(args.input[-1]).read())
    file_names = [tuple(i) for i in jconf['file_names']]
    output = jconf['output']
    graph_path = jconf['graph_path']

#dump configuration in json for bookkeeping
jconf = {
    'file_names': file_names,
    'output': output,
    'graph_path': graph_path,
}

jout = output.split('.')[0]
with open('%s.json' % jout, 'w') as out:
    out.write(prettyjson.dumps(jconf))