Пример #1
0
def get_metadata(input_dir, tree, debug=0):
    json_name = 'metadata.json'
    file_metadata = {}
    json_files = listFiles(input_dir, match=json_name)
    if len(json_files) == 0:
        print 'no metadata file {} in input dir: {}'.format(
            json_name, input_dir)
        print 'Will now index files...'
        files = listFiles(input_dir)
        print '# of files: {}'.format(len(files))

        for idx, file_name in enumerate(files):
            ntuple = HGCalNtuple([file_name], tree)
            nevents = ntuple.nevents()
            file_metadata[file_name] = nevents
            if debug > 2:
                print ' [{}] file: {} # events: {}'.format(
                    idx, file_name, nevents)

        with open(json_name, 'w') as fp:
            json.dump(file_metadata, fp)
        copy_to_eos(file_name=json_name,
                    target_dir=input_dir,
                    target_file_name=json_name)
    else:
        print 'dir already indexed, will read metadata...'
        unique_filename = '{}.json'.format(uuid.uuid4())
        copy_from_eos(input_dir=input_dir,
                      file_name=json_name,
                      target_file_name=unique_filename)
        with open(unique_filename, 'r') as fp:
            file_metadata = json.load(fp)
        os.remove(unique_filename)

    return file_metadata
Пример #2
0
def main():
    ntuple = HGCalNtuple(
        "/Users/clange/CERNBox/partGun_PDGid211_x120_E80.0To80.0_NTUP_9.root")

    tot_nevents = 0
    tot_genpart = 0
    tot_rechit = 0
    tot_rechit_raw = 0
    tot_cluster2d = 0
    tot_multiclus = 0
    tot_simcluster = 0
    tot_pfcluster = 0
    tot_calopart = 0
    tot_track = 0

    for event in ntuple:
        # print "Event", event.entry()
        tot_nevents += 1
        genParts = event.genParticles()
        tot_genpart += len(genParts)
        recHits = event.recHits()
        tot_rechit += len(recHits)
        if (ntuple.hasRawRecHits()):
            recHitsRaw = event.recHits("rechit_raw")
            tot_rechit_raw += len(recHitsRaw)
        layerClusters = event.layerClusters()
        tot_cluster2d += len(layerClusters)
        multiClusters = event.multiClusters()
        tot_multiclus += len(multiClusters)
        simClusters = event.simClusters()
        tot_simcluster += len(simClusters)
        pfClusters = event.pfClusters()
        tot_pfcluster += len(pfClusters)
        pfClusters = event.pfClusters()
        tot_pfcluster += len(pfClusters)
        caloParts = event.caloParticles()
        tot_calopart += len(caloParts)
        tracks = event.tracks()
        tot_track += len(tracks)

        # for genPart in genParts:
        #     print tot_nevents, "genPart pt:", genPart.pt()

    print "Processed %d events" % tot_nevents
    print "On average %f generator particles" % (float(tot_genpart) /
                                                 tot_nevents)
    print "On average %f reconstructed hits" % (float(tot_rechit) /
                                                tot_nevents)
    print "On average %f raw reconstructed hits" % (float(tot_rechit_raw) /
                                                    tot_nevents)
    print "On average %f layer clusters" % (float(tot_cluster2d) / tot_nevents)
    print "On average %f multi-clusters" % (float(tot_multiclus) / tot_nevents)
    print "On average %f sim-clusters" % (float(tot_simcluster) / tot_nevents)
    print "On average %f PF clusters" % (float(tot_pfcluster) / tot_nevents)
    print "On average %f calo particles" % (float(tot_calopart) / tot_nevents)
    print "On average %f tracks" % (float(tot_track) / tot_nevents)
def main():

    relFractionE = 0.001
    # set sample/tree - for pions
    pidSelected = 211
    #    GEN_eng = 5.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E5_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E5.0To5.0_NTUP.root")
    #    GEN_eng = 20.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E20_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E20.0To20.0_NTUP.root")
    #    GEN_eng = 50.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E50_cmssw93X_withPRs_20170809/NTUP/partGun_PDGid211_x100_E50.0To50.0_NTUP.root")
    #    GEN_eng = 100.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E100_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E100.0To100.0_NTUP.root")
    #    GEN_eng = 300.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E300_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E300.0To300.0_NTUP.root")

    # set sample/tree - for photons
    pidSelected = 22
    GEN_eng = 5.
    ntuple = HGCalNtuple(
        "/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E5_cmssw93X_withPRs_20170809/NTUP/partGun_PDGid22_x100_E5.0To5.0_NTUP.root"
    )
    #    GEN_eng = 20.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E20_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E20.0To20.0_NTUP.root")
    #    GEN_eng = 50.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E50_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E50.0To50.0_NTUP.root")
    #    GEN_eng = 100.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E100_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E100.0To100.0_NTUP.root")
    #    GEN_eng = 300.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E300_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E300.0To300.0_NTUP.root")
    #    ntuple = HGCalNtuple("../data/_SingleGammaPt100Eta1p6_2p8_PhaseIITDRFall17DR-noPUFEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO/NTUP/_SingleGammaPt100Eta1p6_2p8_PhaseIITDRFall17DR-noPUFEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO_NTUP_1.root")

    runCalibrationScaleResolution(pidSelected, GEN_eng, ntuple, relFractionE)
Пример #4
0
def main():

    relFractionE = 0.001
    # set sample/tree - for pions
    pidSelected = 211
    #    GEN_eng = 5.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E5_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E5.0To5.0_NTUP.root")
    #    GEN_eng = 20.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E20_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E20.0To20.0_NTUP.root")
    #    GEN_eng = 50.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E50_cmssw93X_withPRs_20170809/NTUP/partGun_PDGid211_x100_E50.0To50.0_NTUP.root")
    #    GEN_eng = 100.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E100_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E100.0To100.0_NTUP.root")
    #    GEN_eng = 300.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E300_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E300.0To300.0_NTUP.root")

    # set sample/tree - for photons
    pidSelected = 22
    GEN_eng = 5.
    ntuple = HGCalNtuple(
        "/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E5_cmssw93X_withPRs_20170809/NTUP/partGun_PDGid22_x100_E5.0To5.0_NTUP.root"
    )
    #    GEN_eng = 20.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E20_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E20.0To20.0_NTUP.root")
    #    GEN_eng = 50.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E50_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E50.0To50.0_NTUP.root")
    #    GEN_eng = 100.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E100_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E100.0To100.0_NTUP.root")
    #    GEN_eng = 300.
    #    ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid22_E300_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid22_x100_E300.0To300.0_NTUP.root")

    runCalibrationScaleResolution(pidSelected, GEN_eng, ntuple, relFractionE)
Пример #5
0
def main():
    global opt, args

    usage = ('usage: %prog [options]\n' + '%prog -h for help')
    parser = optparse.OptionParser(usage)

    # input options
    parser.add_option(
        '',
        '--files',
        dest='fileString',
        type='string',
        default=
        '/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomPtGunProducer_SinglePiPt2Eta1p6_2p8_Fall17DR-NoPUFEVT_clange_20180129/NTUP/partGun_PDGid211_x60_Pt2.0To2.0_NTUP_6.root',
        help='comma-separated file list')
    parser.add_option('',
                      '--gunType',
                      dest='gunType',
                      type='string',
                      default='pt',
                      help='pt or e')
    parser.add_option('',
                      '--pid',
                      dest='pid',
                      type='int',
                      default=211,
                      help='pdgId int')
    parser.add_option('',
                      '--genValue',
                      dest='genValue',
                      type='float',
                      default=25,
                      help='generated pT or energy')

    # store options and arguments as global variables
    global opt, args
    (opt, args) = parser.parse_args()

    print "files:", opt.fileString
    print "gunType:", opt.gunType
    print "pid:", opt.pid
    print "GEN_engpt:", opt.genValue

    # set sample/tree - for photons
    gun_type = opt.gunType
    pidSelected = opt.pid
    GEN_engpt = opt.genValue

    fileList = opt.fileString.split(",")

    for fileName in fileList:
        ntuple = HGCalNtuple(opt.fileString)

        for event in ntuple:
            if (event.entry() > 11):
                break
            SACEvt = SACevent(event, 60)
            SACEvt.Print()
Пример #6
0
def main():
    inFile = sys.argv[1]
    ntuple = HGCalNtuple(inFile)

    maxEvents = 10

    tot_nevents = 0
    tot_genpart = 0
    tot_rechit = 0
    tot_cluster2d = 0
    tot_multiclus = 0
    tot_simcluster = 0
    tot_pfcluster = 0
    tot_calopart = 0
    tot_track = 0

    for event in ntuple:
        if event.entry() >= maxEvents:
            break
        print("Event", event.entry() + 1)
        tot_nevents += 1
        genParts = event.genParticles()
        tot_genpart += len(genParts)
        recHits = event.recHits()
        tot_rechit += len(recHits)
        layerClusters = event.layerClusters()
        tot_cluster2d += len(layerClusters)
        multiClusters = event.multiClusters()
        tot_multiclus += len(multiClusters)
        simClusters = event.simClusters()
        tot_simcluster += len(simClusters)
        pfClusters = event.pfClusters()
        tot_pfcluster += len(pfClusters)
        pfClusters = event.pfClusters()
        tot_pfcluster += len(pfClusters)
        caloParts = event.caloParticles()
        tot_calopart += len(caloParts)
        tracks = event.tracks()
        tot_track += len(tracks)

        # for genPart in genParts:
        #     print(tot_nevents, "genPart pt:", genPart.pt()

    print("Processed %d events" % tot_nevents)
    print("On average %f generator particles" %
          (float(tot_genpart) / tot_nevents))
    print("On average %f reconstructed hits" %
          (float(tot_rechit) / tot_nevents))
    print("On average %f layer clusters" %
          (float(tot_cluster2d) / tot_nevents))
    print("On average %f multi-clusters" %
          (float(tot_multiclus) / tot_nevents))
    print("On average %f sim-clusters" % (float(tot_simcluster) / tot_nevents))
    print("On average %f PF clusters" % (float(tot_pfcluster) / tot_nevents))
    print("On average %f calo particles" % (float(tot_calopart) / tot_nevents))
    print("On average %f tracks" % (float(tot_track) / tot_nevents))
Пример #7
0
def main():
    # ============================================
    # configuration bit
    maxEvents = 100
    debug = 1
    input_base_dir = '/Users/cerminar/cernbox/hgcal/CMSSW932/'
    #input_sample_dir = 'FlatRandomEGunProducer_EleGunE50_1p7_2p8_PU0_20171005/NTUP/'
    #output_filename = 'histos_EleE50_PU0.root'

    # input_sample_dir = 'FlatRandomEGunProducer_EleGunE50_1p7_2p8_PU50_20171005/NTUP/'
    # output_filename = 'histos_EleE50_PU50.root'

    input_sample_dir = 'FlatRandomEGunProducer_EleGunE50_1p7_2p8_PU200_20171005/NTUP/'
    output_filename = 'histos_EleE50_PU200.root'

    # ============================================

    input_files = listFiles(os.path.join(input_base_dir, input_sample_dir))
    print('- dir {} contains {} files.'.format(input_sample_dir,
                                               len(input_files)))

    chain = getChain('hgcalTriggerNtuplizer/HGCalTriggerNtuple', input_files)
    print('- created TChain containing {} events'.format(chain.GetEntries()))

    ntuple = HGCalNtuple(input_files,
                         tree='hgcalTriggerNtuplizer/HGCalTriggerNtuple')

    output = ROOT.TFile(output_filename, "RECREATE")
    output.cd()
    hgen = histos.GenPartHistos('h_genAll')
    htc = histos.TCHistos('h_tcAll')
    h2dcl = histos.ClusterHistos('h_clAll')
    h3dcl = histos.Cluster3DHistos('h_cl3dAll')

    import time
    import numpy as np

    def loopNP(narray, buff):
        for e in range(0, len(narray)):
            buff += narray[e]
        return buff

    def loopROOT(narray, buff):
        for e in range(0, len(narray)):
            buff += narray[e]
        return buff

    def loopCMG(narray, buff):
        for row in narray['energy']:
            buff += row

        buff = np.sum(narray['energy'])

        #             # print

        #         for e in range(0, len(narray._dataframe)):
        #             buff += narray._dataframe.loc[e].tc_energy
        return buff

    for ientry, entry in enumerate(chain):

        print(ientry)
        if ientry == 2:
            break
        start = time.clock()
        narray_id = np.array(entry.tc_id)
        narray_subdet = np.array(entry.tc_subdet)
        narray_zside = np.array(entry.tc_zside)
        narray_wafer = np.array(entry.tc_wafer)
        narray_wafertype = np.array(entry.tc_wafertype)
        narray_energy = np.array(entry.tc_energy)
        narray_eta = np.array(entry.tc_eta)
        narray_phi = np.array(entry.tc_phi)
        narray_z = np.array(entry.tc_z)
        narray_cell = np.array(entry.tc_cell)
        narray_data = np.array(entry.tc_data)
        narray_layer = np.array(entry.tc_layer)

        buff = 0
        buff = loopNP(narray_energy, buff)
        end = time.clock()
        print("LEN NP: {}".format(len(narray_energy)))
        print("PERF Numpy: {}".format(end - start))
        print("SUM: {}".format(buff))

        buff = 0
        print("LEN PY: {}".format(len(entry.tc_energy)))
        start = time.clock()
        buff = loopROOT(entry.tc_energy, buff)
        end = time.clock()
        print("PERF py: {}".format(end - start))
        print("SUM: {}".format(buff))

        buff = 0

        start = time.clock()
        event = Event(chain, ientry)
        triggerCells = event.getDataFrame(prefix='tc')
        buff = loopCMG(triggerCells, buff)
        end = time.clock()
        print("LEN CMG: {}".format(len(triggerCells.energy)))
        print("PERF CMG: {}".format(end - start))
        print("SUM: {}".format(buff))

    sys.exit(0)
Пример #8
0
def analyze(params, batch_idx=0):
    print(params)
    doAlternative = False

    debug = int(params.debug)
    pool = Pool(5)

    tc_geom_df = pd.DataFrame()
    cell_geom_df = pd.DataFrame()
    geom_file = params.input_base_dir + '/geom/test_triggergeom_v1.root'
    print 'Loading the geometry...'
    tc_geom_tree = HGCalNtuple([geom_file],
                               tree='hgcaltriggergeomtester/TreeTriggerCells')
    tc_geom_df = convertGeomTreeToDF(tc_geom_tree._tree)
    tc_geom_df['radius'] = np.sqrt(tc_geom_df['x']**2 + tc_geom_df['y']**2)
    tc_geom_df['eta'] = np.arcsinh(tc_geom_df.z / tc_geom_df.radius)

    cell_geom_tree = HGCalNtuple([geom_file],
                                 tree='hgcaltriggergeomtester/TreeCells')
    cell_geom_df = convertGeomTreeToDF(cell_geom_tree._tree)

    debugPrintOut(debug,
                  'Cell geometry',
                  toCount=cell_geom_df,
                  toPrint=cell_geom_df.iloc[:3])
    debugPrintOut(debug,
                  'TC geometry',
                  toCount=tc_geom_df,
                  toPrint=tc_geom_df.iloc[:3])

    display = EventDisplayManager(cell_geom=cell_geom_df,
                                  trigger_cell_geom=tc_geom_tree)
    # for index, tc_geom in tc_geom_df.iterrows():
    #     tc_geom.max_dist_neigh = np.max(tc_geom.neighbor_distance)

    algos = ['DEF', 'DBS']
    particles = [
        Particle('ele', PID.electron),
        Particle('photon', PID.photon),
        Particle('pion', PID.pion),
        Particle('pizero', PID.pizero)
    ]

    tc_layer = tc_geom_df[(tc_geom_df.eta < 0) & (tc_geom_df.layer == 1)]
    tc_layer['energy'] = tc_layer.id * 1000
    gridM = Grid(x_nbins=68,
                 x_min=-170.,
                 x_max=170.,
                 y_nbins=68,
                 y_min=-170.,
                 y_max=170.,
                 z=-320.755005)

    gridP = Grid(x_nbins=68,
                 x_min=-170.,
                 x_max=170.,
                 y_nbins=68,
                 y_min=-170.,
                 y_max=170.,
                 z=320.755005)

    towerMap = TowerMaps(refGridPlus=gridP, refGridMinus=gridM)

    #display.displayTriggerCells(1, tc_layer)
    #display.displayTowers(1, -1, 3, 1, gridM)

    tc_layer_1_EE = tc_geom_df[(tc_geom_df.eta < 0) & (tc_geom_df.layer == 1) &
                               (tc_geom_df.subdet == 3)]
    print tc_layer_1_EE.iloc[1]
    gridEE_m_l1 = towerMap.extrapolateXY(tc_layer_1_EE.iloc[1].z)
    print gridEE_m_l1.getCorners(34, 34)
    gridEE_m_l1.getBinCenter(65, 34)
    gridEE_m_l1.getBinCenter(64, 34)
    gridEE_m_l1.getBinCenter(42, 34)
    gridEE_m_l1.getBinCenter(43, 34)
    tc_layer_1_FH = tc_geom_df[(tc_geom_df.eta < 0) & (tc_geom_df.layer == 1) &
                               (tc_geom_df.subdet == 4)]
    print tc_layer_1_FH.iloc[1]
    gridFH_m_l1 = towerMap.extrapolateXY(tc_layer_1_FH.iloc[1].z)
    gridFH_m_l1.getBinCenter(65, 34)
    gridFH_m_l1.getBinCenter(64, 34)
    gridFH_m_l1.getBinCenter(42, 34)
    gridFH_m_l1.getBinCenter(43, 34)

    print gridFH_m_l1.getCorners(34, 34)
    tc_layer_1_BH = tc_geom_df[(tc_geom_df.eta < 0) & (tc_geom_df.layer == 1) &
                               (tc_geom_df.subdet == 5)]
    print tc_layer_1_BH.iloc[1]
    gridBH_m_l1 = towerMap.extrapolateXY(tc_layer_1_BH.iloc[1].z)
    print gridBH_m_l1.getCorners(34, 34)
    gridBH_m_l1.getBinCenter(65, 34)
    gridBH_m_l1.getBinCenter(64, 34)
    gridBH_m_l1.getBinCenter(42, 34)
    gridBH_m_l1.getBinCenter(43, 34)

    tc_layer_12_BH = tc_geom_df[(tc_geom_df.eta < 0) & (tc_geom_df.layer == 12)
                                & (tc_geom_df.subdet == 5)]
    print tc_layer_12_BH.iloc[1]
    gridBH_m_l12 = towerMap.extrapolateXY(tc_layer_12_BH.iloc[1].z)
    print gridBH_m_l12.getCorners(34, 34)

    #display.show(1)

    sys.exit(0)

    input_files = listFiles(
        os.path.join(params.input_base_dir, params.input_sample_dir))
    print('- dir {} contains {} files.'.format(params.input_sample_dir,
                                               len(input_files)))

    ntuple = HGCalNtuple(input_files,
                         tree='hgcalTriggerNtuplizer/HGCalTriggerNtuple')
    print('- created TChain containing {} events'.format(ntuple.nevents()))

    event_n = 0
    while (False):
        event_n = input('Enter event number (-1 to quit): ')
        print 'Processing event: {}'.format(event_n)
        if event_n == -1:
            break

        event = ntuple.getEvent(event_n)
        print("--- Event {}, @ {}".format(event.entry(),
                                          datetime.datetime.now()))
        print('    run: {}, lumi: {}, event: {}'.format(
            event.run(), event.lumi(), event.event()))

        genParts = event.getDataFrame(prefix='gen')
        if len(genParts[(genParts.eta > 1.7) & (genParts.eta < 2.5)]) == 0:
            print "No particles in interesting era range"
            continue

        genParticles = event.getDataFrame(prefix='genpart')
        genParticles['pdgid'] = genParticles.pid
        hgcDigis = event.getDataFrame(prefix='hgcdigi')
        triggerCells = event.getDataFrame(prefix='tc')
        # this is not needed anymore in recent versions of the ntuples
        # tcsWithPos = pd.merge(triggerCells, tc_geom_df[['id', 'x', 'y']], on='id')
        triggerClusters = event.getDataFrame(prefix='cl')
        triggerClusters['ncells'] = [len(x) for x in triggerClusters.cells]
        if 'x' not in triggerClusters.columns:
            triggerClusters = pd.merge(triggerClusters,
                                       tc_geom_df[['z', 'id']],
                                       on='id')
            triggerClusters['R'] = triggerClusters.z / np.sinh(
                triggerClusters.eta)
            triggerClusters['x'] = triggerClusters.R * np.cos(
                triggerClusters.phi)
            triggerClusters['y'] = triggerClusters.R * np.sin(
                triggerClusters.phi)

        trigger3DClusters = event.getDataFrame(prefix='cl3d')
        trigger3DClusters['nclu'] = [
            len(x) for x in trigger3DClusters.clusters
        ]
        triggerClustersGEO = pd.DataFrame()
        trigger3DClustersGEO = pd.DataFrame()
        triggerClustersDBS = pd.DataFrame()
        trigger3DClustersDBS = pd.DataFrame()
        trigger3DClustersDBSp = pd.DataFrame()

        debugPrintOut(debug, 'gen parts', toCount=genParts, toPrint=genParts)
        debugPrintOut(debug,
                      'gen particles',
                      toCount=genParticles,
                      toPrint=genParticles)
        # [['eta', 'phi', 'pt', 'energy', 'mother', 'gen', 'pid', 'pdgid', 'reachedEE']]
        debugPrintOut(debug,
                      'digis',
                      toCount=hgcDigis,
                      toPrint=hgcDigis.iloc[:3])
        debugPrintOut(debug,
                      'Trigger Cells',
                      toCount=triggerCells,
                      toPrint=triggerCells.iloc[:3])
        debugPrintOut(debug,
                      '2D clusters',
                      toCount=triggerClusters,
                      toPrint=triggerClusters.iloc[:3])
        debugPrintOut(debug,
                      '3D clusters',
                      toCount=trigger3DClusters,
                      toPrint=trigger3DClusters.iloc[:3])

        if params.clusterize and False:
            # Now build DBSCAN 2D clusters
            for zside in [-1, 1]:
                arg = [(layer, zside, triggerCells) for layer in range(0, 29)]
                results = pool.map(clAlgo.buildDBSCANClustersUnpack, arg)
                for clres in results:
                    triggerClustersDBS = triggerClustersDBS.append(
                        clres, ignore_index=True)

            debugPrintOut(debug,
                          'DBS 2D clusters',
                          toCount=triggerClustersDBS,
                          toPrint=triggerClustersDBS.iloc[:3])

            trigger3DClustersDBS = build3DClusters(
                'DBS', clAlgo.build3DClustersEtaPhi, triggerClustersDBS, pool,
                debug)
            trigger3DClustersDBSp = build3DClusters('DBSp',
                                                    clAlgo.build3DClustersProj,
                                                    triggerClustersDBS, pool,
                                                    debug)

        display.displayTriggerCells(event_n, triggerCells)
        display.displayClusters(event_n, triggerClusters, triggerCells)
        display.displayGenParticle(
            event_n, genParticles[(genParticles.gen > 0)
                                  & (genParticles.pid == PID.photon) &
                                  (genParticles.reachedEE == 2) &
                                  (np.abs(genParticles.eta) < 2.8) &
                                  (np.abs(genParticles.eta) > 1.7)])
        display.show(event_n)
Пример #9
0
def analyze(params, batch_idx=0):
    print(params)
    debug = int(params.debug)

    tc_geom_df = pd.DataFrame()
    tc_rod_bins = pd.DataFrame()
    if False:
        # read the geometry dump
        geom_file = os.path.join(params.input_base_dir,
                                 'geom/test_triggergeom.root')
        tc_geom_tree = HGCalNtuple(
            [geom_file], tree='hgcaltriggergeomtester/TreeTriggerCells')
        tc_geom_tree.setCache(learn_events=100)
        print('read TC GEOM tree with # events: {}'.format(
            tc_geom_tree.nevents()))
        tc_geom_df = convertGeomTreeToDF(tc_geom_tree._tree)
        tc_geom_df['radius'] = np.sqrt(tc_geom_df['x']**2 + tc_geom_df['y']**2)
        tc_geom_df['eta'] = np.arcsinh(tc_geom_df.z / tc_geom_df.radius)

        if False:
            tc_rod_bins = pd.read_csv(
                filepath_or_buffer='data/TCmapping_v2.txt',
                sep=' ',
                names=['id', 'rod_x', 'rod_y'],
                index_col=False)
            tc_rod_bins['rod_bin'] = tc_rod_bins.apply(
                func=lambda cell: (int(cell.rod_x), int(cell.rod_y)), axis=1)

            tc_geom_df = pd.merge(tc_geom_df, tc_rod_bins, on='id')

        if debug == -4:
            tc_geom_tree.PrintCacheStats()
        print('...done')

    tree_name = 'hgcalTriggerNtuplizer/HGCalTriggerNtuple'
    input_files = []
    range_ev = (0, params.maxEvents)

    if params.events_per_job == -1:
        print 'This is interactive processing...'
        input_files = fm.get_files_for_processing(
            input_dir=os.path.join(params.input_base_dir,
                                   params.input_sample_dir),
            tree=tree_name,
            nev_toprocess=params.maxEvents,
            debug=debug)
    else:
        print 'This is batch processing...'
        input_files, range_ev = fm.get_files_and_events_for_batchprocessing(
            input_dir=os.path.join(params.input_base_dir,
                                   params.input_sample_dir),
            tree=tree_name,
            nev_toprocess=params.maxEvents,
            nev_perjob=params.events_per_job,
            batch_id=batch_idx,
            debug=debug)

    # print ('- dir {} contains {} files.'.format(params.input_sample_dir, len(input_files)))
    print '- will read {} files from dir {}:'.format(len(input_files),
                                                     params.input_sample_dir)
    for file_name in input_files:
        print '        - {}'.format(file_name)

    ntuple = HGCalNtuple(input_files, tree=tree_name)
    if params.events_per_job == -1:
        if params.maxEvents == -1:
            range_ev = (0, ntuple.nevents())

    print('- created TChain containing {} events'.format(ntuple.nevents()))
    print('- reading from event: {} to event {}'.format(
        range_ev[0], range_ev[1]))

    ntuple.setCache(learn_events=1, entry_range=range_ev)
    output = ROOT.TFile(params.output_filename, "RECREATE")
    output.cd()

    if False:
        hTCGeom = histos.GeomHistos('hTCGeom')
        hTCGeom.fill(tc_geom_df[(np.abs(tc_geom_df.eta) > 1.65)
                                & (np.abs(tc_geom_df.eta) < 2.85)])

    # instantiate all the plotters
    plotter_collection = []
    plotter_collection.extend(params.plotters)
    print plotter_collection

    # -------------------------------------------------------
    # book histos
    for plotter in plotter_collection:
        plotter.book_histos()

    # -------------------------------------------------------
    # event loop
    ev_manager = collections.EventManager()

    if params.weight_file is not None:
        ev_manager.read_weight_file(params.weight_file)

    nev = 0
    for evt_idx in range(range_ev[0], range_ev[1] + 1):
        # print(evt_idx)
        event = ntuple.getEvent(evt_idx)
        if (params.maxEvents != -1 and nev >= params.maxEvents):
            break
        if debug >= 2 or event.entry() % 100 == 0:
            print("--- Event {}, @ {}".format(event.entry(),
                                              datetime.datetime.now()))
            print('    run: {}, lumi: {}, event: {}'.format(
                event.run(), event.lumi(), event.event()))

        nev += 1

        try:
            ev_manager.read(event, debug)

            puInfo = event.getPUInfo()
            debugPrintOut(debug, 'PU', toCount=puInfo, toPrint=puInfo)

            for plotter in plotter_collection:
                #print plotter
                plotter.fill_histos(debug=debug)

        except Exception as inst:
            print("[EXCEPTION OCCURRED:] --- Event {}, @ {}".format(
                event.entry(), datetime.datetime.now()))
            print('                       run: {}, lumi: {}, event: {}'.format(
                event.run(), event.lumi(), event.event()))
            print(str(inst))
            print("Unexpected error:", sys.exc_info()[0])
            traceback.print_exc()
            sys.exit(200)

    print("Processed {} events/{} TOT events".format(nev, ntuple.nevents()))
    print("Writing histos to file {}".format(params.output_filename))

    lastfile = ntuple.tree().GetFile()
    print 'Read bytes: {}, # of transaction: {}'.format(
        lastfile.GetBytesRead(), lastfile.GetReadCalls())
    if debug == -4:
        ntuple.PrintCacheStats()

    output.cd()
    hm = histos.HistoManager()
    hm.writeHistos()

    output.Close()

    return
Пример #10
0
def main():
    # init output stuff
    outDir = "testReClusteringExample"
    if not os.path.exists(outDir): os.makedirs(outDir)
    histDict = {}

    # get sample/tree
    # please give an CMSSW930 NTUP root file.
    #########################################
    ntuple = HGCalNtuple(
        "root://eoscms.cern.ch//eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomEGunProducer_pdgid211_E20_cmssw93X_withPRs_20170817/NTUP/partGun_PDGid211_x100_E20.0To20.0_NTUP_1.root"
    )  # CMSSW_9_3_0_pre3 with some pre4 PRs on top
    #ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomPtGunProducer_predragm_PDGid22_nPart1_Pt20to100_Eta2p3to2p5_cmssw921_20170605/NTUP/partGun_PDGid22_x400_Pt20.0To100.0_NTUP_1.root") # cmssw921 with all recent fixes as of June 12
    #ntuple = HGCalNtuple("/eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/FlatRandomPtGunProducer_predragm_PDGid22_id211_id11_id15_id130_nPart1_Pt20to100_Eta2p3to2p5_cmssw921_20170606/NTUP/partGun_PDGid22_id211_id11_id15_id130_x400_Pt20.0To100.0_NTUP_1.root")# cmssw921 with all recent fixes as of June 12

    # prepare some lists for comparions
    multiClusters_nClust2DDiff = []
    tot_nClust2D_reco = []
    clusters2D_eng_reco = []
    tot_nClust2D_rerun = []
    clusters2D_eng_rerun = []
    clusters2DMultiSelected_eng_rerun = []

    # start event loop
    for event in ntuple:
        if (not event.entry() in allowedRangeEvents):
            continue  # checking external condition
        if (verbosityLevel >= 1): print("\nCurrent event: ", event.entry())

        # get collections of raw rechits, sim clusters, 2D clusters, multi clusters, etc.
        recHitsRaw = event.recHits()
        simClusters = event.simClusters()
        layerClusters = event.layerClusters()
        multiClusters = event.multiClusters()

        # get flat list of rechist associated to sim-cluster hits
        rHitsSimAssoc = getRecHitsSimAssoc(recHitsRaw, simClusters)
        # get flat list of raw rechits which satisfy treshold condition
        rHitsCleaned = [
            rechit for rechit in recHitsRaw
            if recHitAboveTreshold(rechit, ecut, dependSensor)[1]
        ]

        ### Imaging algo run at RECO step (CMSSW)
        # get flat list of all clusters 2D produced with algo at RECO step (CMSSW)
        clusters2DList_reco = [cls2D for cls2D in layerClusters]
        # get flat list of all multi-clusters produced with algo at RECO step (CMSSW)
        multiClustersList_reco = [
            multiCluster for multiCluster in multiClusters
        ]

        ### Imaging algo run as stand-alone (python)
        # instantiate the stand-alone clustering implemented in HGCalImagingAlgo
        HGCalAlgo = HGCalImagingAlgo(ecut=ecut,
                                     deltac=deltac,
                                     multiclusterRadii=multiclusterRadii,
                                     minClusters=minClusters,
                                     dependSensor=dependSensor,
                                     verbosityLevel=0)
        # produce 2D clusters with stand-alone algo, out of all raw rechits
        clusters2D_rerun = HGCalAlgo.makeClusters(
            recHitsRaw)  # nested list of "hexels", per layer, per 2D cluster
        # produce multi-clusters with stand-alone algo, out of all 2D clusters
        multiClustersList_rerun = HGCalAlgo.make3DClusters(
            clusters2D_rerun
        )  # flat list of multi-clusters (as basic clusters)

        # get for testing: flat list of 2D clustered, and flat list of clustered non-halo "hexeles" (from stand-alone algo)
        clusters2DList_rerun = HGCalAlgo.getClusters(
            clusters2D_rerun,
            verbosityLevel=0)  # flat list of 2D clusters (as basic clusters)
        hexelsClustered_rerun = [
            iNode for bClust in clusters2DList_rerun
            for iNode in bClust.thisCluster if not iNode.isHalo
        ]  # flat list of clustered "hexeles", without the "halo" hexels

        ### Produce some basic histograms for each event (2D/3D view of associated sim-clusters, selected rec-hits, etc.)
        if (verbosityLevel >= 2):
            # histograming of rechist associated to sim-cluster hits
            histDict = histRecHitsSimAssoc(rHitsSimAssoc,
                                           event.entry(),
                                           histDict,
                                           tag="rHitsSimAssoc_",
                                           zoomed=False)
            # histograming of raw rechist (with ecut cleaning)
            histDict = histRecHits(rHitsCleaned,
                                   event.entry(),
                                   histDict,
                                   tag="rHitsCleaned_",
                                   zoomed=True)
            # histograming of clustered hexels
            histDict = histHexelsClustered(hexelsClustered_rerun,
                                           event.entry(),
                                           histDict,
                                           tag="clustHex_",
                                           zoomed=False)

        ### Compare stand-alone clustering and sim-clusters
        rHitsSimAssocDID = [
            rechit.detid() for simClus in rHitsSimAssoc for rechit in simClus
        ]  # list of detids for sim-associated rehits (with ecut cleaning)
        rHitsClustdDID = [iNode.detid for iNode in hexelsClustered_rerun
                          ]  # list of detids for clustered hexels
        # print some info if requested
        if (verbosityLevel >= 1):
            print("num of rechits associated with sim-clusters : ",
                  len(rHitsSimAssocDID))
            print("num of rechits clustered with imaging algo. : ",
                  len(rHitsClustdDID))
            print("num of clustered not found in sim-associated:",
                  len(list(set(rHitsClustdDID) - set(rHitsSimAssocDID))))
            print("num of sim-associated not found in clustered:",
                  len(list(set(rHitsSimAssocDID) - set(rHitsClustdDID))))

        ### Compare stand-alone and reco-level clustering
        clusters2DListMultiSelected_rerun = [
            cls for multiCluster in multiClustersList_rerun
            for cls in multiCluster.thisCluster
        ]
        # print more details if requested
        if (verbosityLevel >= 1):
            ls = sorted(
                range(len(clusters2DListMultiSelected_rerun)),
                key=lambda k: clusters2DListMultiSelected_rerun[k].thisCluster[
                    0].layer,
                reverse=False)  # indices sorted by increasing layer number
            for index in range(len(multiClustersList_rerun)):
                print("Multi-cluster (RE-RUN) index: ", index,
                      ", No. of 2D-clusters = ",
                      len(multiClustersList_rerun[index].thisCluster),
                      ", Energy  = ", multiClustersList_rerun[index].energy,
                      ", Phi = ", multiClustersList_rerun[index].phi,
                      ", Eta = ", multiClustersList_rerun[index].eta, ", z = ",
                      multiClustersList_rerun[index].z)
            ls = sorted(
                range(len(clusters2DList_reco)),
                key=lambda k: clusters2DList_reco[k].layer(),
                reverse=False)  # indices sorted by increasing layer number
            for index in range(len(multiClustersList_reco)):
                print("Multi-cluster (RECO) index: ", index,
                      ", No. of 2D-clusters = ",
                      len(multiClustersList_reco[index].cluster2d()),
                      ", Energy  = ", multiClustersList_reco[index].energy(),
                      ", Phi = ", multiClustersList_reco[index].phi(),
                      ", Eta = ", multiClustersList_reco[index].eta(),
                      ", z = ", multiClustersList_reco[index].z())
            print("num of clusters2D @reco : ", len(clusters2DList_reco))
            print("num of clusters2D re-run: ",
                  len(clusters2DListMultiSelected_rerun))
            print("num of multi-cluster @reco : ", len(multiClustersList_reco))
            print("num of multi-cluster re-run: ",
                  len(multiClustersList_rerun))

        ### Produce some basic histograms with general info (one per sample)
        if (verbosityLevel >= 2):
            # relative diff. in number of 2D clusters (re-run vs. reco)
            multiClusters_nClust2DDiff.append(100 * float(
                len(clusters2DListMultiSelected_rerun) -
                len(clusters2DList_reco)) / float(len(clusters2DList_reco)))
            # number of 2D clusters from algo at re-run step
            tot_nClust2D_rerun.append(len(clusters2DListMultiSelected_rerun))
            clusters2D_eng_rerun.extend([
                clusters2DList_rerun[k].energy
                for k in range(0, len(clusters2DList_rerun))
            ])  # eng re-run
            # number of 2D clusters from algo at RECO step
            tot_nClust2D_reco.append(len(clusters2DList_reco))
            clusters2D_eng_reco.extend([
                clusters2DList_reco[k].energy()
                for k in range(0, len(clusters2DList_reco))
            ])  # eng reco

    # histograms - re-run vs. reco, 2D clusters counting
    histDict = histValue1D(multiClusters_nClust2DDiff,
                           histDict,
                           tag="MultClust_nCl2DRelDiff_RerunReco",
                           title="Rerun vs. Reco: rel. diff. Num(2D clusters)",
                           axunit="#deltaN_{cl.2D}[%]",
                           binsRangeList=[200, -10, 10],
                           ayunit="N(events)")
    histDict = histValue1D(tot_nClust2D_rerun,
                           histDict,
                           tag="tot_nClust2D_rerun",
                           title="Rerun: total Num(2D clusters)",
                           axunit="N_{cl.2D}",
                           binsRangeList=[100, 0, 1000],
                           ayunit="total occurences")
    histDict = histValue1D(tot_nClust2D_reco,
                           histDict,
                           tag="tot_nClust2D_reco",
                           title="Reco: total Num(2D clusters)",
                           axunit="N_{cl.2D}",
                           binsRangeList=[100, 0, 1000],
                           ayunit="total occurences")
    # histograms - 2D clusters energy spectra
    histDict = histValue1D(clusters2D_eng_rerun,
                           histDict,
                           tag="Clust2D_Eng_Rerun",
                           title="Rerun E(all 2D clusters)",
                           axunit="#E_{cl.2D}[GeV]",
                           binsRangeList=[1000, 0, 5],
                           ayunit="N(2D clusters)")
    histDict = histValue1D(clusters2D_eng_reco,
                           histDict,
                           tag="Clust2D_Eng_Reco",
                           title="Reco E(all 2D clusters)",
                           axunit="#E_{cl.2D}[GeV]",
                           binsRangeList=[1000, 0, 5],
                           ayunit="N(2D clusters)")
    # print/save histograms
    histPrintSaveAll(histDict, outDir)
Пример #11
0
def main():

    global opt, args

    usage = ('usage: %prog [options]\n' + '%prog -h for help')
    parser = optparse.OptionParser(usage)

    # input options
    # parser.add_option('', '--files', dest='fileString', type='string',  default='root://eoscms.cern.ch//eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/_SinglePiPt50Eta1p6_2p8_PhaseIITDRFall17DR-noPUFEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO/NTUP/_SinglePiPt50Eta1p6_2p8_PhaseIITDRFall17DR-noPUFEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO_NTUP_1_0.root', help='comma-separated file list')
    parser.add_option(
        '',
        '--files',
        dest='fileString',
        type='string',
        default=
        'root://eoscms.cern.ch//eos/cms/store/cmst3/group/hgcal/CMG_studies/Production/_SinglePiPt50Eta1p6_2p8_PhaseIITDRFall17DR-PU200FEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO/NTUP/_SinglePiPt50Eta1p6_2p8_PhaseIITDRFall17DR-PU200FEVT_93X_upgrade2023_realistic_v2-v1_GEN-SIM-RECO_NTUP_2.root',
        help='comma-separated file list')
    parser.add_option('',
                      '--gunType',
                      dest='gunType',
                      type='string',
                      default='pt',
                      help='pt or e')
    parser.add_option('',
                      '--pid',
                      dest='pid',
                      type='int',
                      default=211,
                      help='pdgId int')
    parser.add_option('',
                      '--genValue',
                      dest='genValue',
                      type='int',
                      default=50,
                      help='generated pT or energy')
    parser.add_option('',
                      '--tag',
                      dest='tag',
                      type='string',
                      default='noPU',
                      help='some tag, best used for PU and other info')
    parser.add_option('',
                      '--ref',
                      dest='refName',
                      type='string',
                      default='genpart',
                      help='reference collection')
    parser.add_option('',
                      '--obj',
                      dest='objName',
                      type='string',
                      default='pfcluster',
                      help='object of interest collection')

    # store options and arguments as global variables
    global opt, args
    (opt, args) = parser.parse_args()

    print "files:", opt.fileString
    print "gunType:", opt.gunType
    print "pid:", opt.pid
    print "GEN_engpt:", opt.genValue
    print "refName:", opt.refName
    print "objName:", opt.objName

    # set sample/tree - for photons
    gun_type = opt.gunType
    pidSelected = opt.pid
    GEN_engpt = opt.genValue
    tag = opt.tag
    refName = opt.refName
    objName = opt.objName

    histDict = {}
    fileList = opt.fileString.split(",")

    start_time = timeit.default_timer()

    for fileName in fileList:
        ntuple = HGCalNtuple(opt.fileString)
        eventLoop(ntuple, refName, objName, gun_type, pidSelected, GEN_engpt,
                  histDict)

    f = ROOT.TFile(
        "{}_{}_{}GeV_{}_{}_{}.root".format(gun_type, pidSelected, GEN_engpt,
                                           refName, objName, tag), "recreate")
    for etaBinName in etaBins:
        for phiBinName in phiBins:
            if "ref_Energy_eta" + etaBinName + "_phi" + phiBinName in histDict[
                    etaBinName][phiBinName]:
                histDict[etaBinName][phiBinName]["ref_Energy_eta" +
                                                 etaBinName + "_phi" +
                                                 phiBinName].Write()
                histDict[etaBinName][phiBinName]["ref_Pt_eta" + etaBinName +
                                                 "_phi" + phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_Energy_eta" +
                                                 etaBinName + "_phi" +
                                                 phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_Pt_eta" + etaBinName +
                                                 "_phi" + phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_dEoverE_eta" +
                                                 etaBinName + "_phi" +
                                                 phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_dPtoverPt_eta" +
                                                 etaBinName + "_phi" +
                                                 phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_dE_eta" + etaBinName +
                                                 "_phi" + phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_dPt_eta" + etaBinName +
                                                 "_phi" + phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_EoverERef_eta" +
                                                 etaBinName + "_phi" +
                                                 phiBinName].Write()
                histDict[etaBinName][phiBinName]["obj_PtoverPtRef_eta" +
                                                 etaBinName + "_phi" +
                                                 phiBinName].Write()

    f.Write()
    f.Close()
    elapsed = timeit.default_timer() - start_time
    print "Time:", elapsed
Пример #12
0
def main():
    if not os.path.exists(outDir): os.makedirs(outDir)

    for ntupleNumber in range(minNtuple, maxNtuple + 1):
        print("\nCurrent ntup: ", ntupleNumber)

        ntuple = HGCalNtuple(inputPath + "{}.root".format(ntupleNumber))

        # start event loop
        for event in ntuple:
            startEvent = time.time()
            eventID = event.entry()
            startEvent = time.time()

            print("\nCurrent event: ", eventID)

            # check if particles reached EE
            genParticles = event.genParticles()
            skipEvent = False
            for particle in genParticles:
                if not particle.reachedEE():
                    #          print("particle didn't reach EE -- skipping the event!!")
                    skipEvent = True
                    break
            if skipEvent: continue

            eventDir = outDir + "/ntup{}/event{}".format(ntupleNumber, eventID)
            if not os.path.exists(eventDir): os.makedirs(eventDir)

            # get raw rec hits
            print("\n\npreparing raw recHits...", end='')
            start = time.time()
            recHitsRaw = event.getDataFrame("rechit")
            end = time.time()
            print(" done (", end - start, " s)")

            # get simulated hits associated with a cluster
            print("preparing simulated hits and clusters...", end='')
            start = time.time()
            simClusters = event.getDataFrame("simcluster")
            simHitsPerClusterArray = getHitsPerCluster(recHitsRaw, simClusters)
            end = time.time()
            print(" done (", end - start, " s)")

            # re-run clustering with HGCalAlgo, save to file
            print("running clustering algorithm...", end='')
            start = time.time()
            recClusters, rec3Dclusters = getRecClustersFromImagingAlgo(
                recHitsRaw)
            end = time.time()
            print(" done (", end - start, " s)")

            # recClusters -> array of hexel objects
            print("looking for hits associated with hexels...", end='')
            start = time.time()
            recHitsPerClusterArray = getRecHitsPerHexel(
                recHitsRaw, recClusters)
            end = time.time()
            print(" done (", end - start, " s)")

            # perform final analysis, fill in histograms and save to files
            print("\nGenerating final hists...")
            start = time.time()
            energyComparisonHist = ROOT.TH2D("energy comparison",
                                             "energy comparison", 100, 0, 100,
                                             100, 0, 100)
            energyComparisonOverlapHist = ROOT.TH2D(
                "energy comparison overlap.", "energy comparison overlap.",
                100, 0, 100, 100, 0, 100)

            for layer in range(minLayer, maxLayer):
                #        print("layer:",layer)
                for recClusterIndex, recCluster in enumerate(
                        recHitsPerClusterArray):
                    #          print("rec cluster:",recCluster)

                    recHitsInLayerInCluster = recCluster[getLayerMask(
                        recCluster, layer)]

                    recEnergy = recHitsInLayerInCluster["energy"].sum()
                    xMaxRec = recHitsInLayerInCluster["x"].max()
                    xMinRec = recHitsInLayerInCluster["x"].min()
                    yMaxRec = recHitsInLayerInCluster["y"].max()
                    yMinRec = recHitsInLayerInCluster["y"].min()

                    recClusterX = xMinRec + (xMaxRec - xMinRec) / 2.
                    recClusterY = yMinRec + (yMaxRec - yMinRec) / 2.
                    recClusterR = max((xMaxRec - xMinRec) / 2.,
                                      (yMaxRec - yMinRec) / 2.)

                    assocSimEnergy = 0

                    for simClusterIndex, simCluster in enumerate(
                            simHitsPerClusterArray):
                        #            print("sim cluster:",simCluster)

                        simHitsInLayerInCluster = simCluster[getLayerMask(
                            simCluster, layer)]

                        simEnergy = simHitsInLayerInCluster["energy"].sum()
                        xMaxSim = simHitsInLayerInCluster["x"].max()
                        xMinSim = simHitsInLayerInCluster["x"].min()
                        yMaxSim = simHitsInLayerInCluster["y"].max()
                        yMinSim = simHitsInLayerInCluster["y"].min()

                        simClusterX = xMinSim + (xMaxSim - xMinSim) / 2.
                        simClusterY = yMinSim + (yMaxSim - yMinSim) / 2.
                        simClusterR = max((xMaxSim - xMinSim) / 2.,
                                          (yMaxSim - yMinSim) / 2.)

                        if recEnergy * simEnergy != 0:
                            energyComparisonHist.Fill(recEnergy, simEnergy)


#              if circlesOverlap(recClusterX,recClusterY,recClusterR,simClusterX,simClusterY,simClusterR):
#                energyComparisonOverlapHist.Fill(recEnergy,simEnergy)

                        if pointWithinCircle(simClusterX, simClusterY,
                                             recClusterX, recClusterY,
                                             recClusterR, clusterAcceptScale):
                            #            if circlesOverlap(recClusterX,recClusterY,recClusterR,simClusterX,simClusterY,simClusterR,clusterAcceptScale):
                            assocSimEnergy += simEnergy

                    if recEnergy * assocSimEnergy != 0:
                        energyComparisonOverlapHist.Fill(
                            recEnergy, assocSimEnergy)

            energyComparisonHist.SaveAs(
                "{}/energyComparisonHist.root".format(eventDir))
            energyComparisonOverlapHist.SaveAs(
                "{}/energyComparisonOverlapHist.root".format(eventDir))
            end = time.time()
            print(" done (", end - start, " s)")

            for index in range(len(rec3Dclusters)):
                print(
                    f"Multi-cluster (RE-RUN) index: {index}",
                    f", No. of 2D-clusters = {len(rec3Dclusters[index].thisCluster)}",
                    f", Energy  = {rec3Dclusters[index].energy:.2f}",
                    f", Phi = {rec3Dclusters[index].phi:.2f}",
                    f", Eta = {rec3Dclusters[index].eta:.2f}",
                    f", z = {rec3Dclusters[index].z:.2f}")

            endEvent = time.time()
            print("Total event processing time: ", endEvent - startEvent, " s")
Пример #13
0
def analyze(params, batch_idx=0):
    print str(params)

    debug = int(params.debug)
    # pool = Pool(5)
    n_phi_bins = 72
    n_eta_bins = 18

    phi_bins = np.linspace(-1 * math.pi, math.pi, n_phi_bins + 1)
    eta_bins = np.linspace(1.479, 3.0, n_eta_bins + 1)
    # eta_bins = np.linspace(1.41, 3.1, n_eta_bins+1)

    eta_bin_size = eta_bins[1] - eta_bins[0]
    eta_bin_first = eta_bins[0] + eta_bin_size / 2
    phi_bin_size = phi_bins[1] - phi_bins[0]
    phi_bin_first = phi_bins[0] + phi_bin_size / 2

    print '-- Eta bin size: {}, first bin center: {}, # bins: {}'.format(
        eta_bin_size, eta_bin_first,
        len(eta_bins) - 1)
    print '    {}'.format(eta_bins)

    print '-- Phi bin size: {}, first bin center: {}, # bins: {}'.format(
        phi_bin_size, phi_bin_first,
        len(phi_bins) - 1)
    print '    {}'.format(phi_bins)

    tc_geom_df = pd.DataFrame()
    cell_geom_df = pd.DataFrame()
    geom_file = os.path.join(params.input_base_dir,
                             'geom/test_triggergeom.root')

    # geom_file = params.input_base_dir+'/geom/test_triggergeom_v1.root'
    print 'Loading the geometry...'
    tc_geom_tree = HGCalNtuple([geom_file],
                               tree='hgcaltriggergeomtester/TreeTriggerCells')
    tc_geom_tree.setCache(learn_events=100)

    tc_geom_df = convertGeomTreeToDF(tc_geom_tree._tree)
    tc_geom_df['radius'] = np.sqrt(tc_geom_df['x']**2 + tc_geom_df['y']**2)
    tc_geom_df['eta'] = np.arcsinh(tc_geom_df.z / tc_geom_df.radius)
    tc_geom_df['phi'] = np.arctan2(tc_geom_df.y, tc_geom_df.x)

    # cell_geom_tree = HGCalNtuple([geom_file], tree='hgcaltriggergeomtester/TreeCells')
    # cell_geom_tree.setCache(learn_events=100)
    # cell_geom_df = convertGeomTreeToDF(cell_geom_tree._tree)
    #
    # bhcell_geom_tree = HGCalNtuple([geom_file], tree='hgcaltriggergeomtester/TreeCellsBH')
    # bhcell_geom_tree.setCache(learn_events=100)
    # bhcell_geom_df = convertGeomTreeToDF(bhcell_geom_tree._tree)
    print '...done'

    # display_mgr = display.EventDisplayManager(cell_geom=cell_geom_df,
    #                                           trigger_cell_geom=tc_geom_tree)

    # tc_geom_df['cell'] = hgcdetid.v_cell(tc_geom_df.id)

    sel_cells = tc_geom_df[tc_geom_df.subdet.isin([3, 4]) & ~(
        (tc_geom_df.subdet == 3) & (tc_geom_df.layer % 2 == 0))].copy()

    # some checks on the selection:
    #
    # print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
    # print sel_cells[(sel_cells.subdet == 3) & (sel_cells.layer % 2 != 0)]
    # print '#################################'
    # print sel_cells[(sel_cells.subdet == 3) & (sel_cells.layer % 2 == 0)]
    #
    print sel_cells.subdet.unique()
    print sel_cells[sel_cells.subdet == 3].layer.unique()

    sel_cells['wafertype'] = 1
    # FIXME: in the current implementation of the TriggerGeometry all TCs have wafertype = 1
    # here we need to know which modules have 120um thinckness to map them to the 6 HGCROC layouts
    # we crudely assume that this is true for EE and FH modules with radius < 70cm
    sel_cells['wafertype'] = hgcdetid.v_settype_on_radius(sel_cells['radius'])

    def map_wafertype_majority(data):
        # print data
        # counts = data['wafertype'].value_counts()
        # # if len(counts) > 1:
        # wafertype = counts.index[0]
        # data.loc[data.index, 'wafertype'] = wafertype
        data['wafertype'] = data['wafertype'].value_counts().index[0]
        return data

    # now we actually correct the wafertype for the rest of the module based on majority logic
    print 'Starting wafertype mapping on majority logic: {}'.format(
        datetime.datetime.now())
    sel_cells = sel_cells.groupby(['subdet', 'zside', 'layer',
                                   'wafer']).apply(map_wafertype_majority)
    print '...done: {}'.format(datetime.datetime.now())

    # we now assign the hgcroc value
    # tc_geom_df['sector'] = hgcdetid.v_module_sector(tc_geom_df.id)
    # tc_geom_df['subcell'] = tc_geom_df.cell - tc_geom_df.sector*16
    sel_cells['hgcroc'] = hgcdetid.v_hgcroc_big(sel_cells.id)
    sel_cells.loc[sel_cells.wafertype == -1,
                  ('hgcroc')] = hgcdetid.v_hgcroc_small(
                      sel_cells.loc[sel_cells.wafertype == -1, ('id')])

    sel_cells['eta_bin'] = np.digitize(np.fabs(sel_cells.eta), eta_bins) - 1
    # tc_geom_df['eta_bin_c'] = np.digitize(np.fabs(tc_geom_df.eta), eta_bins)-1
    sel_cells['phi_bin'] = np.digitize(sel_cells.phi, phi_bins) - 1

    # deal with rounding effects on pi
    sel_cells.loc[sel_cells.phi_bin == n_phi_bins,
                  ('phi_bin')] = n_phi_bins - 1
    sel_cells.loc[sel_cells.phi_bin == -1, ('phi_bin')] = 0
    # deal with the fact that some of the cells hactually have eta outside the bin range
    sel_cells.loc[sel_cells.eta_bin == n_eta_bins,
                  ('eta_bin')] = n_eta_bins - 1
    sel_cells.loc[sel_cells.eta_bin == -1, ('eta_bin')] = 0

    tc_overflow = sel_cells[(sel_cells.eta_bin < 0) |
                            (sel_cells.eta_bin > n_eta_bins - 1) |
                            (sel_cells.phi_bin < 0) |
                            (sel_cells.phi_bin > n_phi_bins - 1)][[
                                'id', 'eta', 'phi', 'eta_bin', 'phi_bin'
                            ]]
    if not tc_overflow.empty:
        print 'ERROR: some of the TCs have a bin outside the allowed range'
        print tc_overflow

    # This needs to be fixed after all the rounding has been take care of
    sel_cells['tt_bin'] = sel_cells.apply(
        func=lambda cell: (int(cell.eta_bin), int(cell.phi_bin)), axis=1)
    sel_cells['hgcroc_tt_bin'] = sel_cells['tt_bin']
    sel_cells['wafer_tt_bin'] = sel_cells['tt_bin']

    # temp_bins = pd.Series()

    # now we assign all hgcrocs or modules to the same tower on a majority logic on the TCs belonging to them
    def map_hgcroctt_majority(data):
        tt_bin = data['tt_bin'].value_counts().index[0]
        data['hgcroc_tt_bin'] = pd.Series([tt_bin for x in data.index],
                                          index=data.index)

        return data
        # counts = data['tt_bin'].value_counts()
        # if len(counts) > 1:
        #     tt_bin = counts.index[0]
        #     data.loc[data.index, 'hgcroc_tt_bin'] = pd.Series([tt_bin for x in data.index],
        #                                                       index=data.index)

    print 'Starting hgcroc mapping to TT on majority logic: {}'.format(
        datetime.datetime.now())
    sel_cells = sel_cells.groupby(
        ['subdet', 'zside', 'layer', 'wafer',
         'hgcroc']).apply(map_hgcroctt_majority)
    print '...done: {}'.format(datetime.datetime.now())

    def map_wafertt_majority(data):
        tt_bin = data['tt_bin'].value_counts().index[0]
        data['wafer_tt_bin'] = pd.Series([tt_bin for x in data.index],
                                         index=data.index)
        return data
        # counts = data['tt_bin'].value_counts()
        # if len(counts) > 1:
        #     tt_bin = counts.index[0]
        #     data.loc[data.index, 'hgcroc_tt_bin'] = pd.Series([tt_bin for x in data.index],
        #                                                       index=data.index)

    print 'Starting wafer mapping to TT on majority logic: {}'.format(
        datetime.datetime.now())
    sel_cells = sel_cells.groupby(['subdet', 'zside', 'layer',
                                   'wafer']).apply(map_wafertt_majority)
    print '...done: {}'.format(datetime.datetime.now())

    def dump_mapping(tc_map, field, file_name):
        tower_tc_mapping = pd.DataFrame(
            columns=['id', 'towerbin_x', 'towerbin_y'], dtype=np.int64)

        tower_tc_mapping.id = tc_map.id
        tower_tc_mapping.towerbin_x = tc_map.apply(
            func=(lambda x: x[field][0]), axis=1)
        tower_tc_mapping.towerbin_y = tc_map.apply(
            func=(lambda x: x[field][1]), axis=1)

        tower_tc_mapping.to_csv(file_name,
                                sep=' ',
                                float_format='%.0f',
                                header=False,
                                index=False)
        return tower_tc_mapping

    hgcroc_sel_cells = dump_mapping(
        sel_cells,
        field='hgcroc_tt_bin',
        file_name='TCmapping_hgcroc_eta-phi_v2.txt')
    wafer_sel_cells = dump_mapping(sel_cells,
                                   field='wafer_tt_bin',
                                   file_name='TCmapping_wafer_eta-phi_v2.txt')

    missing = np.array([
        '1780744193', '1780744194', '1780744195', '1780744196', '1780744197',
        '1780744198', '1780744199', '1780744200', '1780744201', '1780744202',
        '1780744203', '1780744204', '1780744205', '1780744206', '1780744207',
        '1780744208', '1780744209', '1780744210', '1780744211', '1780744212',
        '1780744213', '1780744214', '1780744215', '1780744216', '1780744217',
        '1780744218', '1780744219', '1780744220', '1780744221', '1780744222',
        '1780744223', '1780744224', '1780744225', '1780744226', '1780744227',
        '1780744228', '1780744229', '1780744230', '1780744231', '1780744232',
        '1780744233', '1780744234', '1780744235', '1780744236', '1780744237',
        '1780744238', '1780744239', '1780744240', '1780744241', '1780744242',
        '1780744243', '1780744244', '1780744245', '1780744246', '1780744247',
        '1780744248', '1780744249', '1780744250', '1780744251', '1780744252',
        '1780744253', '1780744254', '1780744255', '1780744256', '1780744257',
        '1780744258', '1780744259', '1780744260', '1780744261', '1780744262',
        '1780744263', '1780744264', '1797521409', '1797521410', '1797521411',
        '1797521412', '1797521413', '1797521414', '1797521415', '1797521416',
        '1797521417', '1797521418', '1797521419', '1797521420', '1797521421',
        '1797521422', '1797521423', '1797521424', '1797521425', '1797521426',
        '1797521427', '1797521428', '1797521429', '1797521430', '1797521431',
        '1797521432', '1797521433', '1797521434', '1797521435', '1797521436',
        '1797521437', '1797521438', '1797521439', '1797521440', '1797521441',
        '1797521442', '1797521443', '1797521444', '1797521445', '1797521446',
        '1797521447', '1797521448', '1797521449', '1797521450', '1797521451',
        '1797521452', '1797521453', '1797521454', '1797521455', '1797521456',
        '1797521457', '1797521458', '1797521459', '1797521460', '1797521461',
        '1797521462', '1797521463', '1797521464', '1797521465', '1797521466',
        '1797521467', '1797521468', '1797521469', '1797521470', '1797521471',
        '1797521472', '1797521473', '1797521474', '1797521475', '1797521476',
        '1797521477', '1797521478', '1797521479', '1797521480'
    ],
                       dtype='|S10')

    print sel_cells[sel_cells.id.isin(missing)]
    print "# of TCs = {}".format(len(tc_geom_df.id.unique()))
    print "# of TCs mapped to TT (hgcroc) = {}".format(
        len(hgcroc_sel_cells.id.unique()))
    print "# of bins (hgcroc) = {}".format(
        len(sel_cells.hgcroc_tt_bin.unique()))
    print "# of TCs mapped to TT (wafer) = {}".format(
        len(wafer_sel_cells.id.unique()))
    print "# of bins (wafer) = {}".format(len(sel_cells.wafer_tt_bin.unique()))

    sys.exit(0)

    # tc_ids_all = pd.DataFrame(columns=['wf', 'wtf', 'hgcroc'])
    # results = []
    #
    #
    #
    #
    # for index, tc in tc_geom_df.iterrows():
    #     if index % 1000 == 0:
    #         print 'TC: {}'.format(index)
    #     detid = HGCalDetId(tc.id)
    #
    #     tc_ids = pd.DataFrame(columns=['wf', 'wtf', 'hgcroc'])
    #     tc_ids['wf'] = detid.wafer()
    #     tc_ids['wft'] = detid.waferType()
    #     tc_ids['hgcroc'] = detid.hgcroc()
    #     results.append(tc_ids)
    # tc_ids_all = pd.concatenate(results)
    #
    # for index, cell in cell_geom_df.iterrows():
    #     if index % 1000 == 0:
    #         print 'Cell: {}'.format(index)
    #     detid = HGCalDetId(cell.tc_id)
    #     cell['wf'] = detid.wafer()
    #     cell['wft'] = detid.waferType()
    #     cell['hgcroc'] = detid.hgcroc()

    cell_sel_type_p1 = cell_geom_df[(cell_geom_df.wafertype == 1)
                                    & (cell_geom_df.layer == 1) &
                                    (cell_geom_df.wafer == 180) &
                                    (cell_geom_df.zside == -1)]
    cell_sel_type_m1 = cell_geom_df[(cell_geom_df.wafertype == -1)
                                    & (cell_geom_df.layer == 1) &
                                    (cell_geom_df.wafer == 101) &
                                    (cell_geom_df.zside == -1)]
    tc_sel_p1 = tc_geom_df[(tc_geom_df.subdet == 3) & (tc_geom_df.layer == 1) &
                           # (tc_geom_df.wafer == 101) &
                           (tc_geom_df.wafertype == -1) &
                           (tc_geom_df.zside == -1)]

    print '--------------------------------------------------------------------'
    print cell_sel_type_p1[['id', 'layer', 'subdet', 'zside', 'wafer', 'cell']]
    print '---------------------------------------------------------------------'
    print cell_sel_type_m1
    #
    #
    # tc_geom_df['wf'] = tc_geom_df.apply(compute_wafer, axis=1)
    # cell_geom_df['wf'] = cell_geom_df.apply(compute_wafer, axis=1)
    # tc_geom_df['wft'] = tc_geom_df.apply(compute_waferType, axis=1)
    # cell_geom_df['wft'] = cell_geom_df.apply(compute_waferType, axis=1)
    #
    print '---------------------------------------------------------------------'
    debugPrintOut(debug,
                  'Cell geometry',
                  toCount=cell_geom_df,
                  toPrint=cell_geom_df.iloc[:3])
    print '---------------------------------------------------------------------'
    debugPrintOut(debug,
                  'BH geometry',
                  toCount=bhcell_geom_df,
                  toPrint=bhcell_geom_df.iloc[:3])
    print '---------------------------------------------------------------------'
    debugPrintOut(debug,
                  'TC geometry',
                  toCount=tc_geom_df,
                  toPrint=tc_geom_df.iloc[:3])

    cell_sel_type_p1['color'] = cell_sel_type_p1.cell
    cell_sel_type_m1['color'] = cell_sel_type_m1.cell
    tc_sel_p1['energy'] = tc_sel_p1.hgcroc

    print '---------------------------------------------------------------------'
    print tc_sel_p1

    # display_mgr.displayCells(event=1, cells=cell_sel_type_p1)
    # display_mgr.displayCells(event=1, cells=cell_sel_type_m1)

    # display_mgr.displayTriggerCells(event=1, tcs=tc_sel_p1)
    # display_mgr.show(event=1)

    sys.exit(0)
def analyze(params, batch_idx=0):
    print(params)
    debug = int(params.debug)
    pool = Pool(5)

    tc_geom_df = pd.DataFrame()
    tc_rod_bins = pd.DataFrame()
    if False:
        # read the geometry dump
        geom_file = os.path.join(params.input_base_dir,
                                 'geom/test_triggergeom.root')
        tc_geom_tree = HGCalNtuple(
            [geom_file], tree='hgcaltriggergeomtester/TreeTriggerCells')
        tc_geom_tree.setCache(learn_events=100)
        print('read TC GEOM tree with # events: {}'.format(
            tc_geom_tree.nevents()))
        tc_geom_df = convertGeomTreeToDF(tc_geom_tree._tree)
        tc_geom_df['radius'] = np.sqrt(tc_geom_df['x']**2 + tc_geom_df['y']**2)
        tc_geom_df['eta'] = np.arcsinh(tc_geom_df.z / tc_geom_df.radius)

        if False:
            tc_rod_bins = pd.read_csv(
                filepath_or_buffer='data/TCmapping_v2.txt',
                sep=' ',
                names=['id', 'rod_x', 'rod_y'],
                index_col=False)
            tc_rod_bins['rod_bin'] = tc_rod_bins.apply(
                func=lambda cell: (int(cell.rod_x), int(cell.rod_y)), axis=1)

            tc_geom_df = pd.merge(tc_geom_df, tc_rod_bins, on='id')

        if debug == -4:
            tc_geom_tree.PrintCacheStats()
        print('...done')

    tree_name = 'hgcalTriggerNtuplizer/HGCalTriggerNtuple'
    input_files = []
    range_ev = (0, params.maxEvents)

    if params.events_per_job == -1:
        print 'This is interactive processing...'
        input_files = fm.get_files_for_processing(
            input_dir=os.path.join(params.input_base_dir,
                                   params.input_sample_dir),
            tree=tree_name,
            nev_toprocess=params.maxEvents,
            debug=debug)
    else:
        print 'This is batch processing...'
        input_files, range_ev = fm.get_files_and_events_for_batchprocessing(
            input_dir=os.path.join(params.input_base_dir,
                                   params.input_sample_dir),
            tree=tree_name,
            nev_toprocess=params.maxEvents,
            nev_perjob=params.events_per_job,
            batch_id=batch_idx,
            debug=debug)

    # print ('- dir {} contains {} files.'.format(params.input_sample_dir, len(input_files)))
    print '- will read {} files from dir {}:'.format(len(input_files),
                                                     params.input_sample_dir)
    for file_name in input_files:
        print '        - {}'.format(file_name)

    ntuple = HGCalNtuple(input_files, tree=tree_name)
    if params.events_per_job == -1:
        if params.maxEvents == -1:
            range_ev = (0, ntuple.nevents())

    print('- created TChain containing {} events'.format(ntuple.nevents()))
    print('- reading from event: {} to event {}'.format(
        range_ev[0], range_ev[1]))

    ntuple.setCache(learn_events=1, entry_range=range_ev)
    output = ROOT.TFile(params.output_filename, "RECREATE")
    output.cd()

    if False:
        hTCGeom = histos.GeomHistos('hTCGeom')
        hTCGeom.fill(tc_geom_df[(np.abs(tc_geom_df.eta) > 1.65)
                                & (np.abs(tc_geom_df.eta) < 2.85)])

    # instantiate all the plotters
    nev = 0
    for evt_idx in range(range_ev[0], range_ev[1] + 1):
        # print(evt_idx)
        event = ntuple.getEvent(evt_idx)
        if (params.maxEvents != -1 and nev >= params.maxEvents):
            break
        if debug >= 2 or event.entry() % 100 == 0:
            print("--- Event {}, @ {}".format(event.entry(),
                                              datetime.datetime.now()))
            print('    run: {}, lumi: {}, event: {}'.format(
                event.run(), event.lumi(), event.event()))

        nev += 1

        # get the interesting data-frames
        triggerCells = event.getDataFrame(prefix='tc')
        clusters = event.getDataFrame(prefix='hmVRcl3d')
        clusters_truth = event.getDataFrame(prefix='cl3dtruth')
        gen_info = event.getDataFrame(prefix='gen')
        gen_particles = event.getDataFrame(prefix='genpart')

        puInfo = event.getPUInfo()

        debugPrintOut(debug, 'PU', toCount=puInfo, toPrint=puInfo)

        debugPrintOut(debug,
                      'Trigger Cells',
                      toCount=triggerCells,
                      toPrint=triggerCells.iloc[:3])

        # print gen_particles.columns
        print gen_particles[['pid', 'eta', 'phi', 'pt', 'mother', 'gen']]

        def find_gen_particle(cluster, triggerCells):
            return triggerCells[triggerCells.id.isin(
                cluster.clusters)].genparticle.unique()[0]

        def find_cluster_components(cluster, triggerCells):
            return triggerCells[triggerCells.id.isin(cluster.clusters)]

        # for index, cluster in clusters_truth.iterrows():
        #     print cluster
        #     print 'corresponding gen particle: {}'.format(find_gen_particle(cluster, triggerCells))

        if not clusters_truth.empty:
            clusters_truth['genparticle'] = clusters_truth.apply(
                func=lambda cl: find_gen_particle(cl, triggerCells), axis=1)

        print clusters_truth

        best_match_indexes = {}
        best_match_indexes_truth = {}
        if not clusters.empty:
            best_match_indexes, allmatches = utils.match_etaphi(
                gen_particles[['eta', 'phi']],
                clusters[['eta', 'phi']],
                clusters['pt'],
                deltaR=0.1)

        if not clusters_truth.empty:
            best_match_indexes_truth, allmatches_truth = utils.match_etaphi(
                gen_particles[['eta', 'phi']],
                clusters_truth[['eta', 'phi']],
                clusters_truth['pt'],
                deltaR=0.1)

        for idx, gen_particle in gen_particles[
            (abs(gen_particles.eta) > 1.5)
                & (abs(gen_particles.eta) < 2.4)].iterrows():
            if idx in best_match_indexes.keys():
                print '-----------------------'
                print gen_particle
                matched3DCluster = clusters.loc[[best_match_indexes[idx]]]
                print 'RECO cluster:'
                print matched3DCluster
                print matched3DCluster.clusters
                print find_cluster_components(matched3DCluster.iloc[0],
                                              triggerCells)
                response = matched3DCluster.pt / gen_particle.pt

            if idx in best_match_indexes_truth.keys():
                matched3DCluster_truth = clusters_truth.loc[[
                    best_match_indexes_truth[idx]
                ]]
                print 'True cluster:'
                print matched3DCluster_truth
                print find_cluster_components(matched3DCluster_truth.iloc[0],
                                              triggerCells)
                response_truth = matched3DCluster_truth.pt / gen_particle.pt

        # print '# towers eta >0 {}'.format(len(triggerTowers[triggerTowers.eta > 0]))
        # print '# towers eta <0 {}'.format(len(triggerTowers[triggerTowers.eta < 0]))

    print("Processed {} events/{} TOT events".format(nev, ntuple.nevents()))
    print("Writing histos to file {}".format(params.output_filename))

    lastfile = ntuple.tree().GetFile()
    print 'Read bytes: {}, # of transaction: {}'.format(
        lastfile.GetBytesRead(), lastfile.GetReadCalls())
    if debug == -4:
        ntuple.PrintCacheStats()

    return
Пример #15
0
def main():

    samples2Run = [
        'FlatRandomPtGunProducer_SinglePion_35GeV_20170523',
        'FlatRandomPtGunProducer_SinglePhoton_35GeV_20170523',
        'RelValTTbar_14TeV_CMSSW_9_1_0_pre3-PU25ns_91X_upgrade2023_realistic_v1_D13PU200-v2_GEN-SIM-RECO'
    ]

    sampleManager = SampleManager()
    for sampleName in samples2Run:
        sample = sampleManager.getSample(sampleName)
        print "Sample {} has {} files".format(sampleName,
                                              len(sample.getFiles()))

        outDir = sampleName
        HGCalHelpers.createOutputDir(outDir)

        rootOnly = False
        imgType = 'png'
        canvas = None
        if not rootOnly:
            canvas = ROOT.TCanvas(outDir, outDir, 600, 600)

        outFile = ROOT.TFile.Open("{}.root".format(sampleName), "RECREATE")

        histDict = getHists()

        currentEvent = 0

        for inFile in sample.getFiles():
            if maxEvents > 0:
                if currentEvent > maxEvents:
                    break

            print inFile
            ntuple = HGCalNtuple(inFile)
            for event in ntuple:
                currentEvent += 1
                if (currentEvent % 10 == 0):
                    print "Event", currentEvent, "of", maxEvents
                if maxEvents > 0:
                    if currentEvent > maxEvents:
                        break

                # # multi clusters
                # multiClusters = event.multiClusters()
                # multiClusterCounter = {}
                # for currentRange in rangeFolders:
                #     multiClusterCounter[currentRange] = 0
                # for multiCluster in multiClusters:
                #     if (multiCluster.z() < 0):
                #         multiClusterCounter['minus_z'] += 1
                #         histDict['{}_{}_eta'.format('MultiClus', currentRange)].Fill(abs(multiCluster.eta()))
                #         histDict['{}_{}_pt'.format('MultiClus', currentRange)].Fill(multiCluster.pt())
                #         histDict['{}_{}_nclus'.format('MultiClus', currentRange)].Fill(len(multiCluster.cluster2d()))
                #         if (1.6 < abs(multiCluster.eta()) < 2.6):
                #             multiClusterCounter['minus_z_eta'] += 1
                #             histDict['{}_{}_eta'.format('MultiClus', currentRange)].Fill(abs(multiCluster.eta()))
                #             histDict['{}_{}_pt'.format('MultiClus', currentRange)].Fill(multiCluster.pt())
                #             histDict['{}_{}_nclus'.format('MultiClus', currentRange)].Fill(len(multiCluster.cluster2d()))
                #         # print multiCluster.pt()
                #     else:
                #         multiClusterCounter['plus_z'] += 1
                #         histDict['{}_{}_eta'.format('MultiClus', currentRange)].Fill(abs(multiCluster.eta()))
                #         histDict['{}_{}_pt'.format('MultiClus', currentRange)].Fill(multiCluster.pt())
                #         histDict['{}_{}_nclus'.format('MultiClus', currentRange)].Fill(len(multiCluster.cluster2d()))
                #         if (1.6 < abs(multiCluster.eta()) < 2.6):
                #             multiClusterCounter['plus_z_eta'] += 1
                #             histDict['{}_{}_eta'.format('MultiClus', currentRange)].Fill(abs(multiCluster.eta()))
                #             histDict['{}_{}_pt'.format('MultiClus', currentRange)].Fill(multiCluster.pt())
                #             histDict['{}_{}_nclus'.format('MultiClus', currentRange)].Fill(len(multiCluster.cluster2d()))

                # layer clusters
                layerClusters = event.layerClusters()
                recHits = event.recHits()
                layerClusterCounter = {}
                for currentRange in rangeFolders:
                    layerClusterCounter[currentRange] = []
                    layerClusterCounter["100_" + currentRange] = []
                    layerClusterCounter["200_" + currentRange] = []
                    layerClusterCounter["300_" + currentRange] = []
                    layerClusterCounter["other_" + currentRange] = []
                    for layer in range(0, 53):
                        # use index zero as overall counter
                        layerClusterCounter[currentRange].append(0)
                        layerClusterCounter["100_" + currentRange].append(0)
                        layerClusterCounter["200_" + currentRange].append(0)
                        layerClusterCounter["300_" + currentRange].append(0)
                        layerClusterCounter["other_" + currentRange].append(0)
                for layerCluster in layerClusters:
                    # if recHits[layerCluster.rechitSeed()].flags() != 0:
                    #     print recHits[layerCluster.rechitSeed()].flags(), layerCluster.pt(), len(layerCluster.rechits())
                    histDict["eventDisplay_{}".format(currentEvent)].Fill(
                        layerCluster.layer(), layerCluster.phi(),
                        layerCluster.eta(), layerCluster.pt())
                    # print layerCluster.rechitSeed()
                    # seedHitThickness = int(recHits[layerCluster.rechitSeed()].thickness())
                    # # print "seedHitThickness", seedHitThickness
                    # histStringsForFilling = []
                    # if (layerCluster.z() < 0):
                    #     histStringsForFilling.append('minus_z')
                    #     histStringsForFilling.append(thicknessDict[seedHitThickness] + 'minus_z')
                    #     if (1.6 < abs(layerCluster.eta()) < 2.6):
                    #         histStringsForFilling.append('minus_z_eta')
                    #         histStringsForFilling.append(thicknessDict[seedHitThickness] + 'minus_z_eta')
                    # else:
                    #     histStringsForFilling.append('plus_z')
                    #     histStringsForFilling.append(thicknessDict[seedHitThickness] + 'plus_z')
                    #     if (1.6 < abs(layerCluster.eta()) < 2.6):
                    #         histStringsForFilling.append('plus_z_eta')
                    #         histStringsForFilling.append(thicknessDict[seedHitThickness] + 'plus_z_eta')
                    # for histString in histStringsForFilling:
                    #     layerClusterCounter[histString][0] += 1
                    #     layerClusterCounter[histString][layerCluster.layer()] += 1
                    #     histDict['{}_{}_eta'.format('LayerClus', histString)].Fill(abs(layerCluster.eta()))
                    #     histDict['{0}_{1}_{2:0>2}_eta'.format('LayerClus', histString, layerCluster.layer())].Fill(abs(layerCluster.eta()))
                    #     histDict['{}_{}_pt'.format('LayerClus', histString)].Fill(layerCluster.pt())
                    #     histDict['{0}_{1}_{2:0>2}_pt'.format('LayerClus', histString, layerCluster.layer())].Fill(layerCluster.pt())
                    #     histDict['{}_{}_nhitCore'.format('LayerClus', histString)].Fill(layerCluster.nhitCore())
                    #     histDict['{0}_{1}_{2:0>2}_nhitCore'.format('LayerClus', histString, layerCluster.layer())].Fill(layerCluster.nhitCore())
                    #     histDict['{}_{}_nhitAll'.format('LayerClus', histString)].Fill(layerCluster.nhitAll())
                    #     histDict['{0}_{1}_{2:0>2}_nhitAll'.format('LayerClus', histString, layerCluster.layer())].Fill(layerCluster.nhitAll())

                # fill counting histograms
                # for currentRange in rangeFolders:
                #     for layer in range(0, 53):
                #         # if (currentEvent == 1):
                #         #     print currentRange, layer, layerClusterCounter[currentRange][layer]
                #         if (layer == 0):
                #             histDict['{}_{}_mult'.format('LayerClus', currentRange)].Fill(layerClusterCounter[currentRange][layer])
                #             histDict['{}_{}_mult'.format('MultiClus', currentRange)].Fill(multiClusterCounter[currentRange])
                #         else:
                #             histDict['{0}_{1}_{2:0>2}_mult'.format('LayerClus', currentRange, layer)].Fill(layerClusterCounter[currentRange][layer])

        outFile.cd()
        # for key, value in histDict.items():
        #     if value.GetEntries() != 0:
        #         value.Scale(1./maxEvents)
        HGCalHelpers.saveHistograms(histDict,
                                    canvas,
                                    outDir,
                                    imgType,
                                    logScale=False,
                                    rootOnly=rootOnly)
        outFile.Write()
        outFile.Close()