def test_slice_assign(): hist = Hist(10, 0, 1) hist[:] = [i for i in range(len(hist))] assert all([a.value == b for a, b in zip(hist, range(len(hist)))]) clone = hist.Clone() # reverse bins hist[:] = clone[::-1] assert all([a.value == b.value for a, b in zip(hist, clone[::-1])])
def test_ravel(): hist = Hist2D(3, 0, 1, 4, 0, 1) for i, bin in enumerate(hist.bins()): bin.value = i bin.error = i rhist = hist.ravel() assert_equal(list(rhist.y()), list(range(12))) assert_equal(list(rhist.yerrh()), list(range(12)))
def test_overall_efficiency(): for stat_op in range(0, 8): Eff = Efficiency(Hist(20, -3, 3), Hist(20, -3, 3)) Eff_1bin = Efficiency(Hist(1, -3, 3), Hist(1, -3, 3)) Eff.SetStatisticOption(stat_op) Eff_1bin.SetStatisticOption(stat_op) for i in range(1000): x = gauss(0, 3.6) w = uniform(0, 1) passed = w > 0.5 Eff.Fill(passed, x) Eff_1bin.Fill(passed, x) assert_almost_equal(Eff.overall_efficiency(overflow=True)[0], Eff_1bin.overall_efficiency(overflow=True)[0]) assert_almost_equal(Eff.overall_efficiency(overflow=True)[1], Eff_1bin.overall_efficiency(overflow=True)[1]) assert_almost_equal(Eff.overall_efficiency(overflow=True)[2], Eff_1bin.overall_efficiency(overflow=True)[2])
def test_edgesl(): h = Hist([1, 2, 3, 4]) assert_equal(list(h.xedgesl()), [1, 2, 3]) assert_equal(list(h.xedgesl(overflow=True)), [float('-inf'), 1, 2, 3, 4]) assert_equal(h.xedgesl(0), float('-inf')) assert_equal(h.xedgesl(-1), 4) assert_equal(h.xedgesl(4), 4) # wrap around assert_equal(h.xedgesl(5), float('-inf')) for i in range(1, h.nbins()): assert_equal(h.xedgesl(i), i)
def test_edgesh(): h = Hist([1, 2, 3, 4]) assert_equal(list(h.xedgesh()), [2, 3, 4]) assert_equal(list(h.xedgesh(overflow=True)), [1, 2, 3, 4, float('inf')]) assert_equal(h.xedgesh(0), 1) assert_equal(h.xedgesh(-1), float('inf')) assert_equal(h.xedgesh(4), float('inf')) # wrap around assert_equal(h.xedgesh(5), 1) for i in range(1, h.nbins()): assert_equal(h.xedgesh(i), i + 1)
def test_edges(): h = Hist([1, 2, 3, 4]) assert_equal(list(h.xedges()), [1, 2, 3, 4]) assert_equal(list(h.xedges(overflow=True)), [float('-inf'), 1, 2, 3, 4, float('inf')]) assert_equal(h.xedges(0), float('-inf')) assert_equal(h.xedges(-1), float('inf')) assert_equal(h.xedges(5), float('inf')) # wrap around assert_equal(h.xedges(6), float('-inf')) for i in range(1, h.nbins() + 1): assert_equal(h.xedges(i), i)
def test_edges(): h = Hist([1, 2, 3, 4]) assert_equal(list(h.xedges()), [1, 2, 3, 4]) assert_equal( list(h.xedges(overflow=True)), [float('-inf'), 1, 2, 3, 4, float('inf')]) assert_equal(h.xedges(0), float('-inf')) assert_equal(h.xedges(-1), float('inf')) assert_equal(h.xedges(5), float('inf')) # wrap around assert_equal(h.xedges(6), float('-inf')) for i in range(1, h.nbins() + 1): assert_equal(h.xedges(i), i)
def test_overall_efficiency(): for stat_op in range(0, 8): Eff = Efficiency(Hist(20, -3, 3), Hist(20, -3, 3)) Eff_1bin = Efficiency(Hist(1, -3, 3), Hist(1, -3, 3)) Eff.SetStatisticOption(stat_op) Eff_1bin.SetStatisticOption(stat_op) for i in range(1000): x = gauss(0, 3.6) w = uniform(0, 1) passed = w > 0.5 Eff.Fill(passed, x) Eff_1bin.Fill(passed, x) assert_almost_equal( Eff.overall_efficiency(overflow=True)[0], Eff_1bin.overall_efficiency(overflow=True)[0]) assert_almost_equal( Eff.overall_efficiency(overflow=True)[1], Eff_1bin.overall_efficiency(overflow=True)[1]) assert_almost_equal( Eff.overall_efficiency(overflow=True)[2], Eff_1bin.overall_efficiency(overflow=True)[2])
def get_weights(y_train, pt_train, options): print 'getting weights...' Nbins = 100 LV = rt.TH1D("LV", "LV", Nbins, options.train_pt_cut, 100.) PU = rt.TH1D("PU", "PU", Nbins, options.train_pt_cut, 100.) W = rt.TH1D("W", "W", Nbins, options.train_pt_cut, 100.) y_train = y_train.values pt_train = pt_train.values for y in y_train: if y == 1: LV.Fill(pt_train[i0]) else: PU.Fill(pt_train[i0]) LV.Scale(1. / LV.Integral()) PU.Scale(1. / PU.Integral()) plot_th1([LV, PU]) #make weight in each pT cat for i0 in range(LV.GetNbinsX()): if (PU.GetBinContent(i0 + 1) == 0.) or (LV.GetBinContent(i0 + 1) == 0.): W.SetBinContent(i0 + 1, 1) else: W.SetBinContent( i0 + 1, float(LV.GetBinContent(i0 + 1)) / float(PU.GetBinContent(i0 + 1))) #make sure weight isn't too big or too small weightUpper = 5.0 weightLower = 0.2 if W.GetBinContent(i0 + 1) > weightUpper: W.SetBinContent(i0 + 1) == weightUpper if W.GetBinContent(i0 + 1) < weightLower: W.SetBinContent(i0 + 1) == WeightLower W.Print("all") #apply weights on training PU samples candWeights = np.zeros(len(y_train), dtype=float) for y in y_train: if y == 1: candWeights[i0] == 1. #don't weight LV else: candWeights[i0] = W.GetBinContent(W.FindBin( pt_train[i0])) #weight PU return candWeights
def get_data(self): print 'Build dataset' lFile = h5py.File(options.infile + '.h5', 'r') lArr = np.array(lFile.get('Events')[:]) df = pd.DataFrame(data=lArr) y = df['LV'] x = df.drop(columns=['ecalE', 'genE', 'LV', 'energy', 'pt']) eta = df['eta'] pt = df['pt'] num_vars = len(x.columns) msk = np.random.rand(len(df)) < 0.8 msk2 = np.zeros(len(df), dtype=bool) for i0 in range(len(msk)): if (msk[i0] == True) and (pt.iloc[i0] > options.train_pt_cut): msk2[i0] = True #print msk2[0:1000] y_train, x_train = y[msk2], x[msk2] y_test, x_test = y[~msk2], x[~msk2] print x_test #propagate eta, pt for ROC curves, weights eta_train, pt_train = eta[msk2], pt[msk2] eta_test, pt_test = eta[~msk2], pt[~msk2] print 'Train on %i PF candidates (satisfying pT > %f) and use %i for validation.' % ( len(y_train), options.train_pt_cut, len(y_test)) return x_train, x_test, y_train, y_test, num_vars, eta_test, pt_test, eta_train, pt_train
# create a simple 1D histogram with 10 constant-width bins between 0 and 1 h_simple = Hist(10, 0, 1) print(h_simple.name) # If the name is not specified, a UUID is used so that ROOT never complains # about two histograms having the same name. # Alternatively you can specify the name (and the title or any other style # attributes) in the constructor: h_simple = Hist(10, -4, 12, name='my hist', title='Some Data', drawstyle='hist', legendstyle='F', fillstyle='/') # fill the histogram for i in range(1000): # all ROOT CamelCase methods are aliased by equivalent snake_case methods # so you can call fill() instead of Fill() h_simple.Fill(random.gauss(4, 3)) # easily set visual attributes h_simple.linecolor = 'blue' h_simple.fillcolor = 'green' h_simple.fillstyle = '/' # attributes may be accessed in the same way print(h_simple.name) print(h_simple.title) print(h_simple.markersize) # plot
def train(self): x_train, x_test, y_train, y_test, num_vars, eta_test, pt_test, eta_train, pt_train = self.get_data( ) Nbatch = 500 Nepoch = 10 model = self.build_model(num_vars) model.summary() scaler = StandardScaler().fit(x_train) x_train = scaler.transform(x_train) callbacks = all_callbacks( stop_patience=1000, lr_factor=0.5, lr_patience=10, lr_epsilon=0.000001, lr_cooldown=2, lr_minimum=0.0000001, outputDir= '/uscms_data/d3/jkrupa/pf_studies/CMSSW_10_5_0_pre2/src/CaloTrigNN/CaloNtupler/test/h5_files' ) print 'Fit model...' if options.use_weights: weights = get_weights(y_train, pt_train, options) history = model.fit(x_train, y_train, epochs=Nepoch, batch_size=Nbatch, callbacks=callbacks.callbacks, validation_split=0.0, sample_weight=weights) else: history = model.fit(x_train, y_train, epochs=Nepoch, batch_size=Nbatch, callbacks=callbacks.callbacks, validation_split=0.0) #https://hackernoon.com/simple-guide-on-how-to-generate-roc-plot-for-keras-classifier-2ecc6c73115a y_pred = model.predict(x_test).ravel() #inclusive fpr, tpr, thresholds = roc_curve(y_test, y_pred) #kinematic binning if options.inc: lpT = [1., 10000.0] leta = [1.7, 3.0] else: lpT = [1., 5., 10., 20., 10000.0] leta = [1.7, 2.0, 2.5, 3.0] if options.makeroc: for i0 in range(len(lpT) - 1): for i1 in range(len(leta) - 1): make_roc_curve(y_pred, y_test, eta_test, pt_test, lpT[i0], lpT[i0 + 1], leta[i1], leta[i1 + 1], options) frozen_graph = freeze_session( K.get_session(), output_names=[out.op.name for out in model.outputs]) tf.train.write_graph(frozen_graph, "h5_files", "tf_model.pb", as_text=False) print_model_to_json( model, '/uscms_data/d3/jkrupa/pf_studies/CMSSW_10_5_0_pre2/src/CaloTrigNN/CaloNtupler/test/h5_files/model.json' ) model.save_weights( '/uscms_data/d3/jkrupa/pf_studies/CMSSW_10_5_0_pre2/src/CaloTrigNN/CaloNtupler/test/h5_files/dense_model_weights.h5' ) json_string = model.to_json()
from rootpy.plotting import Hist, Canvas, Legend, set_style from rootpy.plotting.contrib.quantiles import qqgraph from rootpy.extern.six.moves import range set_style('ATLAS') c = Canvas(width=1200, height=600) c.Divide(2, 1, 1e-3, 1e-3) rand = ROOT.TRandom3() h1 = Hist(100, -5, 5, name="h1", title="Histogram 1", linecolor='red', legendstyle='l') h2 = Hist(100, -5, 5, name="h2", title="Histogram 2", linecolor='blue', legendstyle='l') for ievt in range(10000): h1.Fill(rand.Gaus(0, 0.8)) h2.Fill(rand.Gaus(0, 1)) pad = c.cd(1) h1.Draw('hist') h2.Draw('hist same') leg = Legend([h1, h2], pad=pad, leftmargin=0.5, topmargin=0.11, rightmargin=0.05, textsize=20) leg.Draw() pad = c.cd(2)
def test_slice_assign_error(): hist = Hist(10, 0, 1) hist[:] = [(i, i / 2.) for i in range(len(hist))] assert all([a.value == b for a, b in zip(hist, range(len(hist)))]) assert all([a.error == b / 2. for a, b in zip(hist, range(len(hist)))])
def test_slice_assign_bad(): hist = Hist(10, 0, 1) hist[:] = range(len(hist) + 1)
def load_analysis(self, inputs, res_T): cHW = res_T[inputs.split('/')[-1]][0] tcHW = res_T[inputs.split('/')[-1]][1] xsec = res_T[inputs.split('/')[-1]][2] # Create chain of root trees chain1 = ROOT.TChain("Delphes") chain1.Add(inputs) # Create object of class ExRootTreeReader treeReader = ROOT.ExRootTreeReader(chain1) numberOfEntries = treeReader.GetEntries() # create new root file root_name = 'cHW_{}_tcHW_{}.root'.format(cHW, tcHW) csv_name = 'cHW_{}_tcHW_{}.csv'.format(cHW, tcHW) f = root_open(root_name, "recreate") tree = Tree("cHW_{}_tcHW_{}".format(cHW, tcHW)) tree.create_branches({ 'PT_l1': 'F', 'PT_l2': 'F', 'PT_ll': 'F', 'Cos_lZ': 'F', 'DPHI_ll': 'F', 'PT_j1': 'F', 'PT_j2': 'F', 'PT_b1': 'F', 'PT_b2': 'F', 'Eta_H': 'F', 'phi_H': 'F', 'M_H': 'F', 'Cos_Hb1': 'F', 'PT_H': 'F', 'PT_ZH': 'F', 'M_Z': 'F', 'M_ZH': 'F', 'cHW': 'F', 'tcHW': 'F', 'xsec': 'F' }) # Get pointers to branches used in this analysis branchJet = treeReader.UseBranch("Jet") branchElectron = treeReader.UseBranch("Electron") branchMuon = treeReader.UseBranch("Muon") branchPhoton = treeReader.UseBranch("Photon") branchMET = treeReader.UseBranch("MissingET") # Loop over all events for entry in range(0, numberOfEntries): # Load selected branches with data from specified event treeReader.ReadEntry(entry) muons = [] for n in xrange(branchMuon.GetEntries()): muons.append(branchMuon.At(n)) if len(muons) >= 2: muons = sorted(branchMuon, key=lambda Muon: Muon.P4().Pt(), reverse=True) else: continue missing = sorted(branchMET, key=lambda MisingET: MisingET.MET, reverse=True) muon1 = muons[0] muon2 = muons[1] Muon1 = ROOT.TLorentzVector() Muon2 = ROOT.TLorentzVector() Muon1.SetPtEtaPhiE(muon1.P4().Pt(), muon1.P4().Eta(), muon1.P4().Phi(), muon1.P4().E()) Muon2.SetPtEtaPhiE(muon2.P4().Pt(), muon2.P4().Eta(), muon2.P4().Phi(), muon2.P4().E()) met = ROOT.TLorentzVector() met.SetPtEtaPhiE(missing[0].P4().Pt(), missing[0].P4().Eta(), missing[0].P4().Phi(), missing[0].P4().E()) bjato1 = ROOT.TLorentzVector() bjato2 = ROOT.TLorentzVector() jato1 = ROOT.TLorentzVector() jato2 = ROOT.TLorentzVector() #################################################################################### bjets, ljets = [], [] for n in xrange(branchJet.GetEntries()): if branchJet.At(n).BTag == 1: bjets.append(branchJet.At(n)) else: ljets.append(branchJet.At(n)) if len(bjets) >= 2: bjets = sorted(bjets, key=lambda BJet: BJet.P4().Pt(), reverse=True) else: continue ljets = sorted(ljets, key=lambda Jet: Jet.P4().Pt(), reverse=True) try: jato1.SetPtEtaPhiE(ljets[0].P4().Pt(), ljets[0].P4().Eta(), ljets[0].P4().Phi(), ljets[0].P4().E()) except IndexError: tree.PT_j1 = -999 try: jato2.SetPtEtaPhiE(ljets[1].P4().Pt(), ljets[1].P4().Eta(), ljets[1].P4().Phi(), ljets[1].P4().E()) except IndexError: tree.PT_j2 = -999 #################################################################################### bjato1.SetPtEtaPhiE(bjets[0].P4().Pt(), bjets[0].P4().Eta(), bjets[0].P4().Phi(), bjets[0].P4().E()) bjato2.SetPtEtaPhiE(bjets[1].P4().Pt(), bjets[1].P4().Eta(), bjets[1].P4().Phi(), bjets[1].P4().E()) ################################################################################################### if 95 < (bjato1 + bjato2).M() < 135: tree.PT_l1 = Muon1.Pt() tree.PT_l2 = Muon2.Pt() tree.PT_ll = (Muon1 + Muon2).Pt() tree.PT_b1 = bjato1.Pt() tree.PT_b2 = bjato2.Pt() tree.PT_j1 = jato1.Pt() tree.PT_j2 = jato2.Pt() Z = ROOT.TLorentzVector() H = ROOT.TLorentzVector() ZH = ROOT.TLorentzVector() Z = (Muon1 + Muon2) H = (bjato1 + bjato2) ZH = Z + H tree.phi_H = H.Phi() tree.PT_ZH = ZH.Pt() tree.M_ZH = ZH.M() tree.PT_H = H.Pt() tree.Eta_H = H.Eta() tree.M_H = H.M() tree.M_Z = Z.M() tree.DPHI_ll = np.abs(Muon1.DeltaPhi(Muon2)) ########################## boosted objects ############################################ Ztob = ROOT.TLorentzVector() Ztob.SetPxPyPzE(Z.Px(), Z.Py(), Z.Pz(), Z.E()) Zboost = ROOT.TVector3() Zboost = Ztob.BoostVector() v = Zboost.Unit() Muon1.Boost(-Zboost) Htob = ROOT.TLorentzVector() Htob.SetPxPyPzE(H.Px(), H.Py(), H.Pz(), H.E()) Hboost = ROOT.TVector3() Hboost = Htob.BoostVector() ang = Hboost.Unit() bjato1.Boost(-Hboost) tree.Cos_Hb1 = np.cos(bjato1.Angle(ang)) tree.Cos_lZ = np.cos(Muon1.Angle(v)) ########################################################################################## tree.cHW = cHW tree.tcHW = tcHW tree.xsec = xsec tree.Fill() tree.write() f.close() #create the csv output to_convert = root2array(root_name, "cHW_{}_tcHW_{}".format(cHW, tcHW)) df_conv = pd.DataFrame(to_convert) df_conv.to_csv(csv_name, index=False, header=df_conv.keys(), mode='w', sep=' ') ### move everything if not os.path.exists('500GeV_res'): os.makedirs('500GeV_res') os.makedirs('500GeV_res/roots') os.makedirs('500GeV_res/csv') shutil.move(root_name, '500GeV_res/roots') shutil.move(csv_name, '500GeV_res/csv')
from rootpy.extern.six.moves import range try: kwargs = {} for arg in extra: name, value = arg.lstrip('--').split('=') kwargs[name] = value except ValueError: print("specify style parameters with --name=value") try: style = get_style(args.style, **kwargs) except ValueError: print('Invalid style: `{0}`. Using the `ATLAS` style.'.format(args.style)) style = get_style('ATLAS') # Use styles as context managers. The selected style will only apply # within the following context: with style: c = Canvas() hpx = Hist(100, -4, 4, name="hpx", title="This is the px distribution") # generate some random data ROOT.gRandom.SetSeed() for i in range(25000): hpx.Fill(ROOT.gRandom.Gaus()) hpx.GetXaxis().SetTitle("random variable [unit]") hpx.GetYaxis().SetTitle("#frac{dN}{dr} [unit^{-1}]") hpx.SetMaximum(1000.) hpx.Draw() wait()
from random import gauss from rootpy.io import root_open from rootpy.tree import Tree, TreeChain from rootpy.plotting import Hist from rootpy.extern.six.moves import range # Make two files, each with a Tree called "test" print("Creating test tree in chaintest1.root") f = root_open("chaintest1.root", "recreate") tree = Tree("test") branches = {'x': 'F', 'y': 'F', 'z': 'F', 'i': 'I'} tree.create_branches(branches) for i in range(10000): tree.x = gauss(.5, 1.) tree.y = gauss(.3, 2.) tree.z = gauss(13., 42.) tree.i = i tree.fill() # Make a histogram of x when y > 1 hist1 = Hist(100, -10, 10, name='hist1') tree.Draw('x', 'y > 1', hist=hist1) hist1.SetDirectory(0) # memory resident print("The first tree has {0:f} entries where y > 1".format(hist1.Integral())) tree.write() f.close()
h1 = Hist(100, -5, 5, name="h1", title="Histogram 1", linecolor='red', legendstyle='l') h2 = Hist(100, -5, 5, name="h2", title="Histogram 2", linecolor='blue', legendstyle='l') for ievt in range(10000): h1.Fill(rand.Gaus(0, 0.8)) h2.Fill(rand.Gaus(0, 1)) pad = c.cd(1) h1.Draw('hist') h2.Draw('hist same') leg = Legend([h1, h2], pad=pad, leftmargin=0.5, topmargin=0.11, rightmargin=0.05, textsize=20) leg.Draw()
'M_H': 'F', 'MT_W': 'F', 'Cos_Hb1': 'F', 'PT_H': 'F', }) # Get pointers to branches used in this analysis branchJet = treeReader.UseBranch("Jet") branchElectron = treeReader.UseBranch("Electron") branchMuon = treeReader.UseBranch("Muon") branchPhoton = treeReader.UseBranch("Photon") branchMET = treeReader.UseBranch("MissingET") #################################################################### # Loop over all events for entry in range(0, numberOfEntries): # Load selected branches with data from specified event treeReader.ReadEntry(entry) ########################################################################################################## eletrons = sorted(branchElectron, key=lambda Electron: Electron.PT, reverse=True) missing = sorted(branchMET, key=lambda MisingET: MisingET.MET, reverse=True) elec1 = eletrons[0] eletron1 = ROOT.TLorentzVector() eletron1.SetPtEtaPhiE(elec1.PT,elec1.Eta,elec1.Phi,elec1.P4().E()) met = ROOT.TLorentzVector() met.SetPtEtaPhiE(missing[0].P4().Pt(),missing[0].P4().Eta(),missing[0].P4().Phi(),missing[0].P4().E()) bjato1 = ROOT.TLorentzVector() bjato2 = ROOT.TLorentzVector() jato1 = ROOT.TLorentzVector() jato2 = ROOT.TLorentzVector() ####################################################################################
ROOT.gROOT.SetBatch() set_style('ATLAS') np.random.seed(0) random.seed(0) # create an example TTree dataset class Sample(TreeModel): x = FloatCol() y = FloatCol() with root_open('sample.root', 'recreate'): # generate toy data in a TTree tree = Tree('sample', model=Sample) for i in range(1000): tree.x = gauss(0, 1) tree.y = gauss(0, 1) tree.Fill() tree.write() # read in the TTree as a NumPy array array = root2array('sample.root', 'sample') if os.path.exists('bootstrap.gif'): os.remove('bootstrap.gif') canvas = Canvas(width=500, height=400) hist = Hist2D(10, -3, 3, 10, -3, 3, drawstyle='LEGO2') output = root_open('bootstrap.root', 'recreate')