def dlProgress(count, blockSize, totalSize): if totalSize != -1: percent = int(count * blockSize * 100 / totalSize) utils.printProgress(percent) else: sys.stdout.write(("\r%s bytes downloaded" % (count * blockSize))) sys.stdout.flush()
def get_challenges(self): print("Get challenge meta info") challenges = {} save_dir = self.ctfd.loc save_path = self.ctfd.loc / "challenges.json" if save_path.exists(): print("Challenge are already loaded once") print("Do you need to update it?(y/n)") choice = input("> ") if choice == "n": with open(str(save_path)) as challenges_json: challenges = json.load(challenges_json) return challenges if self.session == '': self.login() challenges_endpoint = self.ctfd.base_url + "/api/v1/challenges" resp = requests.get(challenges_endpoint,cookies={"session":self.session}) if resp.status_code==200: print("Done.") challenges = json.loads(resp.text)['data'] print("Processing...") processed = {} for chall in challenges: chall['name']=chall['name'].replace(' ','_').lower() chall['category']=chall['category'].replace(' ','_').lower() del(chall['template']) del(chall['script']) del(chall['type']) processed.update({chall['id']: {str(key): chall[key] for key in filter(lambda x : x!='id',chall)}}) print("Done") print("Get challenge specific info.(You may need to wait)") challs_total = len(processed.keys()) for idx,chall_id in enumerate(processed.keys()): utils.printProgress(idx+1,challs_total,'Progress:',('{}/{}'.format(idx+1,challs_total)),1,50) resp=requests.get(challenges_endpoint+'/'+str(chall_id),cookies={"session":self.session}) chall_info = json.loads(resp.text)['data'] processed[chall_id]['files']=chall_info['files'] processed[chall_id]['description']=chall_info['description'] processed[chall_id]['hint']=chall_info['hints'] processed[chall_id]['solves']=chall_info['solves'] print("Done") print("Save challenges info into file") utils.save_json_into_file(processed,save_dir,"challenges.json","new") print("Done") return processed else: print("Something wrong...") print(resp.text) return False
def Script(self): print("Starting Memory dump...") script = self.session.create_script("""'use strict'; rpc.exports = { enumerateRanges: function (prot) { return Process.enumerateRangesSync(prot); }, readMemory: function (address, size) { return Memory.readByteArray(ptr(address), size); } }; """) script.on("message", utils.on_message) script.load() agent = script.exports print(self.Perms) ranges = agent.enumerate_ranges(self.Perms) if self.Max_Size is not None: MAX_SIZE = self.Max_Size i = 0 l = len(ranges) # Performing the memory dump for range in ranges: base = range["base"] size = range["size"] logging.debug("Base Address: " + str(base)) logging.debug("") logging.debug("Size: " + str(size)) if size > MAX_SIZE: logging.debug("Too big, splitting the dump into chunks") self.mem_access_viol = dumper.splitter(agent, base, size, MAX_SIZE, self.mem_access_viol, self.Directory) continue self.mem_access_viol = dumper.dump_to_file(agent, base, size, self.mem_access_viol, self.Directory) i += 1 utils.printProgress(i, l, prefix='Progress:', suffix='Complete', bar=50) print("") if self.Strings: self.String()
def String(self): files = os.listdir(self.Directory) i = 0 l = len(files) for f1 in files: utils.strings(f1, self.Directory) i += 1 utils.printProgress(i, l, prefix='Progress:', suffix='Complete', bar=50) print("Finished!")
def packages(): if not CraftCore.cache.availablePackages: CraftCore.cache.availablePackages = [] CraftCore.log.info("Updating search cache:") total = len(CraftPackageObject.installables()) for p in CraftPackageObject.installables(): package = SeachPackage(p) CraftCore.cache.availablePackages.append(package) percent = int(len(CraftCore.cache.availablePackages) / total * 100) utils.printProgress(percent) utils.printProgress(100) CraftCore.log.info("") return CraftCore.cache.availablePackages
base = range["base"] size = range["size"] logging.debug("Base Address: " + str(base)) logging.debug("") logging.debug("Size: " + str(size)) if size > MAX_SIZE: logging.debug("Too big, splitting the dump into chunks") mem_access_viol = dumper.splitter(agent, base, size, MAX_SIZE, mem_access_viol, DIRECTORY) continue mem_access_viol = dumper.dump_to_file(agent, base, size, mem_access_viol, DIRECTORY) i += 1 utils.printProgress(i, l, prefix='Progress:', suffix='Complete', bar=50) print("") # Run Strings if selected if STRINGS: files = os.listdir(DIRECTORY) i = 0 l = len(files) print("Running strings on all files:") for f1 in files: utils.strings(f1, DIRECTORY) i += 1 utils.printProgress(i, l, prefix='Progress:',
def calculateEfficiency(args):#(emulDir, inFile, outDir): inputDir = args[0] inFile = args[1] outDir = args[2] multiProcess = args[3] if not os.path.exists(outDir+'/efficiency'): os.makedirs(outDir+'/efficiency') f_out = TFile.Open(outDir+'/efficiency/'+inFile, 'recreate') DTTREE = TChain('dtNtupleProducer/DTTREE') DTTREE.Add(inputDir+'/'+inFile) f_out.cd() listDPhi = [] listDPhiB = [] for RPCbit in range(0,3): tmp = TH1D('h_SegVsPrimitive_dPhi_RPCbit'+str(RPCbit), 'DT Segment vs TwinMux Primitive dPhi', 100, -2000.0, 2000.0) tmp.GetXaxis().SetTitle('#Delta phi_{Seg,Pri}') tmp.GetYaxis().SetTitle('Entries, RPCbit = '+str(RPCbit)) tmp.Sumw2() listDPhi.append(tmp) tmp = TH1D('h_SegVsPrimitive_dPhiB_RPCbit'+str(RPCbit), 'DT Segment vs TwinMux Primitive dPhiB', 100, -2000.0, 2000.0) tmp.GetXaxis().SetTitle('#Delta phiB_{Seg,Pri}') tmp.GetYaxis().SetTitle('Entries, RPCbit = '+str(RPCbit)) tmp.Sumw2() listDPhiB.append(tmp) h_Efficiency = TH1D('h_Efficiency', 'The efficiency of trigger primitives', 4, 0, 4) h_Efficiency.GetXaxis().SetTitle("") h_Efficiency.GetXaxis().SetBinLabel(1,'Deno(inclusive)') h_Efficiency.GetXaxis().SetBinLabel(2,'Nume(inclusive)') h_Efficiency.GetXaxis().SetBinLabel(3,'Deno(RPC only)') h_Efficiency.GetXaxis().SetBinLabel(4,'Nume(RPC only)') coneSize = 1600 for ievent in xrange(DTTREE.GetEntries()): if not multiProcess: utils.printProgress(ievent, DTTREE.GetEntries(), 'Progress: ', 'Complete', 1, 25) DTTREE.GetEntry(ievent) for iseg in range(DTTREE.seg_nSegments): if DTTREE.seg_phi_t0[iseg] <= -6.0 or DTTREE.seg_phi_t0[iseg] >= 6.0: continue isMatchWithRPCbit01 = False for itrig in range(DTTREE.ltTwinMuxOut_nTrigs): isSameDetector = False isSameWheel = False isSameStation = False isSameSector = False isBX0 = False if DTTREE.seg_wheel[iseg] and DTTREE.ltTwinMuxOut_wheel[itrig]: isSameWheel = True if DTTREE.seg_sector[iseg] and DTTREE.ltTwinMuxOut_sector[itrig]: isSameSector = True if DTTREE.seg_station[iseg] and DTTREE.ltTwinMuxOut_station[itrig]: isSameStation = True if DTTREE.ltTwinMuxOut_BX[itrig] == 0: isBX0 = True if isSameWheel and isSameStation and isSameSector: isSameDetector = True if isSameDetector and isBX0: RPCbit = DTTREE.ltTwinMuxOut_rpcBit[itrig] deltaPhi = DTTREE.seg_posGlb_phi[iseg] - DTTREE.ltTwinMuxOut_phi[itrig] deltaPhiB = (DTTREE.seg_dirGlb_phi[iseg] - DTTREE.seg_posGlb_phi[iseg]) - DTTREE.ltTwinMuxOut_phiB[itrig] listDPhi[RPCbit].Fill(deltaPhi) listDPhiB[RPCbit].Fill(deltaPhiB) if DTTREE.seg_station[iseg] <= 2 and isSameDetector and isBX0: h_Efficiency.Fill(0,1) if DTTREE.ltTwinMuxOut_rpcBit[itrig] < 2: h_Efficiency.Fill(1,1) isMatchWithRPCbit01 = True if not isMatchWithRPCbit01 and DTTREE.seg_station[iseg] <= 2: h_Efficiency.Fill(2,1) for itrig in range(DTTREE.ltTwinMuxOut_nTrigs): isSameWheel = False isSameStation = False isSameSector = False if DTTREE.seg_wheel[iseg] and DTTREE.ltTwinMuxOut_wheel[itrig]: isSameWheel = True if DTTREE.seg_sector[iseg] and DTTREE.ltTwinMuxOut_sector[itrig]: isSameSector = True if DTTREE.seg_station[iseg] and DTTREE.ltTwinMuxOut_station[itrig]: isSameStation = True if isSameWheel and isSameSector and isSameStation: h_Efficiency.Fill(3,1) break if not multiProcess: effInclusive = float(h_Efficiency.GetBinContent(2))/float(h_Efficiency.GetBinContent(1)) effRPConly = float(h_Efficiency.GetBinContent(4))/float(h_Efficiency.GetBinContent(3)) print '\n==== Inclusive ====' print 'Nume: '+str(h_Efficiency.GetBinContent(2))+', Deno: '+str(h_Efficiency.GetBinContent(1))+', Efficiency: '+str(effInclusive) print '==== RPC only ====' print 'Nume: '+str(h_Efficiency.GetBinContent(4))+', Deno: '+str(h_Efficiency.GetBinContent(3))+', Efficiency: '+str(effRPConly) f_out.Write() f_out.Close()
def process(self): if osp.exists( os.path.join(self.processed_dir, 'Decagon-{}-multi.pt'.format(self.datatype))): return data_list = [] # >>> Obtain One-Hot Encoding for Side-Effects json_dict = { literal_eval(k): v for k, v in self.json_load[self.datatype].items() } total = len(json_dict) for idx, (smiles1, smiles2) in enumerate(json_dict): printProgress(idx + 1, total, '{} dataset preparation: '.format(self.datatype), ' ', 2, 50) mol1 = MolFromSmiles(smiles1) mol2 = MolFromSmiles(smiles2) label = np.array(json_dict[(smiles1, smiles2)]) #print(len(label[label == 1])) #print(len(label[label == 0])) #print("\n{}-[{},{},{}:{}] : {}".format(mode, smiles1, smiles2, se, target_dict[se], label)) if mol1 is None or mol2 is None: print("There is a missing drug from the pair (%s,%s)" % (mol1, mol2)) continue ###################################################################### # >>> Get pairwise graph G1, G2 c1_size = mol1.GetNumAtoms() c2_size = mol2.GetNumAtoms() if c1_size == 0 or c2_size == 0: print("There is a size error from pair (%s,%s)" % (mol1, mol2)) continue atoms1 = mol1.GetAtoms() atoms2 = mol2.GetAtoms() bonds1 = mol1.GetBonds() bonds2 = mol2.GetBonds() features, edges = [], [] for atom in atoms1: feature = atom_features(atom) features.append(feature / sum(feature)) # normalize for atom in atoms2: feature = atom_features(atom) features.append(feature / sum(feature)) # normalize for bond in bonds1: edges.append([bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()]) for bond in bonds2: edges.append([ bond.GetBeginAtomIdx() + c1_size, bond.GetEndAtomIdx() + c1_size ]) if len(edges) == 0: continue G = nx.Graph(edges).to_directed() edge_index = [[e1, e2] for e1, e2 in G.edges] GraphSiameseData = DATA.Data( x=torch.Tensor(features), edge_index=torch.LongTensor(edge_index).transpose(1, 0), y=torch.Tensor(label).view(1, -1)) GraphSiameseData.__setitem__('c1_size', torch.LongTensor([c1_size])) GraphSiameseData.__setitem__('c2_size', torch.LongTensor([c2_size])) data_list.append(GraphSiameseData) ########################################################################### if self.pre_filter is not None: data_list = [data for data in data_list if self.pre_filter(data)] if self.pre_transform is not None: data_list = [self.pre_transform(data) for data in data_list] # check this function data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
hdp = HDP(T, K, D, graph.n, eta, alpha, gamma, kappa, tau, scale, adding_noise) log_file = open(f_log, "w") log_file.write( "iteration time doc.count score word.count unseen.score unseen.word.count\n" ) max_iter_per_epoch = np.ceil(D / batchsize) total_doc_count = 0 total_time = 0 doc_seen = set() print("stochastic variational inference...") for epoch in range(epochs): iter = 0 printProgress(iter, max_iter_per_epoch, prefix='epoch %s' % int(epoch + 1), suffix='complete', barLength=50) while iter < max_iter_per_epoch: iter += 1 t0 = time.clock() # Sample the documents. ids = random.sample(range(D), batchsize) docs = [train_corpus.docs[id] for id in ids] unseen_ids = set( [i for (i, id) in enumerate(ids) if id not in doc_seen]) if len(unseen_ids) != 0: doc_seen.update([id for id in ids]) total_doc_count += batchsize
def ana(inputDir, process, outputDir, sys='', flag1=False): if '__' in process: process = process.split('__')[0] print("Process: " + process + "\n") print("Systematics: " + sys + "\n") ntuple = process print('Save:' + process + sys + '\n') timer = ROOT.TStopwatch() timer.Start() ver = "" configDir = "" weightDir = "" modelfile = "" with open('var.txt', 'r') as f: while True: line = f.readline() if not line: break tmp = line.split() if 'ver' in tmp: ver = tmp[1] if 'configDir' in tmp: configDir = tmp[1] if 'weightDir' in tmp: weightDir = tmp[1] if 'modelfile' in tmp: modelfile = tmp[1] if not os.path.exists(outputDir + '/' + modelfile): os.makedirs(outputDir + '/' + modelfile) closureTest = flag1 data = False ttbb = False if 'Data' in process: data = True if 'ttbb' in process: ttbb = True muon_ch = 0 muon_pt = 30.0 muon_eta = 2.1 electron_ch = 1 electron_pt = 35.0 electron_eta = 2.1 jet_pt = 30.0 jet_eta = 2.4 jet_CSV = 0.9535 jet_CSV_medium = 0.8484 number_of_jets = 6 number_of_bjets = 2 nChannel = 2 nStep = 4 selEvent = pd.read_hdf(inputDir + ".h5") #print "\nMerge arrays" #selEvent = pd.DataFrame([]) #max_nevt_num = 0 #for item in os.listdir(inputDir): # #print "Load file : "+str(inputDir)+'/'+str(item) # df = pd.read_hdf(inputDir+'/'+item) # last = 0 # if df.size != 0: last = int(df.tail(1)['event'])+1 # df['event'] = df['event'] + max_nevt_num # str_query = 'csv1 > '+str(jet_CSV)+' and csv2 > '+str(jet_CSV)+' and njets >= 6 and nbjets >= 3' # df = df.query(str_query) # #df.reset_index(drop=True, inplace=True) # selEvent = pd.concat([selEvent,df], axis=0) # max_nevt_num += last #selEvent.reset_index(drop=True, inplace=True) # should get the number automatically countMatchable = True if countMatchable: df = pd.read_hdf("./array/array_train_ttbb.h5") df = df.filter(['signal', 'event', 'dR'], axis=1) df = df.query('signal > 0') #tmpId = df.groupby(['event'])['dR'].transform(max) == df['dR'] #df = df[tmpId] df.reset_index(drop=True, inplace=True) nMatchable = len(df.index) print(nMatchable) f_tmp = open('matchable.txt', 'w') f_tmp.write(str(nMatchable)) f_tmp.write(str(df)) f_tmp.close() #print(selEvent) if closureTest: f_out = ROOT.TFile(outputDir + '/' + modelfile + '/hist_closure.root', 'recreate') elif sys == '': f_out = ROOT.TFile( outputDir + '/' + modelfile + '/hist_' + process + '.root', 'recreate') else: f_out = ROOT.TFile( outputDir + '/' + modelfile + '/hist_' + process + sys + '.root', 'recreate') nbins_reco_addjets_dr = 12 #4 reco_addjets_dr_min = 0.4 reco_addjets_dr_max = 4.0 #reco_addjets_dr_width = [0.4,0.6,1.0,2.0,4.0] nbins_reco_addjets_mass = 12 #4 reco_addjets_m_min = 0 reco_addjets_m_max = 400 #reco_addjets_mass_width = [0.0,60.0,100.0,170.0,400.0] nbins_gen_addjets_dr = 4 gen_addjets_dr_min = 0.4 gen_addjets_dr_max = 4.0 gen_addjets_dr_width = [0.4, 0.6, 1.0, 2.0, 4.0] nbins_gen_addjets_mass = 4 gen_addjets_m_min = 0 gen_addjets_m_max = 400 gen_addjets_mass_width = [0.0, 60.0, 100.0, 170.0, 400.0] #gen_addjets_mass_width = [0.0,60.0,80.0,100.0,120.0,140.0,160.0,180.0,200.0,220.0,400.0] #Histograms for unfolding h_gen_addbjets_deltaR_nosel = [[0] for i in range(nChannel)] h_gen_addbjets_invMass_nosel = [[0] for i in range(nChannel)] h_njets = [[0] for i in range(nChannel)] h_nbjets = [[0] for i in range(nChannel)] h_reco_addjets_deltaR = [[0] for i in range(nChannel)] h_reco_addjets_invMass = [[0] for i in range(nChannel)] h_gen_addbjets_deltaR = [[0] for i in range(nChannel)] h_gen_addbjets_invMass = [[0] for i in range(nChannel)] h_respMatrix_deltaR = [[0] for i in range(nChannel)] h_respMatrix_invMass = [[0] for i in range(nChannel)] #h_respMatrix_invMass = [[0]*nStep for i in range(nChannel)] #Histograms of DNN input variables varlist = ut.getVarlist() xlabel = ut.getHistXlabel() h_hist = [[i for i in range(len(varlist))] for j in range(nChannel)] for iChannel in range(0, nChannel): for i in range(len(varlist)): histRange = [] histRange = ut.getHistRange(varlist[i]) h_hist[iChannel][i] = ROOT.TH1D( 'keras_h_' + varlist[i] + '_Ch' + str(iChannel) + '_S3' + sys, '', int(histRange[0]), float(histRange[1]), float(histRange[2])) h_hist[iChannel][i].GetXaxis().SetTitle(xlabel[varlist[i]]) h_hist[iChannel][i].GetYaxis().SetTitle("Entries") h_hist[iChannel][i].Sumw2() h_gen_addbjets_deltaR_nosel[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetDeltaR_Ch" + str(iChannel) + "_nosel" + sys, "", nbins_gen_addjets_dr, #gen_addjets_dr_min, gen_addjets_dr_max array('d', gen_addjets_dr_width)) h_gen_addbjets_deltaR_nosel[iChannel].GetXaxis().SetTitle( "#DeltaR_{b#bar{b}}") h_gen_addbjets_deltaR_nosel[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_deltaR_nosel[iChannel].Sumw2() h_gen_addbjets_invMass_nosel[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetInvMass_Ch" + str(iChannel) + "_nosel" + sys, "", nbins_gen_addjets_mass, #gen_addjets_m_min, gen_addjets_m_max array('d', gen_addjets_mass_width)) h_gen_addbjets_invMass_nosel[iChannel].GetXaxis().SetTitle( "m_{b#bar{b}}(GeV)") h_gen_addbjets_invMass_nosel[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_invMass_nosel[iChannel].Sumw2() h_njets[iChannel] = ROOT.TH1D( "h_keras_nJets_Ch" + str(iChannel) + "_S3" + sys, "", 10, 0, 10) h_njets[iChannel].GetXaxis().SetTitle("Jet multiplicity") h_njets[iChannel].GetYaxis().SetTitle("Entries") h_njets[iChannel].Sumw2() h_nbjets[iChannel] = ROOT.TH1D( "h_keras_nbJets_Ch" + str(iChannel) + "_S3" + sys, "", 10, 0, 10) h_nbjets[iChannel].GetXaxis().SetTitle("bJet multiplicity") h_nbjets[iChannel].GetYaxis().SetTitle("Entries") h_nbjets[iChannel].Sumw2() h_reco_addjets_deltaR[iChannel] = ROOT.TH1D( "h_keras_RecoAddbJetDeltaR_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_dr, reco_addjets_dr_min, reco_addjets_dr_max #array('d', reco_addjets_dr_width) ) h_reco_addjets_deltaR[iChannel].GetXaxis().SetTitle( "#DeltaR_{b#bar{b}}") h_reco_addjets_deltaR[iChannel].GetYaxis().SetTitle("Entries") h_reco_addjets_deltaR[iChannel].Sumw2() h_reco_addjets_invMass[iChannel] = ROOT.TH1D( "h_keras_RecoAddbJetInvMass_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_mass, reco_addjets_m_min, reco_addjets_m_max #array('d', reco_addjets_mass_width) ) h_reco_addjets_invMass[iChannel].GetXaxis().SetTitle( "m_{b#bar{b}}(GeV)") h_reco_addjets_invMass[iChannel].GetYaxis().SetTitle("Entries") h_reco_addjets_invMass[iChannel].Sumw2() h_gen_addbjets_deltaR[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetDeltaR_Ch" + str(iChannel) + "_S3" + sys, "", nbins_gen_addjets_dr, #gen_addjets_dr_min, gen_addjets_dr_max array('d', gen_addjets_dr_width)) h_gen_addbjets_deltaR[iChannel].GetXaxis().SetTitle( "#DeltaR_{b#bar{b}}") h_gen_addbjets_deltaR[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_deltaR[iChannel].Sumw2() h_gen_addbjets_invMass[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetInvMass_Ch" + str(iChannel) + "_S3" + sys, "", nbins_gen_addjets_mass, #gen_addjets_m_min, gen_addjets_m_max array('d', gen_addjets_mass_width)) h_gen_addbjets_invMass[iChannel].GetXaxis().SetTitle( "m_{b#bar{b}}(GeV)") h_gen_addbjets_invMass[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_invMass[iChannel].Sumw2() h_respMatrix_deltaR[iChannel] = ROOT.TH2D( "h_keras_ResponseMatrixDeltaR_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_dr, reco_addjets_dr_min, reco_addjets_dr_max, #array('d', reco_addjets_dr_width), nbins_gen_addjets_dr, #gen_addjets_dr_min, gen_addjets_dr_max array('d', gen_addjets_dr_width)) h_respMatrix_deltaR[iChannel].GetXaxis().SetTitle( "Reco. #DeltaR_{b#bar{b}}") h_respMatrix_deltaR[iChannel].GetYaxis().SetTitle( "Gen. #DeltaR_{b#bar{b}}") h_respMatrix_deltaR[iChannel].Sumw2() h_respMatrix_invMass[iChannel] = ROOT.TH2D( "h_keras_ResponseMatrixInvMass_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_mass, reco_addjets_m_min, reco_addjets_m_max, #array('d', reco_addjets_mass_width), nbins_gen_addjets_mass, # #gen_addjets_m_min, gen_addjets_m_max array('d', gen_addjets_mass_width)) h_respMatrix_invMass[iChannel].GetXaxis().SetTitle( "Reco. m_{b#bar{b}}(GeV)") h_respMatrix_invMass[iChannel].GetYaxis().SetTitle( "Gen. m_{b#bar{b}}(GeV)") h_respMatrix_invMass[iChannel].Sumw2() #f_pred = open('pred.txt','w') print "\nLoad modelfile : " + str(modelfile) model = load_model(configDir + weightDir + ver + '/' + modelfile) model.summary() varlist = ut.getVarlist() jetCombi = selEvent.filter(varlist) scaler = StandardScaler() pred = pd.DataFrame([]) if len(jetCombi) is not 0: inputset = np.array(jetCombi) inputset_sc = scaler.fit_transform(inputset) pred = model.predict(inputset_sc, batch_size=2000) pred = pd.DataFrame(pred, columns=['background', 'signal']) print(pred) #pred = pd.DataFrame(pred, columns=['signal']) #f_pred.write('Pred\n'+str(pred)+'\n'+str(type(pred))) #f_pred.write('SelEvent\n'+str(selEvent)) selEvent = pd.concat([selEvent, pred], axis=1) #f_pred.write('SelEvent+Pred\n'+str(selEvent)) idx = selEvent.groupby(['event' ])['signal'].transform(max) == selEvent['signal'] #f_pred.write('\n'+str(idx)+'\n'+str(selEvent[idx])+'\n') selEvent = selEvent[idx] selEvent.reset_index(drop=True, inplace=True) #selEvent.groupby('event').max('signal').reset_index(drop=True, inplace=True) #f_pred.write("Groupby\n"+process+"\n"+str(selEvent)) #groups = selEvent.groupby('event') print "\n Fill Hist" nEvents = 0 nEvt_isMatch_DNN = 0 nEvt_isMatch_mindR = 0 for index, event in selEvent.iterrows(): #maxval = event[1][event[1]['signal'] == event[1]['signal'].max()] ut.printProgress(index, len(selEvent.index), 'Progress: ', 'Complete', 1, 25) passmuon = False passelectron = False if event['channel'] == 0: passmuon = True if event['channel'] == 1: passelectron = True njets = event['njets'] nbjets = event['nbjets'] gen_addbjet1 = TLorentzVector() gen_addbjet2 = TLorentzVector() gen_addbjet1.SetPtEtaPhiE(event['addbjet1_pt'], event['addbjet1_eta'], event['addbjet1_phi'], event['addbjet1_e']) gen_addbjet2.SetPtEtaPhiE(event['addbjet2_pt'], event['addbjet2_eta'], event['addbjet2_phi'], event['addbjet2_e']) gen_dR = gen_addbjet1.DeltaR(gen_addbjet2) gen_M = (gen_addbjet1 + gen_addbjet2).M() reco_dR = 9999 reco_M = 9999 reco_addbjet1 = TLorentzVector(0, 0, 0, 0) reco_addbjet2 = TLorentzVector(0, 0, 0, 0) #additional bjets from DNN reco_addbjet1.SetPtEtaPhiE(event['pt1'], event['eta1'], event['phi1'], event['e1']) reco_addbjet2.SetPtEtaPhiE(event['pt2'], event['eta2'], event['phi2'], event['e2']) reco_dR = reco_addbjet1.DeltaR(reco_addbjet2) reco_M = (reco_addbjet1 + reco_addbjet2).M() #matching ratio isMatch_DNN = False isMatch_DNN = (reco_addbjet1.DeltaR(gen_addbjet1) < 0.5 and reco_addbjet2.DeltaR(gen_addbjet2) < 0.5) or ( reco_addbjet1.DeltaR(gen_addbjet2) < 0.5 and reco_addbjet2.DeltaR(gen_addbjet1) < 0.5) if passmuon == True and passelectron == False: passchannel = muon_ch elif passmuon == False and passelectron == True: passchannel = electron_ch else: print "Error!" if isMatch_DNN == 0 and event['signal'] == 1: print "here, strange" if isMatch_DNN: nEvt_isMatch_DNN += 1 nEvents += 1 if ttbb: matching_DNN = 0.0 #matching_mindR = 0.0 if nEvents is not 0: matching_DNN_able = float(nEvt_isMatch_DNN) / float(nMatchable) matching_DNN = float(nEvt_isMatch_DNN) / float(nEvents) #matching_mindR = float(nEvt_isMatch_mindR) / float(nEvents) #print "\nSelected Events / Total Events : "+str(nEvents)+"/"+str(nTotal) string_nmatch_matchable = "Matching ratio with matchable events from DNN : " + str( matching_DNN_able) + "(" + str(nEvt_isMatch_DNN) + "/" + str( nMatchable) + ")" string_nmatch_final = "Matching ratio with step 3 events from DNN : " + str( matching_DNN) + "(" + str(nEvt_isMatch_DNN) + "/" + str( nEvents) + ")" print string_nmatch_matchable print string_nmatch_final #print "Matching Ratio from minimun dR : "+str(matching_mindR)+"("+str(nEvt_isMatch_mindR)+"/"+str(nEvents)+")" f_ratio = open('ratio.txt', 'a') f_ratio.write("\n" + modelfile + "\n") f_ratio.write(string_nmatch_matchable + "\n") f_ratio.write(string_nmatch_final) f_ratio.close() for iChannel in range(nChannel): h_njets[iChannel].AddBinContent(10, h_njets[iChannel].GetBinContent(11)) h_nbjets[iChannel].AddBinContent(10, h_nbjets[iChannel].GetBinContent(11)) h_reco_addjets_deltaR[iChannel].AddBinContent( nbins_reco_addjets_dr, h_reco_addjets_deltaR[iChannel].GetBinContent( nbins_reco_addjets_dr + 1)) h_reco_addjets_invMass[iChannel].AddBinContent( nbins_reco_addjets_mass, h_reco_addjets_invMass[iChannel].GetBinContent( nbins_reco_addjets_mass + 1)) h_gen_addbjets_deltaR[iChannel].AddBinContent( nbins_gen_addjets_dr, h_gen_addbjets_deltaR[iChannel].GetBinContent( nbins_gen_addjets_dr + 1)) h_gen_addbjets_invMass[iChannel].AddBinContent( nbins_gen_addjets_mass, h_gen_addbjets_invMass[iChannel].GetBinContent( nbins_gen_addjets_mass + 1)) h_gen_addbjets_deltaR_nosel[iChannel].AddBinContent( nbins_gen_addjets_dr, h_gen_addbjets_deltaR_nosel[iChannel].GetBinContent( nbins_gen_addjets_dr + 1)) h_gen_addbjets_invMass_nosel[iChannel].AddBinContent( nbins_gen_addjets_mass, h_gen_addbjets_invMass_nosel[iChannel].GetBinContent( nbins_gen_addjets_mass + 1)) for index, value in enumerate(varlist): tmp = ut.getHistRange(value) h_hist[iChannel][index].AddBinContent( tmp[0], h_hist[iChannel][index].GetBinContent(tmp[0] + 1)) h_hist[iChannel][index].ClearUnderflowAndOverflow() for iXaxis in range(1, nbins_reco_addjets_dr + 1): tmp = h_respMatrix_deltaR[iChannel].GetBinContent( iXaxis, nbins_gen_addjets_dr ) + h_respMatrix_deltaR[iChannel].GetBinContent( iXaxis, nbins_gen_addjets_dr + 1) h_respMatrix_deltaR[iChannel].SetBinContent( iXaxis, nbins_gen_addjets_dr, tmp) for iYaxis in range(1, nbins_gen_addjets_dr + 1): tmp = h_respMatrix_deltaR[iChannel].GetBinContent( nbins_reco_addjets_dr, iYaxis) + h_respMatrix_deltaR[iChannel].GetBinContent( nbins_reco_addjets_dr + 1, iYaxis) h_respMatrix_deltaR[iChannel].SetBinContent( nbins_reco_addjets_dr, iYaxis, tmp) for iXaxis in range(1, nbins_reco_addjets_mass + 1): tmp = h_respMatrix_invMass[iChannel].GetBinContent( iXaxis, nbins_gen_addjets_mass ) + h_respMatrix_invMass[iChannel].GetBinContent( iXaxis, nbins_gen_addjets_mass + 1) h_respMatrix_invMass[iChannel].SetBinContent( iXaxis, nbins_gen_addjets_mass, tmp) for iYaxis in range(1, nbins_gen_addjets_mass + 1): tmp = h_respMatrix_invMass[iChannel].GetBinContent( nbins_reco_addjets_mass, iYaxis) + h_respMatrix_invMass[iChannel].GetBinContent( nbins_reco_addjets_mass + 1, iYaxis) h_respMatrix_invMass[iChannel].SetBinContent( nbins_reco_addjets_mass, iYaxis, tmp) tmp = h_respMatrix_deltaR[iChannel].GetBinContent( nbins_reco_addjets_dr + 1, nbins_gen_addjets_dr + 1) + h_respMatrix_deltaR[iChannel].GetBinContent( nbins_reco_addjets_dr, nbins_gen_addjets_dr) h_respMatrix_deltaR[iChannel].SetBinContent(nbins_reco_addjets_dr, nbins_gen_addjets_dr, tmp) tmp = h_respMatrix_invMass[iChannel].GetBinContent( nbins_reco_addjets_mass + 1, nbins_gen_addjets_mass + 1) + h_respMatrix_invMass[iChannel].GetBinContent( nbins_reco_addjets_mass, nbins_gen_addjets_mass) h_respMatrix_invMass[iChannel].SetBinContent(nbins_reco_addjets_mass, nbins_gen_addjets_mass, tmp) h_njets[iChannel].ClearUnderflowAndOverflow() h_nbjets[iChannel].ClearUnderflowAndOverflow() h_reco_addjets_deltaR[iChannel].ClearUnderflowAndOverflow() h_reco_addjets_invMass[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_deltaR[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_invMass[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_deltaR_nosel[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_invMass_nosel[iChannel].ClearUnderflowAndOverflow() h_respMatrix_deltaR[iChannel].ClearUnderflowAndOverflow() h_respMatrix_invMass[iChannel].ClearUnderflowAndOverflow() keras.backend.clear_session() timer.Stop() realtime = timer.RealTime() cputime = timer.CpuTime() print("Real Time : {0:6.2f} seconds, CPU Time : {1:6.2f} seconds").format( realtime, cputime)
l = len(Memories) # Performing the memory dump for memory in Memories: base = memory.base_address logging.debug("Base Address: " + str(hex(base))) logging.debug("") size = memory.size logging.debug("Size: " + str(size)) if size > MAX_SIZE: logging.debug("Too big, splitting the dump into chunks") mem_access_viol = dumper.splitter(session, base, size, MAX_SIZE, mem_access_viol, DIRECTORY) continue mem_access_viol = dumper.dump_to_file(session, base, size, mem_access_viol, DIRECTORY) i += 1 utils.printProgress(i, l, prefix='Progress:', suffix='Complete', bar=50) print # Run Strings if selected if STRINGS: files = os.listdir(DIRECTORY) i = 0 l = len(files) print "Running strings on all files:" for f1 in files: utils.strings(f1, DIRECTORY) i += 1 utils.printProgress(i, l, prefix='Progress:', suffix='Complete', bar=50) print "Finished!" raw_input('Press Enter to exit...')
def ana(inputDir, process, outputDir, sys='', flag1=False): if '__' in process: process = process.split('__')[0] print("Process: "+process+"\n") print("Systematics: "+sys+"\n") ntuple = process if 'ttbb' in process: process = 'ttbb' elif 'ttbj' in process: process = 'ttbj' elif 'ttcc' in process: process = 'ttcc' elif 'ttLF' in process: process = 'ttLF' elif 'ttother' in process: process = 'ttother' elif 'PythiaBkg' in process: process = 'ttbkg' elif 'ttHbb' in process: process = 'ttH' elif 'ttW' in process: process = 'ttW' elif 'ttZ' in process: process = 'ttZ' elif 't_Powheg' in process: process = 'tchannel' elif 'tbar_Powheg' in process: process = 'tbarchannel' elif 'tW_Powheg' in process: process = 'tWchannel' elif 'tbarW_Powheg' in process: process = 'tbarWchannel' elif 'WJets' in process: process = 'wjets' elif 'ZJets_M10to50' in process: process = 'zjets10to50' elif 'ZJets_M50' in process: process = 'zjets' elif 'WW' in process: process = 'ww' elif 'WZ' in process: process = 'wz' elif 'ZZ' in process: process = 'zz' if 'Filter' in inputDir: process = 'ttbbFilter'+process print('Save:'+process+sys+'\n') timer = ROOT.TStopwatch() timer.Start() ver = "" configDir = "" weightDir = "" modelfile = "" with open('var.txt', 'r') as f : while True : line = f.readline() if not line : break tmp = line.split() if 'ver' in tmp : ver = tmp[1] if 'configDir' in tmp : configDir = tmp[1] if 'weightDir' in tmp : weightDir = tmp[1] if 'modelfile' in tmp : modelfile = tmp[1] if not os.path.exists(outputDir+'/'+modelfile): os.makedirs(outputDir+'/'+modelfile) closureTest = flag1 data = False ttbb = False if 'Data' in process : data = True if 'ttbb' in process : ttbb = True muon_ch = 0 muon_pt = 30.0 muon_eta = 2.1 electron_ch = 1 electron_pt = 35.0 electron_eta = 2.1 jet_pt = 30.0 jet_eta = 2.4 jet_CSV = 0.9535 jet_CSV_medium = 0.8484 number_of_jets = 6 number_of_bjets = 2 nChannel = 2 nStep = 4 print "\nMerge arrays" selEvent = pd.DataFrame([]) max_nevt_num = 0 for item in os.listdir(inputDir): #print "Load file : "+str(inputDir)+'/'+str(item) df = pd.read_hdf(inputDir+'/'+item) last = 0 if df.size != 0: last = int(df.tail(1)['event'])+1 df['event'] = df['event'] + max_nevt_num str_query = 'csv1 > '+str(jet_CSV)+' and csv2 > '+str(jet_CSV)+' and njets >= 6 and nbjets >= 3' df = df.query(str_query) #df.reset_index(drop=True, inplace=True) selEvent = pd.concat([selEvent,df], axis=0) max_nevt_num += last selEvent.reset_index(drop=True, inplace=True) nMatchable = 4864 #ttbbFilter nMatchable: 5557 countMatchable = False if countMatchable : df = pd.read_hdf("array/array_ttbb.h5") df = df.filter(['signal','event','dR'], axis=1) df = df.query('signal > 0') #tmpId = df.groupby(['event'])['dR'].transform(max) == df['dR'] #df = df[tmpId] df.reset_index(drop=True, inplace=True) nMatchable = len(df.index) print(nMatchable) f_tmp = open('matchable.txt','w') f_tmp.write(str(nMatchable)) f_tmp.write(str(df)) f_tmp.close() #print(selEvent) if closureTest : f_out = ROOT.TFile(outputDir+'/'+modelfile+'/hist_closure.root', 'recreate') elif sys == '' : f_out = ROOT.TFile(outputDir+'/'+modelfile+'/hist_'+process+'.root', 'recreate') else : f_out = ROOT.TFile(outputDir+'/'+modelfile+'/hist_'+process+sys+'.root', 'recreate') nbins_reco_addjets_dr = 12 #4 reco_addjets_dr_min = 0.4 reco_addjets_dr_max = 4.0 #reco_addjets_dr_width = [0.4,0.6,1.0,2.0,4.0] nbins_reco_addjets_mass = 12 #4 reco_addjets_m_min = 0 reco_addjets_m_max = 400 #reco_addjets_mass_width = [0.0,60.0,100.0,170.0,400.0] nbins_gen_addjets_dr = 4 gen_addjets_dr_min = 0.4 gen_addjets_dr_max = 4.0 gen_addjets_dr_width = [0.4,0.6,1.0,2.0,4.0] nbins_gen_addjets_mass = 4 gen_addjets_m_min = 0 gen_addjets_m_max = 400 gen_addjets_mass_width = [0.0,60.0,100.0,170.0,400.0] #gen_addjets_mass_width = [0.0,60.0,80.0,100.0,120.0,140.0,160.0,180.0,200.0,220.0,400.0] #Histograms for unfolding h_gen_addbjets_deltaR_nosel = [[0] for i in range(nChannel)] h_gen_addbjets_invMass_nosel = [[0] for i in range(nChannel)] h_njets = [[0] for i in range(nChannel)] h_nbjets = [[0] for i in range(nChannel)] h_reco_addjets_deltaR = [[0] for i in range(nChannel)] h_reco_addjets_invMass = [[0] for i in range(nChannel)] h_gen_addbjets_deltaR = [[0] for i in range(nChannel)] h_gen_addbjets_invMass = [[0] for i in range(nChannel)] h_respMatrix_deltaR = [[0] for i in range(nChannel)] h_respMatrix_invMass = [[0] for i in range(nChannel)] #h_respMatrix_invMass = [[0]*nStep for i in range(nChannel)] #Histograms of DNN input variables varlist = ut.getVarlist() xlabel = ut.getHistXlabel() h_hist = [[i for i in range(len(varlist))] for j in range(nChannel)] for iChannel in range(0,nChannel): for i in range(len(varlist)): histRange = [] histRange = ut.getHistRange(varlist[i]) h_hist[iChannel][i] = ROOT.TH1D( 'keras_h_'+ varlist[i] + '_Ch' + str(iChannel) + '_S3' + sys,'', int(histRange[0]), float(histRange[1]), float(histRange[2]) ) h_hist[iChannel][i].GetXaxis().SetTitle(xlabel[varlist[i]]) h_hist[iChannel][i].GetYaxis().SetTitle("Entries") h_hist[iChannel][i].Sumw2() h_gen_addbjets_deltaR_nosel[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetDeltaR_Ch" + str(iChannel) + "_nosel" + sys, "", nbins_gen_addjets_dr, #gen_addjets_dr_min, gen_addjets_dr_max array('d', gen_addjets_dr_width) ) h_gen_addbjets_deltaR_nosel[iChannel].GetXaxis().SetTitle("#DeltaR_{b#bar{b}}") h_gen_addbjets_deltaR_nosel[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_deltaR_nosel[iChannel].Sumw2() h_gen_addbjets_invMass_nosel[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetInvMass_Ch" + str(iChannel) + "_nosel" + sys, "", nbins_gen_addjets_mass, #gen_addjets_m_min, gen_addjets_m_max array('d', gen_addjets_mass_width) ) h_gen_addbjets_invMass_nosel[iChannel].GetXaxis().SetTitle("m_{b#bar{b}}(GeV)") h_gen_addbjets_invMass_nosel[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_invMass_nosel[iChannel].Sumw2() h_njets[iChannel] = ROOT.TH1D( "h_keras_nJets_Ch" + str(iChannel) + "_S3" + sys, "",10, 0, 10 ) h_njets[iChannel].GetXaxis().SetTitle("Jet multiplicity") h_njets[iChannel].GetYaxis().SetTitle("Entries") h_njets[iChannel].Sumw2() h_nbjets[iChannel] = ROOT.TH1D( "h_keras_nbJets_Ch" + str(iChannel) + "_S3" + sys, "",10, 0, 10 ) h_nbjets[iChannel].GetXaxis().SetTitle("bJet multiplicity") h_nbjets[iChannel].GetYaxis().SetTitle("Entries") h_nbjets[iChannel].Sumw2() h_reco_addjets_deltaR[iChannel] = ROOT.TH1D( "h_keras_RecoAddbJetDeltaR_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_dr, reco_addjets_dr_min, reco_addjets_dr_max #array('d', reco_addjets_dr_width) ) h_reco_addjets_deltaR[iChannel].GetXaxis().SetTitle("#DeltaR_{b#bar{b}}") h_reco_addjets_deltaR[iChannel].GetYaxis().SetTitle("Entries") h_reco_addjets_deltaR[iChannel].Sumw2() h_reco_addjets_invMass[iChannel] = ROOT.TH1D( "h_keras_RecoAddbJetInvMass_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_mass, reco_addjets_m_min, reco_addjets_m_max #array('d', reco_addjets_mass_width) ) h_reco_addjets_invMass[iChannel].GetXaxis().SetTitle("m_{b#bar{b}}(GeV)") h_reco_addjets_invMass[iChannel].GetYaxis().SetTitle("Entries") h_reco_addjets_invMass[iChannel].Sumw2() h_gen_addbjets_deltaR[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetDeltaR_Ch" + str(iChannel) + "_S3" + sys, "", nbins_gen_addjets_dr, #gen_addjets_dr_min, gen_addjets_dr_max array('d', gen_addjets_dr_width) ) h_gen_addbjets_deltaR[iChannel].GetXaxis().SetTitle("#DeltaR_{b#bar{b}}") h_gen_addbjets_deltaR[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_deltaR[iChannel].Sumw2() h_gen_addbjets_invMass[iChannel] = ROOT.TH1D( "h_gentop_GenAddbJetInvMass_Ch" + str(iChannel) + "_S3" + sys, "", nbins_gen_addjets_mass, #gen_addjets_m_min, gen_addjets_m_max array('d', gen_addjets_mass_width) ) h_gen_addbjets_invMass[iChannel].GetXaxis().SetTitle("m_{b#bar{b}}(GeV)") h_gen_addbjets_invMass[iChannel].GetYaxis().SetTitle("Entries") h_gen_addbjets_invMass[iChannel].Sumw2() h_respMatrix_deltaR[iChannel] = ROOT.TH2D( "h_keras_ResponseMatrixDeltaR_Ch" + str(iChannel) + "_S3" + sys,"", nbins_reco_addjets_dr, reco_addjets_dr_min, reco_addjets_dr_max, #array('d', reco_addjets_dr_width), nbins_gen_addjets_dr, #gen_addjets_dr_min, gen_addjets_dr_max array('d', gen_addjets_dr_width) ) h_respMatrix_deltaR[iChannel].GetXaxis().SetTitle("Reco. #DeltaR_{b#bar{b}}") h_respMatrix_deltaR[iChannel].GetYaxis().SetTitle("Gen. #DeltaR_{b#bar{b}}") h_respMatrix_deltaR[iChannel].Sumw2() h_respMatrix_invMass[iChannel] = ROOT.TH2D( "h_keras_ResponseMatrixInvMass_Ch" + str(iChannel) + "_S3" + sys, "", nbins_reco_addjets_mass, reco_addjets_m_min, reco_addjets_m_max, #array('d', reco_addjets_mass_width), nbins_gen_addjets_mass,# #gen_addjets_m_min, gen_addjets_m_max array('d', gen_addjets_mass_width) ) h_respMatrix_invMass[iChannel].GetXaxis().SetTitle("Reco. m_{b#bar{b}}(GeV)") h_respMatrix_invMass[iChannel].GetYaxis().SetTitle("Gen. m_{b#bar{b}}(GeV)") h_respMatrix_invMass[iChannel].Sumw2() if ttbb == True: genchain = TChain("ttbbLepJets/gentree") genchain.Add("/data/users/seohyun/ntuple/hep2017/v808/nosplit/"+ntuple+".root") print "GENTREE RUN" for i in xrange(genchain.GetEntries()): #if closureTest: # if i%2 == 0 : continue ut.printProgress(i, genchain.GetEntries(), 'Progress:', 'Complete', 1, 50) genchain.GetEntry(i) addbjet1 = TLorentzVector() addbjet2 = TLorentzVector() addbjet1.SetPtEtaPhiE(genchain.addbjet1_pt, genchain.addbjet1_eta, genchain.addbjet1_phi, genchain.addbjet1_e) addbjet2.SetPtEtaPhiE(genchain.addbjet2_pt, genchain.addbjet2_eta, genchain.addbjet2_phi, genchain.addbjet2_e) gendR = addbjet1.DeltaR(addbjet2) genM = (addbjet1+addbjet2).M() if genchain.genchannel == muon_ch: h_gen_addbjets_deltaR_nosel[muon_ch].Fill(gendR,genchain.genweight) h_gen_addbjets_invMass_nosel[muon_ch].Fill(genM,genchain.genweight) elif genchain.genchannel == electron_ch: h_gen_addbjets_deltaR_nosel[electron_ch].Fill(gendR,genchain.genweight) h_gen_addbjets_invMass_nosel[electron_ch].Fill(genM,genchain.genweight) else: print("Error") #f_pred = open('pred.txt','w') print "\nLoad modelfile : "+str(modelfile) model = load_model(configDir+weightDir+ver+'/'+modelfile) model.summary() varlist = ut.getVarlist() jetCombi = selEvent.filter(varlist) scaler = StandardScaler() pred = pd.DataFrame([]) if len(jetCombi) is not 0: inputset = np.array(jetCombi) inputset_sc = scaler.fit_transform(inputset) pred = model.predict(inputset_sc, batch_size = 2000) pred = pd.DataFrame(pred, columns=['background','signal']) #print(pred) #pred = pd.DataFrame(pred, columns=['signal']) #f_pred.write('Pred\n'+str(pred)+'\n'+str(type(pred))) #f_pred.write('SelEvent\n'+str(selEvent)) selEvent = pd.concat([selEvent,pred], axis=1) #f_pred.write('SelEvent+Pred\n'+str(selEvent)) idx = selEvent.groupby(['event'])['signal'].transform(max) == selEvent['signal'] #f_pred.write('\n'+str(idx)+'\n'+str(selEvent[idx])+'\n') selEvent = selEvent[idx] selEvent.reset_index(drop=True, inplace=True) #selEvent.groupby('event').max('signal').reset_index(drop=True, inplace=True) #f_pred.write("Groupby\n"+process+"\n"+str(selEvent)) #groups = selEvent.groupby('event') print "\n Fill Hist" nEvents = 0 nEvt_isMatch_DNN = 0 nEvt_isMatch_mindR = 0 for index, event in selEvent.iterrows() : #maxval = event[1][event[1]['signal'] == event[1]['signal'].max()] ut.printProgress(index, len(selEvent.index), 'Progress: ','Complete',1,25) passmuon = False passelectron = False if event['channel'] == 0 : passmuon = True if event['channel'] == 1 : passelectron = True njets = event['njets'] nbjets = event['nbjets'] gen_addbjet1 = TLorentzVector() gen_addbjet2 = TLorentzVector() gen_addbjet1.SetPtEtaPhiE(event['addbjet1_pt'],event['addbjet1_eta'],event['addbjet1_phi'],event['addbjet1_e']) gen_addbjet2.SetPtEtaPhiE(event['addbjet2_pt'],event['addbjet2_eta'],event['addbjet2_phi'],event['addbjet2_e']) gen_dR = gen_addbjet1.DeltaR(gen_addbjet2) gen_M = (gen_addbjet1+gen_addbjet2).M() reco_dR = 9999 reco_M = 9999 reco_addbjet1 = TLorentzVector(0,0,0,0) reco_addbjet2 = TLorentzVector(0,0,0,0) #additional bjets from DNN reco_addbjet1.SetPtEtaPhiE(event['pt1'],event['eta1'],event['phi1'],event['e1']) reco_addbjet2.SetPtEtaPhiE(event['pt2'],event['eta2'],event['phi2'],event['e2']) reco_dR = reco_addbjet1.DeltaR(reco_addbjet2) reco_M = (reco_addbjet1+reco_addbjet2).M() eventweight = 1.0 if not data: eventweight *= event['genWeight'] if 'puup' in sys: eventweight *= event['PUWeight'][1] elif 'pudown' in sys: eventweight *= event['PUWeight'][2] else : eventweight *= event['PUWeight'][0] if passmuon: #[0]~[2]: ID/Iso, [3]~[5]: Trigger if 'musfup' in sys: eventweight *= event['lepton_SF'][1] elif 'musfdown' in sys: eventweight *= event['lepton_SF'][2] else : eventweight *= event['lepton_SF'][0] if 'mutrgup' in sys: eventweight *= event['lepton_SF'][4] elif 'mutrgdown' in sys: eventweight *= event['lepton_SF'][5] else : eventweight *= event['lepton_SF'][3] elif passelectron: #[0]~[2]: ID/Iso/Reco, [3]~[5]: Trigger if 'elsfup' in sys: eventweight *= event['lepton_SF'][1] elif 'elsfdown' in sys: eventweight *= event['lepton_SF'][2] else : eventweight *= event['lepton_SF'][0] if 'eltrgup' in sys: eventweight *= event['lepton_SF'][4] elif 'eltrgdown' in sys: eventweight *= event['lepton_SF'][5] else : eventweight *= event['lepton_SF'][3] #Scale Weight(ME) # [0] = muF up, [1] = muF down, [2] = muR up, [3] = muR up && muF up # [4] = muR down, [5] = muF down && muF down if 'TT' in inputDir or 'tt' in inputDir: if 'scale0' in sys: eventweight *= event['scaleweight'][0] elif 'scale1' in sys: eventweight *= event['scaleweight'][1] elif 'scale2' in sys: eventweight *= event['scaleweight'][2] elif 'scale3' in sys: eventweight *= event['scaleweight'][3] elif 'scale4' in sys: eventweight *= event['scaleweight'][4] elif 'scale5' in sys: eventweight *= event['scaleweight'][5] else : eventweight *= 1.0 #CSV Shape # Systematics for bottom flavor jets: # Light flavor contamination: lf # Linear and quadratic statistical fluctuations: hfstats1 and hfstats2 # Systematics for light flavor jets: # Heavy flavor contamimation: hf # Linear and quadratic statistical fluctuations: lfstats1 and lfstats2 # Systematics for charm flavor jets: # Linear and quadratic uncertainties: cferr1 and cferr2 if 'lfup' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][3] elif 'lfdown' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][4] elif 'hfup' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][5] elif 'hfdown' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][6] elif 'hfstat1up' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][7] elif 'hfstat1down' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][8] elif 'hfstat2up' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][9] elif 'hfstat2down' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][10] elif 'lfstat1up' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][11] elif 'lfstat1down' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][12] elif 'lfstat2up' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][13] elif 'lfstat2down' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][14] elif 'cferr1up' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][15] elif 'cferr1down' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][16] elif 'cferr2up' in sys: eventweight *= event['jet_SF_CSV_30'][0] + event['jet_SF_CSV_30'][17] elif 'cferr2down' in sys: eventweight *= event['jet_SF_CSV_30'][0] - event['jet_SF_CSV_30'][18] else : eventweight *= event['jet_SF_CSV_30'][0] if 'TT' in inputDir or 'tt' in inputDir: if 'pdfup' in sys: eventweight *= event['pdfweight'][101] elif 'pdfdown' in sys: eventweight *= event['pdfweight'][100] #f_pred.write('Pred : '+str(maxval)+'\n') #f_pred.write('Score\n'+str(event[1])+'\n') #f_pred.write('jet 1 : '+str(reco_addbjet1.Pt())+' jet 2 : '+str(reco_addbjet2.Pt())+'\n') #f_pred.write('genjet 1 : '+str(gen_addbjet1.Pt())+' genjet2 : '+str(gen_addbjet2.Pt())+'\n') #f_pred.write('reco dR : '+str(reco_addbjet1.DeltaR(reco_addbjet2))+'gen dR : '+str(gen_addbjet1.DeltaR(gen_addbjet2))+'\n') passchannel = -999 passcut = 0 #matching ratio isMatch_DNN = False isMatch_DNN = (reco_addbjet1.DeltaR(gen_addbjet1) < 0.5 and reco_addbjet2.DeltaR(gen_addbjet2) < 0.5) or (reco_addbjet1.DeltaR(gen_addbjet2) < 0.5 and reco_addbjet2.DeltaR(gen_addbjet1) < 0.5) if passmuon == True and passelectron == False : passchannel = muon_ch elif passmuon == False and passelectron == True : passchannel = electron_ch else : print "Error!" if isMatch_DNN : nEvt_isMatch_DNN += 1 nEvents += 1 if closureTest: if index%2 == 1: h_njets[passchannel].Fill(njets, eventweight) h_nbjets[passchannel].Fill(nbjets, eventweight) h_reco_addjets_deltaR[passchannel].Fill(reco_dR, eventweight) h_reco_addjets_invMass[passchannel].Fill(reco_M, eventweight) h_gen_addbjets_deltaR[passchannel].Fill(gen_dR, eventweight) h_gen_addbjets_invMass[passchannel].Fill(gen_M, eventweight) else: h_respMatrix_deltaR[passchannel].Fill(reco_dR, gen_dR, eventweight) h_respMatrix_invMass[passchannel].Fill(reco_M, gen_M, eventweight) else: h_njets[passchannel].Fill(njets, eventweight) h_nbjets[passchannel].Fill(nbjets, eventweight) h_reco_addjets_deltaR[passchannel].Fill(reco_dR, eventweight) h_reco_addjets_invMass[passchannel].Fill(reco_M, eventweight) for index, value in enumerate(varlist): h_hist[passchannel][index].Fill(event[value], eventweight) if ttbb: h_gen_addbjets_deltaR[passchannel].Fill(gen_dR, eventweight) h_gen_addbjets_invMass[passchannel].Fill(gen_M, eventweight) h_respMatrix_deltaR[passchannel].Fill(reco_dR, gen_dR, eventweight) h_respMatrix_invMass[passchannel].Fill(reco_M, gen_M, eventweight) if ttbb: matching_DNN = 0.0 #matching_mindR = 0.0 if nEvents is not 0 : matching_DNN_able = float(nEvt_isMatch_DNN) / float(nMatchable) matching_DNN = float(nEvt_isMatch_DNN) / float(nEvents) #matching_mindR = float(nEvt_isMatch_mindR) / float(nEvents) #print "\nSelected Events / Total Events : "+str(nEvents)+"/"+str(nTotal) print "\nMatching ratio with matchable events from DNN : "+str(matching_DNN_able)+"("+str(nEvt_isMatch_DNN)+"/"+str(nMatchable)+")" print "Matching ratio with step 3 events from DNN : "+str(matching_DNN)+"("+str(nEvt_isMatch_DNN)+"/"+str(nEvents)+")" #print "Matching Ratio from minimun dR : "+str(matching_mindR)+"("+str(nEvt_isMatch_mindR)+"/"+str(nEvents)+")" f_ratio = open('ratio.txt','a') f_ratio.write(modelfile) f_ratio.write("\nMatching ratio with matchable events from DNN: "+str(matching_DNN_able)+"("+str(nEvt_isMatch_DNN)+"/"+str(nMatchable)+")\n") f_ratio.close() for iChannel in range(nChannel) : h_njets[iChannel].AddBinContent(10,h_njets[iChannel].GetBinContent(11)) h_nbjets[iChannel].AddBinContent(10,h_nbjets[iChannel].GetBinContent(11)) h_reco_addjets_deltaR[iChannel].AddBinContent(nbins_reco_addjets_dr, h_reco_addjets_deltaR[iChannel].GetBinContent(nbins_reco_addjets_dr+1)) h_reco_addjets_invMass[iChannel].AddBinContent(nbins_reco_addjets_mass, h_reco_addjets_invMass[iChannel].GetBinContent(nbins_reco_addjets_mass+1)) h_gen_addbjets_deltaR[iChannel].AddBinContent(nbins_gen_addjets_dr, h_gen_addbjets_deltaR[iChannel].GetBinContent(nbins_gen_addjets_dr+1)) h_gen_addbjets_invMass[iChannel].AddBinContent(nbins_gen_addjets_mass, h_gen_addbjets_invMass[iChannel].GetBinContent(nbins_gen_addjets_mass+1)) h_gen_addbjets_deltaR_nosel[iChannel].AddBinContent(nbins_gen_addjets_dr, h_gen_addbjets_deltaR_nosel[iChannel].GetBinContent(nbins_gen_addjets_dr+1)) h_gen_addbjets_invMass_nosel[iChannel].AddBinContent(nbins_gen_addjets_mass, h_gen_addbjets_invMass_nosel[iChannel].GetBinContent(nbins_gen_addjets_mass+1)) for index, value in enumerate(varlist): tmp = ut.getHistRange(value) h_hist[iChannel][index].AddBinContent(tmp[0], h_hist[iChannel][index].GetBinContent(tmp[0]+1)) h_hist[iChannel][index].ClearUnderflowAndOverflow() for iXaxis in range(1, nbins_reco_addjets_dr+1) : tmp = h_respMatrix_deltaR[iChannel].GetBinContent(iXaxis, nbins_gen_addjets_dr)+h_respMatrix_deltaR[iChannel].GetBinContent(iXaxis, nbins_gen_addjets_dr+1) h_respMatrix_deltaR[iChannel].SetBinContent(iXaxis, nbins_gen_addjets_dr, tmp) for iYaxis in range(1, nbins_gen_addjets_dr+1) : tmp = h_respMatrix_deltaR[iChannel].GetBinContent(nbins_reco_addjets_dr, iYaxis)+h_respMatrix_deltaR[iChannel].GetBinContent(nbins_reco_addjets_dr+1, iYaxis) h_respMatrix_deltaR[iChannel].SetBinContent(nbins_reco_addjets_dr, iYaxis, tmp) for iXaxis in range(1, nbins_reco_addjets_mass+1) : tmp = h_respMatrix_invMass[iChannel].GetBinContent(iXaxis, nbins_gen_addjets_mass)+h_respMatrix_invMass[iChannel].GetBinContent(iXaxis, nbins_gen_addjets_mass+1) h_respMatrix_invMass[iChannel].SetBinContent(iXaxis, nbins_gen_addjets_mass, tmp) for iYaxis in range(1, nbins_gen_addjets_mass+1) : tmp = h_respMatrix_invMass[iChannel].GetBinContent(nbins_reco_addjets_mass, iYaxis)+h_respMatrix_invMass[iChannel].GetBinContent(nbins_reco_addjets_mass+1, iYaxis) h_respMatrix_invMass[iChannel].SetBinContent(nbins_reco_addjets_mass, iYaxis,tmp) tmp = h_respMatrix_deltaR[iChannel].GetBinContent(nbins_reco_addjets_dr+1,nbins_gen_addjets_dr+1)+h_respMatrix_deltaR[iChannel].GetBinContent(nbins_reco_addjets_dr,nbins_gen_addjets_dr) h_respMatrix_deltaR[iChannel].SetBinContent(nbins_reco_addjets_dr,nbins_gen_addjets_dr,tmp) tmp = h_respMatrix_invMass[iChannel].GetBinContent(nbins_reco_addjets_mass+1,nbins_gen_addjets_mass+1)+h_respMatrix_invMass[iChannel].GetBinContent(nbins_reco_addjets_mass,nbins_gen_addjets_mass) h_respMatrix_invMass[iChannel].SetBinContent(nbins_reco_addjets_mass,nbins_gen_addjets_mass,tmp) h_njets[iChannel].ClearUnderflowAndOverflow() h_nbjets[iChannel].ClearUnderflowAndOverflow() h_reco_addjets_deltaR[iChannel].ClearUnderflowAndOverflow() h_reco_addjets_invMass[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_deltaR[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_invMass[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_deltaR_nosel[iChannel].ClearUnderflowAndOverflow() h_gen_addbjets_invMass_nosel[iChannel].ClearUnderflowAndOverflow() h_respMatrix_deltaR[iChannel].ClearUnderflowAndOverflow() h_respMatrix_invMass[iChannel].ClearUnderflowAndOverflow() ntuple_path = '/data/users/seohyun/ntuple/hep2017/v808/nosplit/' f_ntuple = TFile.Open(os.path.join(ntuple_path, ntuple+'.root'),'read') h_eventinfo = f_ntuple.Get("ttbbLepJets/EventInfo") h_scaleweight = f_ntuple.Get("ttbbLepJets/ScaleWeights") f_out.cd() h_eventinfo.Write() h_scaleweight.Write() f_out.Write() f_out.Close() #f_pred.close() keras.backend.clear_session() timer.Stop() realtime = timer.RealTime() cputime = timer.CpuTime() print("Real Time : {0:6.2f} seconds, CPU Time : {1:6.2f} seconds").format(realtime,cputime)
import pandas as pd import multiprocessing as mp from concurrent.futures import ThreadPoolExecutor, as_completed import utils if __name__ == '__main__': good_stocks = [] # Load stock symbols from csv file df = pd.read_csv('data/sp500.csv', header=None) symbols = df[0].values count = 1 num_cpu = mp.cpu_count() #print('CPU: {}'.format(num_cpu)) with ThreadPoolExecutor(max_workers=num_cpu) as executor: # Start the task and mark each future with its parameter futures = { executor.submit(utils.isRaisingN, symbol, 3): symbol for symbol in symbols } for f in as_completed(futures): symbol = futures[f] #print('result: {}, {}'.format(symbol, f.result())) count += 1 utils.printProgress(count, 505, prefix='Progress:', decimals=0) if f.result(): good_stocks.append(symbol) df = pd.DataFrame(data=good_stocks) df.to_csv('data/good_stocks.csv', header=False)
def run(self): # Open Chrome self.open() # File name in subjects_info dir collection_names = [ 'bolton', 'Burlington', 'Denver', 'Fels', 'Forsyth', 'Iowa', 'Mathews', 'Michigan', 'Oregon' ] # collection_id is in url collection_ids = [ 'CASEBolton', 'UTBurlington', 'UOKDenver', 'WSUFels', 'Forsyth', 'UIOWAGrowth', 'UOPMathews', 'UMICHGrowth', 'UOGrowth' ] # Loop for each file in subjects_info dir for c_dix, collection_name in enumerate(collection_names): collection_id = collection_ids[c_dix] # Getting subject ids from csv file. # This part also can be automated as well. subject_ids = [] with open('./subjects_info/' + collection_name + '.csv', newline='') as csvfile: subjects_info = csv.reader(csvfile, delimiter=',', quotechar='|') for i, row in enumerate(subjects_info): if i > 0: if len(row[0]) > 0: if collection_name == 'Mathews': subject_ids.append(row[0].zfill(3)) elif collection_name == 'Michigan': subject_ids.append(row[0].zfill(5)) elif collection_name == 'Oregon': if len(row[0]) < 3: subject_ids.append(row[0].zfill(3)) else: subject_ids.append(row[0]) else: subject_ids.append(row[0]) # Getting data from each subject for s_idx, subject_id in enumerate(subject_ids): printProgress(s_idx + 1, len(subject_ids), collection_name + ' 조회중:', '완료', 2, 50) # Saving dir directory = './data/' + collection_name + '/' + subject_id + "/" # Setting image url url = 'http://www.aaoflegacycollection.org/aaof_collectionQuickView.html?collectionID=' + collection_id + '&subjectID=' + subject_id self.driver.get(url) # Getting Image Elements table_elements = self.driver.find_elements_by_xpath( "//table/tbody/tr/td/a/img") # Creating Thread for downloading images target_urls = [] for img_idx, e in enumerate(table_elements): # Getting image src img_url = e.get_attribute('src') target_urls.append(img_url) # Download image def download_img(url, directory, subject_id, img_idx): try: os.makedirs(directory) except OSError as e: if e.errno != errno.EEXIST: raise urllib.request.urlretrieve( img_url, directory + subject_id + "_" + str(img_idx) + ".jpg") threads = [ threading.Thread(target=download_img, args=( url, directory, subject_id, img_idx, )) for img_idx, url in enumerate(target_urls) ] for thread in threads: # Start Threads thread.start() for thread in threads: # Wait for threads to be finished thread.join() # Getting Position url = 'http://www.aaoflegacycollection.org/aaof_LMTableDisplay.html?collectionID=' + collection_id + '&subjectID=' + subject_id self.driver.get(url) # Get table elements table_header = self.driver.find_elements_by_xpath( "//div[@id='tabs-L0']/div[@id='data0_wrapper']/div[@class='dataTables_scroll']/div[@class='dataTables_scrollBody']/table/thead/tr/th" ) # Get Table Header(Column) landmark_headers = [] for idx, th in enumerate(table_header): _target = th.find_element_by_xpath( "./div/div").get_attribute('innerHTML').replace( '<span class="DataTables_sort_icon"></span>', "").strip().replace(" ", "") landmark_headers.append(_target) # Get Table row (actual data locates) table_rows = self.driver.find_elements_by_xpath( "//div[@id='tabs-L0']/div[@id='data0_wrapper']/div[@class='dataTables_scroll']/div[@class='dataTables_scrollBody']/table/tbody/tr" ) # Saving Data in json and csv subject_data = {} subject_data_csv = [] for idx, r in enumerate(table_rows): tds = r.get_attribute('innerHTML').split( '<td class=" dt-body-right">') tds.pop(0) _coords = [] _coords_csv = [] for i, td in enumerate(tds): td_text = td.replace("</td>", "") _data = {} if i == 0: landmark = td_text _coords_csv.append(td_text) else: _data[landmark_headers[i]] = td_text _coords.append(_data) _coords_csv.append(td_text) subject_data[landmark] = _coords subject_data_csv.append(_coords_csv) with open(directory + 'landmarks.json', 'w') as outfile: json.dump(subject_data, outfile) with open(directory + "landmarks.csv", "w") as f: writer = csv.writer(f) writer.writerows([landmark_headers]) writer.writerows(subject_data_csv) # Closing Chrome self.close() exit(0)
def process(self): if osp.exists( os.path.join(self.processed_dir, 'Decagon-{}.pt'.format(self.datatype))): return data_list = [] # >>> Obtain One-Hot Encoding for Side-Effects target_list = [] with open(self.total_data_dir, 'r', encoding='utf-8') as f: rdr = csv.reader(f) for line in rdr: target_list.append(line[-1]) label_encoder = LabelEncoder() label_encoder.fit( target_list ) # Automatically generate one-hot labels for side-effects label_list = label_encoder.transform(target_list) num_classes = len(label_encoder.classes_) target_dict = {} for target_idx, targets in enumerate(target_list): target_dict[targets] = label_list[target_idx] for label_idx, mode in enumerate(['negative', 'positive']): # negative will be 0, positive will be 1 pair_list, se_list = [], [] with open(osp.join(self.dataset_dir, 'Decagon-{}-{}.csv'.format(mode, self.datatype)), 'r', encoding='utf-8') as f: rdr = csv.reader(f) for line in rdr: se_list.append(line[-1]) pair_list.append(line[:-1]) one_hot = [0] * num_classes total = len(pair_list) for idx, (smiles_pair, se) in enumerate(zip(pair_list, se_list)): smiles1, smiles2 = smiles_pair side_effect = one_hot.copy() side_effect[target_dict[se]] = 1 printProgress(idx + 1, total, '{} dataset preparation: '.format(self.datatype), ' ', 2, 50) mol1 = MolFromSmiles(smiles1) mol2 = MolFromSmiles(smiles2) label = [int(label_idx)] #print("\n{}-[{},{},{}:{}] : {}".format(mode, smiles1, smiles2, se, target_dict[se], label)) if mol1 is None or mol2 is None: print("There is a missing drug from the pair (%s,%s)" % (mol1, mol2)) continue ###################################################################### # >>> Get pairwise graph G1, G2 c1_size = mol1.GetNumAtoms() c2_size = mol2.GetNumAtoms() if c1_size == 0 or c2_size == 0: print("There is a size error from pair (%s,%s)" % (mol1, mol2)) continue atoms1 = mol1.GetAtoms() atoms2 = mol2.GetAtoms() bonds1 = mol1.GetBonds() bonds2 = mol2.GetBonds() features, edges = [], [] for atom in atoms1: feature = atom_features(atom) features.append(feature / sum(feature)) # normalize for atom in atoms2: feature = atom_features(atom) features.append(feature / sum(feature)) # normalize for bond in bonds1: edges.append( [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()]) for bond in bonds2: edges.append([ bond.GetBeginAtomIdx() + c1_size, bond.GetEndAtomIdx() + c1_size ]) if len(edges) == 0: continue G = nx.Graph(edges).to_directed() edge_index = [[e1, e2] for e1, e2 in G.edges] GraphSiameseData = DATA.Data( x=torch.Tensor(features), edge_index=torch.LongTensor(edge_index).transpose(1, 0), y=torch.Tensor(label).view(-1, 1)) GraphSiameseData.__setitem__('c1_size', torch.LongTensor([c1_size])) GraphSiameseData.__setitem__('c2_size', torch.LongTensor([c2_size])) GraphSiameseData.__setitem__( 'side_effect', torch.Tensor(side_effect).view(1, -1)) data_list.append(GraphSiameseData) ########################################################################### if self.pre_filter is not None: data_list = [data for data in data_list if self.pre_filter(data)] if self.pre_transform is not None: data_list = [self.pre_transform(data) for data in data_list] # check this function data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def compareDataEmul(args): dataDir = args[0] emulDir = args[1] inFile = args[2] outDir = args[3] multiProcess = args[4] # branches of ltTwinMuxOut # nTrigs # wheel, sector, station, BX # quality, rpcBit, is2nd # phi, phiB, posLoc_x, dirLoc_phi utils.setChamberName() if not os.path.exists(outDir + '/comparison'): os.makedirs(outDir + '/comparison') f_out = TFile.Open( outDir + '/comparison/' + inFile[:-5] + '_Comparison.root', 'recreate') DTTREE_data = TChain('dtNtupleProducer/DTTREE') DTTREE_data.Add(dataDir + '/' + inFile) if '2018D_' in inFile: emulFile = 'DTDPGNtuple_10_3_3_ZMuSkim_2018D_Emulator_nearWheel.root' #emulFile = inFile[:inFile.rfind('_')]+'_Emulator_'+inFile[:-5].split('_')[-1]+'.root' else: emulFile = inFile[:-5] + '_Emulator.root' DTTREE_emul = TChain('dtNtupleProducer/DTTREE') DTTREE_emul.Add(emulDir + '/' + emulFile) f_out.cd() h_nRPCbitData = TH1D('h_nRPCbitData', 'RPC bit', 3, 0, 3) h_nRPCbitData.GetXaxis().SetTitle('RPC bit') h_nRPCbitData.GetXaxis().SetBinLabel(1, '0') h_nRPCbitData.GetXaxis().SetBinLabel(2, '1') h_nRPCbitData.GetXaxis().SetBinLabel(3, '2') h_nRPCbitData.GetYaxis().SetTitle('Entries') h_nRPCbitData.Sumw2() h_nRPCbitEmul = TH1D('h_nRPCbitEmul', 'RPC bit', 3, 0, 3) h_nRPCbitEmul.GetXaxis().SetTitle('RPC bit') h_nRPCbitEmul.GetXaxis().SetBinLabel(1, '0') h_nRPCbitEmul.GetXaxis().SetBinLabel(2, '1') h_nRPCbitEmul.GetXaxis().SetBinLabel(3, '2') h_nRPCbitEmul.GetYaxis().SetTitle('Entries') h_nRPCbitEmul.Sumw2() h_nSegmentData = TH1D('h_nSegmentData', 'Number of Segments', 240, 0, 240) h_nSegmentEmul = TH1D('h_nSegmentEmul', 'Number of Segments', 240, 0, 240) h_nSegmentData.GetYaxis().SetTitle('Number of Segments') h_nSegmentEmul.GetYaxis().SetTitle('Number of Segments') h_nSegmentData.Sumw2() h_nSegmentEmul.Sumw2() h_nSegmentData_RB = [[0] for i in range(utils.nStation)] h_nSegmentEmul_RB = [[0] for i in range(utils.nStation)] iBin = 1 for i in range(utils.nStation): h_nSegmentData_RB[i] = TH1D('h_nSegmentData_RB' + str(i + 1), 'Number of Segments in RB' + str(i + 1), 60, 0, 60) h_nSegmentEmul_RB[i] = TH1D('h_nSegmentEmul_RB' + str(i + 1), 'Number of Segments in RB' + str(i + 1), 60, 0, 60) h_nSegmentData_RB[i].GetYaxis().SetTitle('Number of Segments') h_nSegmentEmul_RB[i].GetYaxis().SetTitle('Number of Segments') h_nSegmentData_RB[i].Sumw2() h_nSegmentEmul_RB[i].Sumw2() iBin2 = 1 for j in range(utils.nWheel): for k in range(utils.nSector): str_name = 'RB' + str( i + 1) + '_' + utils.name_Wheel[j] + '_' + utils.name_Sector[k] str_name2 = utils.name_Wheel[j] + '_' + utils.name_Sector[k] h_nSegmentData.GetXaxis().SetBinLabel(iBin, str_name) h_nSegmentEmul.GetXaxis().SetBinLabel(iBin, str_name) h_nSegmentData_RB[i].GetXaxis().SetBinLabel(iBin2, str_name2) h_nSegmentEmul_RB[i].GetXaxis().SetBinLabel(iBin2, str_name2) iBin += 1 iBin2 += 1 h_nSegmentPerChamberData = TH1D("h_nSegmentPerChamberData", "Number of Segments per Chamber", 5, 0, 5) h_nSegmentPerChamberData.GetXaxis().SetTitle('nSegments/chamber') h_nSegmentPerChamberData.GetXaxis().SetBinLabel(1, '0') h_nSegmentPerChamberData.GetXaxis().SetBinLabel(2, '1') h_nSegmentPerChamberData.GetXaxis().SetBinLabel(3, '2') h_nSegmentPerChamberData.GetXaxis().SetBinLabel(4, '3') h_nSegmentPerChamberData.GetXaxis().SetBinLabel(5, '4') h_nSegmentPerChamberData.GetYaxis().SetTitle('Entries') h_nSegmentPerChamberData.Sumw2() h_nSegmentPerChamberEmul = TH1D("h_nSegmentPerChamberEmul", "Number of Segments per Chamber", 5, 0, 5) h_nSegmentPerChamberEmul.GetXaxis().SetTitle('nSegments/chamber') h_nSegmentPerChamberEmul.GetXaxis().SetBinLabel(1, '0') h_nSegmentPerChamberEmul.GetXaxis().SetBinLabel(2, '1') h_nSegmentPerChamberEmul.GetXaxis().SetBinLabel(3, '2') h_nSegmentPerChamberEmul.GetXaxis().SetBinLabel(4, '3') h_nSegmentPerChamberEmul.GetXaxis().SetBinLabel(5, '4') h_nSegmentPerChamberEmul.GetYaxis().SetTitle('Entries') h_nSegmentPerChamberEmul.Sumw2() h2_ltTwinMuxOut_phi = [[0] for i in range(len(utils.name_RPCbit))] h2_ltTwinMuxOut_phiB = [[0] for i in range(len(utils.name_RPCbit))] h2_ltTwinMuxOut_posLoc_x = [[0] for i in range(len(utils.name_RPCbit))] h2_ltTwinMuxOut_dirLoc_phi = [[0] for i in range(len(utils.name_RPCbit))] for i in range(len(utils.name_RPCbit)): h2_ltTwinMuxOut_phi[i] = TH2D( 'h2_ltTwinMuxOut_phi_' + utils.name_RPCbit[i], 'h2_ltTwinMuxOut_phi_' + utils.name_RPCbit[i], 20, -2000.0, 2000.0, 20, -2000.0, 2000.0) h2_ltTwinMuxOut_phi[i].GetXaxis().SetTitle('Data.ltTwinMuxOut_phi') h2_ltTwinMuxOut_phi[i].GetYaxis().SetTitle('Emul.ltTwinMuxOut_phi') h2_ltTwinMuxOut_phi[i].Sumw2() h2_ltTwinMuxOut_phiB[i] = TH2D( 'h2_ltTwinMuxOut_phiB_' + utils.name_RPCbit[i], 'h2_ltTwinMuxOut_phiB_' + utils.name_RPCbit[i], 20, -450.0, 450.0, 20, -450.0, 450.0) h2_ltTwinMuxOut_phiB[i].GetXaxis().SetTitle('Data.ltTwinMuxOut_phiB') h2_ltTwinMuxOut_phiB[i].GetYaxis().SetTitle('Emul.ltTwinMuxOut_phiB') h2_ltTwinMuxOut_phiB[i].Sumw2() h2_ltTwinMuxOut_posLoc_x[i] = TH2D( 'h2_ltTwinMuxOut_posLoc_x_' + utils.name_RPCbit[i], 'h2_ltTwinMuxOut_posLoc_x_' + utils.name_RPCbit[i], 20, -350.0, 350.0, 20, -350.0, 350.0) h2_ltTwinMuxOut_posLoc_x[i].GetXaxis().SetTitle( 'Data.ltTwinMuxOut_posLoc_x') h2_ltTwinMuxOut_posLoc_x[i].GetYaxis().SetTitle( 'Emul.ltTwinMuxOut_posLoc_x') h2_ltTwinMuxOut_posLoc_x[i].Sumw2() h2_ltTwinMuxOut_dirLoc_phi[i] = TH2D( 'h2_ltTwinMuxOut_dirLoc_phi_' + utils.name_RPCbit[i], 'h2_ltTwinMuxOut_dirLoc_phi_' + utils.name_RPCbit[i], 20, -90.0, 90.0, 20, -90.0, 90.0) h2_ltTwinMuxOut_dirLoc_phi[i].GetXaxis().SetTitle( 'Data.ltTwinMuxOut_dirLoc_phi') h2_ltTwinMuxOut_dirLoc_phi[i].GetYaxis().SetTitle( 'Emul.ltTwinMuxOut_dirLoc_phi') h2_ltTwinMuxOut_dirLoc_phi[i].Sumw2() for ievt in xrange(DTTREE_data.GetEntries()): if not multiProcess: utils.printProgress(ievt, DTTREE_data.GetEntries(), 'Progress: ', 'Complete', 1, 25) DTTREE_data.GetEntry(ievt) DTTREE_emul.GetEntry(ievt) if DTTREE_data.event_runNumber != DTTREE_emul.event_runNumber: print "[WARNING] Entry: " + str( ievt) + " run numbers are different" print "==== File name: " + str(inFile) + " ====" print "---- Data: " + str(DTTREE_data.event_runNumber) + " ----" print "---- Emul: " + str(DTTREE_emul.event_runNumber) + " ----" print "Skip event number" + str(DTTREE_data.event_eventNumber) continue if DTTREE_data.event_eventNumber != DTTREE_emul.event_eventNumber: print "[WARNING] Entry: " + str( ievt) + " event numbers are different" print "==== File name: " + str(inFile) + " ====" print "---- Data: " + str(DTTREE_data.event_eventNumber) + " ----" print "---- Emul: " + str(DTTREE_emul.event_eventNumber) + " ----" print "Skip event number" + str(DTTREE_data.event_eventNumber) continue nSegmentData = [[[[0 for i in range(utils.nBX)] for j in range(utils.nSector)] for k in range(utils.nWheel)] for l in range(utils.nStation)] nSegmentEmul = [[[[0 for i in range(utils.nBX)] for j in range(utils.nSector)] for k in range(utils.nWheel)] for l in range(utils.nStation)] # Debug #if len(DTTREE_data.ltTwinMuxOut_rpcBit) > 0: # tmp = "" # for i in range(DTTREE_data.ltTwinMuxOut_rpcBit.size()): # tmp += " "+str(DTTREE_data.ltTwinMuxOut_rpcBit[i]) # print "rpcBit Size: " + str(DTTREE_data.ltTwinMuxOut_rpcBit.size())+" Value: "+tmp #if DTTREE_data.ltTwinMuxOut_sector.size() > 0: # str_station = "" # str_wheel = "" # str_sector = "" # str_BX = "" # for i in range(DTTREE_data.ltTwinMuxOut_sector.size()): # str_station += " "+str(DTTREE_data.ltTwinMuxOut_station[i]) # str_wheel += " "+str(DTTREE_data.ltTwinMuxOut_wheel[i]) # str_sector += " "+str(DTTREE_data.ltTwinMuxOut_sector[i]) # str_BX += " "+str(DTTREE_data.ltTwinMuxOut_BX[i]) # print str(DTTREE_data.event_eventNumber) # print " Staton size:"+str(DTTREE_data.ltTwinMuxOut_station.size())+" value:"+str_station # print " Wheel size:"+str(DTTREE_data.ltTwinMuxOut_wheel.size())+" value:"+str_wheel # print " Sector size:"+str(DTTREE_data.ltTwinMuxOut_sector.size())+" value:"+str_sector # print " BX size:"+str(DTTREE_data.ltTwinMuxOut_BX.size())+" value:"+str_BX #if len(DTTREE_data.ltTwinMuxOut_station) > 0 and len(DTTREE_data.ltTwinMuxOut_wheel) > 0 and len(DTTREE_data.ltTwinMuxOut_sector) > 0 and len(DTTREE_data.ltTwinMuxOut_BX) > 0: # print "Station:"+str(DTTREE_data.ltTwinMuxOut_station.size())+" Wheel:"+str(DTTREE_data.ltTwinMuxOut_wheel.size())+\ # " Sector:"+str(DTTREE_data.ltTwinMuxOut_sector.size())+" BX:"+str(DTTREE_data.ltTwinMuxOut_BX.size()) #print "Station:"+str(DTTREE_data.ltTwinMuxOut_station[0])+" Wheel:"+str(DTTREE_data.ltTwinMuxOut_wheel[0])+\ # " Sector:"+str(DTTREE_data.ltTwinMuxOut_sector[0])+" BX:"+str(DTTREE_data.ltTwinMuxOut_BX[0]) #if len(DTTREE_data.ltTwinMuxOut_station) > 1 and len(DTTREE_data.ltTwinMuxOut_wheel) > 1 and len(DTTREE_data.ltTwinMuxOut_sector) > 1 and len(DTTREE_data.ltTwinMuxOut_BX) > 1: # print "Station[1]:"+str(DTTREE_data.ltTwinMuxOut_station[1])+" Wheel[1]:"+str(DTTREE_data.ltTwinMuxOut_wheel[1])+\ # " Sector[1]:"+str(DTTREE_data.ltTwinMuxOut_sector[1])+" BX[1]:"+str(DTTREE_data.ltTwinMuxOut_BX[1]) #else: # print "Empty info" for i in range(DTTREE_data.ltTwinMuxOut_station.size()): for j in range(DTTREE_emul.ltTwinMuxOut_station.size()): same_rpcBit = False same_is2nd = False same_station = False same_wheel = False same_sector = False same_BX = False same_quality = False if DTTREE_data.ltTwinMuxOut_rpcBit[ i] == DTTREE_emul.ltTwinMuxOut_rpcBit[j]: same_rpcBit = True if DTTREE_data.ltTwinMuxOut_is2nd[ i] == 0 and DTTREE_emul.ltTwinMuxOut_is2nd[j] == 0: same_is2nd = True if DTTREE_data.ltTwinMuxOut_station[ i] == DTTREE_emul.ltTwinMuxOut_station[j]: same_station = True if DTTREE_data.ltTwinMuxOut_wheel[ i] == DTTREE_emul.ltTwinMuxOut_wheel[j]: same_wheel = True if DTTREE_data.ltTwinMuxOut_sector[ i] == DTTREE_emul.ltTwinMuxOut_sector[j]: same_sector = True if DTTREE_data.ltTwinMuxOut_BX[ i] == DTTREE_emul.ltTwinMuxOut_BX[j]: same_BX = True if DTTREE_data.ltTwinMuxOut_quality[ i] == DTTREE_emul.ltTwinMuxOut_quality[j]: same_quality = True if same_rpcBit and same_is2nd and same_station and same_wheel and same_sector and same_BX and same_quality: h2_ltTwinMuxOut_phi[0].Fill( DTTREE_data.ltTwinMuxOut_phi[i], DTTREE_emul.ltTwinMuxOut_phi[j]) h2_ltTwinMuxOut_phiB[0].Fill( DTTREE_data.ltTwinMuxOut_phiB[i], DTTREE_emul.ltTwinMuxOut_phiB[j]) h2_ltTwinMuxOut_posLoc_x[0].Fill( DTTREE_data.ltTwinMuxOut_posLoc_x[i], DTTREE_emul.ltTwinMuxOut_posLoc_x[j]) h2_ltTwinMuxOut_dirLoc_phi[0].Fill( DTTREE_data.ltTwinMuxOut_dirLoc_phi[i], DTTREE_emul.ltTwinMuxOut_dirLoc_phi[j]) if DTTREE_data.ltTwinMuxOut_rpcBit[i] == 0: h2_ltTwinMuxOut_phi[1].Fill( DTTREE_data.ltTwinMuxOut_phi[i], DTTREE_emul.ltTwinMuxOut_phi[j]) h2_ltTwinMuxOut_phiB[1].Fill( DTTREE_data.ltTwinMuxOut_phiB[i], DTTREE_emul.ltTwinMuxOut_phiB[j]) h2_ltTwinMuxOut_posLoc_x[1].Fill( DTTREE_data.ltTwinMuxOut_posLoc_x[i], DTTREE_emul.ltTwinMuxOut_posLoc_x[j]) h2_ltTwinMuxOut_dirLoc_phi[1].Fill( DTTREE_data.ltTwinMuxOut_dirLoc_phi[i], DTTREE_emul.ltTwinMuxOut_dirLoc_phi[j]) elif DTTREE_data.ltTwinMuxOut_rpcBit[i] == 1: h2_ltTwinMuxOut_phi[2].Fill( DTTREE_data.ltTwinMuxOut_phi[i], DTTREE_emul.ltTwinMuxOut_phi[j]) h2_ltTwinMuxOut_phiB[2].Fill( DTTREE_data.ltTwinMuxOut_phiB[i], DTTREE_emul.ltTwinMuxOut_phiB[j]) h2_ltTwinMuxOut_posLoc_x[2].Fill( DTTREE_data.ltTwinMuxOut_posLoc_x[i], DTTREE_emul.ltTwinMuxOut_posLoc_x[j]) h2_ltTwinMuxOut_dirLoc_phi[2].Fill( DTTREE_data.ltTwinMuxOut_dirLoc_phi[i], DTTREE_emul.ltTwinMuxOut_dirLoc_phi[j]) elif DTTREE_data.ltTwinMuxOut_rpcBit[i] == 2: h2_ltTwinMuxOut_phi[3].Fill( DTTREE_data.ltTwinMuxOut_phi[i], DTTREE_emul.ltTwinMuxOut_phi[j]) h2_ltTwinMuxOut_phiB[3].Fill( DTTREE_data.ltTwinMuxOut_phiB[i], DTTREE_emul.ltTwinMuxOut_phiB[j]) h2_ltTwinMuxOut_posLoc_x[3].Fill( DTTREE_data.ltTwinMuxOut_posLoc_x[i], DTTREE_emul.ltTwinMuxOut_posLoc_x[j]) h2_ltTwinMuxOut_dirLoc_phi[3].Fill( DTTREE_data.ltTwinMuxOut_dirLoc_phi[i], DTTREE_emul.ltTwinMuxOut_dirLoc_phi[j]) #nSegment[utils.nStation][utils.nWheel][utils.nSector][utils.nBX] for i in range(DTTREE_data.ltTwinMuxOut_nTrigs): iStation_idx = DTTREE_data.ltTwinMuxOut_station[i] - 1 iWheel_idx = DTTREE_data.ltTwinMuxOut_wheel[i] + 2 iSector_idx = DTTREE_data.ltTwinMuxOut_sector[i] - 1 iBX_idx = DTTREE_data.ltTwinMuxOut_BX[i] + 4 nSegmentData[iStation_idx][iWheel_idx][iSector_idx][iBX_idx] += 1 h_nRPCbitData.Fill(DTTREE_data.ltTwinMuxOut_rpcBit[i]) for i in range(DTTREE_emul.ltTwinMuxOut_nTrigs): iStation_idx = DTTREE_emul.ltTwinMuxOut_station[i] - 1 iWheel_idx = DTTREE_emul.ltTwinMuxOut_wheel[i] + 2 iSector_idx = DTTREE_emul.ltTwinMuxOut_sector[i] - 1 iBX_idx = DTTREE_emul.ltTwinMuxOut_BX[i] + 4 nSegmentEmul[iStation_idx][iWheel_idx][iSector_idx][iBX_idx] += 1 h_nRPCbitEmul.Fill(DTTREE_emul.ltTwinMuxOut_rpcBit[i]) # Debug #check = numpy.array(nSegmentData) #tmp = numpy.where(check != 0) #if tmp[0].size > 0: # for i in range(tmp[0].size): # print("Index: "+str(tmp[0][i])+" "+str(tmp[1][i])+" "+str(tmp[2][i])+" "+str(tmp[3][i])) # print("Value: "+str(nSegmentData[tmp[0][i]][tmp[1][i]][tmp[2][i]][tmp[3][i]])) binNum = 0 for i in range(utils.nStation): binNum2 = 0 for j in range(utils.nWheel): for k in range(utils.nSector): tmp1 = 0.0 tmp2 = 0.0 for l in range(utils.nBX): tmp1 += nSegmentData[i][j][k][l] tmp2 += nSegmentEmul[i][j][k][l] h_nSegmentPerChamberData.Fill(nSegmentData[i][j][k][l]) h_nSegmentPerChamberEmul.Fill(nSegmentEmul[i][j][k][l]) h_nSegmentData.Fill(binNum, tmp1) h_nSegmentEmul.Fill(binNum, tmp2) h_nSegmentData_RB[i].Fill(binNum2, tmp1) h_nSegmentEmul_RB[i].Fill(binNum2, tmp2) binNum += 1 binNum2 += 1 h_nSegmentPerChamberData2 = h_nSegmentPerChamberData.Clone() h_nSegmentPerChamberData2.SetName(h_nSegmentPerChamberData.GetName() + '_exceptSeg0') h_nSegmentPerChamberData2.SetBinContent(1, 0) h_nSegmentPerChamberEmul2 = h_nSegmentPerChamberEmul.Clone() h_nSegmentPerChamberEmul2.SetName(h_nSegmentPerChamberEmul.GetName() + '_exceptSeg0') h_nSegmentPerChamberEmul2.SetBinContent(1, 0) f_out.Write() f_out.Close()