def test(): one = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root") two = skhep_testdata.data_path("uproot-sample-6.18.00-uncompressed.root") bad = one.replace(".root", "-DOES-NOT-EXIST.root") okay = one.replace(".root", "-DOES-NOT-EXIST-*.root") assert len(list(uproot.iterate([one, two], step_size="1 TB", library="np"))) == 2 with pytest.raises(uproot._util._FileNotFoundError): list(uproot.iterate([one, two, bad], library="np")) assert (len( list(uproot.iterate([one, two, okay], step_size="1 TB", library="np"))) == 2)
def __init__(self, path: Union[str, List[str]], treepath: str, data_branches: List[str], target_branch: str, num_workers: int = 1, ) -> None: """ Args: """ self.data_branches = data_branches self.target_branch = target_branch files = self._to_files(path, treepath) branches = data_branches + [target_branch] tree_iter = uproot.iterate(files, expressions=branches, library='np', num_workers=num_workers) total = self._get_total_entries(files) pbar = tqdm.tqdm(tree_iter) self._examples = [] def print_progress(): processed = len(self._examples) pbar.set_description(f'Total = {total:d}, Processed: {processed:d}' f' ({100 * processed / total:.2f} %)') print_progress() for chunk in tree_iter: self._examples += self._process(chunk) print_progress()
def test(): for arrays in uproot.iterate( skhep_testdata.data_path("uproot-issue335.root") + ":empty_tree", ["var"], library="np", ): pass
def __init__(self, file_list, variables, cuts="", step_size=10000, nbins=50, label="Default"): #file = "E:\\NTuples\\TauClassifier\\user.bewilson.TauID.364702.Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ2WithSW_v0_output.root\\user.bewilson.24794900._000001.output.root" #file = glob.glob("E:\\NTuples\\TauClassifier\\*.*\\*.root") self._iterator = uproot.iterate(file_list, filter_name=variables_list, library="np") #print(next(self._iterator)) self._nbins = nbins self.hist_dict = dict.fromkeys(variables) self._variables = variables self.label = label self._var_max_min_dict = dict.fromkeys(variables) for variable in self._var_max_min_dict: self._var_max_min_dict[variable] = np.zeros(2, dtype='float32') self._step_size = step_size self._file_list = file_list
def test_function_iterate_pandas(): pandas = pytest.importorskip("pandas") files = [ skhep_testdata.data_path("uproot-sample-{0}-uncompressed.root".format(x)) + ":sample" for x in [ "5.23.02", "5.24.00", "5.25.02", "5.26.00", "5.27.02", "5.28.00", "5.29.02", "5.30.00", "6.08.04", "6.10.05", "6.14.00", "6.16.00", "6.18.00", "6.20.04", ] ] expect = 0 for arrays, report in uproot.iterate(files, "i8", report=True, library="pd"): assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11] assert arrays.index.values[0] == expect assert report.global_entry_start == expect assert report.global_entry_stop == expect + len(arrays["i8"]) expect += len(arrays["i8"])
def iterate(self, progressbar=False, n_files=None, use_cache=True, **kwargs): """ Wrapper around uproot.iterate: - Gets a progress bar option - Possibility to limit number of files - Can use a class cache variable """ if use_cache: if not len(self.cache): logger.warning('use_cache was True but cache is empty for %s', self) # logger.debug('Using cache') iterator = iter(self.cache) total = len(self.cache) else: # Allow reading only the first n_files root files rootfiles = self.rootfiles[:] if n_files: rootfiles = rootfiles[:n_files] # rootfiles = [ r + ':' + self.treename for r in rootfiles ] iterator = uproot.iterate(rootfiles, self.treename, **kwargs) total = len(rootfiles) if progressbar: logger.info('Iterating over %s rootfiles for %s', total, self) if progressbar: if not svjflatanalysis.HAS_TQDM: logger.error('tqdm could not be imported, progressbars are disabled') else: iterator = svjflatanalysis.tqdm(iterator, total=total, desc='arrays' if use_cache else 'root files') for arrays in iterator: yield arrays
def getData(fnames="", treeName="Events", chunks=False): branchlist = [] for collection, attrs in branches.items(): branchlist += [collection + "_" + attr for attr in attrs] if chunks: ldmx_dict = uproot.iterate(fnames + ":" + treeName, branchlist) else: ldmx_dict = uproot.lazy(fnames + ":" + treeName, branchlist) return ldmx_dict
def process_sample(sample_name, sample_path, is_sig, is_mc, channel, camp=None): print(f"Processing: {sample_name}") sample_dfs = list() for chunk_pd in uproot.iterate( f"{sample_path}:ntup", feature_list, cut=f"(ll_m >= 200) & ({channel} == 1)", library="pd", step_size="200 MB", ): mem_available = psutil.virtual_memory().available / GB mem_total = psutil.virtual_memory().total / GB print( f"RAM usage {mem_available:.02f} / {mem_total:.02f} GB", end="\r", flush=True, ) # convert float64 to float32 #f64_cols = chunk_pd.select_dtypes(include="float64").columns #chunk_pd[f64_cols] = chunk_pd[f64_cols].astype("float32") # add necessary tags chunk_pd = chunk_pd.assign(sample_name=sample_name) # required chunk_pd = chunk_pd.assign(is_sig=is_sig) # required chunk_pd = chunk_pd.assign(is_mc=is_mc) # required # add other tags (later you can add some cuts before training based on these tags) chunk_pd = chunk_pd.assign(camp=camp) # update df list sample_dfs.append(chunk_pd) sys.stdout.write("\033[K") return sample_dfs
def loadDF(filenames, columns=None, tree="ntp1", preselection=None): """ :param filenames: :param columns: :param tree: :param preselection: Function that Determines which rows to keep. The function receives the loaded dataframe and returns a list of True or False. Example: def cutEtaMass(df): return (df.eta_Mass > 1.5) & (df.eta_Mass < 3) :return: """ if not filenames: raise ValueError("filenames is empty") dfs = [] for tmpDf in uproot.iterate(filenames, tree, columns, outputtype=pd.DataFrame, namedecode="utf-8"): # preselect if preselection: tmpDf = tmpDf[preselection(tmpDf)] dfs.append(tmpDf) # Concat at end return pd.concat(dfs)
def test(): for arrays in uproot.iterate( [skhep_testdata.data_path("uproot-HZZ-uncompressed.root") + ":events"] * 2, ["Muon_Px", "Jet_Px", "MET_px"], library="pd", ): pass
def getTrackEfficiency(inQAname, outfilename): # uproot.iterate will produce a dict with JaggedArrays, so we can create an empty dict and append each iteration try: # open the root trees in a TChain-like manner print(f'reading file {str(inQAname)}') for array in uproot.iterate( str(inQAname), 'pndsim', [b'LMDTrackQ.fTrkRecStatus', b'LMDTrackQ.fThetarec']): clean, recStatus = np.array(cleanArray(array)) print(f'clean: {clean}') except Exception as e: print(f'exception!\n{e}') print(f'is kill') return maskStatGood = ((recStatus == 0)) maskStatBAd = ((recStatus != 0)) good = clean[maskStatGood] bad = clean[maskStatBAd] print(f'len: good:{len(good)}, bad:{len(bad)}') eff = len(good) * 100 / (len(good) + len(bad)) plt.hist(good, bins=50, range=(0.002, 0.01)) plt.suptitle(f'ThetaRec (for RecStatus=0)\nTrack Efficiency: {eff:.1f}%') #plt.yscale('log') # plt.show() plt.savefig(outfilename) plt.close()
def process_entry(selection,observable, sample, is_MC = True): samplename = os.path.basename(sample.path) files = glob.glob(os.path.join(sample.path,'*.root*')) all_counts = None treepath = samplename + '_Nom' if is_MC else samplename for chunk in uproot.iterate( files, branches = [ 'jet_pt','jet_eta','jet_phi','jet_e', 'weight','xs_weight', 'n_*' ], treepath = treepath ): table = awkward.array.table.Table(chunk) oldlen = len(table) table = table[selection(table)] obs = observable(table) if is_MC: weights = table['weight'] * table['xs_weight'] * sample.lumi else: weights = np.ones_like(obs) counts, edges = np.histogram(obs, bins = observable.binning, weights = weights) all_counts = all_counts + counts if all_counts is not None else counts return all_counts.tolist(), edges.tolist()
def uproot_generator(): for data in uproot.iterate(path=path, treepath=treepath, branches=branches, entrysteps=entrysteps, **kwargs): data = np.array([data[branch] for branch in branches]) yield data
def get_from_path(path,arrays): all_files = glob(path,recursive=True) return uproot.iterate(all_files, b'nEXOevents', arrays, entrysteps=1, # entrysteps='500 MB' )
def get_MET(rflist): branches = ['PuppiMissingET.MET'] print('Reading ', branches, ' from your root files') MET = [] for arrays in tqdm(up.iterate(rflist, branches)): f_MET = arrays[b"PuppiMissingET.MET"] MET.append(f_MET) print('Done') return MET
def reset_dataloader(self): self._current_index = 0 self._batches_generator = uproot.iterate( self.files, filter_name=self._variables_list, cut=self.cut, library='ak', step_size=self.specific_batch_size) gc.collect()
def extract_batches(input_files, tree_name, variables=None): logger.info( "Creating generator using uproot.iterate for input files {}".format( input_files)) inputs = ["{}:{}".format(fl, tree_name) for fl in input_files] generator = uproot.iterate(inputs, expressions=variables) return generator
def test_tree_iterator4(self): words2 = [ b"zero", b"one", b"two", b"three", b"four", b"five", b"six", b"seven", b"eight", b"nine", b"ten", b"eleven", b"twelve", b"thirteen", b"fourteen", b"fifteen", b"sixteen", b"seventeen", b"eighteen", b"ninteen", b"twenty", b"twenty-one", b"twenty-two", b"twenty-three", b"twenty-four", b"twenty-five", b"twenty-six", b"twenty-seven", b"twenty-eight", b"twenty-nine", b"thirty" ] # one big array for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 1000): self.assertEqual(arrays[b"data"].tolist(), words2) # size is equal to basket size (for most baskets) i = 0 for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 6): self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 6]) i += 6 if i > 30: i = 0 # size is smaller i = 0 for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 3): self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 3]) i += 3 if i > 30: i = 0 i = 0 for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 4): self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 4]) i += 4 if i > 30: i = 0 # size is larger i = 0 for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 12): self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 12]) i += 12 if i > 30: i = 0 i = 0 for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 10): self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 10]) i += 10 if i > 30: i = 0 # singleton case i = 0 for arrays in uproot.iterate( ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 1): self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 1]) i += 1 if i > 30: i = 0
def build_by_file_pattern(self, file_pattern, tree_path, branches, namedecode): for path, file, start, stop, arrays in uproot.iterate( file_pattern, tree_path, branches, reportpath=True, reportfile=True, reportentries=True, namedecode=namedecode): print(path, file, start, stop, len(arrays))
def test_function_iterate_pandas_2(): pandas = pytest.importorskip("pandas") files = skhep_testdata.data_path("uproot-HZZ.root").replace( "HZZ", "HZZ-{uncompressed,zlib,lz4}") expect = 0 for arrays, report in uproot.iterate({files: "events"}, "Muon_Px", report=True, library="pd"): assert arrays["Muon_Px"].index.values[0] == (expect, 0) expect += report.tree.num_entries
def next_batch(self): try: batch = next(self._batches_generator) except StopIteration: self._batches_generator = uproot.iterate( self.files, filter_name=self._variables_list, cut=self.cut, step_size=self.specific_batch_size) return self.next_batch() self._current_index += 1 return batch, np.ones(len(batch)) * self.class_label
def test_function_iterate_pandas_2(): pandas = pytest.importorskip("pandas") files = [ skhep_testdata.data_path("uproot-HZZ.root") + ":events", skhep_testdata.data_path("uproot-HZZ-uncompressed.root") + ":events", skhep_testdata.data_path("uproot-HZZ-zlib.root") + ":events", skhep_testdata.data_path("uproot-HZZ-lz4.root") + ":events", ] expect = 0 for arrays, report in uproot.iterate(files, "Muon_Px", report=True, library="pd"): assert arrays["Muon_Px"].index.values[0] == (expect, 0) expect += report.tree.num_entries
def test_function_iterate(): files = skhep_testdata.data_path( "uproot-sample-6.20.04-uncompressed.root").replace("6.20.04", "*") expect = 0 for arrays, report in uproot.iterate({files: "sample"}, "i8", report=True, library="np"): assert arrays["i8"][:5].tolist() == [-15, -14, -13, -12, -11] assert report.global_entry_start == expect assert report.global_entry_stop == expect + len(arrays["i8"]) expect += len(arrays["i8"])
def test_iterate(): with pytest.raises(ValueError): for arrays in uproot.iterate(skhep_testdata.data_path("uproot-issue63.root")): pass assert ( len( list( uproot.iterate( {skhep_testdata.data_path("uproot-issue63.root"): "blah"}, allow_missing=True, ) ) ) == 0 ) files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace( "6.16.00", "*" ) for arrays in uproot.iterate(files, "Ai8"): pass for arrays in uproot.iterate({files: "sample"}, "Ai8"): pass for arrays in uproot.iterate([files], "Ai8"): pass for arrays in uproot.iterate([{files: "sample"}], "Ai8"): pass
def run_query(input_filenames=None, tree_name=None, branches=None): import awkward, uproot a = (lambda event: (awkward.Table if hasattr(awkward, 'Table') else awkward['Table'])((event.MVA3lCERN_weight_ttH if hasattr(event, 'MVA3lCERN_weight_ttH') else event['MVA3lCERN_weight_ttH']))) b = (lambda event: event[(((((((((((((((((((((((((((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) == 0)) & (abs((event.total_charge if hasattr(event, 'total_charge') else event['total_charge'])) == 1)) & ((event.nJets_OR_T if hasattr(event, 'nJets_OR_T') else event['nJets_OR_T']) >= 2)) & ((event.nJets_OR_T_MV2c10_70 if hasattr(event, 'nJets_OR_T_MV2c10_70') else event['nJets_OR_T_MV2c10_70']) > 0)) & ((event.lep_Pt_1 if hasattr(event, 'lep_Pt_1') else event['lep_Pt_1']) > 15000.0)) & ((event.lep_Pt_2 if hasattr(event, 'lep_Pt_2') else event['lep_Pt_2']) > 15000.0)) & ((event.lep_isolationFixedCutLoose_0 if hasattr(event, 'lep_isolationFixedCutLoose_0') else event['lep_isolationFixedCutLoose_0']) > 0)) & (abs(((event.Mlll012 if hasattr(event, 'Mlll012') else event['Mlll012']) - 91200.0)) > 10000.0)) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1']))) | ((event.Mll01 if hasattr(event, 'Mll01') else event['Mll01']) > 12000.0))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) | ((event.Mll02 if hasattr(event, 'Mll02') else event['Mll02']) > 12000.0))) & (((((abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 13) & ((event.lep_isMedium_0 if hasattr(event, 'lep_isMedium_0') else event['lep_isMedium_0']) > 0)) | (abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 11)) & (((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 11) & (abs((event.lep_Eta_1 if hasattr(event, 'lep_Eta_1') else event['lep_Eta_1'])) < 2.0)) | ((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 13) & ((event.lep_isMedium_1 if hasattr(event, 'lep_isMedium_1') else event['lep_isMedium_1']) > 0)))) & (((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 11) & (abs((event.lep_Eta_2 if hasattr(event, 'lep_Eta_2') else event['lep_Eta_2'])) < 2.0)) | ((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 13) & ((event.lep_isMedium_2 if hasattr(event, 'lep_isMedium_2') else event['lep_isMedium_2']) > 0))))) & ((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) * abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) != 169) & ((event.DRll12 if hasattr(event, 'DRll12') else event['DRll12']) > 0.5)) | ((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) * abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) == 169))) & ((event.Mll12 if hasattr(event, 'Mll12') else event['Mll12']) > 12000.0)) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1']))) | (abs(((event.Mll01 if hasattr(event, 'Mll01') else event['Mll01']) - 91200.0)) > 10000.0))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) | (abs(((event.Mll02 if hasattr(event, 'Mll02') else event['Mll02']) - 91200.0)) > 10000.0))) & ((event.MVA3lCERN_weight_ttH if hasattr(event, 'MVA3lCERN_weight_ttH') else event['MVA3lCERN_weight_ttH']) > (-1))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1']))) | (abs(((event.Mll01 if hasattr(event, 'Mll01') else event['Mll01']) - 91200.0)) > 10000.0))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) | (abs(((event.Mll02 if hasattr(event, 'Mll02') else event['Mll02']) - 91200.0)) > 10000.0))) & ((event.MVA3lCERN_weight_ttH if hasattr(event, 'MVA3lCERN_weight_ttH') else event['MVA3lCERN_weight_ttH']) > 0.3)) & ((event.MVA3lCERN_weight_ttW if hasattr(event, 'MVA3lCERN_weight_ttW') else event['MVA3lCERN_weight_ttW']) < 0.75)) & ((event.MVA3lCERN_weight_ttZ if hasattr(event, 'MVA3lCERN_weight_ttZ') else event['MVA3lCERN_weight_ttZ']) < 0.75)) & ((event.MVA3lCERN_weight_VV if hasattr(event, 'MVA3lCERN_weight_VV') else event['MVA3lCERN_weight_VV']) < 0.75)) & ((event.MVA3lCERN_weight_ttbar if hasattr(event, 'MVA3lCERN_weight_ttbar') else event['MVA3lCERN_weight_ttbar']) < 0.3)) & ((((((((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) * (event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) > 0)) & ((event.lep_isQMisID_1 if hasattr(event, 'lep_isQMisID_1') else event['lep_isQMisID_1']) == 0)) & ((event.lep_isQMisID_0 if hasattr(event, 'lep_isQMisID_0') else event['lep_isQMisID_0']) == 0)) | ((((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.lep_isQMisID_2 if hasattr(event, 'lep_isQMisID_2') else event['lep_isQMisID_2']) == 0)) & ((event.lep_isQMisID_1 if hasattr(event, 'lep_isQMisID_1') else event['lep_isQMisID_1']) == 0))) | (((event.quadlep_type if hasattr(event, 'quadlep_type') else event['quadlep_type']) > 0) & ((event.FSF_4L_tot if hasattr(event, 'FSF_4L_tot') else event['FSF_4L_tot']) == 1))) & (((((((((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) & (((((abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 13) & ((event.lep_isMedium_0 if hasattr(event, 'lep_isMedium_0') else event['lep_isMedium_0']) > 0)) & ((event.lep_isolationFixedCutLoose_0 if hasattr(event, 'lep_isolationFixedCutLoose_0') else event['lep_isolationFixedCutLoose_0']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_0 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_0') else event['lep_promptLeptonVeto_TagWeight_0']) < (-0.5))) | ((((((abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 11) & ((event.lep_isolationFixedCutLoose_0 if hasattr(event, 'lep_isolationFixedCutLoose_0') else event['lep_isolationFixedCutLoose_0']) > 0)) & ((event.lep_isTightLH_0 if hasattr(event, 'lep_isTightLH_0') else event['lep_isTightLH_0']) > 0)) & ((event.lep_chargeIDBDTTight_0 if hasattr(event, 'lep_chargeIDBDTTight_0') else event['lep_chargeIDBDTTight_0']) > 0.7)) & ((event.lep_ambiguityType_0 if hasattr(event, 'lep_ambiguityType_0') else event['lep_ambiguityType_0']) == 0)) & ((event.lep_promptLeptonVeto_TagWeight_0 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_0') else event['lep_promptLeptonVeto_TagWeight_0']) < (-0.7))))) & (((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 13) & ((event.lep_isMedium_1 if hasattr(event, 'lep_isMedium_1') else event['lep_isMedium_1']) > 0)) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.5))) | ((((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 11) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_isTightLH_1 if hasattr(event, 'lep_isTightLH_1') else event['lep_isTightLH_1']) > 0)) & ((event.lep_chargeIDBDTTight_1 if hasattr(event, 'lep_chargeIDBDTTight_1') else event['lep_chargeIDBDTTight_1']) > 0.7)) & ((event.lep_ambiguityType_1 if hasattr(event, 'lep_ambiguityType_1') else event['lep_ambiguityType_1']) == 0)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.7))))) | ((((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) == 0)) & (((((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 13) & ((event.lep_isolationFixedCutLoose_2 if hasattr(event, 'lep_isolationFixedCutLoose_2') else event['lep_isolationFixedCutLoose_2']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_2 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_2') else event['lep_promptLeptonVeto_TagWeight_2']) < (-0.5))) | ((((((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 11) & ((event.lep_isolationFixedCutLoose_2 if hasattr(event, 'lep_isolationFixedCutLoose_2') else event['lep_isolationFixedCutLoose_2']) > 0)) & ((event.lep_isTightLH_2 if hasattr(event, 'lep_isTightLH_2') else event['lep_isTightLH_2']) > 0)) & ((event.lep_chargeIDBDTTight_2 if hasattr(event, 'lep_chargeIDBDTTight_2') else event['lep_chargeIDBDTTight_2']) > 0.7)) & ((event.lep_promptLeptonVeto_TagWeight_2 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_2') else event['lep_promptLeptonVeto_TagWeight_2']) < (-0.7))) & ((event.lep_ambiguityType_2 if hasattr(event, 'lep_ambiguityType_2') else event['lep_ambiguityType_2']) == 0))) & ((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 13) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.5))) | ((((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 11) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_isTightLH_1 if hasattr(event, 'lep_isTightLH_1') else event['lep_isTightLH_1']) > 0)) & ((event.lep_chargeIDBDTTight_1 if hasattr(event, 'lep_chargeIDBDTTight_1') else event['lep_chargeIDBDTTight_1']) > 0.7)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.7))) & ((event.lep_ambiguityType_1 if hasattr(event, 'lep_ambiguityType_1') else event['lep_ambiguityType_1']) == 0)))))) | (((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) > 1))) | ((((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) | ((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0)) == 0)) | ((event.quadlep_type if hasattr(event, 'quadlep_type') else event['quadlep_type']) > 0)) | (((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) > 0))))) | ((event.is1L2Tau if hasattr(event, 'is1L2Tau') else event['is1L2Tau']) > 0))]) out = awkward.Table() out['0'] =[] for i in uproot.iterate(input_filenames,tree_name,branches=branches,namedecode="utf-8",entrysteps=50000, reportentries=False): out = awkward.concatenate([out, (a)((b)(awkward.Table(i)))]) # for i in uproot.iterate(input_filenames,tree_name,branches=branches,namedecode="utf-8",entrysteps=10000, reportentries=True): # print("Entry range: ", i[0], i[1]) # out = awkward.concatenate([out, (a)((b)(awkward.Table(i[2])))]) return out
def get_event(): current_path = '' for path, data in uproot.iterate(paths, 'clusters', branches, reportpath=True): if path != current_path: print 'Opened file', path processed_paths_list.append(path) for ievt in range(data[branches[0]].shape[0]): if report_ievt: print path, ievt yield tuple(data[b][ievt] for b in branches)
def __init__(self, name): self.n_files = len(name) self.trees = [x for x in uproot.iterate(name, "CRTtree")] self.n_events = sum([len(x[b'muon_flag']) for x in self.trees]) self.t_first = self.trees[0][b'tstamp'][0] self.t_last = self.trees[-1][b'tstamp'][-1] self.delta_t = self.t_last - self.t_first dc.the_run.set_run_infos(self.t_first, self.t_last, self.n_events, self.n_files)
def test_function_iterate_pandas(): pandas = pytest.importorskip("pandas") files = skhep_testdata.data_path( "uproot-sample-6.20.04-uncompressed.root").replace("6.20.04", "*") expect = 0 for arrays, report in uproot.iterate({files: "sample"}, "i8", report=True, library="pd"): assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11] assert arrays.index.values[0] == expect assert report.global_entry_start == expect assert report.global_entry_stop == expect + len(arrays["i8"]) expect += len(arrays["i8"])
def iterate(self, progressbar=True, n_files=None, **kwargs): rootfiles = self.rootfiles[:] if n_files: rootfiles = rootfiles[:n_files] default_kwargs = { 'branches' : [b'JetsAK15_softDropMass'], # 'reportpath' : True, # 'reportfile' : True, # 'reportentries' : True } default_kwargs.update(kwargs) iterator = uproot.iterate(rootfiles, self.treename, **default_kwargs) if progressbar: iterator = tqdm(iterator, total=len(rootfiles), desc='files in {0}'.format(self.shortname)) for elements in iterator: yield elements
def test_tree_iterator3(self): source = list(range(46)) # one big array for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 1000): self.assertEqual(arrays[b"data"].tolist(), source) # size is equal to basket size (for most baskets) i = 0 for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 6): self.assertEqual(arrays[b"data"].tolist(), source[i:i + 6]) i += 6 if i > 45: i = 0 # size is smaller i = 0 for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 3): self.assertEqual(arrays[b"data"].tolist(), source[i:i + 3]) i += 3 if i > 45: i = 0 i = 0 for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 4): self.assertEqual(arrays[b"data"].tolist(), source[i:i + 4]) i += 4 if i > 45: i = 0 # size is larger i = 0 for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 12): self.assertEqual(arrays[b"data"].tolist(), source[i:i + 12]) i += 12 if i > 45: i = 0 i = 0 for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 10): self.assertEqual(arrays[b"data"].tolist(), source[i:i + 10]) i += 10 if i > 45: i = 0 # singleton case i = 0 for arrays in uproot.iterate( ["tests/foriter.root", "tests/foriter.root"], "foriter", 1): self.assertEqual(arrays[b"data"].tolist(), source[i:i + 1]) i += 1 if i > 45: i = 0