def test():
    one = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root")
    two = skhep_testdata.data_path("uproot-sample-6.18.00-uncompressed.root")
    bad = one.replace(".root", "-DOES-NOT-EXIST.root")
    okay = one.replace(".root", "-DOES-NOT-EXIST-*.root")

    assert len(list(uproot.iterate([one, two], step_size="1 TB",
                                   library="np"))) == 2

    with pytest.raises(uproot._util._FileNotFoundError):
        list(uproot.iterate([one, two, bad], library="np"))

    assert (len(
        list(uproot.iterate([one, two, okay], step_size="1 TB",
                            library="np"))) == 2)
示例#2
0
    def __init__(self,
                 path: Union[str, List[str]],
                 treepath: str,
                 data_branches: List[str],
                 target_branch: str,
                 num_workers: int = 1,
    ) -> None:
        """
        Args:
        """
        self.data_branches = data_branches
        self.target_branch = target_branch

        files = self._to_files(path, treepath)
        branches = data_branches + [target_branch]

        tree_iter = uproot.iterate(files, expressions=branches, library='np',
                                   num_workers=num_workers)

        total = self._get_total_entries(files)
        pbar = tqdm.tqdm(tree_iter)
        self._examples = []

        def print_progress():
            processed = len(self._examples)
            pbar.set_description(f'Total = {total:d}, Processed: {processed:d}'
                                 f' ({100 * processed / total:.2f} %)')

        print_progress()
        for chunk in tree_iter:
            self._examples += self._process(chunk)
            print_progress()
def test():
    for arrays in uproot.iterate(
            skhep_testdata.data_path("uproot-issue335.root") + ":empty_tree",
        ["var"],
            library="np",
    ):
        pass
示例#4
0
    def __init__(self,
                 file_list,
                 variables,
                 cuts="",
                 step_size=10000,
                 nbins=50,
                 label="Default"):

        #file = "E:\\NTuples\\TauClassifier\\user.bewilson.TauID.364702.Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ2WithSW_v0_output.root\\user.bewilson.24794900._000001.output.root"

        #file = glob.glob("E:\\NTuples\\TauClassifier\\*.*\\*.root")

        self._iterator = uproot.iterate(file_list,
                                        filter_name=variables_list,
                                        library="np")

        #print(next(self._iterator))

        self._nbins = nbins
        self.hist_dict = dict.fromkeys(variables)
        self._variables = variables
        self.label = label
        self._var_max_min_dict = dict.fromkeys(variables)
        for variable in self._var_max_min_dict:
            self._var_max_min_dict[variable] = np.zeros(2, dtype='float32')
        self._step_size = step_size
        self._file_list = file_list
示例#5
0
def test_function_iterate_pandas():
    pandas = pytest.importorskip("pandas")
    files = [
        skhep_testdata.data_path("uproot-sample-{0}-uncompressed.root".format(x))
        + ":sample"
        for x in [
            "5.23.02",
            "5.24.00",
            "5.25.02",
            "5.26.00",
            "5.27.02",
            "5.28.00",
            "5.29.02",
            "5.30.00",
            "6.08.04",
            "6.10.05",
            "6.14.00",
            "6.16.00",
            "6.18.00",
            "6.20.04",
        ]
    ]
    expect = 0
    for arrays, report in uproot.iterate(files, "i8", report=True, library="pd"):
        assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11]
        assert arrays.index.values[0] == expect
        assert report.global_entry_start == expect
        assert report.global_entry_stop == expect + len(arrays["i8"])
        expect += len(arrays["i8"])
示例#6
0
 def iterate(self, progressbar=False, n_files=None, use_cache=True, **kwargs):
     """
     Wrapper around uproot.iterate:
     - Gets a progress bar option
     - Possibility to limit number of files
     - Can use a class cache variable
     """
     if use_cache:
         if not len(self.cache):
             logger.warning('use_cache was True but cache is empty for %s', self)
         # logger.debug('Using cache')
         iterator = iter(self.cache)
         total = len(self.cache)
     else:
         # Allow reading only the first n_files root files
         rootfiles = self.rootfiles[:]
         if n_files: rootfiles = rootfiles[:n_files]
         # rootfiles = [ r + ':' + self.treename for r in rootfiles ]
         iterator = uproot.iterate(rootfiles, self.treename, **kwargs)
         total = len(rootfiles)
         if progressbar: logger.info('Iterating over %s rootfiles for %s', total, self)
     if progressbar:
         if not svjflatanalysis.HAS_TQDM:
             logger.error('tqdm could not be imported, progressbars are disabled')
         else:
             iterator = svjflatanalysis.tqdm(iterator, total=total, desc='arrays' if use_cache else 'root files')
     for arrays in iterator:
         yield arrays
示例#7
0
def getData(fnames="", treeName="Events", chunks=False):
    branchlist = []
    for collection, attrs in branches.items():
        branchlist += [collection + "_" + attr for attr in attrs]
    if chunks: ldmx_dict = uproot.iterate(fnames + ":" + treeName, branchlist)
    else: ldmx_dict = uproot.lazy(fnames + ":" + treeName, branchlist)
    return ldmx_dict
def process_sample(sample_name, sample_path, is_sig, is_mc, channel, camp=None):
    print(f"Processing: {sample_name}")
    sample_dfs = list()
    for chunk_pd in uproot.iterate(
        f"{sample_path}:ntup",
        feature_list,
        cut=f"(ll_m >= 200) & ({channel} == 1)",
        library="pd",
        step_size="200 MB",
    ):
        mem_available = psutil.virtual_memory().available / GB
        mem_total = psutil.virtual_memory().total / GB
        print(
            f"RAM usage {mem_available:.02f} / {mem_total:.02f} GB",
            end="\r",
            flush=True,
        )
        # convert float64 to float32
        #f64_cols = chunk_pd.select_dtypes(include="float64").columns
        #chunk_pd[f64_cols] = chunk_pd[f64_cols].astype("float32")
        # add necessary tags
        chunk_pd = chunk_pd.assign(sample_name=sample_name)  # required
        chunk_pd = chunk_pd.assign(is_sig=is_sig)  # required
        chunk_pd = chunk_pd.assign(is_mc=is_mc)  # required
        # add other tags (later you can add some cuts before training based on these tags)
        chunk_pd = chunk_pd.assign(camp=camp)
        # update df list
        sample_dfs.append(chunk_pd)
    sys.stdout.write("\033[K")
    return sample_dfs
示例#9
0
def loadDF(filenames, columns=None, tree="ntp1", preselection=None):
    """

    :param filenames:
    :param columns:
    :param tree:
    :param preselection: Function that Determines which rows to keep.
                         The function receives the loaded dataframe and
                         returns a list of True or False.
                         Example: def cutEtaMass(df):
                                    return (df.eta_Mass > 1.5) & (df.eta_Mass < 3)
    :return:
    """
    if not filenames:
        raise ValueError("filenames is empty")

    dfs = []

    for tmpDf in uproot.iterate(filenames,
                                tree,
                                columns,
                                outputtype=pd.DataFrame,
                                namedecode="utf-8"):

        # preselect
        if preselection:
            tmpDf = tmpDf[preselection(tmpDf)]

        dfs.append(tmpDf)

    # Concat at end
    return pd.concat(dfs)
def test():
    for arrays in uproot.iterate(
        [skhep_testdata.data_path("uproot-HZZ-uncompressed.root") + ":events"] * 2,
        ["Muon_Px", "Jet_Px", "MET_px"],
        library="pd",
    ):
        pass
示例#11
0
def getTrackEfficiency(inQAname, outfilename):

    # uproot.iterate will produce a dict with JaggedArrays, so we can create an empty dict and append each iteration
    try:
        # open the root trees in a TChain-like manner
        print(f'reading file {str(inQAname)}')
        for array in uproot.iterate(
                str(inQAname), 'pndsim',
            [b'LMDTrackQ.fTrkRecStatus', b'LMDTrackQ.fThetarec']):
            clean, recStatus = np.array(cleanArray(array))
            print(f'clean: {clean}')

    except Exception as e:
        print(f'exception!\n{e}')
        print(f'is kill')
        return

    maskStatGood = ((recStatus == 0))
    maskStatBAd = ((recStatus != 0))
    good = clean[maskStatGood]
    bad = clean[maskStatBAd]

    print(f'len: good:{len(good)}, bad:{len(bad)}')
    eff = len(good) * 100 / (len(good) + len(bad))

    plt.hist(good, bins=50, range=(0.002, 0.01))
    plt.suptitle(f'ThetaRec (for RecStatus=0)\nTrack Efficiency: {eff:.1f}%')
    #plt.yscale('log')
    # plt.show()
    plt.savefig(outfilename)
    plt.close()
def process_entry(selection,observable, sample, is_MC = True):
    samplename = os.path.basename(sample.path)
    files = glob.glob(os.path.join(sample.path,'*.root*'))

    all_counts = None

    treepath = samplename + '_Nom' if is_MC else samplename


    for chunk in uproot.iterate(
        files,
        branches = [
            'jet_pt','jet_eta','jet_phi','jet_e',
            'weight','xs_weight',
            'n_*'
        ],
        treepath = treepath
    ):
        table = awkward.array.table.Table(chunk)
        oldlen = len(table)

        table = table[selection(table)]

        obs = observable(table)

        if is_MC:
            weights = table['weight'] * table['xs_weight'] * sample.lumi
        else:
            weights = np.ones_like(obs)

        counts, edges = np.histogram(obs, bins = observable.binning, weights = weights)
        all_counts = all_counts + counts if all_counts is not None else counts

    return all_counts.tolist(), edges.tolist()
示例#13
0
 def uproot_generator():
     for data in uproot.iterate(path=path,
                                treepath=treepath,
                                branches=branches,
                                entrysteps=entrysteps,
                                **kwargs):
         data = np.array([data[branch] for branch in branches])
         yield data
示例#14
0
def get_from_path(path,arrays):
    all_files = glob(path,recursive=True)
    return uproot.iterate(all_files,
                            b'nEXOevents',
                            arrays,
                            entrysteps=1,
                            # entrysteps='500 MB'
    )
示例#15
0
def get_MET(rflist):
    branches = ['PuppiMissingET.MET']
    print('Reading ', branches, ' from your root files')
    MET = []
    for arrays in tqdm(up.iterate(rflist, branches)):
        f_MET = arrays[b"PuppiMissingET.MET"]
        MET.append(f_MET)
    print('Done')
    return MET
示例#16
0
 def reset_dataloader(self):
     self._current_index = 0
     self._batches_generator = uproot.iterate(
         self.files,
         filter_name=self._variables_list,
         cut=self.cut,
         library='ak',
         step_size=self.specific_batch_size)
     gc.collect()
示例#17
0
def extract_batches(input_files, tree_name, variables=None):
    logger.info(
        "Creating generator using uproot.iterate for input files {}".format(
            input_files))

    inputs = ["{}:{}".format(fl, tree_name) for fl in input_files]
    generator = uproot.iterate(inputs, expressions=variables)

    return generator
示例#18
0
    def test_tree_iterator4(self):
        words2 = [
            b"zero", b"one", b"two", b"three", b"four", b"five", b"six",
            b"seven", b"eight", b"nine", b"ten", b"eleven", b"twelve",
            b"thirteen", b"fourteen", b"fifteen", b"sixteen", b"seventeen",
            b"eighteen", b"ninteen", b"twenty", b"twenty-one", b"twenty-two",
            b"twenty-three", b"twenty-four", b"twenty-five", b"twenty-six",
            b"twenty-seven", b"twenty-eight", b"twenty-nine", b"thirty"
        ]

        # one big array
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 1000):
            self.assertEqual(arrays[b"data"].tolist(), words2)

        # size is equal to basket size (for most baskets)
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 6):
            self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 6])
            i += 6
            if i > 30: i = 0

        # size is smaller
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 3):
            self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 3])
            i += 3
            if i > 30: i = 0
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 4):
            self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 4])
            i += 4
            if i > 30: i = 0

        # size is larger
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 12):
            self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 12])
            i += 12
            if i > 30: i = 0
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 10):
            self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 10])
            i += 10
            if i > 30: i = 0

        # singleton case
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter2.root", "tests/foriter2.root"], "foriter2", 1):
            self.assertEqual(arrays[b"data"].tolist(), words2[i:i + 1])
            i += 1
            if i > 30: i = 0
示例#19
0
 def build_by_file_pattern(self, file_pattern, tree_path, branches,
                           namedecode):
     for path, file, start, stop, arrays in uproot.iterate(
             file_pattern,
             tree_path,
             branches,
             reportpath=True,
             reportfile=True,
             reportentries=True,
             namedecode=namedecode):
         print(path, file, start, stop, len(arrays))
示例#20
0
def test_function_iterate_pandas_2():
    pandas = pytest.importorskip("pandas")
    files = skhep_testdata.data_path("uproot-HZZ.root").replace(
        "HZZ", "HZZ-{uncompressed,zlib,lz4}")
    expect = 0
    for arrays, report in uproot.iterate({files: "events"},
                                         "Muon_Px",
                                         report=True,
                                         library="pd"):
        assert arrays["Muon_Px"].index.values[0] == (expect, 0)
        expect += report.tree.num_entries
示例#21
0
 def next_batch(self):
     try:
         batch = next(self._batches_generator)
     except StopIteration:
         self._batches_generator = uproot.iterate(
             self.files,
             filter_name=self._variables_list,
             cut=self.cut,
             step_size=self.specific_batch_size)
         return self.next_batch()
     self._current_index += 1
     return batch, np.ones(len(batch)) * self.class_label
示例#22
0
def test_function_iterate_pandas_2():
    pandas = pytest.importorskip("pandas")
    files = [
        skhep_testdata.data_path("uproot-HZZ.root") + ":events",
        skhep_testdata.data_path("uproot-HZZ-uncompressed.root") + ":events",
        skhep_testdata.data_path("uproot-HZZ-zlib.root") + ":events",
        skhep_testdata.data_path("uproot-HZZ-lz4.root") + ":events",
    ]
    expect = 0
    for arrays, report in uproot.iterate(files, "Muon_Px", report=True, library="pd"):
        assert arrays["Muon_Px"].index.values[0] == (expect, 0)
        expect += report.tree.num_entries
示例#23
0
def test_function_iterate():
    files = skhep_testdata.data_path(
        "uproot-sample-6.20.04-uncompressed.root").replace("6.20.04", "*")
    expect = 0
    for arrays, report in uproot.iterate({files: "sample"},
                                         "i8",
                                         report=True,
                                         library="np"):
        assert arrays["i8"][:5].tolist() == [-15, -14, -13, -12, -11]
        assert report.global_entry_start == expect
        assert report.global_entry_stop == expect + len(arrays["i8"])
        expect += len(arrays["i8"])
def test_iterate():
    with pytest.raises(ValueError):
        for arrays in uproot.iterate(skhep_testdata.data_path("uproot-issue63.root")):
            pass

    assert (
        len(
            list(
                uproot.iterate(
                    {skhep_testdata.data_path("uproot-issue63.root"): "blah"},
                    allow_missing=True,
                )
            )
        )
        == 0
    )

    files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace(
        "6.16.00", "*"
    )

    for arrays in uproot.iterate(files, "Ai8"):
        pass
    for arrays in uproot.iterate({files: "sample"}, "Ai8"):
        pass
    for arrays in uproot.iterate([files], "Ai8"):
        pass
    for arrays in uproot.iterate([{files: "sample"}], "Ai8"):
        pass
def run_query(input_filenames=None, tree_name=None, branches=None):
    import awkward, uproot    
    a = (lambda event: (awkward.Table if hasattr(awkward, 'Table') else awkward['Table'])((event.MVA3lCERN_weight_ttH if hasattr(event, 'MVA3lCERN_weight_ttH') else event['MVA3lCERN_weight_ttH'])))
    b = (lambda event: event[(((((((((((((((((((((((((((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) == 0)) & (abs((event.total_charge if hasattr(event, 'total_charge') else event['total_charge'])) == 1)) & ((event.nJets_OR_T if hasattr(event, 'nJets_OR_T') else event['nJets_OR_T']) >= 2)) & ((event.nJets_OR_T_MV2c10_70 if hasattr(event, 'nJets_OR_T_MV2c10_70') else event['nJets_OR_T_MV2c10_70']) > 0)) & ((event.lep_Pt_1 if hasattr(event, 'lep_Pt_1') else event['lep_Pt_1']) > 15000.0)) & ((event.lep_Pt_2 if hasattr(event, 'lep_Pt_2') else event['lep_Pt_2']) > 15000.0)) & ((event.lep_isolationFixedCutLoose_0 if hasattr(event, 'lep_isolationFixedCutLoose_0') else event['lep_isolationFixedCutLoose_0']) > 0)) & (abs(((event.Mlll012 if hasattr(event, 'Mlll012') else event['Mlll012']) - 91200.0)) > 10000.0)) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1']))) | ((event.Mll01 if hasattr(event, 'Mll01') else event['Mll01']) > 12000.0))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) | ((event.Mll02 if hasattr(event, 'Mll02') else event['Mll02']) > 12000.0))) & (((((abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 13) & ((event.lep_isMedium_0 if hasattr(event, 'lep_isMedium_0') else event['lep_isMedium_0']) > 0)) | (abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 11)) & (((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 11) & (abs((event.lep_Eta_1 if hasattr(event, 'lep_Eta_1') else event['lep_Eta_1'])) < 2.0)) | ((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 13) & ((event.lep_isMedium_1 if hasattr(event, 'lep_isMedium_1') else event['lep_isMedium_1']) > 0)))) & (((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 11) & (abs((event.lep_Eta_2 if hasattr(event, 'lep_Eta_2') else event['lep_Eta_2'])) < 2.0)) | ((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 13) & ((event.lep_isMedium_2 if hasattr(event, 'lep_isMedium_2') else event['lep_isMedium_2']) > 0))))) & ((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) * abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) != 169) & ((event.DRll12 if hasattr(event, 'DRll12') else event['DRll12']) > 0.5)) | ((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) * abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) == 169))) & ((event.Mll12 if hasattr(event, 'Mll12') else event['Mll12']) > 12000.0)) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1']))) | (abs(((event.Mll01 if hasattr(event, 'Mll01') else event['Mll01']) - 91200.0)) > 10000.0))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) | (abs(((event.Mll02 if hasattr(event, 'Mll02') else event['Mll02']) - 91200.0)) > 10000.0))) & ((event.MVA3lCERN_weight_ttH if hasattr(event, 'MVA3lCERN_weight_ttH') else event['MVA3lCERN_weight_ttH']) > (-1))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1']))) | (abs(((event.Mll01 if hasattr(event, 'Mll01') else event['Mll01']) - 91200.0)) > 10000.0))) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) != (-(event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2']))) | (abs(((event.Mll02 if hasattr(event, 'Mll02') else event['Mll02']) - 91200.0)) > 10000.0))) & ((event.MVA3lCERN_weight_ttH if hasattr(event, 'MVA3lCERN_weight_ttH') else event['MVA3lCERN_weight_ttH']) > 0.3)) & ((event.MVA3lCERN_weight_ttW if hasattr(event, 'MVA3lCERN_weight_ttW') else event['MVA3lCERN_weight_ttW']) < 0.75)) & ((event.MVA3lCERN_weight_ttZ if hasattr(event, 'MVA3lCERN_weight_ttZ') else event['MVA3lCERN_weight_ttZ']) < 0.75)) & ((event.MVA3lCERN_weight_VV if hasattr(event, 'MVA3lCERN_weight_VV') else event['MVA3lCERN_weight_VV']) < 0.75)) & ((event.MVA3lCERN_weight_ttbar if hasattr(event, 'MVA3lCERN_weight_ttbar') else event['MVA3lCERN_weight_ttbar']) < 0.3)) & ((((((((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) & (((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0']) * (event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) > 0)) & ((event.lep_isQMisID_1 if hasattr(event, 'lep_isQMisID_1') else event['lep_isQMisID_1']) == 0)) & ((event.lep_isQMisID_0 if hasattr(event, 'lep_isQMisID_0') else event['lep_isQMisID_0']) == 0)) | ((((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.lep_isQMisID_2 if hasattr(event, 'lep_isQMisID_2') else event['lep_isQMisID_2']) == 0)) & ((event.lep_isQMisID_1 if hasattr(event, 'lep_isQMisID_1') else event['lep_isQMisID_1']) == 0))) | (((event.quadlep_type if hasattr(event, 'quadlep_type') else event['quadlep_type']) > 0) & ((event.FSF_4L_tot if hasattr(event, 'FSF_4L_tot') else event['FSF_4L_tot']) == 1))) & (((((((((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) & (((((abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 13) & ((event.lep_isMedium_0 if hasattr(event, 'lep_isMedium_0') else event['lep_isMedium_0']) > 0)) & ((event.lep_isolationFixedCutLoose_0 if hasattr(event, 'lep_isolationFixedCutLoose_0') else event['lep_isolationFixedCutLoose_0']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_0 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_0') else event['lep_promptLeptonVeto_TagWeight_0']) < (-0.5))) | ((((((abs((event.lep_ID_0 if hasattr(event, 'lep_ID_0') else event['lep_ID_0'])) == 11) & ((event.lep_isolationFixedCutLoose_0 if hasattr(event, 'lep_isolationFixedCutLoose_0') else event['lep_isolationFixedCutLoose_0']) > 0)) & ((event.lep_isTightLH_0 if hasattr(event, 'lep_isTightLH_0') else event['lep_isTightLH_0']) > 0)) & ((event.lep_chargeIDBDTTight_0 if hasattr(event, 'lep_chargeIDBDTTight_0') else event['lep_chargeIDBDTTight_0']) > 0.7)) & ((event.lep_ambiguityType_0 if hasattr(event, 'lep_ambiguityType_0') else event['lep_ambiguityType_0']) == 0)) & ((event.lep_promptLeptonVeto_TagWeight_0 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_0') else event['lep_promptLeptonVeto_TagWeight_0']) < (-0.7))))) & (((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 13) & ((event.lep_isMedium_1 if hasattr(event, 'lep_isMedium_1') else event['lep_isMedium_1']) > 0)) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.5))) | ((((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 11) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_isTightLH_1 if hasattr(event, 'lep_isTightLH_1') else event['lep_isTightLH_1']) > 0)) & ((event.lep_chargeIDBDTTight_1 if hasattr(event, 'lep_chargeIDBDTTight_1') else event['lep_chargeIDBDTTight_1']) > 0.7)) & ((event.lep_ambiguityType_1 if hasattr(event, 'lep_ambiguityType_1') else event['lep_ambiguityType_1']) == 0)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.7))))) | ((((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) == 0)) & (((((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 13) & ((event.lep_isolationFixedCutLoose_2 if hasattr(event, 'lep_isolationFixedCutLoose_2') else event['lep_isolationFixedCutLoose_2']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_2 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_2') else event['lep_promptLeptonVeto_TagWeight_2']) < (-0.5))) | ((((((abs((event.lep_ID_2 if hasattr(event, 'lep_ID_2') else event['lep_ID_2'])) == 11) & ((event.lep_isolationFixedCutLoose_2 if hasattr(event, 'lep_isolationFixedCutLoose_2') else event['lep_isolationFixedCutLoose_2']) > 0)) & ((event.lep_isTightLH_2 if hasattr(event, 'lep_isTightLH_2') else event['lep_isTightLH_2']) > 0)) & ((event.lep_chargeIDBDTTight_2 if hasattr(event, 'lep_chargeIDBDTTight_2') else event['lep_chargeIDBDTTight_2']) > 0.7)) & ((event.lep_promptLeptonVeto_TagWeight_2 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_2') else event['lep_promptLeptonVeto_TagWeight_2']) < (-0.7))) & ((event.lep_ambiguityType_2 if hasattr(event, 'lep_ambiguityType_2') else event['lep_ambiguityType_2']) == 0))) & ((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 13) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.5))) | ((((((abs((event.lep_ID_1 if hasattr(event, 'lep_ID_1') else event['lep_ID_1'])) == 11) & ((event.lep_isolationFixedCutLoose_1 if hasattr(event, 'lep_isolationFixedCutLoose_1') else event['lep_isolationFixedCutLoose_1']) > 0)) & ((event.lep_isTightLH_1 if hasattr(event, 'lep_isTightLH_1') else event['lep_isTightLH_1']) > 0)) & ((event.lep_chargeIDBDTTight_1 if hasattr(event, 'lep_chargeIDBDTTight_1') else event['lep_chargeIDBDTTight_1']) > 0.7)) & ((event.lep_promptLeptonVeto_TagWeight_1 if hasattr(event, 'lep_promptLeptonVeto_TagWeight_1') else event['lep_promptLeptonVeto_TagWeight_1']) < (-0.7))) & ((event.lep_ambiguityType_1 if hasattr(event, 'lep_ambiguityType_1') else event['lep_ambiguityType_1']) == 0)))))) | (((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) > 1))) | ((((event.dilep_type if hasattr(event, 'dilep_type') else event['dilep_type']) > 0) | ((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0)) == 0)) | ((event.quadlep_type if hasattr(event, 'quadlep_type') else event['quadlep_type']) > 0)) | (((event.trilep_type if hasattr(event, 'trilep_type') else event['trilep_type']) > 0) & ((event.nTaus_OR_Pt25 if hasattr(event, 'nTaus_OR_Pt25') else event['nTaus_OR_Pt25']) > 0))))) | ((event.is1L2Tau if hasattr(event, 'is1L2Tau') else event['is1L2Tau']) > 0))])
    out = awkward.Table()
    out['0'] =[]
    for i in uproot.iterate(input_filenames,tree_name,branches=branches,namedecode="utf-8",entrysteps=50000, reportentries=False):
        out = awkward.concatenate([out, (a)((b)(awkward.Table(i)))])
        
    # for i in uproot.iterate(input_filenames,tree_name,branches=branches,namedecode="utf-8",entrysteps=10000, reportentries=True):
    #     print("Entry range: ", i[0], i[1])
    #     out = awkward.concatenate([out, (a)((b)(awkward.Table(i[2])))])
    
    return out
示例#26
0
    def get_event():
        current_path = ''
        for path, data in uproot.iterate(paths,
                                         'clusters',
                                         branches,
                                         reportpath=True):
            if path != current_path:
                print 'Opened file', path
                processed_paths_list.append(path)

            for ievt in range(data[branches[0]].shape[0]):
                if report_ievt:
                    print path, ievt
                yield tuple(data[b][ievt] for b in branches)
示例#27
0
    def __init__(self, name):

        self.n_files = len(name)

        self.trees = [x for x in uproot.iterate(name, "CRTtree")]
        self.n_events = sum([len(x[b'muon_flag']) for x in self.trees])

        self.t_first = self.trees[0][b'tstamp'][0]
        self.t_last = self.trees[-1][b'tstamp'][-1]

        self.delta_t = self.t_last - self.t_first

        dc.the_run.set_run_infos(self.t_first, self.t_last, self.n_events,
                                 self.n_files)
示例#28
0
def test_function_iterate_pandas():
    pandas = pytest.importorskip("pandas")
    files = skhep_testdata.data_path(
        "uproot-sample-6.20.04-uncompressed.root").replace("6.20.04", "*")
    expect = 0
    for arrays, report in uproot.iterate({files: "sample"},
                                         "i8",
                                         report=True,
                                         library="pd"):
        assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11]
        assert arrays.index.values[0] == expect
        assert report.global_entry_start == expect
        assert report.global_entry_stop == expect + len(arrays["i8"])
        expect += len(arrays["i8"])
示例#29
0
 def iterate(self, progressbar=True, n_files=None, **kwargs):
     rootfiles = self.rootfiles[:]
     if n_files: rootfiles = rootfiles[:n_files]
     default_kwargs = {
         'branches' : [b'JetsAK15_softDropMass'],
         # 'reportpath' : True,
         # 'reportfile' : True,
         # 'reportentries' : True
         }
     default_kwargs.update(kwargs)
     iterator = uproot.iterate(rootfiles, self.treename, **default_kwargs)
     if progressbar:
         iterator = tqdm(iterator, total=len(rootfiles), desc='files in {0}'.format(self.shortname))
     for elements in iterator:
         yield elements
示例#30
0
    def test_tree_iterator3(self):
        source = list(range(46))

        # one big array
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 1000):
            self.assertEqual(arrays[b"data"].tolist(), source)

        # size is equal to basket size (for most baskets)
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 6):
            self.assertEqual(arrays[b"data"].tolist(), source[i:i + 6])
            i += 6
            if i > 45: i = 0

        # size is smaller
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 3):
            self.assertEqual(arrays[b"data"].tolist(), source[i:i + 3])
            i += 3
            if i > 45: i = 0
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 4):
            self.assertEqual(arrays[b"data"].tolist(), source[i:i + 4])
            i += 4
            if i > 45: i = 0

        # size is larger
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 12):
            self.assertEqual(arrays[b"data"].tolist(), source[i:i + 12])
            i += 12
            if i > 45: i = 0
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 10):
            self.assertEqual(arrays[b"data"].tolist(), source[i:i + 10])
            i += 10
            if i > 45: i = 0

        # singleton case
        i = 0
        for arrays in uproot.iterate(
            ["tests/foriter.root", "tests/foriter.root"], "foriter", 1):
            self.assertEqual(arrays[b"data"].tolist(), source[i:i + 1])
            i += 1
            if i > 45: i = 0