Exemplo n.º 1
0
    def from_root(
        cls,
        file,
        treepath="/Events",
        entry_start=None,
        entry_stop=None,
        runtime_cache=None,
        persistent_cache=None,
        schemaclass=NanoAODSchema,
        metadata=None,
        uproot_options={},
        access_log=None,
        iteritems_options={},
    ):
        """Quickly build NanoEvents from a root file

        Parameters
        ----------
            file : str or uproot.reading.ReadOnlyDirectory
                The filename or already opened file using e.g. ``uproot.open()``
            treepath : str, optional
                Name of the tree to read in the file
            entry_start : int, optional
                Start at this entry offset in the tree (default 0)
            entry_stop : int, optional
                Stop at this entry offset in the tree (default end of tree)
            runtime_cache : dict, optional
                A dict-like interface to a cache object. This cache is expected to last the
                duration of the program only, and will be used to hold references to materialized
                awkward arrays, etc.
            persistent_cache : dict, optional
                A dict-like interface to a cache object. Only bare numpy arrays will be placed in this cache,
                using globally-unique keys.
            schemaclass : BaseSchema
                A schema class deriving from `BaseSchema` and implementing the desired view of the file
            metadata : dict, optional
                Arbitrary metadata to add to the `base.NanoEvents` object
            uproot_options : dict, optional
                Any options to pass to ``uproot.open``
            access_log : list, optional
                Pass a list instance to record which branches were lazily accessed by this instance
        """
        if isinstance(file, str):
            tree = uproot.open(file, **uproot_options)[treepath]
        elif isinstance(file, uproot.reading.ReadOnlyDirectory):
            tree = file[treepath]
        elif "<class 'uproot.rootio.ROOTDirectory'>" == str(type(file)):
            raise RuntimeError(
                "The file instance (%r) is an uproot3 type, but this module is only compatible with uproot4 or higher"
                % file)
        else:
            raise TypeError("Invalid file type (%s)" % (str(type(file))))

        if entry_start is None or entry_start < 0:
            entry_start = 0
        if entry_stop is None or entry_stop > tree.num_entries:
            entry_stop = tree.num_entries

        partition_key = (
            str(tree.file.uuid),
            tree.object_path,
            "{0}-{1}".format(entry_start, entry_stop),
        )
        uuidpfn = {partition_key[0]: tree.file.file_path}
        mapping = UprootSourceMapping(
            TrivialUprootOpener(uuidpfn, uproot_options),
            cache={},
            access_log=access_log,
        )
        mapping.preload_column_source(partition_key[0], partition_key[1], tree)

        base_form = mapping._extract_base_form(
            tree, iteritems_options=iteritems_options)

        return cls._from_mapping(
            mapping,
            partition_key,
            base_form,
            runtime_cache,
            persistent_cache,
            schemaclass,
            metadata,
        )
Exemplo n.º 2
0
    def from_file(
        cls,
        file,
        treepath="/Events",
        entry_start=None,
        entry_stop=None,
        runtime_cache=None,
        persistent_cache=None,
        schemaclass=NanoAODSchema,
        metadata=None,
    ):
        """Quickly build NanoEvents from a file

        Parameters
        ----------
            file : str or uproot4.reading.ReadOnlyDirectory
                The filename or already opened file using e.g. ``uproot4.open()``
            treepath : str, optional
                Name of the tree to read in the file
            entry_start : int, optional
                Start at this entry offset in the tree (default 0)
            entry_stop : int, optional
                Stop at this entry offset in the tree (default end of tree)
            runtime_cache : dict, optional
                A dict-like interface to a cache object. This cache is expected to last the
                duration of the program only, and will be used to hold references to materialized
                awkward1 arrays, etc.
            persistent_cache : dict, optional
                A dict-like interface to a cache object. Only bare numpy arrays will be placed in this cache,
                using globally-unique keys.
            schemaclass : BaseSchema
                A schema class deriving from `BaseSchema` and implementing the desired view of the file
            metadata : dict, optional
                Arbitrary metadata to add to the `base.NanoEvents` object
        """
        if not issubclass(schemaclass, BaseSchema):
            raise RuntimeError("Invalid schema type")
        if isinstance(file, str):
            tree = uproot4.open(file + ":" + treepath)
        elif isinstance(file, uproot4.reading.ReadOnlyDirectory):
            tree = file[treepath]
        if entry_start is None or entry_start < 0:
            entry_start = 0
        if entry_stop is None or entry_stop > tree.num_entries:
            entry_stop = tree.num_entries
        partition_tuple = (
            str(tree.file.uuid),
            tree.object_path,
            "{0}-{1}".format(entry_start, entry_stop),
        )
        uuidpfn = {partition_tuple[0]: tree.file.file_path}
        mapping = UprootSourceMapping(uuidpfn)
        mapping.preload_tree(partition_tuple[0], partition_tuple[1], tree)
        if persistent_cache is not None:
            mapping = CachedMapping(persistent_cache, mapping)
        base_form = cls._extract_base_form(tree)
        if metadata is not None:
            base_form["parameters"]["metadata"] = metadata
        schema = schemaclass(base_form)
        return cls(schema,
                   mapping,
                   tuple_to_key(partition_tuple),
                   cache=runtime_cache)