Exemplo n.º 1
0
    def load(cls, path, *args, **kwargs):
        path = get_path(path)

        if path.endswith(".root"):
            from coffea.nanoevents import NanoEventsFactory
            return NanoEventsFactory.from_root(path, *args, **kwargs)

        if path.endswith(".parquet"):
            from coffea.nanoevents import NanoEventsFactory
            return NanoEventsFactory.from_parquet(path, *args, **kwargs)

        # .coffea
        from coffea.util import load
        return load(path, *args, **kwargs)
Exemplo n.º 2
0
def run_coffea_processor(
    events_url: str, tree_name: Optional[str], proc, data_type, meta_data
):
    """
    Process a single file from a tree via a coffea processor on the remote node
    :param events_url:
        a URL to a ROOT file that uproot4 can open
    :param tree_name:
        The tree in the ROOT file to use for our data. Can be null if the data isn't a root
        tree!
    :param accumulator:
        Accumulator to store the results
    :param proc:
        Analysis function to execute. Must have signature
    :param data_type:
        What datatype is the data (root, parquet?)
    :return:
        Populated accumulator
    """
    # Since we execute remotely, explicitly include everything we need.
    from coffea.nanoevents import NanoEventsFactory
    from coffea.nanoevents.schemas.schema import auto_schema

    if data_type == "root":
        # Use NanoEvents to build a 4-vector
        assert tree_name is not None
        events = NanoEventsFactory.from_root(
            file=str(events_url),
            treepath=f"/{tree_name}",
            schemaclass=auto_schema,
            metadata=dict(meta_data, filename=str(events_url)),
        ).events()
    elif data_type == "parquet":
        events = NanoEventsFactory.from_parquet(
            file=str(events_url),
            treepath="/",
            schemaclass=auto_schema,
            metadata=dict(meta_data, filename=str(events_url)),
        ).events()
    else:
        raise Exception(f"Unknown stream data type of {data_type} - cannot process.")

    return proc(events)