def load(cls, path, *args, **kwargs): path = get_path(path) if path.endswith(".root"): from coffea.nanoevents import NanoEventsFactory return NanoEventsFactory.from_root(path, *args, **kwargs) if path.endswith(".parquet"): from coffea.nanoevents import NanoEventsFactory return NanoEventsFactory.from_parquet(path, *args, **kwargs) # .coffea from coffea.util import load return load(path, *args, **kwargs)
def run_coffea_processor( events_url: str, tree_name: Optional[str], proc, data_type, meta_data ): """ Process a single file from a tree via a coffea processor on the remote node :param events_url: a URL to a ROOT file that uproot4 can open :param tree_name: The tree in the ROOT file to use for our data. Can be null if the data isn't a root tree! :param accumulator: Accumulator to store the results :param proc: Analysis function to execute. Must have signature :param data_type: What datatype is the data (root, parquet?) :return: Populated accumulator """ # Since we execute remotely, explicitly include everything we need. from coffea.nanoevents import NanoEventsFactory from coffea.nanoevents.schemas.schema import auto_schema if data_type == "root": # Use NanoEvents to build a 4-vector assert tree_name is not None events = NanoEventsFactory.from_root( file=str(events_url), treepath=f"/{tree_name}", schemaclass=auto_schema, metadata=dict(meta_data, filename=str(events_url)), ).events() elif data_type == "parquet": events = NanoEventsFactory.from_parquet( file=str(events_url), treepath="/", schemaclass=auto_schema, metadata=dict(meta_data, filename=str(events_url)), ).events() else: raise Exception(f"Unknown stream data type of {data_type} - cannot process.") return proc(events)