Пример #1
0
def get_mergeable_values(starting_node: ROOT.RDF.RNode, range_id: int,
                         computation_graph_callable: Callable[[ROOT.RDF.RNode, int], List], optimized: bool) -> List:
    """
    Triggers the computation graph and returns a list of mergeable values.
    """
    if optimized:
        # Create the RDF computation graph and execute it on this ranged
        # dataset. The results of the actions of the graph and their types
        # are returned
        results, res_types = computation_graph_callable(starting_node, range_id)

        # Get RResultPtrs out of the type-erased RResultHandles by
        # instantiating with the type of the value
        mergeables = [
            ROOT.ROOT.Detail.RDF.GetMergeableValue(res.GetResultPtr[res_type]())
            if isinstance(res, ROOT.RDF.RResultHandle)
            else res
            for res, res_type in zip(results, res_types)
        ]
    else:
        # Output of the callable
        resultptr_list = computation_graph_callable(starting_node, range_id)

        mergeables = [Utils.get_mergeablevalue(resultptr) for resultptr in resultptr_list]

    return mergeables
Пример #2
0
        def mapper(current_range):
            """
            Triggers the event-loop and executes all
            nodes in the computational graph using the
            callable.

            Args:
                current_range (Range): A Range named tuple, representing the
                    range of entries to be processed, their input files and
                    information about friend trees.

            Returns:
                list: This respresents the list of (mergeable)values of all
                action nodes in the computational graph.
            """
            # Disable graphics functionality in ROOT. It is not needed inside a
            # distributed task
            ROOT.gROOT.SetBatch(True)
            # Enable thread safety for the whole mapper function. We need to do
            # this since two tasks could be invoking the C++ interpreter
            # simultaneously, given that this function will release the GIL
            # before calling into C++ to run the event loop. Dask multi-threaded
            # or even multi-process workers could trigger such a scenario.
            ROOT.EnableThreadSafety()

            # We have to decide whether to do this in Dist or in subclasses
            # Utils.declare_headers(worker_includes)  # Declare headers if any
            # Run initialization method to prepare the worker runtime
            # environment
            initialization()

            # Build an RDataFrame instance for the current mapper task, based
            # on the type of the head node.
            rdf = build_rdf_from_range(current_range)

            if optimized:
                # Create the RDF computation graph and execute it on this ranged
                # dataset. The results of the actions of the graph and their types
                # are returned
                results, res_types = computation_graph_callable(
                    rdf, current_range.id)

                # Get RResultPtrs out of the type-erased RResultHandles by
                # instantiating with the type of the value
                mergeables = [
                    ROOT.ROOT.Detail.RDF.GetMergeableValue(
                        res.GetResultPtr[res_type]()) if isinstance(
                            res, ROOT.RDF.RResultHandle) else res
                    for res, res_type in zip(results, res_types)
                ]
            else:
                # Output of the callable
                resultptr_list = computation_graph_callable(
                    rdf, current_range.id)

                mergeables = [
                    Utils.get_mergeablevalue(resultptr)
                    for resultptr in resultptr_list
                ]

            return mergeables