Пример #1
0
        def __init__(self, *args):
            """initialize"""

            # Passing None as `npartitions`, the tests will change it as needed.
            self.headnode = HeadNode.get_headnode(None, *args)

            self.headnode.backend = DistRDataFrameInterface.TestBackend()

            self.headproxy = Proxy.TransformationProxy(self.headnode)
Пример #2
0
 def make_dataframe(self, *args, **kwargs):
     """
     Creates an instance of distributed RDataFrame that can send computations
     to a Dask cluster.
     """
     # Set the number of partitions for this dataframe, one of the following:
     # 1. User-supplied `npartitions` optional argument
     npartitions = kwargs.pop("npartitions", None)
     headnode = HeadNode.get_headnode(self, npartitions, *args)
     return DataFrame.RDataFrame(headnode)
Пример #3
0
 def make_dataframe(self, *args, **kwargs):
     """
     Creates an instance of distributed RDataFrame that can send computations
     to a Dask cluster.
     """
     # Set the number of partitions for this dataframe, one of the following:
     # 1. User-supplied `npartitions` optional argument
     # 2. An educated guess according to the backend, using the backend's
     #    `optimize_npartitions` function
     # 3. Set `npartitions` to 2
     npartitions = kwargs.pop("npartitions", self.optimize_npartitions())
     headnode = HeadNode.get_headnode(self, npartitions, *args)
     return DataFrame.RDataFrame(headnode)
Пример #4
0
    def test_count_result_invariance(self):
        """
        Tests that counting the entries in the dataset does not depend on the
        number of partitions. This could have happened if we used TEntryList
        to restrict processing on a certain range of entries of the TChain in a
        distributed task, but the changes in
        https://github.com/root-project/root/commit/77bd5aa82e9544811e0d5fce197ab87c739c2e23
        were not implemented yet.
        """
        treename = "entries"
        filenames = ["1cluster_20entries.root"] * 5

        for npartitions in range(1, 6):
            headnode = HeadNode.get_headnode(npartitions, treename, filenames)
            backend = DistRDataFrameInvariants.TestBackend()
            rdf = DataFrame.RDataFrame(headnode, backend)
            self.assertEqual(rdf.Count().GetValue(), 100)
Пример #5
0
def create_dummy_headnode(*args):
    """Create dummy head node instance needed in the test"""
    # Pass None as `npartitions`. The tests will modify this member
    # according to needs
    return HeadNode.get_headnode(None, None, *args)