def __init__(self, *args): """initialize""" # Passing None as `npartitions`, the tests will change it as needed. self.headnode = HeadNode.get_headnode(None, *args) self.headnode.backend = DistRDataFrameInterface.TestBackend() self.headproxy = Proxy.TransformationProxy(self.headnode)
def make_dataframe(self, *args, **kwargs): """ Creates an instance of distributed RDataFrame that can send computations to a Dask cluster. """ # Set the number of partitions for this dataframe, one of the following: # 1. User-supplied `npartitions` optional argument npartitions = kwargs.pop("npartitions", None) headnode = HeadNode.get_headnode(self, npartitions, *args) return DataFrame.RDataFrame(headnode)
def make_dataframe(self, *args, **kwargs): """ Creates an instance of distributed RDataFrame that can send computations to a Dask cluster. """ # Set the number of partitions for this dataframe, one of the following: # 1. User-supplied `npartitions` optional argument # 2. An educated guess according to the backend, using the backend's # `optimize_npartitions` function # 3. Set `npartitions` to 2 npartitions = kwargs.pop("npartitions", self.optimize_npartitions()) headnode = HeadNode.get_headnode(self, npartitions, *args) return DataFrame.RDataFrame(headnode)
def test_count_result_invariance(self): """ Tests that counting the entries in the dataset does not depend on the number of partitions. This could have happened if we used TEntryList to restrict processing on a certain range of entries of the TChain in a distributed task, but the changes in https://github.com/root-project/root/commit/77bd5aa82e9544811e0d5fce197ab87c739c2e23 were not implemented yet. """ treename = "entries" filenames = ["1cluster_20entries.root"] * 5 for npartitions in range(1, 6): headnode = HeadNode.get_headnode(npartitions, treename, filenames) backend = DistRDataFrameInvariants.TestBackend() rdf = DataFrame.RDataFrame(headnode, backend) self.assertEqual(rdf.Count().GetValue(), 100)
def create_dummy_headnode(*args): """Create dummy head node instance needed in the test""" # Pass None as `npartitions`. The tests will modify this member # according to needs return HeadNode.get_headnode(None, None, *args)