def test_nentries_not_multipleOf_npartitions(self): """ `BuildRanges` method when then number of entries is not a multiple of the number of partitions. """ headnode = HeadNode(1) builder = RangesBuilder(headnode) nentries_1 = 10 headnode.npartitions = 4 nentries_2 = 9 # Example in which fractional part of # (nentries/npartitions) >= 0.5 rng = builder._get_balanced_ranges(nentries_1) ranges_1 = rangesToTuples(rng) # Example in which fractional part of # (nentries/npartitions) < 0.5 rng = builder._get_balanced_ranges(nentries_2) ranges_2 = rangesToTuples(rng) # Required output pairs ranges_1_reqd = [(0, 3), (3, 6), (6, 8), (8, 10)] ranges_2_reqd = [(0, 3), (3, 5), (5, 7), (7, 9)] self.assertListEqual(ranges_1, ranges_1_reqd) self.assertListEqual(ranges_2, ranges_2_reqd)
def test_clustered_ranges_with_many_clusters_many_partitions(self): """ Check that _get_clustered_ranges creates clustered ranges as equal as possible for the maximum number of possible partitions (number of clusters) """ headnode = HeadNode(1) builder = RangesBuilder(headnode) treename = "myTree" filelist = ["backend/1000clusters.root"] headnode.npartitions = 1000 crs = builder._get_clustered_ranges(treename, filelist) ranges = rangesToTuples(crs) start = 0 end = 1000 step = 1 ranges_reqd = [(a, b) for a, b in zip(range(start, end, step), range(step, end + 1, step))] self.assertListEqual(ranges, ranges_reqd)
def test_nentries_multipleOf_npartitions(self): """ `BuildRanges` method when the number of entries is a multiple of the number of partitions. """ headnode = HeadNode(1) builder = RangesBuilder(headnode) nentries_small = 10 nentries_large = 100 # First case headnode.npartitions = 5 rng = builder._get_balanced_ranges(nentries_small) ranges_small = rangesToTuples(rng) # Second case headnode.npartitions = 10 rng = builder._get_balanced_ranges(nentries_large) ranges_large = rangesToTuples(rng) ranges_small_reqd = [(0, 2), (2, 4), (4, 6), (6, 8), (8, 10)] ranges_large_reqd = [(0, 10), (10, 20), (20, 30), (30, 40), (40, 50), (50, 60), (60, 70), (70, 80), (80, 90), (90, 100)] self.assertListEqual(ranges_small, ranges_small_reqd) self.assertListEqual(ranges_large, ranges_large_reqd)
def test_warning_when_npartitions_greater_than_clusters(self): """ Check that _get_clustered_ranges raises a warning when the number of partitions is bigger than the number of clusters in the dataset. """ headnode = HeadNode(1) builder = RangesBuilder(headnode) treename = "TotemNtuple" filelist = ["backend/Slimmed_ntuple.root"] headnode.npartitions = 2 ranges_reqd = [(0, 10)] with warnings.catch_warnings(record=True) as w: # Trigger warning crs = builder._get_clustered_ranges(treename, filelist) ranges = rangesToTuples(crs) # Verify ranges self.assertListEqual(ranges, ranges_reqd) # Verify warning assert issubclass(w[-1].category, UserWarning)
def _(distrdf_node: HeadNode, previous_rdf_node: Any, range_id: int) -> List: """ Implementation of the initial state of the computation_graph_generator function. The 'previous_rdf_node' parameter is some kind of ROOT::RDataFrame. The lifetimes of the DistRDF head node and its RDataFrame counterpart are bound together, in order to provide an input for the next recursive state. """ distrdf_node.pyroot_node = previous_rdf_node return []
def test_nentries_greater_than_npartitions(self): """ `BuildRanges` method when the number of entries is smaller than the number of partitions. """ headnode = HeadNode(1) builder = RangesBuilder(headnode) nentries = 5 headnode.npartitions = 7 # > nentries rng = builder._get_balanced_ranges(nentries) ranges = rangesToTuples(rng) ranges_reqd = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)] self.assertListEqual(ranges, ranges_reqd)
def test_clustered_ranges_with_one_cluster(self): """ Check that _get_clustered_ranges returns one range when the dataset contains a single cluster and the number of partitions is 1 """ headnode = HeadNode(1) builder = RangesBuilder(headnode) treename = "TotemNtuple" filelist = ["backend/Slimmed_ntuple.root"] headnode.npartitions = 1 crs = builder._get_clustered_ranges(treename, filelist) ranges = rangesToTuples(crs) ranges_reqd = [(0, 10)] self.assertListEqual(ranges, ranges_reqd)
def test_buildranges_with_balanced_ranges(self): """ Check that build_ranges produces balanced ranges when there are no clusters involved. """ headnode = HeadNode(50) builder = RangesBuilder(headnode) headnode.npartitions = 16 crs = builder.build_ranges() ranges = rangesToTuples(crs) ranges_reqd = [(0, 4), (4, 8), (8, 11), (11, 14), (14, 17), (17, 20), (20, 23), (23, 26), (26, 29), (29, 32), (32, 35), (35, 38), (38, 41), (41, 44), (44, 47), (47, 50)] self.assertListEqual(ranges, ranges_reqd)
def test_clustered_ranges_with_many_clusters_four_partitions(self): """ Check that _get_clustered_ranges creates clustered ranges as equal as possible for four partitions """ headnode = HeadNode(1) builder = RangesBuilder(headnode) treename = "myTree" filelist = ["backend/1000clusters.root"] headnode.npartitions = 4 crs = builder._get_clustered_ranges(treename, filelist) ranges = rangesToTuples(crs) ranges_reqd = [(0, 250), (250, 500), (500, 750), (750, 1000)] self.assertListEqual(ranges, ranges_reqd)
def test_clustered_ranges_with_two_clusters_two_partitions(self): """ Check that _get_clustered_ranges creates clustered ranges respecting the cluster boundaries even if that implies to have ranges with very different numbers of entries. """ headnode = HeadNode(1) builder = RangesBuilder(headnode) treename = "myTree" filelist = ["backend/2clusters.root"] headnode.npartitions = 2 crs = builder._get_clustered_ranges(treename, filelist) ranges = rangesToTuples(crs) ranges_reqd = [(0, 777), (777, 1000)] self.assertListEqual(ranges, ranges_reqd)
def test_friend_info_with_ttree(self): """ Check that FriendInfo correctly stores information about the friend trees """ self.create_parent_tree() self.create_friend_tree() # Parent Tree base_tree_name = "T" base_tree_filename = "treeparent.root" basetree = ROOT.TChain(base_tree_name) basetree.Add(base_tree_filename) # Friend Tree friend_tree_name = "TF" friend_tree_filename = "treefriend.root" friendtree = ROOT.TChain(friend_tree_name) friendtree.Add(friend_tree_filename) # Add friendTree to the parent basetree.AddFriend(friendtree) # Instantiate head node of the graph with the base TTree headnode = HeadNode(basetree) # Retrieve FriendInfo instance friend_info = headnode._get_friend_info() # Check that FriendInfo has non-empty lists self.assertTrue(friend_info) # Check that the two lists with treenames and filenames are populated # as expected. self.assertListEqual(friend_info.friend_names, [friend_tree_name]) self.assertListEqual(friend_info.friend_file_names, [[friend_tree_filename]]) # Remove unnecessary .root files os.remove(base_tree_filename) os.remove(friend_tree_filename)
def test_buildranges_with_clustered_ranges(self): """ Check that build_ranges produces clustered ranges when the dataset contains clusters. """ headnode = HeadNode("myTree", "backend/1000clusters.root") builder = RangesBuilder(headnode) headnode.npartitions = 1000 crs = builder.build_ranges() ranges = rangesToTuples(crs) start = 0 end = 1000 step = 1 ranges_reqd = [(a, b) for a, b in zip(range(start, end, step), range(step, end + 1, step))] self.assertListEqual(ranges, ranges_reqd)