示例#1
0
    def test_nentries_not_multipleOf_npartitions(self):
        """
        `BuildRanges` method when then number of entries is not a multiple of
        the number of partitions.

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        nentries_1 = 10
        headnode.npartitions = 4
        nentries_2 = 9

        # Example in which fractional part of
        # (nentries/npartitions) >= 0.5
        rng = builder._get_balanced_ranges(nentries_1)
        ranges_1 = rangesToTuples(rng)

        # Example in which fractional part of
        # (nentries/npartitions) < 0.5
        rng = builder._get_balanced_ranges(nentries_2)
        ranges_2 = rangesToTuples(rng)

        # Required output pairs
        ranges_1_reqd = [(0, 3), (3, 6), (6, 8), (8, 10)]
        ranges_2_reqd = [(0, 3), (3, 5), (5, 7), (7, 9)]

        self.assertListEqual(ranges_1, ranges_1_reqd)
        self.assertListEqual(ranges_2, ranges_2_reqd)
示例#2
0
    def test_clustered_ranges_with_many_clusters_many_partitions(self):
        """
        Check that _get_clustered_ranges creates clustered ranges as equal as
        possible for the maximum number of possible partitions (number of
        clusters)

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        treename = "myTree"
        filelist = ["backend/1000clusters.root"]
        headnode.npartitions = 1000

        crs = builder._get_clustered_ranges(treename, filelist)
        ranges = rangesToTuples(crs)

        start = 0
        end = 1000
        step = 1

        ranges_reqd = [(a, b) for a, b in zip(range(start, end, step),
                                              range(step, end + 1, step))]

        self.assertListEqual(ranges, ranges_reqd)
示例#3
0
    def test_nentries_multipleOf_npartitions(self):
        """
        `BuildRanges` method when the number of entries is a multiple of the
        number of partitions.

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        nentries_small = 10
        nentries_large = 100

        # First case
        headnode.npartitions = 5
        rng = builder._get_balanced_ranges(nentries_small)
        ranges_small = rangesToTuples(rng)

        # Second case
        headnode.npartitions = 10
        rng = builder._get_balanced_ranges(nentries_large)
        ranges_large = rangesToTuples(rng)

        ranges_small_reqd = [(0, 2), (2, 4), (4, 6), (6, 8), (8, 10)]
        ranges_large_reqd = [(0, 10), (10, 20), (20, 30), (30, 40), (40, 50),
                             (50, 60), (60, 70), (70, 80), (80, 90), (90, 100)]

        self.assertListEqual(ranges_small, ranges_small_reqd)
        self.assertListEqual(ranges_large, ranges_large_reqd)
示例#4
0
    def test_warning_when_npartitions_greater_than_clusters(self):
        """
        Check that _get_clustered_ranges raises a warning when the number of
        partitions is bigger than the number of clusters in the dataset.

        """

        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        treename = "TotemNtuple"
        filelist = ["backend/Slimmed_ntuple.root"]
        headnode.npartitions = 2

        ranges_reqd = [(0, 10)]

        with warnings.catch_warnings(record=True) as w:
            # Trigger warning
            crs = builder._get_clustered_ranges(treename, filelist)
            ranges = rangesToTuples(crs)

            # Verify ranges
            self.assertListEqual(ranges, ranges_reqd)

            # Verify warning
            assert issubclass(w[-1].category, UserWarning)
示例#5
0
def _(distrdf_node: HeadNode, previous_rdf_node: Any, range_id: int) -> List:
    """
    Implementation of the initial state of the computation_graph_generator
    function. The 'previous_rdf_node' parameter is some kind of ROOT::RDataFrame.
    The lifetimes of the DistRDF head node and its RDataFrame counterpart are
    bound together, in order to provide an input for the next recursive state.
    """
    distrdf_node.pyroot_node = previous_rdf_node
    return []
示例#6
0
    def test_nentries_greater_than_npartitions(self):
        """
        `BuildRanges` method when the number of entries is smaller than the
        number of partitions.

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        nentries = 5
        headnode.npartitions = 7  # > nentries

        rng = builder._get_balanced_ranges(nentries)
        ranges = rangesToTuples(rng)

        ranges_reqd = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]

        self.assertListEqual(ranges, ranges_reqd)
示例#7
0
    def test_clustered_ranges_with_one_cluster(self):
        """
        Check that _get_clustered_ranges returns one range when the dataset
        contains a single cluster and the number of partitions is 1

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        treename = "TotemNtuple"
        filelist = ["backend/Slimmed_ntuple.root"]
        headnode.npartitions = 1

        crs = builder._get_clustered_ranges(treename, filelist)
        ranges = rangesToTuples(crs)

        ranges_reqd = [(0, 10)]

        self.assertListEqual(ranges, ranges_reqd)
示例#8
0
    def test_buildranges_with_balanced_ranges(self):
        """
        Check that build_ranges produces balanced ranges when there are no
        clusters involved.

        """
        headnode = HeadNode(50)
        builder = RangesBuilder(headnode)

        headnode.npartitions = 16

        crs = builder.build_ranges()
        ranges = rangesToTuples(crs)

        ranges_reqd = [(0, 4), (4, 8), (8, 11), (11, 14), (14, 17), (17, 20),
                       (20, 23), (23, 26), (26, 29), (29, 32), (32, 35),
                       (35, 38), (38, 41), (41, 44), (44, 47), (47, 50)]

        self.assertListEqual(ranges, ranges_reqd)
示例#9
0
    def test_clustered_ranges_with_many_clusters_four_partitions(self):
        """
        Check that _get_clustered_ranges creates clustered ranges as equal as
        possible for four partitions

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        treename = "myTree"
        filelist = ["backend/1000clusters.root"]
        headnode.npartitions = 4

        crs = builder._get_clustered_ranges(treename, filelist)
        ranges = rangesToTuples(crs)

        ranges_reqd = [(0, 250), (250, 500), (500, 750), (750, 1000)]

        self.assertListEqual(ranges, ranges_reqd)
示例#10
0
    def test_clustered_ranges_with_two_clusters_two_partitions(self):
        """
        Check that _get_clustered_ranges creates clustered ranges respecting
        the cluster boundaries even if that implies to have ranges with very
        different numbers of entries.

        """
        headnode = HeadNode(1)
        builder = RangesBuilder(headnode)

        treename = "myTree"
        filelist = ["backend/2clusters.root"]
        headnode.npartitions = 2

        crs = builder._get_clustered_ranges(treename, filelist)
        ranges = rangesToTuples(crs)

        ranges_reqd = [(0, 777), (777, 1000)]

        self.assertListEqual(ranges, ranges_reqd)
示例#11
0
    def test_friend_info_with_ttree(self):
        """
        Check that FriendInfo correctly stores information about the friend
        trees
        """
        self.create_parent_tree()
        self.create_friend_tree()

        # Parent Tree
        base_tree_name = "T"
        base_tree_filename = "treeparent.root"
        basetree = ROOT.TChain(base_tree_name)
        basetree.Add(base_tree_filename)

        # Friend Tree
        friend_tree_name = "TF"
        friend_tree_filename = "treefriend.root"
        friendtree = ROOT.TChain(friend_tree_name)
        friendtree.Add(friend_tree_filename)

        # Add friendTree to the parent
        basetree.AddFriend(friendtree)

        # Instantiate head node of the graph with the base TTree
        headnode = HeadNode(basetree)

        # Retrieve FriendInfo instance
        friend_info = headnode._get_friend_info()

        # Check that FriendInfo has non-empty lists
        self.assertTrue(friend_info)

        # Check that the two lists with treenames and filenames are populated
        # as expected.
        self.assertListEqual(friend_info.friend_names, [friend_tree_name])
        self.assertListEqual(friend_info.friend_file_names,
                             [[friend_tree_filename]])

        # Remove unnecessary .root files
        os.remove(base_tree_filename)
        os.remove(friend_tree_filename)
示例#12
0
    def test_buildranges_with_clustered_ranges(self):
        """
        Check that build_ranges produces clustered ranges when the dataset
        contains clusters.

        """
        headnode = HeadNode("myTree", "backend/1000clusters.root")
        builder = RangesBuilder(headnode)

        headnode.npartitions = 1000

        crs = builder.build_ranges()
        ranges = rangesToTuples(crs)

        start = 0
        end = 1000
        step = 1

        ranges_reqd = [(a, b) for a, b in zip(range(start, end, step),
                                              range(step, end + 1, step))]

        self.assertListEqual(ranges, ranges_reqd)