示例#1
0
    def test_incorrect_args(self):
        """Constructor with incorrect arguments"""
        with self.assertRaises(TypeError):
            # Incorrect first argument in 2-argument case
            Node.HeadNode(10, "file.root")

        with self.assertRaises(TypeError):
            # Incorrect third argument in 3-argument case
            Node.HeadNode("treename", "file.root", "column1")

        with self.assertRaises(TypeError):
            # No argument case
            Node.HeadNode()
示例#2
0
 def test_kwargs_read(self):
     """Named arguments are read accurately."""
     hn = Node.HeadNode(1)
     hn.backend = TestBackend()
     node = Proxy.TransformationProxy(hn)
     newNode = node.Define(1, "b", a="1", b=2)
     self.assertEqual(newNode.operation.kwargs, {"a": "1", "b": 2})
示例#3
0
    def test_node_pickle(self):
        """
        Test cases to check that nodes can be accurately
        pickled and un-pickled.
        """
        import pickle

        # Node definitions
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)
        n1 = node.Define("a", b="c")  # First child node
        n2 = n1.Count()  # noqa: avoid PEP8 F841
        n3 = node.Filter("b")
        n4 = n3.Count()  # noqa: avoid PEP8 F841

        # Pickled representation of nodes
        pickled_node = pickle.dumps(node.proxied_node)
        # n3 is of class Proxy.TransformationProxy, so the proxied node must be
        # accessed before pickling.
        pickled_n3_node = pickle.dumps(n3.proxied_node)

        # Un-pickled node objects
        unpickled_node = pickle.loads(pickled_node)
        unpickled_n3_node = pickle.loads(pickled_n3_node)

        self.assertIsInstance(unpickled_node, type(node.proxied_node))
        self.assertIsInstance(unpickled_n3_node, type(n3.proxied_node))
        self.assertGraphs(node, unpickled_node)
        self.assertGraphs(n3.proxied_node, unpickled_n3_node)
示例#4
0
 def test_args_read(self):
     """Arguments (unnamed) are read accurately."""
     hn = Node.HeadNode(1)
     hn.backend = TestBackend()
     node = Proxy.TransformationProxy(hn)
     newNode = node.Define(1, "b", a="1", b=2)
     self.assertEqual(newNode.operation.args, [1, "b"])
示例#5
0
    def test_mapper_from_graph(self):
        """A simple test case to check the working of mapper."""
        # A mock RDF object
        t = ComputationGraphGeneratorTest.Temp()

        # Head node
        hn = Node.HeadNode(1)
        hn.backend = ComputationGraphGeneratorTest.TestBackend()
        node = Proxy.TransformationProxy(hn)
        # Set of operations to build the graph
        n1 = node.Define()
        n2 = node.Filter().Filter()
        n4 = n2.Count()
        n5 = n1.Count()
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Generate and execute the mapper
        generator = ComputationGraphGenerator.ComputationGraphGenerator(
            node.proxied_node)
        mapper_func = generator.get_callable()
        values = mapper_func(t)
        nodes = generator.get_action_nodes()

        reqd_order = [1, 3, 2, 2, 3, 2]

        self.assertEqual(t.ord_list, reqd_order)
        self.assertListEqual(nodes, [n5.proxied_node, n4.proxied_node])
        self.assertListEqual(values, [t, t])
示例#6
0
    def test_get_state(self):
        """
        Test cases to check the working of __getstate__ method on
        Node class.

        """
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)
        n1 = node.Define("a", b="c")  # First child node

        # Required dictionaries
        node_dict = {"children": [n1.proxied_node]}
        n1_dict = {
            'operation_name': "Define",
            'operation_args': ["a"],
            'operation_kwargs': {
                "b": "c"
            },
            'children': []
        }
        # Nodes are wrapped by TransformationProxies, so the proxied nodes
        # must be accessed in order to extract their dictionaries.
        self.assertDictEqual(node.proxied_node.__getstate__(), node_dict)
        self.assertDictEqual(n1.proxied_node.__getstate__(), n1_dict)
示例#7
0
 def test_attr_read(self):
     """Function names are read accurately."""
     hn = Node.HeadNode(1)
     hn.backend = TestBackend()
     node = Proxy.TransformationProxy(hn)
     func = node.Define  # noqa: avoid PEP8 F841
     self.assertEqual(node._new_op_name, "Define")
示例#8
0
    def test_dfs_graph_with_computed_values_pruning(self):
        """
        Test case to check that computed values in action nodes get
        pruned.

        """
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Graph nodes
        n1 = node.Define()
        n2 = node.Filter()
        n3 = n2.Filter()
        n4 = n3.Count()  # noqa: avoid PEP8 F841
        n5 = n1.Filter()
        n6 = n5.Count()
        n7 = node.Filter()

        # This is to make sure action nodes with
        # already computed values are pruned.
        n6.proxied_node.value = 1
        # This is to make sure that transformation
        # leaf nodes with value (possibly set intentionally)
        # don't get pruned.
        n7.value = 1  # noqa: avoid PEP8 F841

        obtained_order = DfsTest.traverse(node=node.get_head())

        # The node 'n6' will be pruned. Hence,
        # there's only one '3' in this list.
        reqd_order = [1, 2, 2, 2, 3, 2]

        self.assertEqual(obtained_order, reqd_order)
示例#9
0
    def test_dfs_graph_with_parent_pruning(self):
        """
        Test case to check that parent nodes with no user references don't
        get pruned.

        """
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Graph nodes
        n1 = node.Define()
        n2 = node.Filter()
        n3 = n2.Filter()
        n4 = n3.Count()  # noqa: avoid PEP8 F841
        n5 = n1.Filter()  # noqa: avoid PEP8 F841
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Remove references from n2 (which shouldn't affect the graph)
        n2 = None

        obtained_order = DfsTest.traverse(node=node.get_head())

        reqd_order = [1, 2, 2, 2, 3, 2]
        # Removing references from n2 will not prune any node
        # because n2 still has children

        self.assertEqual(obtained_order, reqd_order)
示例#10
0
    def test_dfs_graph_with_recursive_pruning(self):
        """
        Test case to check that nodes in a DistRDF graph with no user references
        and no children get pruned recursively.
        """
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Graph nodes
        n1 = node.Define()
        n2 = node.Filter()
        n3 = n2.Filter()
        n4 = n3.Count()  # noqa: avoid PEP8 F841
        n5 = n1.Filter()  # noqa: avoid PEP8 F841
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Remove references from n4 and it's parent nodes
        n4 = n3 = n2 = None  # noqa: avoid PEP8 F841

        obtained_order = DfsTest.traverse(node=node.get_head())

        reqd_order = [1, 2, 2]

        self.assertEqual(obtained_order, reqd_order)
示例#11
0
    def test_dfs_graph_with_pruning_transformations(self):
        """
        Test case to check that transformation nodes with no children and
        no user references get pruned.

        """
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Graph nodes
        n1 = node.Define()
        n2 = node.Filter()
        n3 = n2.Filter()
        n4 = n3.Count()  # noqa: avoid PEP8 F841
        n5 = n1.Filter()  # noqa: avoid PEP8 F841
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Transformation pruning, n5 was earlier a transformation node
        n5 = n1.Count()  # noqa: avoid PEP8 F841

        obtained_order = DfsTest.traverse(node=node.get_head())

        reqd_order = [1, 3, 2, 2, 3, 2]

        self.assertEqual(obtained_order, reqd_order)
示例#12
0
 def test_transformation_proxy_return(self):
     """Node objects are returned for transformation nodes."""
     hn = Node.HeadNode(1)
     hn.backend = TestBackend()
     node = Proxy.TransformationProxy(hn)
     newNode = node.Define(1)
     self.assertIsInstance(newNode, Proxy.TransformationProxy)
     self.assertIsInstance(newNode.proxied_node, Node.Node)
示例#13
0
        def __init__(self, *args):
            """initialize"""

            self.headnode = Node.HeadNode(*args)

            self.headnode.backend = DistRDataFrameInterface.TestBackend()

            self.headproxy = Proxy.TransformationProxy(self.headnode)
示例#14
0
    def test_three_args_with_single_file(self):
        """Constructor with TTree, one input file and selected branches"""
        rdf_branches = ["branch1", "branch2"]

        # Convert RDF branches list to ROOT CPP Vector
        reqd_vec = ROOT.std.vector("string")()
        for elem in rdf_branches:
            reqd_vec.push_back(elem)

        # RDataFrame constructor with 3rd argument as Python list
        hn_1 = Node.HeadNode("treename", "file.root", rdf_branches)

        # RDataFrame constructor with 3rd argument as ROOT CPP Vector
        hn_2 = Node.HeadNode("treename", "file.root", reqd_vec)

        self.assertArgs(hn_1.args, ["treename", "file.root", rdf_branches])
        self.assertArgs(hn_2.args, ["treename", "file.root", reqd_vec])
示例#15
0
    def test_num_entries_two_args_case(self):
        """
        Ensure that the number of entries recorded are correct in the case
        of two arguments to RDataFrame constructor.

        """
        self.fill_tree(1111)  # Store RDataFrame object of size 1111
        files_vec = ROOT.std.vector("string")()
        files_vec.push_back("data.root")

        # Create RDataFrame instances
        hn = Node.HeadNode("tree", "data.root")
        hn_1 = Node.HeadNode("tree", ["data.root"])
        hn_2 = Node.HeadNode("tree", files_vec)

        self.assertEqual(hn.get_num_entries(), 1111)
        self.assertEqual(hn_1.get_num_entries(), 1111)
        self.assertEqual(hn_2.get_num_entries(), 1111)
示例#16
0
    def test_num_entries_single_arg_case(self):
        """
        Ensure that the number of entries recorded are correct in the case
        of a single integer argument to RDataFrame.

        """
        hn = Node.HeadNode(123)  # Create HeadNoded instance

        self.assertEqual(hn.get_num_entries(), 123)
示例#17
0
    def test_two_args(self):
        """Constructor with list of input files"""
        rdf_2_files = ["file1.root", "file2.root"]

        # Convert RDF files list to ROOT CPP vector
        reqd_vec = ROOT.std.vector("string")()
        for elem in rdf_2_files:
            reqd_vec.push_back(elem)

        # RDataFrame constructor with 2nd argument as string
        hn_1 = Node.HeadNode("treename", "file.root")

        # RDataFrame constructor with 2nd argument as Python list
        hn_2 = Node.HeadNode("treename", rdf_2_files)

        # RDataFrame constructor with 2nd argument as ROOT CPP Vector
        hn_3 = Node.HeadNode("treename", reqd_vec)

        self.assertArgs(hn_1.args, ["treename", "file.root"])
        self.assertArgs(hn_2.args, ["treename", rdf_2_files])
        self.assertArgs(hn_3.args, ["treename", reqd_vec])
示例#18
0
    def test_num_entries_three_args_case(self):
        """
        Ensure that the number of entries recorded are correct in the case
        of two arguments to RDataFrame constructor.

        """
        self.fill_tree(1234)  # Store RDataFrame object of size 1234
        branches_vec_1 = ROOT.std.vector("string")()
        branches_vec_2 = ROOT.std.vector("string")()
        branches_vec_1.push_back("b1")
        branches_vec_2.push_back("b2")

        # Create RDataFrame instances
        hn = Node.HeadNode("tree", "data.root", ["b1"])
        hn_1 = Node.HeadNode("tree", "data.root", ["b2"])
        hn_2 = Node.HeadNode("tree", "data.root", branches_vec_1)
        hn_3 = Node.HeadNode("tree", "data.root", branches_vec_2)

        self.assertEqual(hn.get_num_entries(), 1234)
        self.assertEqual(hn_1.get_num_entries(), 1234)
        self.assertEqual(hn_2.get_num_entries(), 1234)
        self.assertEqual(hn_3.get_num_entries(), 1234)
示例#19
0
    def test_inmemory_tree(self):
        """Constructor with an in-memory-only tree is not supported"""
        tree = ROOT.TTree("tree", "Tree in memory")
        x = array("i", [0])
        tree.Branch("x", x, "x/I")
        for i in range(100):
            x[0] = i
            tree.Fill()

        headnode = Node.HeadNode(tree)
        with self.assertRaises(RuntimeError):
            # Trees with no associated files are not supported
            headnode.get_inputfiles()
示例#20
0
    def test_three_args_with_multiple_files(self):
        """Constructor with TTree, list of input files and selected branches"""
        rdf_branches = ["branch1", "branch2"]
        rdf_files = ["file1.root", "file2.root"]

        # Convert RDF files list to ROOT CPP Vector
        reqd_files_vec = ROOT.std.vector("string")()
        for elem in rdf_files:
            reqd_files_vec.push_back(elem)

        # Convert RDF files list to ROOT CPP Vector
        reqd_branches_vec = ROOT.std.vector("string")()
        for elem in rdf_branches:
            reqd_branches_vec.push_back(elem)

        # RDataFrame constructor with 2nd argument as Python List
        # and 3rd argument as Python List
        hn_1 = Node.HeadNode("treename", rdf_files, rdf_branches)

        # RDataFrame constructor with 2nd argument as Python List
        # and 3rd argument as ROOT CPP Vector
        hn_2 = Node.HeadNode("treename", rdf_files, reqd_branches_vec)

        # RDataFrame constructor with 2nd argument as ROOT CPP Vector
        # and 3rd argument as Python List
        hn_3 = Node.HeadNode("treename", reqd_files_vec, rdf_branches)

        # RDataFrame constructor with 2nd and 3rd arguments as ROOT
        # CPP Vectors
        hn_4 = Node.HeadNode("treename", reqd_files_vec, reqd_branches_vec)

        self.assertArgs(hn_1.args, ["treename", rdf_files, rdf_branches])
        self.assertArgs(hn_2.args, ["treename", rdf_files, reqd_branches_vec])
        self.assertArgs(hn_3.args, ["treename", reqd_files_vec, rdf_branches])
        self.assertArgs(
            hn_4.args, ["treename", reqd_files_vec, reqd_branches_vec])
示例#21
0
    def test_num_entries_with_ttree_arg(self):
        """
        Ensure that the number of entries recorded are correct in the case
        of RDataFrame constructor with a TTree.

        """
        tree = ROOT.TTree("tree", "test")  # Create tree
        v = ROOT.std.vector("int")(4)  # Create a vector of 0s of size 4
        tree.Branch("vectorb", v)  # Create branch to hold the vector

        for i in range(4):
            v[i] = 1  # Change the vector element to 1
            tree.Fill()  # Fill the tree with that element

        hn = Node.HeadNode(tree)

        self.assertEqual(hn.get_num_entries(), 4)
示例#22
0
    def test_set_state(self):
        """
        Test cases to check the working of
        __setstate__ method on Node class.

        """
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)

        nn1 = Node.Node(None, None)
        nn1.backend = TestBackend()
        n1 = Proxy.TransformationProxy(nn1)

        # State dictionaries
        node_dict = {"children": [n1]}
        n1_dict = {
            "operation_name": "Define",
            "operation_args": ["a"],
            "operation_kwargs": {
                "b": "c"
            },
            "children": []
        }

        # Set node objects with state dicts
        node.proxied_node.__setstate__(node_dict)
        n1.proxied_node.__setstate__(n1_dict)

        self.assertListEqual([node.operation, node.children],
                             [None, node_dict["children"]])
        self.assertListEqual([
            n1.operation.name, n1.operation.args, n1.operation.kwargs,
            n1.children
        ], [
            n1_dict["operation_name"], n1_dict["operation_args"],
            n1_dict["operation_kwargs"], n1_dict["children"]
        ])
示例#23
0
    def test_dfs_graph_without_pruning(self):
        """
        Test case to check that node pruning does not occur if every node either
        has children or some user references.

        """
        # Head node
        hn = Node.HeadNode(1)
        hn.backend = TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Graph nodes
        n1 = node.Define()
        n2 = node.Filter()
        n3 = n2.Filter()
        n4 = n3.Count()  # noqa: avoid PEP8 F841
        n5 = n1.Count()  # noqa: avoid PEP8 F841
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        obtained_order = DfsTest.traverse(node=node.get_head())

        reqd_order = [1, 3, 2, 2, 3, 2]

        self.assertEqual(obtained_order, reqd_order)
示例#24
0
    def test_mapper_with_pruning(self):
        """
        A test case to check that the mapper works even in the case of
        pruning.

        """
        # A mock RDF object
        t = ComputationGraphGeneratorTest.Temp()

        # Head node
        hn = Node.HeadNode(1)
        hn.backend = ComputationGraphGeneratorTest.TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Set of operations to build the graph
        n1 = node.Define()
        n2 = node.Filter().Filter()
        n4 = n2.Count()
        n5 = n1.Count()
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Reason for pruning (change of reference)
        n5 = n1.Filter()  # noqa: avoid PEP8 F841

        # Generate and execute the mapper
        generator = ComputationGraphGenerator.ComputationGraphGenerator(
            node.proxied_node)
        mapper_func = generator.get_callable()
        values = mapper_func(t)
        nodes = generator.get_action_nodes()

        reqd_order = [1, 2, 2, 2, 3, 2]

        self.assertEqual(t.ord_list, reqd_order)
        self.assertListEqual(nodes, [n4.proxied_node])
        self.assertListEqual(values, [t])
示例#25
0
 def make_dataframe(self, *args, **kwargs):
     """Creates an instance of SparkDataFrame"""
     headnode = Node.HeadNode(*args)
     return DataFrame.RDataFrame(headnode, self, **kwargs)
示例#26
0
    def test_integer_arg(self):
        """Constructor with number of entries"""
        hn = Node.HeadNode(10)

        self.assertListEqual(hn.args, [10])