def test_find_node_by_name_not_found(self):
        doc = ET.Element("outer")
        node = ET.SubElement(doc, "inner_tag", attrib={"name": "test_attrib"})
        ET.SubElement(node, "in_inner_tag", attrib={"name": "out_of_scope"})
        element_tree = ET.ElementTree(doc)

        with self.assertRaises(xml_utils.NoNodeFoundException):
            xml_utils.find_node_by_name(element_tree.getroot(), "out_of_scope")
    def test_find_node_by_name_multiple(self):
        doc = ET.Element("outer")
        ET.SubElement(doc, "inner_tag", attrib={"name": "test_attrib"})
        ET.SubElement(doc, "other_inner_tag", attrib={"name": "test_attrib"})
        element_tree = ET.ElementTree(doc)

        with self.assertRaises(xml_utils.MultipleNodeFoundException):
            xml_utils.find_node_by_name(element_tree.getroot(), "test_attrib")
    def test_find_node_by_name(self):
        doc = ET.Element("outer")
        node = ET.SubElement(doc, "inner_tag", attrib={"name": "test_attrib"})
        element_tree = ET.ElementTree(doc)

        found = xml_utils.find_node_by_name(element_tree.getroot(),
                                            "test_attrib")
        self.assertEqual(node, found)
示例#4
0
    def parse_fork_node(self, root, fork_node):
        """
        Fork nodes need to be dummy operators with multiple parallel downstream
        tasks.

        This parses the fork node, the action nodes that it references and then
        the join node at the end.

        This will only parse well-formed xml-adhering workflows where all paths
        end at the join node.
        """
        fork_name = fork_node.attrib["name"]
        mapper = DummyMapper(oozie_node=fork_node,
                             name=fork_name,
                             dag_name=self.workflow.dag_name)
        p_node = ParsedActionNode(mapper)

        mapper.on_parse_node()

        logging.info(f"Parsed {mapper.name} as Fork Node.")
        paths = []
        for node in fork_node:
            if "path" in node.tag:
                # Parse all the downstream tasks that can run in parallel.
                curr_name = node.attrib["start"]
                paths.append(xml_utils.find_node_by_name(root, curr_name))

        self.workflow.nodes[fork_name] = p_node

        for path in paths:
            p_node.add_downstream_node_name(path.attrib["name"])
            logging.info(
                f"Added {mapper.name}'s downstream: {path.attrib['name']}")

            # Theoretically these will all be action nodes, however I don't
            # think that is guaranteed.
            # The end of the execution path has not been reached
            self.parse_node(root, path)
            if path.attrib["name"] not in self.workflow.nodes:
                root.remove(path)