Пример #1
0
 def _parse_oozie_node(self):
     res_man_text = self.oozie_node.find("resource-manager").text
     name_node_text = self.oozie_node.find("name-node").text
     script = self.oozie_node.find("script").text
     self.resource_manager = el_utils.replace_el_with_var(res_man_text, params=self.params, quote=False)
     self.name_node = el_utils.replace_el_with_var(name_node_text, params=self.params, quote=False)
     self.script_file_name = el_utils.replace_el_with_var(script, params=self.params, quote=False)
     self._parse_config()
     self._parse_params()
Пример #2
0
 def _parse_oozie_node(self):
     res_man_text = self.oozie_node.find("resource-manager").text
     name_node_text = self.oozie_node.find("name-node").text
     self.resource_manager = el_utils.replace_el_with_var(
         res_man_text, params=self.params, quote=False)
     self.name_node = el_utils.replace_el_with_var(name_node_text,
                                                   params=self.params,
                                                   quote=False)
     self._parse_config()
     cmd_node = self.oozie_node.find("exec")
     arg_nodes = self.oozie_node.findall("argument")
     cmd = " ".join([cmd_node.text] + [x.text for x in arg_nodes])
     self.bash_command = el_utils.convert_el_to_jinja(cmd, quote=False)
Пример #3
0
    def test_replace_el_with_var_var_quote(self):
        params = {"hostname": "*****@*****.**"}
        el_var = "${hostname}"
        expected = "'*****@*****.**'"

        replaced = el_utils.replace_el_with_var(el_var, params, quote=True)
        self.assertEqual(replaced, expected)
Пример #4
0
    def test_replace_el_with_var_var_quote(self):
        params = {'hostname': '*****@*****.**'}
        el_var = '${hostname}'
        expected = '\'[email protected]\''

        replaced = el_utils.replace_el_with_var(el_var, params, quote=True)
        self.assertEqual(replaced, expected)
Пример #5
0
 def parse_prepare_node(oozie_node: ET.Element, params: Dict[str, str]) -> Tuple[List[str], List[str]]:
     """
     <prepare>
         <delete path="[PATH]"/>
         ...
         <mkdir path="[PATH]"/>
         ...
     </prepare>
     """
     delete_paths = []
     mkdir_paths = []
     prepare_nodes = xml_utils.find_nodes_by_tag(oozie_node, "prepare")
     if prepare_nodes:
         # If there exists a prepare node, there will only be one, according
         # to oozie xml schema
         for node in prepare_nodes[0]:
             node_path = el_utils.replace_el_with_var(node.attrib["path"], params=params, quote=False)
             if "//" in node_path:
                 node_path = node_path.split("//", maxsplit=1)[1]  # Removing the hdfs:// or similar part
             node_path = "/" + node_path.split("/", maxsplit=1)[1]  # Removing the 'localhost:8082/' part
             if node.tag == "delete":
                 delete_paths.append(node_path)
             else:
                 mkdir_paths.append(node_path)
     return delete_paths, mkdir_paths
Пример #6
0
    def test_replace_el_with_var_func_quote(self):
        # functions shouldn't be replaced
        params = {}
        el_var = '${concat("abc", "def")}'
        expected = '\'${concat("abc", "def")}\''

        replaced = el_utils.replace_el_with_var(el_var, params, quote=True)
        self.assertEqual(replaced, expected)
Пример #7
0
 def _parse_params(self):
     param_nodes = xml_utils.find_nodes_by_tag(self.oozie_node, "param")
     if param_nodes:
         self.params_dict = {}
         for node in param_nodes:
             param = el_utils.replace_el_with_var(node.text, params=self.params, quote=False)
             key, value = param.split("=")
             self.params_dict[key] = value
Пример #8
0
 def _parse_config(self):
     config = self.oozie_node.find("configuration")
     if config:
         property_nodes = xml_utils.find_nodes_by_tag(config, "property")
         if property_nodes:
             for node in property_nodes:
                 name = node.find("name").text
                 value = el_utils.replace_el_with_var(
                     node.find("value").text,
                     params=self.params,
                     quote=False)
                 self.properties[name] = value
Пример #9
0
    def _test_and_set(root, tag, default=None, params={}, quote=False):
        """
        If a node exists in the oozie_node with the tag specified in tag, it
        will attempt to replace the EL (if it exists) with the corresponding
        variable. If no EL var is found, it just returns text. However, if the
        tag is not found under oozie_node, then return default. If there are
        more than one with the specified tag, it uses the first one found.
        """
        var = xml_utils.find_nodes_by_tag(root, tag)

        if var:
            # Only check the first one
            return el_utils.replace_el_with_var(var[0].text,
                                                params=params,
                                                quote=quote)
        else:
            return default
Пример #10
0
 def _parse_oozie_node(self):
     app_path = self.oozie_node.find("app-path").text
     app_path = el_utils.replace_el_with_var(app_path,
                                             params=self.params,
                                             quote=False)
     # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path
     # TODO: but for now we assume app is in "examples"
     app_path = os.path.join(EXAMPLES_PATH, app_path.split("examples/")[1])
     logging.info(f"Converting subworkflow from {app_path}")
     self._parse_config()
     converter = OozieSubworkflowConverter(
         input_directory_path=app_path,
         output_directory_path=self.output_directory_path,
         start_days_ago=0,
         action_mapper=self.action_mapper,
         control_mapper=self.control_mapper,
         dag_name=f"{self.dag_name}.{self.task_id}",
     )
     converter.convert()
Пример #11
0
    def _parse_oozie_node(self, oozie_node: ET.Element):
        """
        Property values specified in the configuration element override
        values specified in the job-xml file.
        """
        self.application = ""
        self.conf = {}
        self.conn_id = "spark_default"
        self.files = None
        self.py_files = None
        self.driver_classpath = None
        self.jars = None
        self.java_class = None
        self.packages = None
        self.exclude_packages = None
        self.repositories = None
        self.total_executor_cores = None
        self.executor_cores = None
        self.executor_memory = None
        self.driver_memory = None
        self.keytab = None
        self.principal = None
        self.spark_name = "airflow-spark"
        self.num_executors = None
        self.application_args = []
        self.env_vars = None
        self.verbose = False

        # Prepare nodes
        self.delete_paths = []
        self.mkdir_paths = []

        prepare_nodes = xml_utils.find_nodes_by_tag(oozie_node, "prepare")

        if prepare_nodes:
            # If there exists a prepare node, there will only be one, according
            # to oozie xml schema
            self.delete_paths, self.mkdir_paths = self.parse_prepare_node(
                prepare_nodes[0])

        # master url, deploy mode,
        self.application = self.test_and_set(oozie_node,
                                             "jar",
                                             "''",
                                             params=self.params,
                                             quote=True)
        self.spark_name = self.test_and_set(oozie_node,
                                            "name",
                                            "'airflow-spark'",
                                            params=self.params,
                                            quote=True)
        self.java_class = self.test_and_set(oozie_node,
                                            "class",
                                            None,
                                            params=self.params,
                                            quote=True)

        config_node = xml_utils.find_nodes_by_tag(oozie_node, "configuration")
        job_xml = xml_utils.find_nodes_by_tag(oozie_node, "job-xml")

        for xml_file in job_xml:
            tree = ET.parse(xml_file.text)
            self.conf = {
                **self.conf,
                **self.parse_spark_config(tree.getroot())
            }

        if config_node:
            self.conf = {
                **self.conf,
                **self.parse_spark_config(config_node[0])
            }

        spark_opts = xml_utils.find_nodes_by_tag(oozie_node, "spark-opts")
        if spark_opts:
            self.update_class_spark_opts(spark_opts[0])

        app_args = xml_utils.find_nodes_by_tag(oozie_node, "arg")
        for arg in app_args:
            self.application_args.append(
                el_utils.replace_el_with_var(arg.text,
                                             self.params,
                                             quote=False))