def _extract_java_data(self): """Extracts Java node data.""" root = self.oozie_node props = self.props if "mapred.child.java.opts" in props.merged: self.java_opts.extend( props.merged["mapred.child.java.opts"].split(" ")) if "mapreduce.map.java.opts" in props.merged: self.java_opts.extend( props.merged["mapreduce.map.java.opts"].split(" ")) self.main_class = xml_utils.get_tag_el_text(root=root, tag=TAG_MAIN_CLASS, props=props) java_opts_string = xml_utils.get_tag_el_text(root=root, tag=TAG_JAVA_OPTS, props=props) if java_opts_string: self.java_opts.extend(java_opts_string.split(" ")) else: self.java_opts.extend( get_tags_el_array_from_text(root=root, tag=TAG_JAVA_OPT, props=props)) self.args = get_tags_el_array_from_text(root=root, tag=TAG_ARG, props=props)
def __extract_email_data(self): root = self.oozie_node self.to_addr = xml_utils.get_tag_el_text(root=root, tag="to") self.cc_addr = xml_utils.get_tag_el_text(root=root, tag="cc") self.bcc_addr = xml_utils.get_tag_el_text(root=root, tag="bcc") self.subject = xml_utils.get_tag_el_text(root=root, tag="subject") self.body = xml_utils.get_tag_el_text(root=root, tag="body")
def _parse_oozie_node(self): self.resource_manager = get_tag_el_text(self.oozie_node, TAG_RESOURCE) self.name_node = get_tag_el_text(self.oozie_node, TAG_NAME) self.script_file_name = get_tag_el_text(self.oozie_node, TAG_SCRIPT) self.params_dict = extract_param_values_from_action_node( self.oozie_node) self.files, self.hdfs_files = self.file_extractor.parse_node() self.archives, self.hdfs_archives = self.archive_extractor.parse_node()
def on_parse_node(self): super().on_parse_node() self.git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI, props=self.props) self.git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH, props=self.props) destination_uri = get_tag_el_text(self.oozie_node, tag=TAG_DESTINATION_URI, props=self.props) if destination_uri: self.destination_path = urlparse(destination_uri).path key_path_uri = get_tag_el_text(self.oozie_node, tag=TAG_KEY_PATH, props=self.props) self.key_path = urlparse(key_path_uri).path if key_path_uri else None
def _parse_oozie_node(self): self.resource_manager = get_tag_el_text(self.oozie_node, TAG_RESOURCE) self.name_node = get_tag_el_text(self.oozie_node, TAG_NAME) cmd_txt = get_tag_el_text(self.oozie_node, TAG_CMD) args = get_tags_el_array_from_text(self.oozie_node, TAG_ARG) cmd = " ".join([cmd_txt] + [x for x in args]) self.bash_command = el_parser.translate(cmd, quote=False) self.pig_command = f"sh {self.bash_command}"
def _parse_oozie_node(self): self.resource_manager = get_tag_el_text(self.oozie_node, TAG_RESOURCE, self.props) self.name_node = get_tag_el_text(self.oozie_node, TAG_NAME, self.props) cmd_txt = get_tag_el_text(self.oozie_node, TAG_CMD, self.props) args = get_tags_el_array_from_text(self.oozie_node, TAG_ARG, self.props) cmd = " ".join([cmd_txt] + [x for x in args]) self.bash_command = convert_el_to_jinja(cmd, quote=False) self.pig_command = f"sh {self.bash_command}"
def on_parse_node(self): git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI, self.params) git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH, self.params) destination_uri = get_tag_el_text(self.oozie_node, TAG_DESTINATION_URI, self.params) destination_path = urlparse(destination_uri).path key_path_uri = get_tag_el_text(self.oozie_node, TAG_KEY_PATH, self.params) key_path = urlparse(key_path_uri).path self.bash_command = prepare_git_command( git_uri=git_uri, git_branch=git_branch, destination_path=destination_path, key_path=key_path)
def on_parse_node(self): super().on_parse_node() self.git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI) self.git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH) destination_uri = get_tag_el_text(self.oozie_node, tag=TAG_DESTINATION_URI) if destination_uri: self.destination_path = normalize_path(destination_uri, props=self.props, translated=True) key_path_uri = get_tag_el_text(self.oozie_node, tag=TAG_KEY_PATH) self.key_path = (normalize_path( key_path_uri, props=self.props, translated=True) if key_path_uri else None)
def on_parse_node(self): super().on_parse_node() self.name_node = get_tag_el_text(self.oozie_node, "name-node") self.params_dict = extract_param_values_from_action_node( self.oozie_node) _, self.hdfs_files = self.file_extractor.parse_node() _, self.hdfs_archives = self.archive_extractor.parse_node()
def on_parse_node(self): super().on_parse_node() _, self.hdfs_files = self.file_extractor.parse_node() _, self.hdfs_archives = self.archive_extractor.parse_node() self.java_jar = get_tag_el_text(self.oozie_node, tag=SPARK_TAG_JAR) self.java_class = get_tag_el_text(self.oozie_node, tag=SPARK_TAG_CLASS) if self.java_class and self.java_jar: self.dataproc_jars = [self.java_jar] self.java_jar = None self.job_name = get_tag_el_text(self.oozie_node, tag=SPARK_TAG_JOB_NAME) spark_opts = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_OPTS) if spark_opts: self.spark_opts.update(self._parse_spark_opts(spark_opts[0])) self.application_args = xml_utils.get_tags_el_array_from_text(self.oozie_node, tag=SPARK_TAG_ARG)
def get_command(self) -> str: cmd_txt = xml_utils.get_tag_el_text(self.oozie_node, TAG_CMD, self.props) args = xml_utils.get_tags_el_array_from_text(self.oozie_node, TAG_ARG, self.props) if not cmd_txt: raise Exception("Missing or empty command node in SSH action {}".format(self.oozie_node)) cmd = " ".join([cmd_txt] + [shlex.quote(x) for x in args]) cmd = el_utils.convert_el_to_jinja(cmd) return cmd
def on_parse_node(self): super().on_parse_node() self._parse_config() self.query = get_tag_el_text(self.oozie_node, TAG_QUERY) self.script = get_tag_el_text(self.oozie_node, TAG_SCRIPT) if not self.query and not self.script: raise ParseException( f"Action Configuration does not include {TAG_SCRIPT} or {TAG_QUERY} element" ) if self.query and self.script: raise ParseException( f"Action Configuration include {TAG_SCRIPT} and {TAG_QUERY} element. " f"Only one can be set at the same time.") self.variables = extract_param_values_from_action_node(self.oozie_node) _, self.hdfs_files = self.file_extractor.parse_node() _, self.hdfs_archives = self.archive_extractor.parse_node()
def get_command(self) -> str: cmd_txt = xml_utils.get_tag_el_text(self.oozie_node, TAG_CMD) args = xml_utils.get_tags_el_array_from_text(self.oozie_node, TAG_ARG) if not cmd_txt: raise Exception(f"Missing or empty command node in SSH action {self.oozie_node}") cmd = " ".join([cmd_txt] + [shlex.quote(x) for x in args]) cmd = el_parser.translate(cmd, quote=True) return cmd
def on_parse_node(self): super().on_parse_node() _, self.hdfs_files = self.file_extractor.parse_node() _, self.hdfs_archives = self.archive_extractor.parse_node() self.java_jar = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_JAR) self.java_class = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_CLASS) if self.java_class and self.java_jar: self.dataproc_jars = [self.java_jar] self.java_jar = None self.job_name = get_tag_el_text(self.oozie_node, props=self.props, tag=SPARK_TAG_JOB_NAME) spark_opts = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_OPTS) if spark_opts: self.spark_opts.update(self._parse_spark_opts(spark_opts[0])) app_args = xml_utils.find_nodes_by_tag(self.oozie_node, SPARK_TAG_ARGS) for arg in app_args: self.application_args.append(el_utils.replace_el_with_var(arg.text, self.props, quote=False))
def _parse_oozie_node(self): app_path = xml_utils.get_tag_el_text(self.oozie_node, TAG_APP) _, _, self.app_name = app_path.rpartition("/") # TODO: hacky: we should calculate it deriving from input_directory_path and comparing app-path # TODO: but for now we assume app is in "examples" app_path = os.path.join(EXAMPLES_PATH, self.app_name) logging.info(f"Converting subworkflow from {app_path}") converter = OozieConverter( input_directory_path=app_path, output_directory_path=self.output_directory_path, renderer=self.renderer, action_mapper=self.action_mapper, dag_name=self.app_name, initial_props=self.get_child_props(), transformers=self.transformers, ) converter.convert(as_subworkflow=True)