def __init__( self, oozie_node: Element, name: str, dag_name: str, input_directory_path: str, output_directory_path: str, props: PropertySet, action_mapper: Dict[str, Type[ActionMapper]], renderer: BaseRenderer, transformers: List[BaseWorkflowTransformer] = None, **kwargs, ): ActionMapper.__init__( self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props, input_directory_path=input_directory_path, **kwargs, ) self.task_id = name self.input_directory_path = input_directory_path self.output_directory_path = output_directory_path self.dag_name = dag_name self.action_mapper = action_mapper self.renderer = renderer self.transformers = transformers or [] self._parse_oozie_node()
def __init__( self, oozie_node: Element, name: str, dag_name: str, input_directory_path: str, output_directory_path: str, action_mapper: Dict[str, Type[ActionMapper]], control_mapper: Dict[str, Type[BaseMapper]], trigger_rule=TriggerRule.ALL_SUCCESS, params=None, **kwargs, ): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs) if params is None: params = {} self.params = params self.task_id = name self.trigger_rule = trigger_rule self.properties: Dict[str, str] = {} self.input_directory_path = input_directory_path self.output_directory_path = output_directory_path self.dag_name = dag_name self.action_mapper = action_mapper self.control_mapper = control_mapper self._parse_oozie_node()
def __init__( self, oozie_node: ET.Element, name: str, trigger_rule: str = TriggerRule.ALL_SUCCESS, params: Dict[str, str] = None, **kwargs, ): ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs) self.params = params or {} self.trigger_rule = trigger_rule self.java_class = "" self.java_jar = "" self.job_name = None self.jars = [] self.properties = {} self.application_args = [] self.file_extractor = FileExtractor(oozie_node=oozie_node, params=self.params) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=self.params) self.prepare_command = None self.hdfs_files = [] self.hdfs_archives = [] self.dataproc_jars = []
def __init__( self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, jar_files: List[str], **kwargs, ): ActionMapper.__init__(self, oozie_node=oozie_node, dag_name=dag_name, name=name, props=props, **kwargs) self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.main_class: Optional[str] = None self.java_opts: List[str] = [] self.args: Optional[List[str]] = None self.hdfs_files: Optional[List[str]] = None self.hdfs_archives: Optional[List[str]] = None self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self) self.jar_files: List[str] = jar_files if jar_files else [] self.jar_files_in_hdfs: List[str] = [] self._get_jar_files_in_hdfs_full_paths()
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.git_uri: Optional[str] = None self.git_branch: Optional[str] = None self.destination_path: Optional[str] = None self.key_path_uri: Optional[str] = None self.key_path: Optional[str] = None self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self._parse_oozie_node() self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self)
def __init__(self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, **kwargs): ActionMapper.__init__( self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props, **kwargs ) self.params_dict: Dict[str, str] = {} self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.name_node: Optional[str] = None self.hdfs_files: Optional[List[str]] = None self.hdfs_archives: Optional[List[str]] = None self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
def __init__( self, oozie_node: ET.Element, name: str, trigger_rule: str = TriggerRule.ALL_SUCCESS, params: Dict[str, str] = None, **kwargs, ): ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs) if params is None: params = {} self.params = params self.trigger_rule = trigger_rule
def __init__(self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, dag_name=dag_name, name=name, props=props, **kwargs) # *_addr suffix to satisfy Pylint's 3-letter variable length minimum; bcc_addr for consistency self.to_addr: Optional[str] = None self.cc_addr: Optional[str] = None self.bcc_addr: Optional[str] = None self.subject: Optional[str] = None self.body: Optional[str] = None
def __init__( self, oozie_node: Element, name: str, props: PropertySet, template: str = "ssh.tpl", **kwargs ): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.template = template self.command = self.get_command() host_key = self.get_host_key() # Since Airflow separates user and host, we can't use jinja templating. # We must check if it is in job_properties. user_host = host_key.split("@") self.user = user_host[0] self.host = user_host[1]
def __init__(self, oozie_node: ET.Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.java_class: Optional[str] = None self.java_jar: Optional[str] = None self.job_name: Optional[str] = None self.jars: List[str] = [] self.application_args: List[str] = [] self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.hdfs_files: List[str] = [] self.hdfs_archives: List[str] = [] self.dataproc_jars: List[str] = [] self.spark_opts: Dict[str, str] = {} self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.params_dict: Dict[str, str] = {} self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self._parse_oozie_node() self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self)
def __init__( self, oozie_node: Element, name: str, trigger_rule: str = TriggerRule.ALL_SUCCESS, params=None, **kwargs, ): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs) if params is None: params = dict() self.params = params self.trigger_rule = trigger_rule self.properties = {} self.params_dict = {} self.file_extractor = FileExtractor(oozie_node=oozie_node, params=params) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=params) self._parse_oozie_node()
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs) self.variables: Optional[Dict[str, str]] = None self.query: Optional[str] = None self.script: Optional[str] = None self.hdfs_files: Optional[List[str]] = None self.hdfs_archives: Optional[List[str]] = None self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props) self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props) self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension( self)
def __init__( self, oozie_node: Element, name: str, trigger_rule: str = TriggerRule.ALL_SUCCESS, params: Dict[str, str] = None, template: str = "ssh.tpl", **kwargs, ): ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs) if params is None: params = {} self.template = template cmd_node = self.oozie_node.find("command") arg_nodes = self.oozie_node.findall("args") if cmd_node is None or not cmd_node.text: raise Exception( "Missing or empty command node in SSH action {}".format( self.oozie_node)) cmd = cmd_node.text args = (x.text if x.text else "" for x in arg_nodes) cmd = " ".join(shlex.quote(x) for x in [cmd, *args]) self.command = el_utils.convert_el_to_jinja(cmd, quote=True) host = self.oozie_node.find("host") if host is None: raise Exception("Missing host node in SSH action: {}".format( self.oozie_node)) host_key = el_utils.strip_el(host.text) # the <user> node is formatted like [USER]@[HOST] if host_key in params: host_key = params[host_key] # Since ariflow separates user and host, we can't use jinja templating. # We must check if it is in params. user_host = host_key.split("@") self.user = user_host[0] self.host = user_host[1]
def test_prepend_task_empty_relations(self): task_1 = Task(task_id=TEST_MAPPER_NAME + "_1", template_name="pig.tpl") task_2 = Task(task_id=TEST_MAPPER_NAME + "_2", template_name="pig.tpl") tasks, relations = ActionMapper.prepend_task(task_to_prepend=task_1, tasks=[task_2], relations=[]) self.assertEqual([task_1, task_2], tasks) self.assertEqual([ Relation(from_task_id="mapper_name_1", to_task_id="mapper_name_2") ], relations)
def test_prepend_task_no_tasks(self): task_1 = Task(task_id=TEST_MAPPER_NAME + "_1", template_name="pig.tpl") with self.assertRaises(IndexError): ActionMapper.prepend_task(task_to_prepend=task_1, tasks=[], relations=[])