Пример #1
0
 def __init__(
     self,
     oozie_node: ET.Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params: Dict[str, str] = None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node, name, trigger_rule, **kwargs)
     self.params = params or {}
     self.trigger_rule = trigger_rule
     self.java_class = ""
     self.java_jar = ""
     self.job_name = None
     self.jars = []
     self.properties = {}
     self.application_args = []
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         params=self.params)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               params=self.params)
     self.prepare_command = None
     self.hdfs_files = []
     self.hdfs_archives = []
     self.dataproc_jars = []
Пример #2
0
    def test_replace_el(self):
        # Given
        self.job_properties["var1"] = "value1"
        self.job_properties["var2"] = "value2"
        # language=XML
        node_str = """
<pig>
    <file>/path/with/el/${var1}</file>
    <file>/path/with/el/${var2}</file>
    <file>/path/with/two/els/${var1}/${var2}</file>
</pig>
        """
        oozie_node = ET.fromstring(node_str)
        file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
        # When
        file_extractor.parse_node()
        # Then
        self.assertEqual(
            [
                "hdfs:///path/with/el/{{var1}}",
                "hdfs:///path/with/el/{{var2}}",
                "hdfs:///path/with/two/els/{{var1}}/{{var2}}",
            ],
            file_extractor.hdfs_files,
        )
Пример #3
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     dag_name: str,
     props: PropertySet,
     jar_files: List[str],
     **kwargs,
 ):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           dag_name=dag_name,
                           name=name,
                           props=props,
                           **kwargs)
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self.main_class: Optional[str] = None
     self.java_opts: List[str] = []
     self.args: Optional[List[str]] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
     self.jar_files: List[str] = jar_files if jar_files else []
     self.jar_files_in_hdfs: List[str] = []
     self._get_jar_files_in_hdfs_full_paths()
Пример #4
0
 def test_add_absolute_file(self):
     # Given
     file_extractor = FileExtractor(oozie_node=Element("fake"),
                                    params=self.default_params)
     # When
     file_extractor.add_file("/test_file")
     # Then
     self.assertEqual(file_extractor.files, ["/test_file"])
     self.assertEqual(file_extractor.hdfs_files, ["hdfs:///test_file"])
Пример #5
0
 def test_add_relative_file(self):
     # Given
     file_extractor = FileExtractor(oozie_node=Element("fake"),
                                    params=self.default_params)
     # When
     file_extractor.add_file("test_file")
     # Then
     self.assertEqual(file_extractor.files, ["test_file"])
     self.assertEqual(file_extractor.hdfs_files,
                      ["hdfs:///user/pig/examples/pig_test_node/test_file"])
Пример #6
0
 def test_add_file_extra_hash(self):
     # Given
     file_extractor = FileExtractor(oozie_node=Element("fake"),
                                    params=self.default_params)
     # When
     with self.assertRaises(Exception) as context:
         file_extractor.add_file("/test_file#4rarear#")
     # Then
     self.assertEqual(
         "There should be maximum one '#' in the path /test_file#4rarear#",
         str(context.exception))
 def __init__(self, oozie_node: Element, name: str, dag_name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(
         self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props, **kwargs
     )
     self.params_dict: Dict[str, str] = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props)
     self.name_node: Optional[str] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
Пример #8
0
 def __init__(self, oozie_node: ET.Element, name: str, props: PropertySet, **kwargs):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs)
     self.java_class: Optional[str] = None
     self.java_jar: Optional[str] = None
     self.job_name: Optional[str] = None
     self.jars: List[str] = []
     self.application_args: List[str] = []
     self.file_extractor = FileExtractor(oozie_node=oozie_node, props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, props=self.props)
     self.hdfs_files: List[str] = []
     self.hdfs_archives: List[str] = []
     self.dataproc_jars: List[str] = []
     self.spark_opts: Dict[str, str] = {}
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
Пример #9
0
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self.params_dict: Dict[str, str] = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self._parse_oozie_node()
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
Пример #10
0
 def __init__(self, oozie_node: Element, name: str, props: PropertySet,
              **kwargs):
     ActionMapper.__init__(self,
                           oozie_node=oozie_node,
                           name=name,
                           props=props,
                           **kwargs)
     self.variables: Optional[Dict[str, str]] = None
     self.query: Optional[str] = None
     self.script: Optional[str] = None
     self.hdfs_files: Optional[List[str]] = None
     self.hdfs_archives: Optional[List[str]] = None
     self.file_extractor = FileExtractor(oozie_node=oozie_node,
                                         props=self.props)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node,
                                               props=self.props)
     self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(
         self)
Пример #11
0
 def __init__(
     self,
     oozie_node: Element,
     name: str,
     trigger_rule: str = TriggerRule.ALL_SUCCESS,
     params=None,
     **kwargs,
 ):
     ActionMapper.__init__(self, oozie_node=oozie_node, name=name, trigger_rule=trigger_rule, **kwargs)
     if params is None:
         params = dict()
     self.params = params
     self.trigger_rule = trigger_rule
     self.properties = {}
     self.params_dict = {}
     self.file_extractor = FileExtractor(oozie_node=oozie_node, params=params)
     self.archive_extractor = ArchiveExtractor(oozie_node=oozie_node, params=params)
     self._parse_oozie_node()
Пример #12
0
 def test_add_multiple_files(self):
     # Given
     file_extractor = FileExtractor(oozie_node=Element("fake"),
                                    props=self.props)
     # When
     file_extractor.add_file("/test_file")
     file_extractor.add_file("test_file2")
     file_extractor.add_file("/test_file3")
     # Then
     self.assertEqual(file_extractor.files,
                      ["/test_file", "test_file2", "/test_file3"])
     self.assertEqual(
         file_extractor.hdfs_files,
         [
             "hdfs:///test_file",
             "hdfs:///user/pig/examples/pig_test_node/test_file2",
             "hdfs:///test_file3"
         ],
     )
Пример #13
0
 def test_add_hash_files(self):
     # Given
     file_extractor = FileExtractor(oozie_node=Element("fake"),
                                    params=self.default_params)
     # When
     file_extractor.add_file("/test_file#test3_link")
     file_extractor.add_file("test_file2#test_link")
     file_extractor.add_file("/test_file3")
     # Then
     self.assertEqual(
         file_extractor.files,
         ["/test_file#test3_link", "test_file2#test_link", "/test_file3"])
     self.assertEqual(
         file_extractor.hdfs_files,
         [
             "hdfs:///test_file#test3_link",
             "hdfs:///user/pig/examples/pig_test_node/test_file2#test_link",
             "hdfs:///test_file3",
         ],
     )
Пример #14
0
    def test_replace_el(self):
        # Given
        params = {"var1": "value1", "var2": "value2", **self.default_params}
        # language=XML
        node_str = """
<pig>
    <file>/path/with/el/${var1}</file>
    <file>/path/with/el/${var2}</file>
    <file>/path/with/two/els/${var1}/${var2}</file>
</pig>
        """
        oozie_node = ET.fromstring(node_str)
        file_extractor = FileExtractor(oozie_node=oozie_node, params=params)
        # When
        file_extractor.parse_node()
        # Then
        self.assertEqual(
            file_extractor.hdfs_files,
            [
                "hdfs:///path/with/el/value1",
                "hdfs:///path/with/el/value2",
                "hdfs:///path/with/two/els/value1/value2",
            ],
        )