示例#1
0
    def test_extraction_with_empty_query_result(self) -> None:
        config_dict = {
            f'extractor.sqlalchemy.{SQLAlchemyExtractor.CONN_STRING}':
            'TEST_CONNECTION',
            f'filesystem.{FileSystem.DASK_FILE_SYSTEM}': MagicMock()
        }
        conf = ConfigFactory.from_dict(config_dict)
        with patch.object(SQLAlchemyExtractor, '_get_connection'):
            extractor = HiveTableLastUpdatedExtractor()
            extractor.init(conf)

            result = extractor.extract()
            self.assertEqual(result, None)
    def test_extraction_with_partition_table_result(self):
        # type: () -> None
        config_dict = {
            'filesystem.{}'.format(FileSystem.DASK_FILE_SYSTEM): MagicMock()
        }
        conf = ConfigFactory.from_dict(config_dict)

        pt_alchemy_extractor_instance = MagicMock()
        non_pt_alchemy_extractor_instance = MagicMock()
        with patch.object(HiveTableLastUpdatedExtractor, '_get_partitioned_table_sql_alchemy_extractor',
                          return_value=pt_alchemy_extractor_instance),\
            patch.object(HiveTableLastUpdatedExtractor, '_get_non_partitioned_table_sql_alchemy_extractor',
                         return_value=non_pt_alchemy_extractor_instance):
            pt_alchemy_extractor_instance.extract = MagicMock(
                side_effect=[{
                    'schema': 'foo_schema',
                    'table_name': 'table_1',
                    'last_updated_time': 1
                }, {
                    'schema': 'foo_schema',
                    'table_name': 'table_2',
                    'last_updated_time': 2
                }])

            non_pt_alchemy_extractor_instance.extract = MagicMock(
                return_value=None)

            extractor = HiveTableLastUpdatedExtractor()
            extractor.init(conf)

            result = extractor.extract()
            expected = TableLastUpdated(schema='foo_schema',
                                        table_name='table_1',
                                        last_updated_time_epoch=1,
                                        db='hive',
                                        cluster='gold')
            self.assertEqual(result.__repr__(), expected.__repr__())
            result = extractor.extract()
            expected = TableLastUpdated(schema='foo_schema',
                                        table_name='table_2',
                                        last_updated_time_epoch=2,
                                        db='hive',
                                        cluster='gold')
            self.assertEqual(result.__repr__(), expected.__repr__())

            self.assertIsNone(extractor.extract())
示例#3
0
    def test_extraction(self) -> None:
        old_datetime = datetime(2018, 8, 14, 4, 12, 3, tzinfo=UTC)
        new_datetime = datetime(2018, 11, 14, 4, 12, 3, tzinfo=UTC)

        fs = MagicMock()
        fs.ls = MagicMock(return_value=['/foo/bar', '/foo/baz'])
        fs.is_file = MagicMock(return_value=True)
        fs.info = MagicMock(side_effect=[
            FileMetadata(
                path='/foo/bar', last_updated=old_datetime, size=15093),
            FileMetadata(
                path='/foo/baz', last_updated=new_datetime, size=15094)
        ])

        pt_alchemy_extractor_instance = MagicMock()
        non_pt_alchemy_extractor_instance = MagicMock()

        with patch.object(HiveTableLastUpdatedExtractor,
                          '_get_partitioned_table_sql_alchemy_extractor', return_value=pt_alchemy_extractor_instance), \
            patch.object(HiveTableLastUpdatedExtractor,
                         '_get_non_partitioned_table_sql_alchemy_extractor',
                         return_value=non_pt_alchemy_extractor_instance), \
            patch.object(HiveTableLastUpdatedExtractor,
                         '_get_filesystem', return_value=fs):
            pt_alchemy_extractor_instance.extract = MagicMock(
                return_value=None)

            non_pt_alchemy_extractor_instance.extract = MagicMock(
                side_effect=null_iterator([
                    {
                        'schema': 'foo_schema',
                        'table_name': 'table_1',
                        'location': '/foo/bar'
                    },
                ]))

            extractor = HiveTableLastUpdatedExtractor()
            extractor.init(ConfigFactory.from_dict({}))

            result = extractor.extract()
            expected = TableLastUpdated(schema='foo_schema',
                                        table_name='table_1',
                                        last_updated_time_epoch=1542168723,
                                        db='hive',
                                        cluster='gold')
            self.assertEqual(result.__repr__(), expected.__repr__())

            self.assertIsNone(extractor.extract())