def test_extraction_with_database_specified(self) -> None: """ Test DATABASE_KEY in extractor result """ with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute sql_execute.return_value = [{ 'schema': 'test_schema', 'table_name': 'test_table', 'last_updated_time': 1000, 'cluster': 'MY_CLUSTER', }] extractor = SnowflakeTableLastUpdatedExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableLastUpdated(schema='test_schema', table_name='test_table', last_updated_time_epoch=1000, db=self.database_key, cluster='MY_CLUSTER') self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_single_result(self) -> None: """ Test Extraction with default cluster and database and with one table as result """ with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute sql_execute.return_value = [{ 'schema': 'test_schema', 'table_name': 'test_table', 'last_updated_time': 1000, 'cluster': self.conf['extractor.snowflake_table_last_updated.{}'.format( SnowflakeTableLastUpdatedExtractor.CLUSTER_KEY)], }] extractor = SnowflakeTableLastUpdatedExtractor() extractor.init(self.conf) actual = extractor.extract() expected = TableLastUpdated(schema='test_schema', table_name='test_table', last_updated_time_epoch=1000, db='snowflake', cluster='MY_CLUSTER') self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_partition_table_result(self): # type: () -> None config_dict = { 'filesystem.{}'.format(FileSystem.DASK_FILE_SYSTEM): MagicMock() } conf = ConfigFactory.from_dict(config_dict) pt_alchemy_extractor_instance = MagicMock() non_pt_alchemy_extractor_instance = MagicMock() with patch.object(HiveTableLastUpdatedExtractor, '_get_partitioned_table_sql_alchemy_extractor', return_value=pt_alchemy_extractor_instance),\ patch.object(HiveTableLastUpdatedExtractor, '_get_non_partitioned_table_sql_alchemy_extractor', return_value=non_pt_alchemy_extractor_instance): pt_alchemy_extractor_instance.extract = MagicMock( side_effect=[{ 'schema': 'foo_schema', 'table_name': 'table_1', 'last_updated_time': 1 }, { 'schema': 'foo_schema', 'table_name': 'table_2', 'last_updated_time': 2 }]) non_pt_alchemy_extractor_instance.extract = MagicMock( return_value=None) extractor = HiveTableLastUpdatedExtractor() extractor.init(conf) result = extractor.extract() expected = TableLastUpdated(schema='foo_schema', table_name='table_1', last_updated_time_epoch=1, db='hive', cluster='gold') self.assertEqual(result.__repr__(), expected.__repr__()) result = extractor.extract() expected = TableLastUpdated(schema='foo_schema', table_name='table_2', last_updated_time_epoch=2, db='hive', cluster='gold') self.assertEqual(result.__repr__(), expected.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction(self) -> None: old_datetime = datetime(2018, 8, 14, 4, 12, 3, tzinfo=UTC) new_datetime = datetime(2018, 11, 14, 4, 12, 3, tzinfo=UTC) fs = MagicMock() fs.ls = MagicMock(return_value=['/foo/bar', '/foo/baz']) fs.is_file = MagicMock(return_value=True) fs.info = MagicMock(side_effect=[ FileMetadata( path='/foo/bar', last_updated=old_datetime, size=15093), FileMetadata( path='/foo/baz', last_updated=new_datetime, size=15094) ]) pt_alchemy_extractor_instance = MagicMock() non_pt_alchemy_extractor_instance = MagicMock() with patch.object(HiveTableLastUpdatedExtractor, '_get_partitioned_table_sql_alchemy_extractor', return_value=pt_alchemy_extractor_instance), \ patch.object(HiveTableLastUpdatedExtractor, '_get_non_partitioned_table_sql_alchemy_extractor', return_value=non_pt_alchemy_extractor_instance), \ patch.object(HiveTableLastUpdatedExtractor, '_get_filesystem', return_value=fs): pt_alchemy_extractor_instance.extract = MagicMock( return_value=None) non_pt_alchemy_extractor_instance.extract = MagicMock( side_effect=null_iterator([ { 'schema': 'foo_schema', 'table_name': 'table_1', 'location': '/foo/bar' }, ])) extractor = HiveTableLastUpdatedExtractor() extractor.init(ConfigFactory.from_dict({})) result = extractor.extract() expected = TableLastUpdated(schema='foo_schema', table_name='table_1', last_updated_time_epoch=1542168723, db='hive', cluster='gold') self.assertEqual(result.__repr__(), expected.__repr__()) self.assertIsNone(extractor.extract())
def test_extraction_with_multiple_result(self) -> None: """ Test Extraction with default cluster and database and with multiple tables as result """ with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection: connection = MagicMock() mock_connection.return_value = connection sql_execute = MagicMock() connection.execute = sql_execute default_cluster = self.conf[ 'extractor.snowflake_table_last_updated.{}'.format( SnowflakeTableLastUpdatedExtractor.CLUSTER_KEY)] table = { 'schema': 'test_schema1', 'table_name': 'test_table1', 'last_updated_time': 1000, 'cluster': default_cluster } table1 = { 'schema': 'test_schema1', 'table_name': 'test_table2', 'last_updated_time': 2000, 'cluster': default_cluster } table2 = { 'schema': 'test_schema2', 'table_name': 'test_table3', 'last_updated_time': 3000, 'cluster': default_cluster } sql_execute.return_value = [table, table1, table2] extractor = SnowflakeTableLastUpdatedExtractor() extractor.init(self.conf) expected = TableLastUpdated(schema='test_schema1', table_name='test_table1', last_updated_time_epoch=1000, db='snowflake', cluster='MY_CLUSTER') self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableLastUpdated(schema='test_schema1', table_name='test_table2', last_updated_time_epoch=2000, db='snowflake', cluster='MY_CLUSTER') self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) expected = TableLastUpdated(schema='test_schema2', table_name='test_table3', last_updated_time_epoch=3000, db='snowflake', cluster='MY_CLUSTER') self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertIsNone(extractor.extract())