def _get_config(self): path = self._get_user_config_path() connector_config_path = self._get_connector_config_path() if path: user_config = config.Config(path, connector_config_path) return user_config return None
def test_scrape_metadata_with_credentials_overriding_base_metadata_query_should_return_objects( # noqa: E501 self, normalize): metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'base_metadata_query_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) normalize.return_value = metadata scraper = test_utils.FakeScraper() default_query = 'SELECT * from default_db' user_defined_override_query = 'SELECT * from db' schemas_metadata = scraper.scrape({}, connection_args={ 'host': 'localhost', 'port': 1234 }, query=default_query, config=loaded_config) self.assertEqual(user_defined_override_query, scraper.cur.execute.call_args[0][0]) self.assertEqual(1, len(schemas_metadata))
def test_scrape_metadata_with_user_config_should_return_objects( self, to_metadata_dict, get_exact_table_names_from_dataframe): # noqa metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') to_metadata_dict.return_value = metadata get_exact_table_names_from_dataframe.return_value = [ "schema0.table0", "schema1.table1" ] scraper = test_utils.FakeScraper() config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../test_data/ingest_cfg.yaml') user_config = config.Config(config_path) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) schemas_metadata = scraper.get_metadata(metada_def, connection_args={ 'host': 'localhost', 'port': 1234 }, user_config=user_config) self.assertEqual(1, len(schemas_metadata))
def test_config_should_retrieve_sql_objects(self, yaml_load): yaml_load.return_value = { constants.SQL_OBJECTS_KEY: [{ constants.SQL_OBJECT_ITEM_NAME: 'functions', constants.SQL_OBJECT_ITEM_ENABLED_FLAG: True }, { constants.SQL_OBJECT_ITEM_NAME: 'stored_procedures', constants.SQL_OBJECT_ITEM_ENABLED_FLAG: False }] } user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) self.assertEqual(1, len(loaded_config.sql_objects_config)) self.assertEqual( 'functions', loaded_config.sql_objects_config['functions'][ constants.SQL_OBJECT_ITEM_NAME]) self.assertIsNotNone(loaded_config.sql_objects_config['functions'][ constants.SQL_OBJECT_ITEM_QUERY_KEY]) self.assertIsNotNone(loaded_config.sql_objects_config['functions'][ constants.SQL_OBJECT_ITEM_METADATA_DEF_KEY])
def test_scrape_metadata_with_enrich_metadata_user_config_and_no_enricher_should_raise_error( # noqa:E501 self, to_metadata_dict, _): # noqa metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') to_metadata_dict.return_value = metadata config_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '../test_data/enrich_metadata_ingest_cfg.yaml') user_config = config.Config(config_path) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) scraper = test_utils.FakeScraper() self.assertRaises(NotImplementedError, scraper.get_metadata, metada_def, connection_args={ 'host': 'localhost', 'port': 1234 }, user_config=user_config)
def test_scrape_metadata_with_enrich_metadata_config_and_no_enricher_should_succeed( # noqa:E501 self, normalize, _): # noqa metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') normalize.return_value = metadata config_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '../test_data/enrich_metadata_ingest_cfg.yaml') loaded_config = config.Config( config_path, utils.Utils.get_test_config_path(self.__MODULE_PATH)) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) scraper = test_utils.FakeScraper() scraper.scrape(metada_def, connection_args={ 'host': 'localhost', 'port': 1234 }, config=loaded_config)
def test_config_should_not_deliver_options_not_chosen_by_user( self, yaml_load): yaml_load.return_value = { config_constants.REFRESH_OPTION: True, config_constants.ROW_COUNT_OPTION: False } test_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'ingest_cfg.yaml') user_config = config.Config(test_config_path) self.assertEqual([], user_config.get_chosen_metadata_options())
def test_scrape_metadata_with_multiple_sql_objects_config_should_return_objects( # noqa: E501 self, sql_objects_normalize, base_normalize, get_exact_table_names_from_dataframe): # noqa base_metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') base_normalize.return_value = base_metadata functions_metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'normalized_sql_objects.json') stored_procedure_metadata = \ utils.Utils.convert_json_to_object( self.__MODULE_PATH, 'normalized_sql_objects_stored_procedure.json') sql_objects_normalize.side_effect = [ functions_metadata, stored_procedure_metadata ] get_exact_table_names_from_dataframe.return_value = [ "schema0.table0", "schema1.table1" ] scraper = test_utils.FakeScraper() user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) scraped_metadata = scraper.scrape(metada_def, connection_args={ 'host': 'localhost', 'port': 1234 }, config=loaded_config) self.assertEqual(2, len(scraped_metadata)) self.assertIn('schemas', scraped_metadata) self.assertIn('sql_objects', scraped_metadata) self.assertDictEqual(base_metadata, scraped_metadata) self.assertDictEqual(functions_metadata, scraped_metadata['sql_objects']['functions']) self.assertDictEqual( stored_procedure_metadata, scraped_metadata['sql_objects']['stored_procedures'])
def test_config_should_retrieve_base_metadata_query(self, yaml_load): yaml_load.return_value = { constants.BASE_METADATA_QUERY_FILENAME: 'my_override_query.sql' } user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'base_metadata_query_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) self.assertEqual('SELECT * from db', loaded_config.base_metadata_query)
def test_config_should_deliver_options_chosen_by_user(self, yaml_load): yaml_load.return_value = { constants.REFRESH_OPTION: True, constants.ROW_COUNT_OPTION: True } user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) self.assertEqual([constants.ROW_COUNT_OPTION], loaded_config.get_chosen_metadata_options())
def test_optional_metadata_should_not_be_pulled_with_empty_config( self, get_optional_queries, to_metadata_dict): path_to_empty_config = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'empty_ingest_cfg.yaml') empty_config = config.Config(path_to_empty_config) scraper = test_utils.FakeScraper() metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') to_metadata_dict.return_value = metadata schemas_metadata = scraper.get_metadata({}, connection_args={ 'host': 'localhost', 'port': 1234 }, user_config=empty_config) self.assertEqual(1, len(schemas_metadata)) self.assertEqual(0, get_optional_queries.call_count)
def test_scrape_metadata_with_csv_and_sql_objects_should_return_base_metadata( # noqa: E501 self, sql_objects_normalize, base_normalize, get_exact_table_names_from_dataframe): # noqa base_metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') base_normalize.return_value = base_metadata functions_metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'normalized_sql_objects.json') sql_objects_normalize.return_value = functions_metadata get_exact_table_names_from_dataframe.return_value = [ "schema0.table0", "schema1.table1" ] scraper = test_utils.FakeScraper() user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) scraped_metadata = scraper.scrape( metada_def, csv_path=utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'rdbms_full_dump.csv'), config=loaded_config) self.assertEqual(1, len(scraped_metadata)) self.assertIn('schemas', scraped_metadata) self.assertNotIn('sql_objects', scraped_metadata) self.assertDictEqual(base_metadata, scraped_metadata)
def test_metadata_should_not_be_updated_with_empty_config( self, get_refresh_metadata_queries, normalize): path_to_empty_config = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'empty_ingest_cfg.yaml') empty_config = config.Config( path_to_empty_config, utils.Utils.get_test_config_path(self.__MODULE_PATH)) scraper = test_utils.FakeScraper() metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') normalize.return_value = metadata schemas_metadata = scraper.scrape({}, connection_args={ 'host': 'localhost', 'port': 1234 }, config=empty_config) self.assertEqual(1, len(schemas_metadata)) self.assertEqual(0, get_refresh_metadata_queries.call_count)
def test_scrape_metadata_with_enrich_metadata_config_should_return_objects( # noqa:E501 self, normalize, get_exact_table_names_from_dataframe): # noqa metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') normalize.return_value = metadata get_exact_table_names_from_dataframe.return_value = [ "schema0.table0", "schema1.table1" ] scraper = test_utils.FakeScraperWithMetadataEnricher() config_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '../test_data/enrich_metadata_ingest_cfg.yaml') loaded_config = config.Config( config_path, utils.Utils.get_test_config_path(self.__MODULE_PATH)) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) schemas_metadata = scraper.scrape(metada_def, connection_args={ 'host': 'localhost', 'port': 1234 }, config=loaded_config) self.assertEqual(1, len(schemas_metadata)) metadata_dataframe, metadata_definition = \ normalize.call_args_list[0][0] self.assertTrue( metadata_dataframe['schema_name'][0].startswith('mycompany')) self.assertTrue( metadata_dataframe['table_name'][0].startswith('mycompany'))
def test_config_no_files_should_not_retrieve_sql_objects(self, yaml_load): yaml_load.return_value = { constants.SQL_OBJECTS_KEY: [{ constants.SQL_OBJECT_ITEM_NAME: 'functions_xpto', constants.SQL_OBJECT_ITEM_ENABLED_FLAG: True }, { constants.SQL_OBJECT_ITEM_NAME: 'stored_procedures', constants.SQL_OBJECT_ITEM_ENABLED_FLAG: False }] } user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) self.assertEqual(0, len(loaded_config.sql_objects_config))
def test_synchronize_metadata_with_sql_config_should_not_raise_error( # noqa: E501 self, process_entries_length_metric, process_metadata_payload_bytes_metric, process_elapsed_time_metric, delete_obsolete_metadata, ingest_metadata, make_base_entries, make_sql_objects_entries, scrape): make_base_entries.return_value = [({}, [])] make_sql_objects_entries.return_value = [] user_config_path = utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'sql_objects_ingest_cfg.yaml') connector_config_path = utils.Utils.get_test_config_path( self.__MODULE_PATH) loaded_config = config.Config(user_config_path, connector_config_path) synchronizer = datacatalog_synchronizer.DataCatalogSynchronizer( DatacatalogSynchronizerTestCase.__PROJECT_ID, DatacatalogSynchronizerTestCase.__LOCATION_ID, DatacatalogSynchronizerTestCase.__ENTRY_GROUP_ID, DatacatalogSynchronizerTestCase.__HOST, utils.Utils.get_metadata_def_obj(self.__MODULE_PATH), test_utils.FakeScraper, {'database': 'test_db'}, enable_monitoring=True, config=loaded_config) synchronizer.run() self.assertEqual(scrape.call_count, 1) self.assertEqual( synchronizer. _DataCatalogSynchronizer__metadata_definition['database_name'], 'test_db') self.assertEqual(make_base_entries.call_count, 1) self.assertEqual(ingest_metadata.call_count, 2) self.assertEqual(delete_obsolete_metadata.call_count, 1) self.assertEqual(process_entries_length_metric.call_count, 1) self.assertEqual(process_metadata_payload_bytes_metric.call_count, 1) self.assertEqual(process_elapsed_time_metric.call_count, 1)
def test_scrape_metadata_with_csv_and_user_config_should_return_objects( self, to_metadata_dict, get_exact_table_names_from_dataframe): # noqa metadata = \ utils.Utils.convert_json_to_object(self.__MODULE_PATH, 'metadata.json') to_metadata_dict.return_value = metadata get_exact_table_names_from_dataframe.return_value = [ "schema0.table0", "schema1.table1" ] scraper = test_utils.FakeScraperWithMetadataEnricher() config_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '../test_data/enrich_metadata_ingest_cfg.yaml') user_config = config.Config(config_path) metada_def = utils.Utils.get_metadata_def_obj(self.__MODULE_PATH) schemas_metadata = scraper.get_metadata( metada_def, csv_path=utils.Utils.get_resolved_file_name( self.__MODULE_PATH, 'rdbms_full_dump.csv'), user_config=user_config) self.assertEqual(1, len(schemas_metadata)) metadata_dataframe, metadata_definition = \ to_metadata_dict.call_args_list[0][0] self.assertTrue( metadata_dataframe['schema_name'][0].startswith('mycompany')) self.assertTrue( metadata_dataframe['table_name'][0].startswith('mycompany'))