def test_empty_dataset(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, NO_TABLES, None) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_can_handle_datasets(self, mock_build): mock_build.return_value = MockBigQueryClient(NO_DATASETS, None, None) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_view(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_VIEW, VIEW_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, TableMetadata) self.assertEqual(result.is_view, True)
def test_empty_dataset(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, NO_TABLES, None) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_view(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_VIEW, VIEW_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, TableMetadata) self.assertEqual(result.is_view, True)
def test_table_without_tags(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_TABLE, TABLE_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, NO_TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.tags, None)
def test_table_without_columns(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NO_COLS) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.database, 'bigquery') self.assertEqual(result.cluster, 'your-project-here') self.assertEqual(result.schema, 'fdgdfgh') self.assertEqual(result.name, 'nested_recs') self.assertEqual(result.description, "") self.assertEqual(result.columns, []) self.assertEqual(result.is_view, False)
def test_accepts_dataset_filter_by_label(self, mock_build): config_dict = { 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.FILTER_KEY): 'label.key:value' } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, TableMetadata)
def test_table_part_of_table_date_range(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) count = 0 result = extractor.extract() table_name = result.name while result: count += 1 result = extractor.extract() self.assertEqual(count, 1) self.assertEqual(table_name, 'date_range_')
def test_table_with_nested_records(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NESTED_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() first_col = result.columns[0] self.assertEqual(first_col.name, 'nested') self.assertEqual(first_col.type, 'RECORD') second_col = result.columns[1] self.assertEqual(second_col.name, 'nested.nested2') self.assertEqual(second_col.type, 'RECORD') third_col = result.columns[2] self.assertEqual(third_col.name, 'nested.nested2.ahah') self.assertEqual(third_col.type, 'STRING')
def test_keypath_and_pagesize_can_be_set(self, mock_build): config_dict = { 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PAGE_SIZE_KEY): 200, 'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.KEY_PATH_KEY): '/tmp/doesnotexist', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA) extractor = BigQueryMetadataExtractor() with self.assertRaises(FileNotFoundError): extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
def test_table_without_columns(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_TABLE, NO_COLS) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.database, "bigquery") self.assertEqual(result.cluster, "your-project-here") self.assertEqual(result.schema, "fdgdfgh") self.assertEqual(result.name, "nested_recs") self.assertEqual(result.description, "") self.assertEqual(result.columns, []) self.assertEqual(result.is_view, False)
def test_normal_table(self, mock_build): mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA) extractor = BigQueryMetadataExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.database, 'bigquery') self.assertEqual(result.cluster, 'your-project-here') self.assertEqual(result.schema, 'fdgdfgh') self.assertEqual(result.name, 'nested_recs') self.assertEqual(result.description, "") first_col = result.columns[0] self.assertEqual(first_col.name, 'test') self.assertEqual(first_col.type, 'STRING') self.assertEqual(first_col.description, 'some_description') self.assertEqual(result.is_view, False)
def test_accepts_dataset_filter_by_label(self, mock_datacatalogue, mock_bigquery): config_dict = { "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.PROJECT_ID_KEY): "your-project-here", "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.FILTER_KEY): "label.key:value", } conf = ConfigFactory.from_dict(config_dict) mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_TABLE, TABLE_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, TableMetadata)
def test_table_part_of_table_date_range(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) count = 0 result = extractor.extract() table_name = result.name while result: count += 1 result = extractor.extract() self.assertEqual(count, 1) self.assertEqual(table_name, "date_range_")
def test_keypath_and_pagesize_can_be_set(self, mock_datacatalogue, mock_bigquery): config_dict = { "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.PROJECT_ID_KEY): "your-project-here", "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.PAGE_SIZE_KEY): 200, "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.KEY_PATH_KEY): "/tmp/doesnotexist", } conf = ConfigFactory.from_dict(config_dict) mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_TABLE, TABLE_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() with self.assertRaises(FileNotFoundError): extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
def test_table_with_nested_records(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_TABLE, NESTED_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() first_col = result.columns[0] self.assertEqual(first_col.name, "nested") self.assertEqual(first_col.type, "RECORD") second_col = result.columns[1] self.assertEqual(second_col.name, "nested.nested2") self.assertEqual(second_col.type, "RECORD") third_col = result.columns[2] self.assertEqual(third_col.name, "nested.nested2.ahah") self.assertEqual(third_col.type, "STRING")
def test_normal_table(self, mock_datacatalogue, mock_bigquery): mock_bigquery.return_value = MockBigQueryClient( ONE_DATASET, ONE_TABLE, TABLE_DATA) mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient( ENTRY, TAGS) extractor = BigQueryMetadataExtractor() extractor.init( Scoped.get_scoped_conf(conf=self.conf, scope=extractor.get_scope())) result = extractor.extract() self.assertEqual(result.database, "bigquery") self.assertEqual(result.cluster, "your-project-here") self.assertEqual(result.schema, "fdgdfgh") self.assertEqual(result.name, "nested_recs") self.assertEqual(result.description, "") self.assertEqual(result.is_view, False) self.assertEqual( result.tags, { "name": "projects/your-project-here/locations/us/entryGroups/@bigquery/entries/cHJvamVjdHMvd2hhbGUtZGV2LTI5NDgxMi9kYXRhc2V0cy90ZXN0aW5nL3RhYmxlcy90YWJsZTE/tags/CXy_PbcgFLIaW", "template": "projects/your-project-here/locations/europe-west2/tagTemplates/demo_tag", "fields": { "demo2": { "displayName": "demo2", "boolValue": "true" }, "demo1": { "displayName": "demo1", "stringValue": "test1", "order": 1, }, }, "templateDisplayName": "demo-tag", }, ) self.assertEqual(result.labels, { "label_1": "test_label_1", "label_2": "test_label_2" }) first_col = result.columns[0] self.assertEqual(first_col.name, "test") self.assertEqual(first_col.type, "STRING") self.assertEqual(first_col.description, "some_description") self.assertEqual(first_col.tags, None) fourth_col = result.columns[3] self.assertEqual( fourth_col.tags, { "name": "projects/your-project-here/locations/us/entryGroups/@bigquery/entries/cHJvamVjdHMvd2hhbGUtZGV2LTI5NDgxMi9kYXRhc2V0cy90ZXN0aW5nL3RhYmxlcy90YWJsZTE/tags/CYPqPyHt4oku", "template": "projects/your-project-here/locations/europe-west2/tagTemplates/demo_tag", "fields": { "demo1": { "displayName": "demo1", "stringValue": "test1", "order": 1, }, "demo2": { "displayName": "demo2", "boolValue": "true" }, }, "column": "test4", "templateDisplayName": "demo-tag", }, )