def test_empty_dataset(self, mock_build):
     mock_build.return_value = MockBigQueryClient(ONE_DATASET, NO_TABLES, None)
     extractor = BigQueryMetadataExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
 def test_can_handle_datasets(self, mock_build):
     mock_build.return_value = MockBigQueryClient(NO_DATASETS, None, None)
     extractor = BigQueryMetadataExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
 def test_view(self, mock_build):
     mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_VIEW, VIEW_DATA)
     extractor = BigQueryMetadataExtractor()
     extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                           scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsInstance(result, TableMetadata)
     self.assertEqual(result.is_view, True)
Пример #4
0
 def test_empty_dataset(self, mock_datacatalogue, mock_bigquery):
     mock_bigquery.return_value = MockBigQueryClient(
         ONE_DATASET, NO_TABLES, None)
     mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
         ENTRY, TAGS)
     extractor = BigQueryMetadataExtractor()
     extractor.init(
         Scoped.get_scoped_conf(conf=self.conf,
                                scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsNone(result)
Пример #5
0
 def test_view(self, mock_datacatalogue, mock_bigquery):
     mock_bigquery.return_value = MockBigQueryClient(
         ONE_DATASET, ONE_VIEW, VIEW_DATA)
     mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
         ENTRY, TAGS)
     extractor = BigQueryMetadataExtractor()
     extractor.init(
         Scoped.get_scoped_conf(conf=self.conf,
                                scope=extractor.get_scope()))
     result = extractor.extract()
     self.assertIsInstance(result, TableMetadata)
     self.assertEqual(result.is_view, True)
Пример #6
0
    def test_table_without_tags(self, mock_datacatalogue, mock_bigquery):
        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, ONE_TABLE, TABLE_DATA)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, NO_TAGS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.tags, None)
    def test_table_without_columns(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NO_COLS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.name, 'nested_recs')
        self.assertEqual(result.description, "")
        self.assertEqual(result.columns, [])
        self.assertEqual(result.is_view, False)
    def test_accepts_dataset_filter_by_label(self, mock_build):
        config_dict = {
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PROJECT_ID_KEY):
                'your-project-here',
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.FILTER_KEY):
                'label.key:value'
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsInstance(result, TableMetadata)
    def test_table_part_of_table_date_range(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))

        count = 0
        result = extractor.extract()
        table_name = result.name
        while result:
            count += 1
            result = extractor.extract()

        self.assertEqual(count, 1)
        self.assertEqual(table_name, 'date_range_')
    def test_table_with_nested_records(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, NESTED_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        first_col = result.columns[0]
        self.assertEqual(first_col.name, 'nested')
        self.assertEqual(first_col.type, 'RECORD')
        second_col = result.columns[1]
        self.assertEqual(second_col.name, 'nested.nested2')
        self.assertEqual(second_col.type, 'RECORD')
        third_col = result.columns[2]
        self.assertEqual(third_col.name, 'nested.nested2.ahah')
        self.assertEqual(third_col.type, 'STRING')
    def test_keypath_and_pagesize_can_be_set(self, mock_build):
        config_dict = {
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PROJECT_ID_KEY):
                'your-project-here',
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.PAGE_SIZE_KEY):
                200,
            'extractor.bigquery_table_metadata.{}'.format(BigQueryMetadataExtractor.KEY_PATH_KEY):
                '/tmp/doesnotexist',
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()

        with self.assertRaises(FileNotFoundError):
            extractor.init(Scoped.get_scoped_conf(conf=conf,
                                                  scope=extractor.get_scope()))
Пример #12
0
    def test_table_without_columns(self, mock_datacatalogue, mock_bigquery):
        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, ONE_TABLE, NO_COLS)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, TAGS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, "bigquery")
        self.assertEqual(result.cluster, "your-project-here")
        self.assertEqual(result.schema, "fdgdfgh")
        self.assertEqual(result.name, "nested_recs")
        self.assertEqual(result.description, "")
        self.assertEqual(result.columns, [])
        self.assertEqual(result.is_view, False)
    def test_normal_table(self, mock_build):
        mock_build.return_value = MockBigQueryClient(ONE_DATASET, ONE_TABLE, TABLE_DATA)
        extractor = BigQueryMetadataExtractor()
        extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                              scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, 'bigquery')
        self.assertEqual(result.cluster, 'your-project-here')
        self.assertEqual(result.schema, 'fdgdfgh')
        self.assertEqual(result.name, 'nested_recs')
        self.assertEqual(result.description, "")

        first_col = result.columns[0]
        self.assertEqual(first_col.name, 'test')
        self.assertEqual(first_col.type, 'STRING')
        self.assertEqual(first_col.description, 'some_description')
        self.assertEqual(result.is_view, False)
Пример #14
0
    def test_accepts_dataset_filter_by_label(self, mock_datacatalogue,
                                             mock_bigquery):
        config_dict = {
            "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.PROJECT_ID_KEY):
            "your-project-here",
            "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.FILTER_KEY):
            "label.key:value",
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, ONE_TABLE, TABLE_DATA)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, TAGS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
        result = extractor.extract()
        self.assertIsInstance(result, TableMetadata)
Пример #15
0
    def test_table_part_of_table_date_range(self, mock_datacatalogue,
                                            mock_bigquery):
        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, TABLE_DATE_RANGE, TABLE_DATA)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, TAGS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))

        count = 0
        result = extractor.extract()
        table_name = result.name
        while result:
            count += 1
            result = extractor.extract()

        self.assertEqual(count, 1)
        self.assertEqual(table_name, "date_range_")
Пример #16
0
    def test_keypath_and_pagesize_can_be_set(self, mock_datacatalogue,
                                             mock_bigquery):
        config_dict = {
            "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.PROJECT_ID_KEY):
            "your-project-here",
            "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.PAGE_SIZE_KEY):
            200,
            "extractor.bigquery_table_metadata.{}".format(BigQueryMetadataExtractor.KEY_PATH_KEY):
            "/tmp/doesnotexist",
        }
        conf = ConfigFactory.from_dict(config_dict)

        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, ONE_TABLE, TABLE_DATA)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, TAGS)
        extractor = BigQueryMetadataExtractor()

        with self.assertRaises(FileNotFoundError):
            extractor.init(
                Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope()))
Пример #17
0
    def test_table_with_nested_records(self, mock_datacatalogue,
                                       mock_bigquery):
        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, ONE_TABLE, NESTED_DATA)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, TAGS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))
        result = extractor.extract()

        first_col = result.columns[0]
        self.assertEqual(first_col.name, "nested")
        self.assertEqual(first_col.type, "RECORD")
        second_col = result.columns[1]
        self.assertEqual(second_col.name, "nested.nested2")
        self.assertEqual(second_col.type, "RECORD")
        third_col = result.columns[2]
        self.assertEqual(third_col.name, "nested.nested2.ahah")
        self.assertEqual(third_col.type, "STRING")
Пример #18
0
    def test_normal_table(self, mock_datacatalogue, mock_bigquery):
        mock_bigquery.return_value = MockBigQueryClient(
            ONE_DATASET, ONE_TABLE, TABLE_DATA)
        mock_datacatalogue.DataCatalogClient.return_value = MockDataCatalogClient(
            ENTRY, TAGS)
        extractor = BigQueryMetadataExtractor()
        extractor.init(
            Scoped.get_scoped_conf(conf=self.conf,
                                   scope=extractor.get_scope()))
        result = extractor.extract()

        self.assertEqual(result.database, "bigquery")
        self.assertEqual(result.cluster, "your-project-here")
        self.assertEqual(result.schema, "fdgdfgh")
        self.assertEqual(result.name, "nested_recs")
        self.assertEqual(result.description, "")
        self.assertEqual(result.is_view, False)
        self.assertEqual(
            result.tags,
            {
                "name":
                "projects/your-project-here/locations/us/entryGroups/@bigquery/entries/cHJvamVjdHMvd2hhbGUtZGV2LTI5NDgxMi9kYXRhc2V0cy90ZXN0aW5nL3RhYmxlcy90YWJsZTE/tags/CXy_PbcgFLIaW",
                "template":
                "projects/your-project-here/locations/europe-west2/tagTemplates/demo_tag",
                "fields": {
                    "demo2": {
                        "displayName": "demo2",
                        "boolValue": "true"
                    },
                    "demo1": {
                        "displayName": "demo1",
                        "stringValue": "test1",
                        "order": 1,
                    },
                },
                "templateDisplayName": "demo-tag",
            },
        )
        self.assertEqual(result.labels, {
            "label_1": "test_label_1",
            "label_2": "test_label_2"
        })

        first_col = result.columns[0]
        self.assertEqual(first_col.name, "test")
        self.assertEqual(first_col.type, "STRING")
        self.assertEqual(first_col.description, "some_description")
        self.assertEqual(first_col.tags, None)

        fourth_col = result.columns[3]
        self.assertEqual(
            fourth_col.tags,
            {
                "name":
                "projects/your-project-here/locations/us/entryGroups/@bigquery/entries/cHJvamVjdHMvd2hhbGUtZGV2LTI5NDgxMi9kYXRhc2V0cy90ZXN0aW5nL3RhYmxlcy90YWJsZTE/tags/CYPqPyHt4oku",
                "template":
                "projects/your-project-here/locations/europe-west2/tagTemplates/demo_tag",
                "fields": {
                    "demo1": {
                        "displayName": "demo1",
                        "stringValue": "test1",
                        "order": 1,
                    },
                    "demo2": {
                        "displayName": "demo2",
                        "boolValue": "true"
                    },
                },
                "column": "test4",
                "templateDisplayName": "demo-tag",
            },
        )