def test_email_filter_counted(self, mock_build: Any) -> None: config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.EMAIL_PATTERN): '.*@test.com.*', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() assert result is not None self.assertIsInstance(result, tuple) (key, value) = result self.assertIsInstance(key, TableColumnUsageTuple) self.assertIsInstance(value, int) self.assertEqual(key.database, 'bigquery') self.assertEqual(key.cluster, 'bigquery-public-data') self.assertEqual(key.schema, 'austin_incidents') self.assertEqual(key.table, 'incidents_2008') self.assertEqual(key.email, '*****@*****.**') self.assertEqual(value, 1)
def test_timestamp_pagesize_settings(self, mock_build: Any) -> None: """ Test timestamp and pagesize can be set """ TIMESTAMP = '2019-01-01T00:00:00.00Z' PAGESIZE = 215 config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.TIMESTAMP_KEY): TIMESTAMP, 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PAGE_SIZE_KEY): PAGESIZE, } conf = ConfigFactory.from_dict(config_dict) client = MockLoggingClient(CORRECT_DATA) mock_build.return_value = client extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) args, kwargs = client.b.list.call_args body = kwargs['body'] self.assertEqual(body['pageSize'], PAGESIZE) self.assertEqual(TIMESTAMP in body['filter'], True)
def test_basic_extraction(self, mock_build): """ Test Extraction using mock class """ config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsInstance(result, tuple) (key, value) = result self.assertIsInstance(key, TableColumnUsageTuple) self.assertIsInstance(value, int) self.assertEqual(key.database, 'bigquery') self.assertEqual(key.cluster, 'bigquery-public-data') self.assertEqual(key.schema, 'austin_incidents') self.assertEqual(key.table, 'incidents_2008') self.assertEqual(key.email, '*****@*****.**') self.assertEqual(value, 1)
def test_key_path(self, mock_build: Any) -> None: """ Test key_path can be used """ with tempfile.NamedTemporaryFile() as keyfile: # There are many github scanners looking for API / cloud keys, so in order not to get a # false positive triggering everywhere, I base64 encoded the key. # This is written to a tempfile as part of this test and then used. keyfile.write(base64.b64decode(KEYFILE_DATA)) keyfile.flush() config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.KEY_PATH_KEY): keyfile.name, } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) args, kwargs = mock_build.call_args creds = kwargs['http'].credentials self.assertEqual(creds.project_id, 'your-project-here') self.assertEqual( creds.service_account_email, '*****@*****.**')
def test_counting_referenced_table_belonging_to_different_project( self, mock_build: Any) -> None: """ Test result when referenced table belongs to a project different from the PROJECT_ID_KEY of the extractor and COUNT_READS_ONLY_FROM_PROJECT is set to False """ config_dict = { f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}': 'your-project-here', f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.COUNT_READS_ONLY_FROM_PROJECT_ID_KEY}': False, } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() assert result is not None self.assertIsInstance(result, tuple) (key, value) = result self.assertIsInstance(key, TableColumnUsageTuple) self.assertIsInstance(value, int) self.assertEqual(key.database, 'bigquery') self.assertEqual(key.cluster, 'bigquery-public-data') self.assertEqual(key.schema, 'austin_incidents') self.assertEqual(key.table, 'incidents_2008') self.assertEqual(key.email, '*****@*****.**') self.assertEqual(value, 1)
def test_no_entries(self, mock_build: Any) -> None: config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(NO_ENTRIES) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_email_filter_not_counted(self, mock_build: Any) -> None: config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.EMAIL_PATTERN): 'emailFilter', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_failed_jobs_should_not_be_counted(self, mock_build: Any) -> None: config_dict = { f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}': 'bigquery-public-data', } conf = ConfigFactory.from_dict(config_dict) client = MockLoggingClient(FAILURE) mock_build.return_value = client extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_failed_jobs_should_not_be_counted(self, mock_build): config_dict = { 'extractor.bigquery_table_usage.{}'.format(BigQueryTableUsageExtractor.PROJECT_ID_KEY): 'your-project-here', } conf = ConfigFactory.from_dict(config_dict) client = MockLoggingClient(FAILURE) mock_build.return_value = client extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() self.assertIsNone(result)
def test_not_counting_referenced_table_belonging_to_different_project( self, mock_build: Any) -> None: """ Test result when referenced table belongs to a project different from the PROJECT_ID_KEY of the extractor """ config_dict = { f'extractor.bigquery_table_usage.{BigQueryTableUsageExtractor.PROJECT_ID_KEY}': 'your-project-here', } conf = ConfigFactory.from_dict(config_dict) mock_build.return_value = MockLoggingClient(CORRECT_DATA) extractor = BigQueryTableUsageExtractor() extractor.init( Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) result = extractor.extract() assert result is None