def test_valid_table_path(self): args = BigQueryArgs('project:dataset.table') client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project', datasetId='dataset')) self.options.validate(args, client)
def test_get_or_create_dataset_fetched(self): client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_dataset = wrapper.get_or_create_dataset('project_id', 'dataset_id') self.assertEqual(new_dataset.datasetReference.datasetId, 'dataset_id')
def test_valid_table_path(self): args = self._make_args( ['--append', '--output_table', 'project:dataset.table']) client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project', datasetId='dataset')) self._options.validate(args, client)
def test_temporary_dataset_is_unique(self, patched_time_sleep): client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) with self.assertRaises(RuntimeError): wrapper.create_temporary_dataset('project_id', 'location') self.assertTrue(client.datasets.Get.called)
def test_get_or_create_dataset_created(self): client = mock.Mock() client.datasets.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') client.datasets.Insert.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference( projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_dataset = wrapper.get_or_create_dataset('project_id', 'dataset_id') self.assertEqual(new_dataset.datasetReference.datasetId, 'dataset_id')
def get_or_create_dataset(self, project_id, dataset_id): # Check if dataset already exists otherwise create it try: dataset = self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest( projectId=project_id, datasetId=dataset_id)) return dataset except HttpError as exn: if exn.status_code == 404: dataset = bigquery.Dataset( datasetReference=bigquery.DatasetReference( projectId=project_id, datasetId=dataset_id)) request = bigquery.BigqueryDatasetsInsertRequest( projectId=project_id, dataset=dataset) response = self.client.datasets.Insert(request) # The response is a bigquery.Dataset instance. return response else: raise
def test_raise_error_if_dataset_not_exists(self): client = mock.Mock() client.datasets.Get.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project', datasetId='dataset')) bigquery_util.raise_error_if_dataset_not_exists( client, 'project', 'dataset') client.datasets.Get.side_effect = exceptions.HttpError( response={'status': '404'}, url='', content='') self.assertRaises(ValueError, bigquery_util.raise_error_if_dataset_not_exists, client, 'project', 'dataset') client.datasets.Get.side_effect = exceptions.HttpError( response={'status': '401'}, url='', content='') self.assertRaises(exceptions.HttpError, bigquery_util.raise_error_if_dataset_not_exists, client, 'project', 'dataset')