def validate(self, parsed_args, client=None): output_table_re_match = re.match( r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$', parsed_args.output_table) if not output_table_re_match: raise ValueError( 'Expected a table reference (PROJECT:DATASET.TABLE) ' 'instead of {}.'.format(parsed_args.output_table)) try: if not client: credentials = GoogleCredentials.get_application_default( ).create_scoped(['https://www.googleapis.com/auth/bigquery']) client = bigquery.BigqueryV2(credentials=credentials) client.datasets.Get( bigquery.BigqueryDatasetsGetRequest( projectId=output_table_re_match.group('project'), datasetId=output_table_re_match.group('dataset'))) except exceptions.HttpError as e: if e.status_code == 404: raise ValueError('Dataset %s:%s does not exist.' % (output_table_re_match.group('project'), output_table_re_match.group('dataset'))) else: # For the rest of the errors, use BigQuery error message. raise
def raise_error_if_dataset_not_exists(client, project_id, dataset_id): # type: (beam_bigquery.BigqueryV2, str, str) -> None try: client.datasets.Get( beam_bigquery.BigqueryDatasetsGetRequest(projectId=project_id, datasetId=dataset_id)) except exceptions.HttpError as e: if e.status_code == 404: raise ValueError('Dataset %s:%s does not exist.' % (project_id, dataset_id)) from e # For the rest of the errors, use BigQuery error message. raise
def clean_up_temporary_dataset(self, project_id): temp_table = self._get_temp_table(project_id) try: self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest( projectId=project_id, datasetId=temp_table.datasetId)) except HttpError as exn: if exn.status_code == 404: logging.warning('Dataset %s:%s does not exist', project_id, temp_table.datasetId) return else: raise self._delete_dataset(temp_table.projectId, temp_table.datasetId, True)
def validate(self, parsed_args, client=None): # type: (argparse.Namespace, bigquery.BigqueryV2) -> None if not parsed_args.output_table and parsed_args.output_avro_path: # Writing into BigQuery is not requested; no more BigQuery checks needed. return output_table_re_match = re.match( r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$', parsed_args.output_table) if not output_table_re_match: raise ValueError( 'Expected a table reference (PROJECT:DATASET.TABLE) ' 'instead of {}.'.format(parsed_args.output_table)) if not client: credentials = GoogleCredentials.get_application_default( ).create_scoped(['https://www.googleapis.com/auth/bigquery']) client = bigquery.BigqueryV2(credentials=credentials) project_id = output_table_re_match.group('project') dataset_id = output_table_re_match.group('dataset') table_id = output_table_re_match.group('table') try: client.datasets.Get( bigquery.BigqueryDatasetsGetRequest(projectId=project_id, datasetId=dataset_id)) except exceptions.HttpError as e: if e.status_code == 404: raise ValueError('Dataset %s:%s does not exist.' % (project_id, dataset_id)) else: # For the rest of the errors, use BigQuery error message. raise # Ensuring given output table doesn't already exist to avoid overwriting it. if not parsed_args.append: if parsed_args.update_schema_on_append: raise ValueError( '--update_schema_on_append requires --append to be ' 'true.') try: client.tables.Get( bigquery.BigqueryTablesGetRequest(projectId=project_id, datasetId=dataset_id, tableId=table_id)) raise ValueError( 'Table %s:%s.%s already exists, cannot overwrite it.' % (project_id, dataset_id, table_id)) except exceptions.HttpError as e: if e.status_code == 404: # This is expected, output table must not already exist pass else: # For the rest of the errors, use BigQuery error message. raise
def create_temporary_dataset(self, project_id): dataset_id = BigQueryWrapper.TEMP_DATASET + self._temporary_table_suffix # Check if dataset exists to make sure that the temporary id is unique try: self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest( projectId=project_id, datasetId=dataset_id)) if project_id is not None: # Unittests don't pass projectIds so they can be run without error raise RuntimeError( 'Dataset %s:%s already exists so cannot be used as temporary.' % (project_id, dataset_id)) except HttpError as exn: if exn.status_code == 404: logging.warning('Dataset does not exist so we will create it') self.get_or_create_dataset(project_id, dataset_id) else: raise
def get_or_create_dataset(self, project_id, dataset_id): # Check if dataset already exists otherwise create it try: dataset = self.client.datasets.Get(bigquery.BigqueryDatasetsGetRequest( projectId=project_id, datasetId=dataset_id)) return dataset except HttpError as exn: if exn.status_code == 404: dataset = bigquery.Dataset( datasetReference=bigquery.DatasetReference( projectId=project_id, datasetId=dataset_id)) request = bigquery.BigqueryDatasetsInsertRequest( projectId=project_id, dataset=dataset) response = self.client.datasets.Insert(request) # The response is a bigquery.Dataset instance. return response else: raise