示例#1
0
 def _get_table(self, project_id, dataset_id, table_id):
     request = bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                 datasetId=dataset_id,
                                                 tableId=table_id)
     response = self.client.tables.Get(request)
     # The response is a bigquery.Table instance.
     return response
示例#2
0
    def _update_bigquery_schema_on_append(self):
        # type: (bool) -> None
        # if table does not exist, do not need to update the schema.
        # TODO (yifangchen): Move the logic into validate().
        output_table_re_match = re.match(
            r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
            self._output_table)
        credentials = GoogleCredentials.get_application_default(
        ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
        client = bigquery.BigqueryV2(credentials=credentials)
        try:
            project_id = output_table_re_match.group('project')
            dataset_id = output_table_re_match.group('dataset')
            table_id = output_table_re_match.group('table')
            existing_table = client.tables.Get(
                bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                  datasetId=dataset_id,
                                                  tableId=table_id))
        except exceptions.HttpError:
            return

        new_schema = bigquery.TableSchema()
        new_schema.fields = _get_merged_field_schemas(
            existing_table.schema.fields, self._schema.fields)
        existing_table.schema = new_schema
        try:
            client.tables.Update(
                bigquery.BigqueryTablesUpdateRequest(projectId=project_id,
                                                     datasetId=dataset_id,
                                                     table=existing_table,
                                                     tableId=table_id))
        except exceptions.HttpError as e:
            raise RuntimeError('BigQuery schema update failed: %s' % str(e))
示例#3
0
def _get_schema(input_table):
  # type: (str) -> bigquery_v2.TableSchema
  project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
      input_table)
  credentials = (client.GoogleCredentials.get_application_default().
                 create_scoped(['https://www.googleapis.com/auth/bigquery']))
  bigquery_client = bigquery_v2.BigqueryV2(credentials=credentials)
  table = bigquery_client.tables.Get(bigquery_v2.BigqueryTablesGetRequest(
      projectId=project_id, datasetId=dataset_id, tableId=table_id))
  return table.schema
 def side_effect(request):
     if (request == bigquery.BigqueryTablesGetRequest(
             projectId='project',
             datasetId='dataset',
             tableId='table__sample_info')):
         raise exceptions.HttpError(response={'status': '404'},
                                    url='',
                                    content='')
     return bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project',
         datasetId='dataset',
         tableId='table__chr1_part1'))
示例#5
0
def table_exist(client, project_id, dataset_id, table_id):
    # type: (beam_bigquery.BigqueryV2, str, str, str) -> bool
    try:
        client.tables.Get(
            beam_bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                   datasetId=dataset_id,
                                                   tableId=table_id))
    except exceptions.HttpError as e:
        if e.status_code == 404:
            return False
        else:
            raise
    return True
示例#6
0
 def validate(self, parsed_args, client=None):
     # type: (argparse.Namespace, bigquery.BigqueryV2) -> None
     if not parsed_args.output_table and parsed_args.output_avro_path:
         # Writing into BigQuery is not requested; no more BigQuery checks needed.
         return
     output_table_re_match = re.match(
         r'^((?P<project>.+):)(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
         parsed_args.output_table)
     if not output_table_re_match:
         raise ValueError(
             'Expected a table reference (PROJECT:DATASET.TABLE) '
             'instead of {}.'.format(parsed_args.output_table))
     if not client:
         credentials = GoogleCredentials.get_application_default(
         ).create_scoped(['https://www.googleapis.com/auth/bigquery'])
         client = bigquery.BigqueryV2(credentials=credentials)
     project_id = output_table_re_match.group('project')
     dataset_id = output_table_re_match.group('dataset')
     table_id = output_table_re_match.group('table')
     try:
         client.datasets.Get(
             bigquery.BigqueryDatasetsGetRequest(projectId=project_id,
                                                 datasetId=dataset_id))
     except exceptions.HttpError as e:
         if e.status_code == 404:
             raise ValueError('Dataset %s:%s does not exist.' %
                              (project_id, dataset_id))
         else:
             # For the rest of the errors, use BigQuery error message.
             raise
     # Ensuring given output table doesn't already exist to avoid overwriting it.
     if not parsed_args.append:
         if parsed_args.update_schema_on_append:
             raise ValueError(
                 '--update_schema_on_append requires --append to be '
                 'true.')
         try:
             client.tables.Get(
                 bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                                   datasetId=dataset_id,
                                                   tableId=table_id))
             raise ValueError(
                 'Table %s:%s.%s already exists, cannot overwrite it.' %
                 (project_id, dataset_id, table_id))
         except exceptions.HttpError as e:
             if e.status_code == 404:
                 # This is expected, output table must not already exist
                 pass
             else:
                 # For the rest of the errors, use BigQuery error message.
                 raise
示例#7
0
  def get_table(self, project_id, dataset_id, table_id):
    """Lookup a table's metadata object.

    Args:
      client: bigquery.BigqueryV2 instance
      project_id, dataset_id, table_id: table lookup parameters

    Returns:
      bigquery.Table instance
    Raises:
      HttpError if lookup failed.
    """
    request = bigquery.BigqueryTablesGetRequest(
        projectId=project_id, datasetId=dataset_id, tableId=table_id)
    response = self.client.tables.Get(request)
    return response
示例#8
0
def raise_error_if_table_exists(client, project_id, dataset_id, table_id):
    # type: (bigquery.BigqueryV2, str, str, str) -> None
    try:
        client.tables.Get(
            bigquery.BigqueryTablesGetRequest(projectId=project_id,
                                              datasetId=dataset_id,
                                              tableId=table_id))
        raise ValueError(
            'Table %s:%s.%s already exists, cannot overwrite it.' %
            (project_id, dataset_id, table_id))
    except exceptions.HttpError as e:
        if e.status_code == 404:
            # This is expected, output table must not already exist
            pass
        else:
            # For the rest of the errors, use BigQuery error message.
            raise