Exemplo n.º 1
0
    def _validate_output_tables(self, client, output_table_base_name,
                                sharding_config_path, append, is_main_output):
        if (output_table_base_name !=
                bigquery_util.get_table_base_name(output_table_base_name)):
            raise ValueError(
                ('Output table cannot contain "{}". we reserve this '
                 'string to mark sharded output tables.').format(
                     bigquery_util.TABLE_SUFFIX_SEPARATOR))

        project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
            output_table_base_name)
        bigquery_util.raise_error_if_dataset_not_exists(
            client, project_id, dataset_id)
        all_output_tables = []
        if is_main_output:
            all_output_tables.append(
                bigquery_util.compose_table_name(table_id,
                                                 SAMPLE_INFO_TABLE_SUFFIX))
        sharding = variant_sharding.VariantSharding(sharding_config_path)
        num_shards = sharding.get_num_shards()
        # In case there is no residual in config we will ignore the last shard.
        if not sharding.should_keep_shard(sharding.get_residual_index()):
            num_shards -= 1
        for i in range(num_shards):
            table_suffix = sharding.get_output_table_suffix(i)
            if table_suffix != bigquery_util.get_table_base_name(table_suffix):
                raise ValueError(
                    ('Table suffix cannot contain "{}" we reserve this  '
                     'string to mark sharded output tables.').format(
                         bigquery_util.TABLE_SUFFIX_SEPARATOR))
            all_output_tables.append(
                bigquery_util.compose_table_name(table_id, table_suffix))

        for output_table in all_output_tables:
            if append:
                if not bigquery_util.table_exist(client, project_id,
                                                 dataset_id, output_table):
                    raise ValueError(
                        'Table {}:{}.{} does not exist, cannot append to it.'.
                        format(project_id, dataset_id, output_table))
            else:
                if bigquery_util.table_exist(client, project_id, dataset_id,
                                             output_table):
                    raise ValueError((
                        'Table {}:{}.{} already exists, cannot overwrite it. Please '
                        'set `--append True` if you want to append to it.'
                    ).format(project_id, dataset_id, output_table))
    def test_table_exist(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(
                projectId='project', datasetId='dataset', tableId='table'))
        self.assertEqual(
            bigquery_util.table_exist(client, 'project', 'dataset', 'table'),
            True)

        client.tables.Get.side_effect = exceptions.HttpError(
            response={'status': '404'}, url='', content='')
        self.assertEqual(
            bigquery_util.table_exist(client, 'project', 'dataset', 'table'),
            False)

        client.tables.Get.side_effect = exceptions.HttpError(
            response={'status': '401'}, url='', content='')
        self.assertRaises(exceptions.HttpError, bigquery_util.table_exist,
                          client, 'project', 'dataset', 'table')
  def validate(self, parsed_args, client=None):
    if not client:
      credentials = GoogleCredentials.get_application_default().create_scoped(
          ['https://www.googleapis.com/auth/bigquery'])
      client = bigquery.BigqueryV2(credentials=credentials)

    project_id, dataset_id, table_id = bigquery_util.parse_table_reference(
        parsed_args.input_table)
    if not bigquery_util.table_exist(client, project_id, dataset_id, table_id):
      raise ValueError('Table {}:{}.{} does not exist.'.format(
          project_id, dataset_id, table_id))
    if table_id.count(TABLE_SUFFIX_SEPARATOR) != 1:
      raise ValueError(
          'Input table {} is malformed - exactly one suffix separator "{}" is '
          'required'.format(parsed_args.input_table,
                            TABLE_SUFFIX_SEPARATOR))
    base_table_id = table_id[:table_id.find(TABLE_SUFFIX_SEPARATOR)]
    sample_table_id = bigquery_util.compose_table_name(base_table_id,
                                                       SAMPLE_INFO_TABLE_SUFFIX)

    if not bigquery_util.table_exist(client, project_id, dataset_id,
                                     sample_table_id):
      raise ValueError('Sample table {}:{}.{} does not exist.'.format(
          project_id, dataset_id, sample_table_id))