Пример #1
0
    def test_rows_are_written(self):
        client = mock.Mock()
        table = bigquery.Table(tableReference=bigquery.TableReference(
            projectId='project', datasetId='dataset', tableId='table'),
                               schema=bigquery.TableSchema())
        client.tables.Get.return_value = table
        write_disposition = df.io.BigQueryDisposition.WRITE_APPEND

        insert_response = mock.Mock()
        insert_response.insertErrors = []
        client.tabledata.InsertAll.return_value = insert_response

        with df.io.BigQuerySink(
                'project:dataset.table',
                write_disposition=write_disposition).writer(client) as writer:
            writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14})

        sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14}
        expected_rows = []
        json_object = bigquery.JsonObject()
        for k, v in sample_row.iteritems():
            json_object.additionalProperties.append(
                bigquery.JsonObject.AdditionalProperty(key=k,
                                                       value=to_json_value(v)))
        expected_rows.append(
            bigquery.TableDataInsertAllRequest.RowsValueListEntry(
                insertId='_1',  # First row ID generated with prefix ''
                json=json_object))
        client.tabledata.InsertAll.assert_called_with(
            bigquery.BigqueryTabledataInsertAllRequest(
                projectId='project',
                datasetId='dataset',
                tableId='table',
                tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest(
                    rows=expected_rows)))
Пример #2
0
 def _create_table(self, project_id, dataset_id, table_id, schema):
   table = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId=project_id, datasetId=dataset_id, tableId=table_id),
       schema=schema)
   request = bigquery.BigqueryTablesInsertRequest(
       projectId=project_id, datasetId=dataset_id, table=table)
   response = self.client.tables.Insert(request)
   # The response is a bigquery.Table instance.
   return response
Пример #3
0
 def test_table_with_write_disposition_append(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.return_value = table
     client.tables.Insert.return_value = table
     write_disposition = df.io.BigQueryDisposition.WRITE_APPEND
     with df.io.BigQuerySink(
             'project:dataset.table',
             write_disposition=write_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertFalse(client.tables.Delete.called)
     self.assertFalse(client.tables.Insert.called)
Пример #4
0
def _parse_table_reference(table, dataset=None, project=None):
  """Parses a table reference into a (project, dataset, table) tuple.

  Args:
    table: The ID of the table. The ID must contain only letters
      (a-z, A-Z), numbers (0-9), or underscores (_). If dataset argument is None
      then the table argument must contain the entire table reference:
      'DATASET.TABLE' or 'PROJECT:DATASET.TABLE'. This argument can be a
      bigquery.TableReference instance in which case dataset and project are
      ignored and the reference is returned as a result.
    dataset: The ID of the dataset containing this table or null if the table
      reference is specified entirely by the table argument.
    project: The ID of the project containing this table or null if the table
      reference is specified entirely by the table (and possibly dataset)
      argument.

  Returns:
    A bigquery.TableReference object. The object has the following attributes:
    projectId, datasetId, and tableId.

  Raises:
    ValueError: if the table reference as a string does not match the expected
      format.
  """

  if isinstance(table, bigquery.TableReference):
    return table

  table_reference = bigquery.TableReference()
  # If dataset argument is not specified, the expectation is that the
  # table argument will contain a full table reference instead of just a
  # table name.
  if dataset is None:
    match = re.match(
        r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>\w+)$', table)
    if not match:
      raise ValueError(
          'Expected a table reference (PROJECT:DATASET.TABLE or '
          'DATASET.TABLE) instead of %s.' % table)
    table_reference.projectId = match.group('project')
    table_reference.datasetId = match.group('dataset')
    table_reference.tableId = match.group('table')
  else:
    table_reference.projectId = project
    table_reference.datasetId = dataset
    table_reference.tableId = table
  return table_reference
Пример #5
0
 def test_no_table_and_create_if_needed(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.side_effect = HttpError(response={'status': '404'},
                                               url='',
                                               content='')
     client.tables.Insert.return_value = table
     create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED
     with df.io.BigQuerySink(
             'project:dataset.table',
             schema='somefield:INTEGER',
             create_disposition=create_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tables.Insert.called)
Пример #6
0
 def test_table_empty_and_write_disposition_empty(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.return_value = table
     client.tabledata.List.return_value = bigquery.TableDataList(
         totalRows=0)
     client.tables.Insert.return_value = table
     write_disposition = df.io.BigQueryDisposition.WRITE_EMPTY
     with df.io.BigQuerySink(
             'project:dataset.table',
             write_disposition=write_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tabledata.List.called)
     self.assertFalse(client.tables.Delete.called)
     self.assertFalse(client.tables.Insert.called)
Пример #7
0
 def test_table_not_empty_and_write_disposition_empty(self):
     client = mock.Mock()
     client.tables.Get.return_value = bigquery.Table(
         tableReference=bigquery.TableReference(projectId='project',
                                                datasetId='dataset',
                                                tableId='table'),
         schema=bigquery.TableSchema())
     client.tabledata.List.return_value = bigquery.TableDataList(
         totalRows=1)
     write_disposition = df.io.BigQueryDisposition.WRITE_EMPTY
     with self.assertRaises(RuntimeError) as exn:
         with df.io.BigQuerySink(
                 'project:dataset.table',
                 write_disposition=write_disposition).writer(client):
             pass
     self.assertEqual(
         exn.exception.message,
         'Table project:dataset.table is not empty but write disposition is '
         'WRITE_EMPTY.')