def test_dofn_client_no_records(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( batch_size=2, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() # InsertRows not called as batch size is not hit self.assertFalse(client.tabledata.InsertAll.called) fn.finish_bundle() # InsertRows not called in finish bundle as no records self.assertFalse(client.tabledata.InsertAll.called)
def test_dofn_client_finish_bundle_flush_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( batch_size=2, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() # Destination is a tuple of (destination, schema) to ensure the table is # created. fn.process(('project_id:dataset_id.table_id', ({ 'month': 1 }, 'insertid3'))) self.assertTrue(client.tables.Get.called) # InsertRows not called as batch size is not hit self.assertFalse(client.tabledata.InsertAll.called) fn.finish_bundle() # InsertRows called in finish bundle self.assertTrue(client.tabledata.InsertAll.called)
def test_dofn_client_finish_bundle_flush_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND schema = {'fields': [ {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]} fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema=schema, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() fn.process({'month': 1}) self.assertTrue(client.tables.Get.called) # InsertRows not called as batch size is not hit self.assertFalse(client.tabledata.InsertAll.called) fn.finish_bundle() # InsertRows called in finish bundle self.assertTrue(client.tabledata.InsertAll.called)
def test_dofn_client_process_flush_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = ( bigquery.TableDataInsertAllResponse(insertErrors=[])) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( batch_size=2, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() fn.process(('project_id:dataset_id.table_id', ({ 'month': 1 }, 'insertid1'))) fn.process(('project_id:dataset_id.table_id', ({ 'month': 2 }, 'insertid2'))) # InsertRows called as batch size is hit self.assertTrue(client.tabledata.InsertAll.called)
def test_dofn_client_process_performs_batching(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema='month:INTEGER', create_disposition=create_disposition, write_disposition=write_disposition, client=client) fn.start_bundle() fn.process({'month': 1}) self.assertTrue(client.tables.Get.called) # InsertRows not called as batch size is not hit yet self.assertFalse(client.tabledata.InsertAll.called)