def load_data( self, dataframe, dataset_id, table_id, chunksize=None, schema=None, progress_bar=True, ): from pandas_gbq import load total_rows = len(dataframe) try: chunks = load.load_chunks( self.client, dataframe, dataset_id, table_id, chunksize=chunksize, schema=schema, location=self.location, ) if progress_bar and tqdm: chunks = tqdm.tqdm(chunks) for remaining_rows in chunks: logger.info( "\r{} out of {} rows loaded.".format( total_rows - remaining_rows, total_rows ) ) except self.http_error as ex: self.process_http_error(ex)
def load_data(self, dataframe, dataset_id, table_id, chunksize=None, schema=None, progress_bar=True): from pandas_gbq import load total_rows = len(dataframe) logger.info("\n\n") try: chunks = load.load_chunks(self.client, dataframe, dataset_id, table_id, chunksize=chunksize, schema=schema) if progress_bar and tqdm: chunks = tqdm.tqdm(chunks) for remaining_rows in chunks: logger.info("\rLoad is {0}% Complete".format( ((total_rows - remaining_rows) * 100) / total_rows)) except self.http_error as ex: self.process_http_error(ex) logger.info("\n")
def test_load_chunks_omits_policy_tags(monkeypatch, mock_bigquery_client, bigquery_has_from_dataframe_with_csv): """Ensure that policyTags are omitted. We don't want to change the policyTags via a load job, as this can cause 403 error. See: https://github.com/googleapis/python-bigquery/pull/557 """ import google.cloud.bigquery monkeypatch.setattr( type(FEATURES), "bigquery_has_from_dataframe_with_csv", mock.PropertyMock(return_value=bigquery_has_from_dataframe_with_csv), ) df = pandas.DataFrame({"col1": [1, 2, 3]}) destination = google.cloud.bigquery.TableReference.from_string( "my-project.my_dataset.my_table") schema = { "fields": [{ "name": "col1", "type": "INT64", "policyTags": ["tag1", "tag2"] }] } _ = list( load.load_chunks(mock_bigquery_client, df, destination, schema=schema)) mock_load = load_method(mock_bigquery_client) assert mock_load.called _, kwargs = mock_load.call_args assert "job_config" in kwargs sent_field = kwargs["job_config"].schema[0].to_api_repr() assert "policyTags" not in sent_field
def load_data( self, dataframe, destination_table_ref, chunksize=None, schema=None, progress_bar=True, api_method: str = "load_parquet", billing_project: Optional[str] = None, ): from pandas_gbq import load total_rows = len(dataframe) try: chunks = load.load_chunks( self.client, dataframe, destination_table_ref, chunksize=chunksize, schema=schema, location=self.location, api_method=api_method, billing_project=billing_project, ) if progress_bar and tqdm: chunks = tqdm.tqdm(chunks) for remaining_rows in chunks: logger.info("\r{} out of {} rows loaded.".format( total_rows - remaining_rows, total_rows)) except self.http_error as ex: self.process_http_error(ex)
def test_load_chunks_with_invalid_api_method(): with pytest.raises(ValueError, match="Got unexpected api_method:"): load.load_chunks(None, None, None, api_method="not_a_thing")