Пример #1
0
    def load_data(
        self,
        dataframe,
        dataset_id,
        table_id,
        chunksize=None,
        schema=None,
        progress_bar=True,
    ):
        from pandas_gbq import load

        total_rows = len(dataframe)

        try:
            chunks = load.load_chunks(
                self.client,
                dataframe,
                dataset_id,
                table_id,
                chunksize=chunksize,
                schema=schema,
                location=self.location,
            )
            if progress_bar and tqdm:
                chunks = tqdm.tqdm(chunks)
            for remaining_rows in chunks:
                logger.info(
                    "\r{} out of {} rows loaded.".format(
                        total_rows - remaining_rows, total_rows
                    )
                )
        except self.http_error as ex:
            self.process_http_error(ex)
Пример #2
0
    def load_data(self,
                  dataframe,
                  dataset_id,
                  table_id,
                  chunksize=None,
                  schema=None,
                  progress_bar=True):
        from pandas_gbq import load

        total_rows = len(dataframe)
        logger.info("\n\n")

        try:
            chunks = load.load_chunks(self.client,
                                      dataframe,
                                      dataset_id,
                                      table_id,
                                      chunksize=chunksize,
                                      schema=schema)
            if progress_bar and tqdm:
                chunks = tqdm.tqdm(chunks)
            for remaining_rows in chunks:
                logger.info("\rLoad is {0}% Complete".format(
                    ((total_rows - remaining_rows) * 100) / total_rows))
        except self.http_error as ex:
            self.process_http_error(ex)

        logger.info("\n")
Пример #3
0
def test_load_chunks_omits_policy_tags(monkeypatch, mock_bigquery_client,
                                       bigquery_has_from_dataframe_with_csv):
    """Ensure that policyTags are omitted.

    We don't want to change the policyTags via a load job, as this can cause
    403 error. See: https://github.com/googleapis/python-bigquery/pull/557
    """
    import google.cloud.bigquery

    monkeypatch.setattr(
        type(FEATURES),
        "bigquery_has_from_dataframe_with_csv",
        mock.PropertyMock(return_value=bigquery_has_from_dataframe_with_csv),
    )
    df = pandas.DataFrame({"col1": [1, 2, 3]})
    destination = google.cloud.bigquery.TableReference.from_string(
        "my-project.my_dataset.my_table")
    schema = {
        "fields": [{
            "name": "col1",
            "type": "INT64",
            "policyTags": ["tag1", "tag2"]
        }]
    }

    _ = list(
        load.load_chunks(mock_bigquery_client, df, destination, schema=schema))

    mock_load = load_method(mock_bigquery_client)
    assert mock_load.called
    _, kwargs = mock_load.call_args
    assert "job_config" in kwargs
    sent_field = kwargs["job_config"].schema[0].to_api_repr()
    assert "policyTags" not in sent_field
Пример #4
0
    def load_data(
        self,
        dataframe,
        destination_table_ref,
        chunksize=None,
        schema=None,
        progress_bar=True,
        api_method: str = "load_parquet",
        billing_project: Optional[str] = None,
    ):
        from pandas_gbq import load

        total_rows = len(dataframe)

        try:
            chunks = load.load_chunks(
                self.client,
                dataframe,
                destination_table_ref,
                chunksize=chunksize,
                schema=schema,
                location=self.location,
                api_method=api_method,
                billing_project=billing_project,
            )
            if progress_bar and tqdm:
                chunks = tqdm.tqdm(chunks)
            for remaining_rows in chunks:
                logger.info("\r{} out of {} rows loaded.".format(
                    total_rows - remaining_rows, total_rows))
        except self.http_error as ex:
            self.process_http_error(ex)
Пример #5
0
def test_load_chunks_with_invalid_api_method():
    with pytest.raises(ValueError, match="Got unexpected api_method:"):
        load.load_chunks(None, None, None, api_method="not_a_thing")