Exemplo n.º 1
0
    def export_csv(self,
                   bucket_name: str,
                   bucket_path: str,
                   dataset: str,
                   table: str,
                   sep: str = "\t") -> str:

        bucket_url = f"gs://{bucket_name}/{self.config.lake_path}/{bucket_path}"

        logging.info(
            f"DataWarehouse.export_csv {bucket_url} to {dataset}.{table} ...")
        client = self._get_client()

        dataset_ref = DatasetReference(self.config.gcp_project, dataset)

        to_export = TableReference(dataset_ref, table)
        config = ExtractJobConfig()
        config.field_delimiter = sep
        config.destination_format = bigquery.DestinationFormat.CSV

        extract_job = client.extract_table(to_export,
                                           bucket_url,
                                           job_config=config)
        result = extract_job.result()

        logging.info(
            f"DataWarehouse.export_csv {bucket_url} to {dataset}.{table} Complete!"
        )

        return bucket_url
Exemplo n.º 2
0
    def test_begin_w_alternate_client(self):
        from google.cloud.bigquery.dataset import DatasetReference
        from google.cloud.bigquery.job import Compression
        from google.cloud.bigquery.job import DestinationFormat
        from google.cloud.bigquery.job import ExtractJobConfig

        PATH = "/projects/%s/jobs" % (self.PROJECT, )
        RESOURCE = self._make_resource(ended=True)
        EXTRACT_CONFIGURATION = {
            "sourceTable": {
                "projectId": self.PROJECT,
                "datasetId": self.DS_ID,
                "tableId": self.SOURCE_TABLE,
            },
            "destinationUris": [self.DESTINATION_URI],
            "compression": Compression.GZIP,
            "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON,
            "fieldDelimiter": "|",
            "printHeader": False,
        }
        RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION
        conn1 = _make_connection()
        client1 = _make_client(project=self.PROJECT, connection=conn1)
        conn2 = _make_connection(RESOURCE)
        client2 = _make_client(project=self.PROJECT, connection=conn2)
        source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
        source = source_dataset.table(self.SOURCE_TABLE)
        config = ExtractJobConfig()
        config.compression = Compression.GZIP
        config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON
        config.field_delimiter = "|"
        config.print_header = False
        job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
                             client1, config)
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin(client=client2)

        final_attributes.assert_called_with({"path": PATH}, client2, job)

        conn1.api_request.assert_not_called()
        conn2.api_request.assert_called_once_with(
            method="POST",
            path=PATH,
            data={
                "jobReference": {
                    "projectId": self.PROJECT,
                    "jobId": self.JOB_ID
                },
                "configuration": {
                    "extract": EXTRACT_CONFIGURATION
                },
            },
            timeout=None,
        )
        self._verifyResourceProperties(job, RESOURCE)