示例#1
0
def copy_table(dataset_name, table_name, new_table_name, project=None):
    """Copies a table.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    # This sample shows the destination table in the same dataset and project,
    # however, it's possible to copy across datasets and projects. You can
    # also copy muliple source tables into a single destination table by
    # providing addtional arguments to `copy_table`.
    destination_table = dataset.table(new_table_name)

    # Create a job to copy the table to the destination table.
    job_id = str(uuid.uuid4())
    job = bigquery_client.copy_table(
        job_id, destination_table, table)

    # Create the table if it doesn't exist.
    job.create_disposition = (
        google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED)

    job.begin()  # Start the job.
    print('Waiting for job to finish...')
    job.result()

    print('Table {} copied to {}.'.format(table_name, new_table_name))
def copy_table(dataset_name, table_name, new_table_name, project=None):
    """Copies a table.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    # This sample shows the destination table in the same dataset and project,
    # however, it's possible to copy across datasets and projects. You can
    # also copy muliple source tables into a single destination table by
    # providing addtional arguments to `copy_table`.
    destination_table = dataset.table(new_table_name)

    # Create a job to copy the table to the destination table.
    job_id = str(uuid.uuid4())
    job = bigquery_client.copy_table(
        job_id, destination_table, table)

    # Create the table if it doesn't exist.
    job.create_disposition = (
        google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED)

    job.begin()  # Start the job.
    print('Waiting for job to finish...')
    job.result()

    print('Table {} copied to {}.'.format(table_name, new_table_name))
示例#3
0
    def execute(
        self,
        query: str,
        destination_table: Optional[str] = None,
        write_disposition: Optional[
            google.cloud.bigquery.job.WriteDisposition] = None,
    ) -> None:
        dataset = google.cloud.bigquery.dataset.DatasetReference.from_string(
            self.dataset,
            default_project=self.project,
        )
        kwargs = {}
        if destination_table:
            kwargs["destination"] = dataset.table(destination_table)
            kwargs[
                "write_disposition"] = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE

        if write_disposition:
            kwargs["write_disposition"] = write_disposition

        config = google.cloud.bigquery.job.QueryJobConfig(
            default_dataset=dataset, **kwargs)
        job = self.client.query(query, config)
        # block on result
        job.result(max_results=1)

        if destination_table:
            # add a label with the current timestamp to the table
            self.add_labels_to_table(
                destination_table,
                {"last_updated": self._current_timestamp_label()},
            )
示例#4
0
    def test_result_invokes_begin(self):
        begun_resource = self._make_resource()
        done_resource = copy.deepcopy(begun_resource)
        done_resource["status"] = {"state": "DONE"}
        connection = _make_connection(begun_resource, done_resource)
        client = _make_client(self.PROJECT)
        client._connection = connection

        job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
                             client)
        job.result()

        self.assertEqual(len(connection.api_request.call_args_list), 2)
        begin_request, reload_request = connection.api_request.call_args_list
        self.assertEqual(begin_request[1]["method"], "POST")
        self.assertEqual(reload_request[1]["method"], "GET")
示例#5
0
def copyTable(projectFrom, projectTo, datasetFrom, datasetTo, tableName):

    table_source = datasetFrom.table(tableName)
    table_destination = datasetTo.table(tableName)

    job_id = str(uuid.uuid4())
    job = projectFrom.copy_table(job_id, table_destination, table_source)

    job.create_disposition = (
        google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED)

    job.begin()  # Start the job.
    print('Waiting for job to finish...')
    job.result()

    print('Table {} copied to {}.'.format(tableName, tableName))
示例#6
0
def run_sql(sql_script_path: str) -> bool:
    """Runs a sql load script and returns job completion status.

  Args:
    sql_script_path: Path to the sql script file.

  Returns:
    True if the job completed successfully, false otherwise.
  """

    client = _get_bq_client()

    with open(sql_script_path, 'r') as f:
        raw_query = f.read()

    query = _populate_table_names(raw_query)

    job = client.query(query)
    try:
        rows = job.result()
    except google.cloud.exceptions.GoogleCloudError as e:
        logging.error(e)
        return list(), job.errors

    return list(rows), job.errors
示例#7
0
    def test_result(self):
        client = _make_client(project=self.PROJECT)
        resource = self._make_resource(ended=True)
        job = self._get_target_class().from_api_repr(resource, client)

        result = job.result()

        self.assertIs(result, job)
示例#8
0
 def execute(self,
             query: str,
             destination_table: Optional[str] = None) -> None:
     dataset = google.cloud.bigquery.dataset.DatasetReference.from_string(
         self.dataset,
         default_project=self.project,
     )
     kwargs = {}
     if destination_table:
         kwargs["destination"] = dataset.table(destination_table)
         kwargs[
             "write_disposition"] = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE
     config = google.cloud.bigquery.job.QueryJobConfig(
         default_dataset=dataset, **kwargs)
     job = self.client.query(query, config)
     # block on result
     job.result(max_results=1)
示例#9
0
def _delete_rows(client: bigquery.Client, table_id: str) -> None:
    """Delete all rows in a table."""
    query = f'DELETE FROM `{table_id}` WHERE TRUE'
    job = client.query(query)
    try:
        rows = job.result()
    except google.cloud.exceptions.GoogleCloudError as e:
        logging.error(e)
        raise e
示例#10
0
    def test_result_w_retry_wo_state(self):
        begun_job_resource = _make_job_resource(job_id=self.JOB_ID,
                                                project_id=self.PROJECT,
                                                location="EU",
                                                started=True)
        done_job_resource = _make_job_resource(
            job_id=self.JOB_ID,
            project_id=self.PROJECT,
            location="EU",
            started=True,
            ended=True,
        )
        conn = _make_connection(
            exceptions.NotFound("not normally retriable"),
            begun_job_resource,
            exceptions.NotFound("not normally retriable"),
            done_job_resource,
        )
        client = _make_client(project=self.PROJECT, connection=conn)
        job = self._make_one(
            self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client)
        custom_predicate = mock.Mock()
        custom_predicate.return_value = True
        custom_retry = google.api_core.retry.Retry(
            predicate=custom_predicate,
            initial=0.001,
            maximum=0.001,
            deadline=0.1,
        )
        self.assertIs(job.result(retry=custom_retry), job)

        begin_call = mock.call(
            method="POST",
            path=f"/projects/{self.PROJECT}/jobs",
            data={
                "jobReference": {
                    "jobId": self.JOB_ID,
                    "projectId": self.PROJECT,
                    "location": "EU",
                }
            },
            timeout=None,
        )
        reload_call = mock.call(
            method="GET",
            path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
            query_params={"location": "EU"},
            timeout=None,
        )
        conn.api_request.assert_has_calls(
            [begin_call, begin_call, reload_call, reload_call])
示例#11
0
    def test_result_explicit_w_state(self):
        conn = _make_connection()
        client = _make_client(project=self.PROJECT, connection=conn)
        job = self._make_one(self.JOB_ID, client)
        # Use _set_properties() instead of directly modifying _properties so
        # that the result state is set properly.
        job_resource = job._properties
        job_resource["status"] = {"state": "DONE"}
        job._set_properties(job_resource)
        timeout = 1

        self.assertIs(job.result(timeout=timeout), job)

        conn.api_request.assert_not_called()
示例#12
0
    def test_result_default_wo_state(self):
        begun_job_resource = _make_job_resource(job_id=self.JOB_ID,
                                                project_id=self.PROJECT,
                                                location="US",
                                                started=True)
        done_job_resource = _make_job_resource(
            job_id=self.JOB_ID,
            project_id=self.PROJECT,
            location="US",
            started=True,
            ended=True,
        )
        conn = _make_connection(
            _make_retriable_exception(),
            begun_job_resource,
            _make_retriable_exception(),
            done_job_resource,
        )
        client = _make_client(project=self.PROJECT, connection=conn)
        job = self._make_one(self.JOB_ID, client)

        self.assertIs(job.result(), job)

        begin_call = mock.call(
            method="POST",
            path=f"/projects/{self.PROJECT}/jobs",
            data={
                "jobReference": {
                    "jobId": self.JOB_ID,
                    "projectId": self.PROJECT
                }
            },
            timeout=None,
        )
        reload_call = mock.call(
            method="GET",
            path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
            query_params={"location": "US"},
            timeout=None,
        )
        conn.api_request.assert_has_calls(
            [begin_call, begin_call, reload_call, reload_call])