Python Client示例，gcloud.bigquery.Client Python示例

示例#1

0

显示文件

文件： snippets.py 项目： tristanvanech/python-docs-samples

def copy_table(dataset_name, table_name, new_table_name, project=None):
    """Copies a table.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    # This sample shows the destination table in the same dataset and project,
    # however, it's possible to copy across datasets and projects. You can
    # also copy muliple source tables into a single destination table by
    # providing addtional arguments to `copy_table`.
    destination_table = dataset.table(new_table_name)

    # Create a job to copy the table to the destination table.
    job_id = str(uuid.uuid4())
    job = bigquery_client.copy_table(
        job_id, destination_table, table)

    # Create the table if it doesn't exist.
    job.create_disposition = (
        gcloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED)

    # Start the job.
    job.begin()

    # Wait for the the job to finish.
    print('Waiting for job to finish...')
    wait_for_job(job)

    print('Table {} copied to {}.'.format(table_name, new_table_name))

示例#2

0

显示文件

def handler(event, context):
    rows = []

    for r in event['Records']:
        payload = r['kinesis']['data']
        try:
            data = json.loads(base64.b64decode(payload))
            row = []
            for key in ['time', 'tag', 'value']:
                if key == 'time':
                    row.append(datetime.datetime.fromtimestamp(data[key]))
                else:
                    row.append(data[key])
            rows.append(tuple(row))
        except Exception as e:
            print('Invalid data "{0}": {1}'.format(payload, e))
            pass

    if len(rows) == 0:
        return

    kms = boto3.client('kms')
    blob = base64.b64decode(BQ_CREDENTIALS)
    dec = kms.decrypt(CiphertextBlob=blob)
    keyfile_dict = json.loads(dec['Plaintext'])
    credentials = ServiceAccountCredentials.from_json_keyfile_dict(
        keyfile_dict)

    bq = bigquery.Client(credentials=credentials, project=BQ_PROJECT)
    dataset = bq.dataset(BQ_DATASET)
    table = dataset.table(BQ_TABLE)
    table.reload()
    res = table.insert_data(rows)

    print(res)

示例#3

0

显示文件

文件： async_query.py 项目： johndpope/Hello-Clone

def async_query(query):
    client = bigquery.Client()
    query_job = client.run_async_query(str(uuid.uuid4()), query)
    query_job.use_legacy_sql = False
    query_job.begin()

    wait_for_job(query_job)

    # Manually construct the QueryResults.
    # TODO: The client library will provide a helper method that does this.
    # https://github.com/GoogleCloudPlatform/gcloud-python/issues/2083
    query_results = bigquery.query.QueryResults('', client)
    query_results._properties['jobReference'] = {
        'jobId': query_job.name,
        'projectId': query_job.project
    }

    # Drain the query results by requesting a page at a time.
    page_token = None

    while True:
        rows, total_rows, page_token = query_results.fetch_data(
            max_results=10, page_token=page_token)

        for row in rows:
            print(row)

        if not page_token:
            break

示例#4

0

显示文件

文件： snippets.py 项目： johndpope/Hello-Clone

def delete_table(dataset_name, table_name, project=None):
    """Deletes a table in a given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    table.delete()

    print('Table {}:{} deleted.'.format(dataset_name, table_name))

示例#5

0

显示文件

def test_delete_table(capsys):
    # Create a table to delete
    bigquery_client = bigquery.Client()
    dataset = bigquery_client.dataset(DATASET_ID)
    table = dataset.table('test_delete_table')

    if not table.exists():
        table.schema = [bigquery.SchemaField('id', 'INTEGER')]
        table.create()

    snippets.delete_table(DATASET_ID, table.name)

    assert not table.exists()

示例#6

0

显示文件

def export_data_to_gcs(dataset_name, table_name, destination):
    bigquery_client = bigquery.Client()
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)
    job_name = str(uuid.uuid4())

    job = bigquery_client.extract_table_to_storage(job_name, table,
                                                   destination)

    job.begin()

    wait_for_job(job)

    print('Exported {}:{} to {}'.format(dataset_name, table_name, destination))

示例#7

0

显示文件

def load_data_from_gcs(dataset_name, table_name, source):
    bigquery_client = bigquery.Client()
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)
    job_name = str(uuid.uuid4())

    job = bigquery_client.load_table_from_storage(
        job_name, table, source)

    job.begin()

    wait_for_job(job)

    print('Loaded {} rows into {}:{}.'.format(
        job.output_rows, dataset_name, table_name))

示例#8

0

显示文件

文件： stream_data.py 项目： johndpope/Hello-Clone

def stream_data(dataset_name, table_name, json_data):
    bigquery_client = bigquery.Client()
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)
    data = json.loads(json_data)

    # Reload the table to get the schema.
    table.reload()

    rows = [data]
    errors = table.insert_data(rows)

    if not errors:
        print('Loaded 1 row into {}:{}'.format(dataset_name, table_name))
    else:
        print('Errors:')
        pprint(errors)

示例#9

0

显示文件

def load_data_from_file(dataset_name, table_name, source_file_name):
    bigquery_client = bigquery.Client()
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    # Reload the table to get the schema.
    table.reload()

    with open(source_file_name, 'rb') as source_file:
        # This example uses CSV, but you can use other formats.
        # See https://cloud.google.com/bigquery/loading-data
        job = table.upload_from_file(source_file, source_format='text/csv')

    job.begin()

    wait_for_job(job)

    print('Loaded {} rows into {}:{}.'.format(job.output_rows, dataset_name,
                                              table_name))

示例#10

0

显示文件

文件： snippets.py 项目： johndpope/Hello-Clone

def list_rows(dataset_name, table_name, project=None):
    """Prints rows in the given table.

    Will print 25 rows at most for brevity as tables can contain large amounts
    of rows.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)
    table = dataset.table(table_name)

    if not table.exists():
        print('Table {}:{} does not exist.'.format(dataset_name, table_name))
        return

    # Reload the table so that the schema is available.
    table.reload()

    rows = []
    page_token = None

    # Load at most 25 results. You can change this to `while True` and change
    # the max_results argument to load more rows from BigQuery, but note
    # that this can take some time. It's preferred to use a query.
    while len(rows) < 25:
        results, total_rows, page_token = table.fetch_data(
            max_results=25, page_token=page_token)
        rows.extend(results)

        if not page_token:
            break

    # Use format to create a simple table.
    format_string = '{:<16} ' * len(table.schema)

    # Print schema field names
    field_names = [field.name for field in table.schema]
    print(format_string.format(*field_names))

    for row in rows:
        print(format_string.format(*row))

示例#11

0

显示文件

文件： snippets.py 项目： johndpope/Hello-Clone

def list_datasets(project=None):
    """Lists all datasets in a given project.

    If no project is specified, then the currently active project is used
    """
    bigquery_client = bigquery.Client(project=project)

    datasets = []
    page_token = None

    while True:
        results, page_token = bigquery_client.list_datasets(
            page_token=page_token)
        datasets.extend(results)

        if not page_token:
            break

    for dataset in datasets:
        print(dataset.name)

示例#12

0

显示文件

文件： logging_.py 项目： yang-g/gcloud-python

    def _init_bigquery_dataset(self):
        from gcloud import bigquery
        from gcloud.bigquery.dataset import AccessGrant
        DATASET_URI = 'bigquery.googleapis.com/projects/%s/datasets/%s' % (
            Config.CLIENT.project,
            DATASET_NAME,
        )

        # Create the destination dataset, and set up the ACL to allow
        # Stackdriver Logging to write into it.
        bigquery_client = bigquery.Client()
        dataset = bigquery_client.dataset(DATASET_NAME)
        dataset.create()
        self.to_delete.append(dataset)
        dataset.reload()
        grants = dataset.access_grants
        grants.append(
            AccessGrant('WRITER', 'groupByEmail', '*****@*****.**'))
        dataset.access_grants = grants
        dataset.update()
        return DATASET_URI

示例#13

0

显示文件

def sync_query(query):
    client = bigquery.Client()
    query_results = client.run_sync_query(query)

    # Use standard SQL syntax for queries.
    # See: https://cloud.google.com/bigquery/sql-reference/
    query_results.use_legacy_sql = False

    query_results.run()

    # Drain the query results by requesting a page at a time.
    page_token = None

    while True:
        rows, total_rows, page_token = query_results.fetch_data(
            max_results=10, page_token=page_token)

        for row in rows:
            print(row)

        if not page_token:
            break

示例#14

0

显示文件

文件： snippets.py 项目： johndpope/Hello-Clone

def list_tables(dataset_name, project=None):
    """Lists all of the tables in a given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)

    if not dataset.exists():
        print('Dataset {} does not exist.'.format(dataset_name))
        return

    tables = []
    page_token = None

    while True:
        results, page_token = dataset.list_tables(page_token=page_token)
        tables.extend(results)

        if not page_token:
            break

    for table in tables:
        print(table.name)

示例#15

0

显示文件

文件： snippets.py 项目： tristanvanech/python-docs-samples

def create_table(dataset_name, table_name, project=None):
    """Creates a simple table in the given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)

    if not dataset.exists():
        print('Dataset {} does not exist.'.format(dataset_name))
        return

    table = dataset.table(table_name)

    # Set the table schema
    table.schema = (
        bigquery.SchemaField('Name', 'STRING'),
        bigquery.SchemaField('Age', 'INTEGER'),
        bigquery.SchemaField('Weight', 'FLOAT'),
    )

    table.create()

    print('Created table {} in dataset {}.'.format(table_name, dataset_name))

示例#16

0

显示文件

文件： schema_update_utils.py 项目： verilylifesciences/variant-annotation

def update_table_schema(destination_table, source_vcf, description=None):
  """Updates a BigQuery table with the variants schema using a VCF header.

  Args:
    destination_table: BigQuery table name, PROJECT_ID.DATASET_NAME.TABLE_NAME.
    source_vcf: Path to local or remote (Cloud Storage) VCF or gzipped VCF file.
    description: Optional description for the BigQuery table.

  Raises:
    ValueError: If destination_table cannot be parsed.
  """

  dest_table = tokenize_table_name(destination_table)
  dest_project_id, dest_dataset_name, dest_table_name = dest_table

  # Load the source VCF
  descriptions = Descriptions()
  descriptions.add_from_vcf(source_vcf)

  # Initialize the BQ client
  client = bigquery.Client(project=dest_project_id)

  # Load the destination table
  dest_dataset = client.dataset(dest_dataset_name)
  dest_dataset.reload()

  dest_table = dest_dataset.table(dest_table_name)
  dest_table.reload()

  if description is not None:
    dest_table.patch(description=description[:_MAX_LENGTH])
    if len(description) > _MAX_LENGTH:
      logging.warning(_TRUNCATION_WARNING, 'table description')

  # Set the description on the variant fields and the call fields.
  #
  # The (non-fixed) variant field descriptions come from the ##INFO headers
  # The (non-fixed) call fields descriptions can come from the ##FORMAT headers
  #   as well as the ##INFO headers.

  # Process variant fields
  call_field = None
  for field in dest_table.schema:
    if field.name.lower() in _FIXED_VARIANT_FIELDS:
      field.description = _FIXED_VARIANT_FIELDS[field.name.lower()]
      logging.debug('Variant(fixed): %s: %s', field.name, field.description)

    elif field.name in descriptions.info_fields:
      field.description = descriptions.info_fields[field.name]
      logging.debug('Variant(INFO) %s: %s', field.name, field.description)

    elif field.name.lower() == 'filter':
      field.description = descriptions.filter_description

    if field.name == 'call':
      call_field = field

    if field.description is not None and len(field.description) > _MAX_LENGTH:
      logging.warning(_TRUNCATION_WARNING, field.name)
      field.description = field.description[:_MAX_LENGTH]

  # Process call fields
  for field in call_field.fields:
    if field.name.lower() in _FIXED_CALL_FIELDS:
      field.description = _FIXED_CALL_FIELDS[field.name.lower()]
      logging.debug('Call(fixed): %s: %s', field.name, field.description)

    elif field.name in descriptions.format_fields:
      field.description = descriptions.format_fields[field.name]
      logging.debug('Call(FORMAT) %s: %s', field.name, field.description)

    elif field.name in descriptions.info_fields:
      field.description = descriptions.info_fields[field.name]
      logging.debug('Call(INFO) %s: %s', field.name, field.description)

    elif field.name.lower() == 'filter':
      field.description = descriptions.filter_description

    if field.description is not None and len(field.description) > _MAX_LENGTH:
      logging.warning(_TRUNCATION_WARNING, field.name)
      field.description = field.description[:_MAX_LENGTH]

  logging.info('Updating table %s', dest_table.path)
  dest_table.patch(schema=dest_table.schema)

示例#17

0

显示文件

TABLE_NAME = 'airport'
BUCKET_NAME = 'satish123'
FILE = 'airport.csv'
SOURCE = 'https://storage.cloud.google.com/satish123/airport.csv?_ga=2.200274028.-331489596.1519587350&_gac=1.252996475.1519744301.CjwKCAiAoNTUBRBUEiwAWje2ltt6Onlm-oURmJ0zEqOD_dy_wmi_5yUsCdGXFro37ANM_5QjwIFk5RoC4PUQAvD_BwE'.format(
    BUCKET_NAME, FILE)

SCHEMA = [
    bq.SchemaField('name', 'STRING', mode='required'),
    bq.SchemaField('country', 'STRING', mode='required'),
    bq.SchemaField('area_code', 'STRING', mode='required'),
    bq.SchemaField('origin', 'STRING', mode='required')
]

# CREDENTIALS = GoogleCredentials.get_application_efault()

client = bq.Client(project=BILLING_PROJECT_ID)


# Dataset
# Check if the dataset exists
def create_datasets(name):
    dataset = client.dataset(name)
    try:
        assert not dataset.exists()
        dataset.create()
        assert dataset.exists()
        print("Dataset {} created".format(name))
    except (AssertionError):
        pass

示例#18

0

显示文件

文件： bigquery.py 项目： vkaya/gcloud-python

def setUpModule():
    _helpers.PROJECT = TESTS_PROJECT
    Config.CLIENT = bigquery.Client()

示例#19

0

显示文件

文件： natality1.py 项目： vefra/BigData-Hands-on

  * A query is run against the public dataset,
    bigquery-public-data.samples.natality, selecting only the data of interest
    to the regression, the output of which is stored in the “regression_input”
    table.
  * The output table is moved over the wire to the user's default project via
    the built-in BigQuery Connector for Spark that bridges BigQuery and Cloud
    Dataproc.
"""

from gcloud import bigquery
from gcloud.bigquery import job
from gcloud.bigquery.table import *

# Create a new Google BigQuery client using Google Cloud Platform project
# defaults.
bq = bigquery.Client()

# Create a new BigQuery dataset.
reg_dataset = bq.dataset("natality_regression")
reg_dataset.create()

# In the new BigQuery dataset, create a new table.
table = reg_dataset.table(name="regression_input")
# The table needs a schema before it can be created and accept data.
# We create an ordered list of the columns using SchemaField objects.
schema = []
schema.append(SchemaField("weight_pounds", "float"))
schema.append(SchemaField("mother_age", "integer"))
schema.append(SchemaField("father_age", "integer"))
schema.append(SchemaField("gestation_weeks", "integer"))
schema.append(SchemaField("weight_gain_pounds", "integer"))

示例#20

0

显示文件

文件： bigquery.py 项目： elliehamilton3/COMP3207_GroupProject

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import operator
import time

import unittest2

from gcloud import _helpers
from gcloud.environment_vars import TESTS_PROJECT
from gcloud import bigquery


_helpers.PROJECT = TESTS_PROJECT
CLIENT = bigquery.Client()
DATASET_NAME = 'system_tests_%012d' % (1000 * time.time(),)


class TestBigQuery(unittest2.TestCase):

    def setUp(self):
        self.to_delete = []

    def tearDown(self):
        for doomed in self.to_delete:
            doomed.delete()

    def test_create_dataset(self):
        dataset = CLIENT.dataset(DATASET_NAME)
        self.assertFalse(dataset.exists())