Пример #1
0
    def test_list_tables(self):
        dataset = Config.CLIENT.dataset(DATASET_NAME)
        self.assertFalse(dataset.exists())
        dataset.create()
        self.to_delete.append(dataset)

        # Retrieve tables before any are created for the dataset.
        all_tables, token = dataset.list_tables()
        self.assertEqual(all_tables, [])
        self.assertEqual(token, None)

        # Insert some tables to be listed.
        tables_to_create = [
            'new%d' % (1000 * time.time(), ),
            'newer%d' % (1000 * time.time(), ),
            'newest%d' % (1000 * time.time(), ),
        ]
        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        for table_name in tables_to_create:
            table = dataset.table(table_name, schema=[full_name, age])
            table.create()
            self.to_delete.insert(0, table)

        # Retrieve the tables.
        all_tables, token = dataset.list_tables()
        self.assertTrue(token is None)
        created = [
            table for table in all_tables
            if (table.name in tables_to_create
                and table.dataset_name == DATASET_NAME)
        ]
        self.assertEqual(len(created), len(tables_to_create))
Пример #2
0
 def test_update_table(self):
     dataset = Config.CLIENT.dataset(DATASET_NAME)
     self.assertFalse(dataset.exists())
     dataset.create()
     self.to_delete.append(dataset)
     TABLE_NAME = 'test_table'
     full_name = bigquery.SchemaField('full_name',
                                      'STRING',
                                      mode='REQUIRED')
     age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
     table = dataset.table(TABLE_NAME, schema=[full_name, age])
     self.assertFalse(table.exists())
     table.create()
     self.to_delete.insert(0, table)
     self.assertTrue(table.exists())
     voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE')
     schema = table.schema
     schema.append(voter)
     table.schema = schema
     table.update()
     self.assertEqual(len(table.schema), len(schema))
     for found, expected in zip(table.schema, schema):
         self.assertEqual(found.name, expected.name)
         self.assertEqual(found.field_type, expected.field_type)
         self.assertEqual(found.mode, expected.mode)
Пример #3
0
    def test_update_table(self):
        dataset = Config.CLIENT.dataset(DATASET_NAME)
        self.assertFalse(dataset.exists())

        # We need to wait to stay within the rate limits.
        # The alternative outcome is a 403 Forbidden response from upstream.
        # See: https://cloud.google.com/bigquery/quota-policy
        @Retry(Forbidden, tries=2, delay=30)
        def create_dataset():
            dataset.create()

        create_dataset()
        self.to_delete.append(dataset)
        TABLE_NAME = 'test_table'
        full_name = bigquery.SchemaField('full_name', 'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        self.assertFalse(table.exists())
        table.create()
        self.to_delete.insert(0, table)
        self.assertTrue(table.exists())
        voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE')
        schema = table.schema
        schema.append(voter)
        table.schema = schema
        table.update()
        self.assertEqual(len(table.schema), len(schema))
        for found, expected in zip(table.schema, schema):
            self.assertEqual(found.name, expected.name)
            self.assertEqual(found.field_type, expected.field_type)
            self.assertEqual(found.mode, expected.mode)
Пример #4
0
 def test_create_table(self):
     dataset = Config.CLIENT.dataset(DATASET_NAME)
     self.assertFalse(dataset.exists())
     dataset.create()
     self.to_delete.append(dataset)
     TABLE_NAME = 'test_table'
     full_name = bigquery.SchemaField('full_name', 'STRING',
                                      mode='REQUIRED')
     age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
     table = dataset.table(TABLE_NAME, schema=[full_name, age])
     self.assertFalse(table.exists())
     table.create()
     self.to_delete.insert(0, table)
     self.assertTrue(table.exists())
     self.assertEqual(table.name, TABLE_NAME)
Пример #5
0
    def test_load_table_then_dump_table(self):
        import datetime
        from gcloud._helpers import UTC

        NOW_SECONDS = 1448911495.484366
        NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(
            tzinfo=UTC)
        ROWS = [
            ('Phred Phlyntstone', 32, NOW),
            ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)),
            ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)),
            ('Bhettye Rhubble', 27, None),
        ]
        ROW_IDS = range(len(ROWS))
        dataset = Config.CLIENT.dataset(DATASET_NAME)
        self.assertFalse(dataset.exists())
        dataset.create()
        self.to_delete.append(dataset)
        TABLE_NAME = 'test_table'
        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        now = bigquery.SchemaField('now', 'TIMESTAMP')
        table = dataset.table(TABLE_NAME, schema=[full_name, age, now])
        self.assertFalse(table.exists())
        table.create()
        self.to_delete.insert(0, table)
        self.assertTrue(table.exists())

        errors = table.insert_data(ROWS, ROW_IDS)
        self.assertEqual(len(errors), 0)

        rows = ()
        counter = 9
        # Allow for 90 seconds of "warm up" before rows visible.  See:
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability

        while len(rows) == 0 and counter > 0:
            counter -= 1
            rows, _, _ = table.fetch_data()
            if len(rows) == 0:
                time.sleep(10)

        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
 def test_patch_table(self):
     dataset = CLIENT.dataset(DATASET_NAME)
     self.assertFalse(dataset.exists())
     dataset.create()
     self.to_delete.append(dataset)
     TABLE_NAME = 'test_table'
     full_name = bigquery.SchemaField('full_name', 'STRING',
                                      mode='REQUIRED')
     age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
     table = dataset.table(TABLE_NAME, schema=[full_name, age])
     self.assertFalse(table.exists())
     table.create()
     self.to_delete.insert(0, table)
     self.assertTrue(table.exists())
     self.assertEqual(table.friendly_name, None)
     self.assertEqual(table.description, None)
     table.patch(friendly_name='Friendly', description='Description')
     self.assertEqual(table.friendly_name, 'Friendly')
     self.assertEqual(table.description, 'Description')
Пример #7
0
    def test_load_table_then_dump_table(self):
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        ROW_IDS = range(len(ROWS))
        DATASET_NAME = 'system_tests'
        dataset = CLIENT.dataset(DATASET_NAME)
        self.assertFalse(dataset.exists())
        dataset.create()
        self.to_delete.append(dataset)
        TABLE_NAME = 'test_table'
        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        self.assertFalse(table.exists())
        table.create()
        self.to_delete.insert(0, table)
        self.assertTrue(table.exists())

        errors = table.insert_data(ROWS, ROW_IDS)
        self.assertEqual(len(errors), 0)

        rows = ()
        counter = 9
        # Allow for 90 seconds of "warm up" before rows visible.  See:
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability

        while len(rows) == 0 and counter > 0:
            counter -= 1
            rows, _, _ = table.fetch_data()
            if len(rows) == 0:
                time.sleep(10)

        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
Пример #8
0
def test_delete_table(capsys):
    # Create a table to delete
    bigquery_client = bigquery.Client()
    dataset = bigquery_client.dataset(DATASET_ID)
    table = dataset.table('test_delete_table')

    if not table.exists():
        table.schema = [bigquery.SchemaField('id', 'INTEGER')]
        table.create()

    snippets.delete_table(DATASET_ID, table.name)

    assert not table.exists()
Пример #9
0
def create_table(dataset_name, table_name, project=None):
    """Creates a simple table in the given dataset.

    If no project is specified, then the currently active project is used.
    """
    bigquery_client = bigquery.Client(project=project)
    dataset = bigquery_client.dataset(dataset_name)

    if not dataset.exists():
        print('Dataset {} does not exist.'.format(dataset_name))
        return

    table = dataset.table(table_name)

    # Set the table schema
    table.schema = (
        bigquery.SchemaField('Name', 'STRING'),
        bigquery.SchemaField('Age', 'INTEGER'),
        bigquery.SchemaField('Weight', 'FLOAT'),
    )

    table.create()

    print('Created table {} in dataset {}.'.format(table_name, dataset_name))
Пример #10
0
    def test_load_table_from_storage_then_dump_table(self):
        import csv
        import tempfile
        from gcloud.storage import Client as StorageClient
        TIMESTAMP = 1000 * time.time()
        BUCKET_NAME = 'bq_load_test_%d' % (TIMESTAMP, )
        BLOB_NAME = 'person_ages.csv'
        GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME)
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        s_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = s_client.create_bucket(BUCKET_NAME)
        self.to_delete.append(bucket)

        blob = bucket.blob(BLOB_NAME)

        with tempfile.TemporaryFile(mode='w+') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(('Full Name', 'Age'))
            writer.writerows(ROWS)
            blob.upload_from_file(csv_file,
                                  rewind=True,
                                  content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(DATASET_NAME)
        dataset.create()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_%d' % (TIMESTAMP, ), table, GS_URL)
        job.create_disposition = 'CREATE_NEVER'
        job.skip_leading_rows = 1
        job.source_format = 'CSV'
        job.write_disposition = 'WRITE_EMPTY'

        job.begin()

        counter = 9  # Allow for 90 seconds of lag.

        while job.state not in ('DONE', 'done') and counter > 0:
            counter -= 1
            job.reload()
            if job.state not in ('DONE', 'done'):
                time.sleep(10)

        self.assertTrue(job.state in ('DONE', 'done'))

        rows, _, _ = table.fetch_data()
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
Пример #11
0
import time
from gcloud import bigquery as bq
#from google.cloud import bigquery as bq
from oauth2client.client import GoogleCredentials

# Configuration
BILLING_PROJECT_ID = 'onyx-cumulus-196507'
DATASET_NAME = 'bigquery123'
TABLE_NAME = 'airport'
BUCKET_NAME = 'satish123'
FILE = 'airport.csv'
SOURCE = 'https://storage.cloud.google.com/satish123/airport.csv?_ga=2.200274028.-331489596.1519587350&_gac=1.252996475.1519744301.CjwKCAiAoNTUBRBUEiwAWje2ltt6Onlm-oURmJ0zEqOD_dy_wmi_5yUsCdGXFro37ANM_5QjwIFk5RoC4PUQAvD_BwE'.format(
    BUCKET_NAME, FILE)

SCHEMA = [
    bq.SchemaField('name', 'STRING', mode='required'),
    bq.SchemaField('country', 'STRING', mode='required'),
    bq.SchemaField('area_code', 'STRING', mode='required'),
    bq.SchemaField('origin', 'STRING', mode='required')
]

# CREDENTIALS = GoogleCredentials.get_application_efault()

client = bq.Client(project=BILLING_PROJECT_ID)


# Dataset
# Check if the dataset exists
def create_datasets(name):
    dataset = client.dataset(name)
    try: