def _set_up_table(self, row_count):
        from google.cloud.spanner import KeySet

        def _row_data(max_index):
            for index in range(max_index):
                yield [
                    index,
                    'First%09d' % (index, ),
                    'Last09%d' % (index),
                    '*****@*****.**' % (index, )
                ]

        keyset = KeySet(all_=True)

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.transaction() as transaction:
            transaction.delete(self.TABLE, keyset)
            transaction.insert(self.TABLE, self.COLUMNS, _row_data(row_count))

        return session, keyset, transaction.committed
示例#2
0
    def test_job_cancel(self):
        DATASET_NAME = _make_dataset_name('job_cancel')
        JOB_NAME = 'fetch_' + DATASET_NAME
        TABLE_NAME = 'test_table'
        QUERY = 'SELECT * FROM %s.%s' % (DATASET_NAME, TABLE_NAME)

        dataset = Config.CLIENT.dataset(DATASET_NAME)

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.run_async_query(JOB_NAME, QUERY)
        job.begin()
        job.cancel()

        def _job_done(instance):
            return instance.state in ('DONE', 'done')

        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()
    def test_transaction_read_and_insert_or_update_then_commit(self):
        from google.cloud.spanner import KeySet
        keyset = KeySet(all_=True)

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.delete(self.TABLE, keyset)

        with session.transaction() as transaction:
            rows = list(transaction.read(self.TABLE, self.COLUMNS, keyset))
            self.assertEqual(rows, [])

            transaction.insert_or_update(self.TABLE, self.COLUMNS,
                                         self.ROW_DATA)

            # Inserted rows can't be read until after commit.
            rows = list(transaction.read(self.TABLE, self.COLUMNS, keyset))
            self.assertEqual(rows, [])

        rows = list(session.read(self.TABLE, self.COLUMNS, keyset))
        self._check_row_data(rows)
示例#4
0
    def test_fetch_delete_subscription_w_deleted_topic(self):
        from google.cloud.iterator import MethodIterator
        TO_DELETE = 'delete-me' + unique_resource_id('-')
        ORPHANED = 'orphaned' + unique_resource_id('-')
        topic = Config.CLIENT.topic(TO_DELETE)
        topic.create()
        subscription = topic.subscription(ORPHANED)
        subscription.create()
        topic.delete()

        def _fetch():
            return list(MethodIterator(Config.CLIENT.list_subscriptions))

        def _found_orphan(result):
            names = [subscription.name for subscription in result]
            return ORPHANED in names

        retry_until_found_orphan = RetryResult(_found_orphan)
        all_subs = retry_until_found_orphan(_fetch)()

        created = [
            subscription for subscription in all_subs
            if subscription.name == ORPHANED
        ]
        self.assertEqual(len(created), 1)
        orphaned = created[0]

        def _no_topic(instance):
            return instance.topic is None

        retry_until_no_topic = RetryInstanceState(_no_topic)
        retry_until_no_topic(orphaned.reload)()

        self.assertTrue(orphaned.topic is None)
        orphaned.delete()
示例#5
0
    def test_load_table_from_local_file_then_dump_table(self):
        import csv
        import tempfile
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_local_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        with tempfile.NamedTemporaryFile(mode='w+') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(('Full Name', 'Age'))
            writer.writerows(ROWS)
            csv_file.flush()

            with open(csv_file.name, 'rb') as csv_read:
                job = table.upload_from_file(
                    csv_read,
                    source_format='CSV',
                    skip_leading_rows=1,
                    create_disposition='CREATE_NEVER',
                    write_disposition='WRITE_EMPTY',
                )

        def _job_done(instance):
            return instance.state.lower() == 'done'

        # Retry until done.
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        self.assertTrue(_job_done(job))
        self.assertEqual(job.output_rows, len(ROWS))

        rows, _, _ = table.fetch_data()
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
    def test_batch_insert_or_update_then_query(self):

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        with session.batch() as batch:
            batch.insert_or_update(self.TABLE, self.COLUMNS, self.ROW_DATA)

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(snapshot.execute_sql(self.SQL))
        self._check_row_data(rows)
    def test_message_pull_mode_e2e(self):
        import operator
        TOPIC_NAME = 'message-e2e' + unique_resource_id('-')
        topic = Config.CLIENT.topic(TOPIC_NAME,
                                    timestamp_messages=True)
        self.assertFalse(topic.exists())
        topic.create()
        self.to_delete.append(topic)
        SUBSCRIPTION_NAME = 'subscribing-now' + unique_resource_id('-')
        subscription = topic.subscription(SUBSCRIPTION_NAME)
        self.assertFalse(subscription.exists())
        subscription.create()
        self.to_delete.append(subscription)

        MESSAGE_1 = b'MESSAGE ONE'
        MESSAGE_2 = b'MESSAGE ONE'
        EXTRA_1 = 'EXTRA 1'
        EXTRA_2 = 'EXTRA 2'
        topic.publish(MESSAGE_1, extra=EXTRA_1)
        topic.publish(MESSAGE_2, extra=EXTRA_2)

        class Hoover(object):

            def __init__(self):
                self.received = []

            def done(self, *dummy):
                return len(self.received) == 2

            def suction(self):
                with subscription.auto_ack(max_messages=2) as ack:
                    self.received.extend(ack.values())

        hoover = Hoover()
        retry = RetryInstanceState(hoover.done)
        retry(hoover.suction)()

        message1, message2 = sorted(hoover.received,
                                    key=operator.attrgetter('timestamp'))

        self.assertEqual(message1.data, MESSAGE_1)
        self.assertEqual(message1.attributes['extra'], EXTRA_1)
        self.assertIsNotNone(message1.service_timestamp)

        self.assertEqual(message2.data, MESSAGE_2)
        self.assertEqual(message2.attributes['extra'], EXTRA_2)
        self.assertIsNotNone(message2.service_timestamp)
    def test_batch_insert_then_read(self):
        from google.cloud.spanner import KeySet
        keyset = KeySet(all_=True)

        retry = RetryInstanceState(_has_all_ddl)
        retry(self._db.reload)()

        session = self._db.session()
        session.create()
        self.to_delete.append(session)

        batch = session.batch()
        batch.delete(self.TABLE, keyset)
        batch.insert(self.TABLE, self.COLUMNS, self.ROW_DATA)
        batch.commit()

        snapshot = session.snapshot(read_timestamp=batch.committed)
        rows = list(snapshot.read(self.TABLE, self.COLUMNS, keyset))
        self._check_row_data(rows)
示例#9
0
    def test_load_table_from_storage_then_dump_table(self):
        import csv
        import tempfile
        from google.cloud.storage import Client as StorageClient
        local_id = unique_resource_id()
        BUCKET_NAME = 'bq_load_test' + local_id
        BLOB_NAME = 'person_ages.csv'
        GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME)
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        s_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = s_client.create_bucket(BUCKET_NAME)
        self.to_delete.append(bucket)

        blob = bucket.blob(BLOB_NAME)

        with tempfile.TemporaryFile(mode='w+') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(('Full Name', 'Age'))
            writer.writerows(ROWS)
            blob.upload_from_file(csv_file,
                                  rewind=True,
                                  content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, GS_URL)
        job.create_disposition = 'CREATE_NEVER'
        job.skip_leading_rows = 1
        job.source_format = 'CSV'
        job.write_disposition = 'WRITE_EMPTY'

        job.begin()

        def _job_done(instance):
            return instance.state in ('DONE', 'done')

        # Allow for 90 seconds of "warm up" before rows visible.  See:
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        rows, _, _ = table.fetch_data()
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))