Exemplo n.º 1
0
def test_extract_table(client, to_delete):
    DATASET_ID = 'export_data_dataset_{}'.format(_millis())
    dataset = bigquery.Dataset(client.dataset(DATASET_ID))
    client.create_dataset(dataset)
    to_delete.append(dataset)

    table_ref = dataset.table('person_ages')
    table = client.create_table(bigquery.Table(table_ref, schema=SCHEMA))
    to_delete.insert(0, table)
    client.create_rows(table, ROWS)

    bucket_name = 'extract_person_ages_job_{}'.format(_millis())
    # [START extract_table]
    from google.cloud.storage import Client as StorageClient

    storage_client = StorageClient()
    bucket = storage_client.create_bucket(bucket_name)  # API request
    destination_blob_name = 'person_ages_out.csv'
    destination = bucket.blob(destination_blob_name)

    destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name)
    extract_job = client.extract_table(table_ref,
                                       destination_uri)  # API request
    extract_job.result(timeout=100)  # Waits for job to complete.

    got = destination.download_as_string().decode('utf-8')  # API request
    assert 'Bharney Rhubble' in got
    # [END extract_table]
    to_delete.append(bucket)
    to_delete.insert(0, destination)
Exemplo n.º 2
0
    def __init__(
        self,
        application_credentials: Optional[Union[str, os.PathLike]] = None,
        credentials: Optional["Credentials"] = None,
        project: Optional[str] = None,
        storage_client: Optional["StorageClient"] = None,
        local_cache_dir: Optional[Union[str, os.PathLike]] = None,
    ):
        """Class constructor. Sets up a [`Storage
        Client`](https://googleapis.dev/python/storage/latest/client.html).
        Supports the following authentication methods of `Storage Client`.

        - Environment variable `"GOOGLE_APPLICATION_CREDENTIALS"` containing a
          path to a JSON credentials file for a Google service account. See
          [Authenticating as a Service
          Account](https://cloud.google.com/docs/authentication/production).
        - File path to a JSON credentials file for a Google service account.
        - OAuth2 Credentials object and a project name.
        - Instantiated and already authenticated `Storage Client`.

        If multiple methods are used, priority order is reverse of list above
        (later in list takes priority). If no authentication methods are used,
        then the client will be instantiated as anonymous, which will only have
        access to public buckets.

        Args:
            application_credentials (Optional[Union[str, os.PathLike]]): Path to Google service
                account credentials file.
            credentials (Optional[Credentials]): The OAuth2 Credentials to use for this client.
                See documentation for [`StorageClient`](
                https://googleapis.dev/python/storage/latest/client.html).
            project (Optional[str]): The project which the client acts on behalf of. See
                documentation for [`StorageClient`](
                https://googleapis.dev/python/storage/latest/client.html).
            storage_client (Optional[StorageClient]): Instantiated [`StorageClient`](
                https://googleapis.dev/python/storage/latest/client.html).
            local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache
                for downloaded files. If None, will use a temporary directory.
        """
        if application_credentials is None:
            application_credentials = os.getenv(
                "GOOGLE_APPLICATION_CREDENTIALS")

        if storage_client is not None:
            self.client = storage_client
        elif credentials is not None:
            self.client = StorageClient(credentials=credentials,
                                        project=project)
        elif application_credentials is not None:
            self.client = StorageClient.from_service_account_json(
                application_credentials)
        else:
            self.client = StorageClient.create_anonymous_client()

        super().__init__(local_cache_dir=local_cache_dir)
Exemplo n.º 3
0
 def _uploadFile(credentials: dict, bucket_name: str, key: str, file_name: str) -> None:
     """
     Uploads a file to cloud storage.
     :param credentials: The Google cloud storage service credentials retrieved from the Kubernetes secret.
     :param bucket_name: The name of the bucket.
     :param key: The key to save the file in the cloud storage.
     :param file_name: The local file that will be uploaded.
     """
     credentials = ServiceCredentials.from_service_account_info(credentials)
     gcs_client = StorageClient(credentials.project_id, credentials)
     bucket = gcs_client.bucket(bucket_name)
     bucket.blob(key).upload_from_filename(file_name)
     logging.info("Backup uploaded to gcs://%s/%s", bucket_name, key)
Exemplo n.º 4
0
def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows):

    import csv

    from google.cloud._testing import _NamedTemporaryFile

    from google.cloud.storage import Client as StorageClient



    storage_client = StorageClient()



    # In the **very** rare case the bucket name is reserved, this

    # fails with a ConnectionError.

    bucket = storage_client.create_bucket(bucket_name)



    blob = bucket.blob(blob_name)



    with _NamedTemporaryFile() as temp:

        with open(temp.name, 'w') as csv_write:

            writer = csv.writer(csv_write)

            writer.writerow(header_row)

            writer.writerows(data_rows)



        with open(temp.name, 'rb') as csv_read:

            blob.upload_from_file(csv_read, content_type='text/csv')



    return bucket, blob
Exemplo n.º 5
0
    def _downloadFile(credentials: dict, bucket_name: str, key: str,
                      file_name: str) -> str:
        """
        Downloads a file from cloud storage.
        :param credentials: The Google cloud storage service credentials retrieved from the Kubernetes secret.
        :param bucket_name: The name of the bucket.
        :param key: The key to download the file from the cloud storage.
        :param file_name: The file that will be downloaded.
        :return: The location of the downloaded file.
        """
        credentials = ServiceCredentials.from_service_account_info(credentials)
        gcs_client = StorageClient(credentials.project_id, credentials)
        bucket = gcs_client.get_bucket(bucket_name)
        logging.info("Going to download gcs://%s/%s", bucket_name, key)

        bucket.blob(key).download_to_filename(file_name)

        logging.info("Backup gcs://%s/%s downloaded to %s", bucket_name, key,
                     file_name)
        return file_name
Exemplo n.º 6
0
def test_extract_table(client, to_delete):
    DATASET_ID = 'export_data_dataset_{}'.format(_millis())
    dataset = bigquery.Dataset(client.dataset(DATASET_ID))
    client.create_dataset(dataset)
    to_delete.append(dataset)

    table_ref = dataset.table('person_ages')
    to_insert = [
        {'full_name': name, 'age': age}
        for name, age in ROWS
    ]
    rows = [json.dumps(row) for row in to_insert]
    body = six.StringIO('{}\n'.format('\n'.join(rows)))
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = 'WRITE_TRUNCATE'
    job_config.source_format = 'NEWLINE_DELIMITED_JSON'
    job_config.schema = SCHEMA
    to_delete.insert(0, table_ref)
    # Load a table using a local JSON file from memory.
    client.load_table_from_file(
        body, table_ref, job_config=job_config).result()

    bucket_name = 'extract_person_ages_job_{}'.format(_millis())
    # [START extract_table]
    from google.cloud.storage import Client as StorageClient

    storage_client = StorageClient()
    bucket = storage_client.create_bucket(bucket_name)  # API request
    destination_blob_name = 'person_ages_out.csv'
    destination = bucket.blob(destination_blob_name)

    destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name)
    extract_job = client.extract_table(
        table_ref, destination_uri)  # API request
    extract_job.result(timeout=100)  # Waits for job to complete.

    got = destination.download_as_string().decode('utf-8')  # API request
    assert 'Bharney Rhubble' in got
    # [END extract_table]
    to_delete.append(bucket)
    to_delete.insert(0, destination)
Exemplo n.º 7
0
    def _lastBackupFile(credentials: dict, bucket_name: str, key: str) -> str:
        """
        Gets the name of the last backup file in the bucket.
        :param credentials: The Google cloud storage service credentials retrieved from the Kubernetes secret.
        :param bucket_name: The name of the bucket.
        :param key: The prefix of tha backups
        :return: The location of the last backup file.
        """
        credentials = ServiceCredentials.from_service_account_info(credentials)
        gcs_client = StorageClient(credentials.project_id, credentials)
        bucket = gcs_client.get_bucket(bucket_name)
        blobs = bucket.list_blobs(prefix=key)

        last_blob = None
        for blob in blobs:
            logging.info("Found backup file '%s' in bucket '%s'", blob.name,
                         bucket_name)
            if last_blob is None or blob.time_created > last_blob.time_created:
                last_blob = blob

        logging.info("Returning backup file %s",
                     last_blob.name.replace(key, ""))
        return last_blob.name.replace(key, "") if last_blob else None
Exemplo n.º 8
0
 def __init__(self, ctx, config=None):
     super().__init__(ctx, config)
     self.bucket = Bucket(StorageClient(), self.bucket)
Exemplo n.º 9
0
    def test_load_table_from_storage_then_dump_table(self):
        import csv
        import tempfile
        from google.cloud.storage import Client as StorageClient
        local_id = unique_resource_id()
        BUCKET_NAME = 'bq_load_test' + local_id
        BLOB_NAME = 'person_ages.csv'
        GS_URL = 'gs://%s/%s' % (BUCKET_NAME, BLOB_NAME)
        ROWS = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ]
        TABLE_NAME = 'test_table'

        s_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = s_client.create_bucket(BUCKET_NAME)
        self.to_delete.append(bucket)

        blob = bucket.blob(BLOB_NAME)

        with tempfile.TemporaryFile(mode='w+') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(('Full Name', 'Age'))
            writer.writerows(ROWS)
            blob.upload_from_file(csv_file,
                                  rewind=True,
                                  content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        full_name = bigquery.SchemaField('full_name',
                                         'STRING',
                                         mode='REQUIRED')
        age = bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = dataset.table(TABLE_NAME, schema=[full_name, age])
        table.create()
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, GS_URL)
        job.create_disposition = 'CREATE_NEVER'
        job.skip_leading_rows = 1
        job.source_format = 'CSV'
        job.write_disposition = 'WRITE_EMPTY'

        job.begin()

        def _job_done(instance):
            return instance.state in ('DONE', 'done')

        # Allow for 90 seconds of "warm up" before rows visible.  See:
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        rows, _, _ = table.fetch_data()
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(rows, key=by_age), sorted(ROWS, key=by_age))
Exemplo n.º 10
0
    def test_load_table_from_storage_w_autodetect_schema(self):
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.storage import Client as StorageClient
        from google.cloud.bigquery import SchemaField

        local_id = unique_resource_id()
        bucket_name = 'bq_load_test' + local_id
        blob_name = 'person_ages.csv'
        gs_url = 'gs://{}/{}'.format(bucket_name, blob_name)
        rows = [
            ('Phred Phlyntstone', 32),
            ('Bharney Rhubble', 33),
            ('Wylma Phlyntstone', 29),
            ('Bhettye Rhubble', 27),
        ] * 100  # BigQuery internally uses the first 100 rows to detect schema
        table_name = 'test_table'

        storage_client = StorageClient()

        # In the **very** rare case the bucket name is reserved, this
        # fails with a ConnectionError.
        bucket = storage_client.create_bucket(bucket_name)
        self.to_delete.append(bucket)

        blob = bucket.blob(blob_name)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, 'w') as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(('Full Name', 'Age'))
                writer.writerows(rows)

            with open(temp.name, 'rb') as csv_read:
                blob.upload_from_file(csv_read, content_type='text/csv')

        self.to_delete.insert(0, blob)

        dataset = Config.CLIENT.dataset(
            _make_dataset_name('load_gcs_then_dump'))

        retry_403(dataset.create)()
        self.to_delete.append(dataset)

        table = dataset.table(table_name)
        self.to_delete.insert(0, table)

        job = Config.CLIENT.load_table_from_storage(
            'bq_load_storage_test_' + local_id, table, gs_url)
        job.autodetect = True

        job.begin()

        # Allow for 90 seconds of "warm up" before rows visible.  See
        # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
        # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
        retry = RetryInstanceState(_job_done, max_tries=8)
        retry(job.reload)()

        table.reload()
        field_name = SchemaField(u'Full_Name', u'string', u'NULLABLE', None,
                                 ())
        field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ())
        self.assertEqual(table.schema, [field_name, field_age])

        actual_rows = self._fetch_single_page(table)
        by_age = operator.itemgetter(1)
        self.assertEqual(sorted(actual_rows, key=by_age),
                         sorted(rows, key=by_age))
Exemplo n.º 11
0
from time import time
from abc import ABCMeta, abstractmethod
from io import BytesIO
from random import random
from orjson import dumps, OPT_SORT_KEYS

from google.cloud.storage import Client as StorageClient
from PIL import Image
from matplotlib import pyplot as plt
from matplotlib import ticker as tkr
import matplotlib.transforms as mtransforms
from lark import Tree, Token, Transformer, v_args

from assets import static_storage

storage_client = StorageClient()

plt.switch_backend("Agg")
plt.ion()
plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams["figure.dpi"] = 200.0
plt.rcParams['savefig.facecolor'] = "#131722"


class AbstractProvider(object):
    __metaclass__ = ABCMeta
    bucket = storage_client.get_bucket("nlc-bot-36685.appspot.com")
    stableCoinTickers = [
        "USD", "USDT", "USDC", "DAI", "HUSD", "TUSD", "PAX", "USDK", "USDN",
        "BUSD", "GUSD", "USDS"
Exemplo n.º 12
0
from google.cloud.firestore import Client as FirestoreClient
from google.cloud.storage import Client as StorageClient
from joblib import Memory
from PIL import Image
from requests import Session
from requests.exceptions import HTTPError

app = Flask(__name__)

firestore = FirestoreClient()

memory = Memory(tempfile.gettempdir(), verbose=0)

requests = Session()

storage = StorageClient()

bucket = storage.get_bucket(os.environ["BUCKET"])

PROFILE_URL = os.environ["PROFILE_URL"]
GAMES_URL = os.environ["GAMES_URL"]
MEDIA_URL = os.environ["MEDIA_URL"]

NO_CONTENT = "", http.HTTPStatus.NO_CONTENT
CAPSULE_WIDTH = 184
CAPSULE_HEIGHT = 69


def get_steam_id(uid):
    logging.info(f"fetching steam_id of the user {uid}")
    try:
Exemplo n.º 13
0
 def __init__(self):
     """Initiates connection to gcs using google gcs library."""
     self.client = StorageClient()