Пример #1
0
    def __init__(self, bucket=None):
        super(GoogleStorageArchive, self).__init__(bucket)
        self.client = Client()
        log.info("Archive: gs://%s", bucket)

        self.bucket = self.client.lookup_bucket(bucket)
        if self.bucket is None:
            self.bucket = self.client.create_bucket(bucket)
            self.upgrade()
Пример #2
0
def setup_bucket(project_id, service_account_key, bucket_name):
    credentials = service_account.Credentials.from_service_account_file(
        service_account_key, scopes=SCOPES)

    client = GSClient(project_id, credentials)
    bucket = client.bucket(bucket_name)
    needs_create = not bucket.exists()

    if needs_create:
        bucket.create()
Пример #3
0
def get_bucket(service_account_b64_key, bucket_name, google_project):
    if service_account_b64_key:
        assert bucket_name and google_project, "missing required GCS configurations"
        credentials_info = json.loads(
            base64.decodebytes(service_account_b64_key.encode()))
        credentials = service_account.Credentials.from_service_account_info(
            credentials_info)
        gcs = Client(project=google_project, credentials=credentials)
        return gcs.bucket(bucket_name)
    else:
        return None
Пример #4
0
    def retrieve_bucket(self):
        """Return bucket used for storing files.

        Returns:
            Bucket object.
        """
        project = self.env("GOOGLE_PROJECT")
        service_account_file = json.loads(
            self.env("GOOGLE_APPLICATION_CREDENTIALS"))
        credentials = service_account.Credentials.from_service_account_info(
            service_account_file)
        client = Client(project=project, credentials=credentials)
        return client.bucket("cs4teachers-static")
Пример #5
0
def get_bucket():
    global _bucket
    if _bucket is None:
        from google.cloud.storage.client import Client

        # A `None` project behaves differently with the client, so
        # we need to call it differently
        try:
            client = Client(project=os.environ["GOOGLE_CLOUD_PROJECT"])
        except KeyError:
            client = Client()
        _bucket = client.bucket(settings.GOOGLE_STORAGE_BUCKET)
    return _bucket
Пример #6
0
def get_food_recommender(request):
    """Get existing food recommender list."""
    list_id = request.args.get('listId')
    if not list_id:
        raise ValueError('Query parameter "listId" must be provided')

    if os.getenv('ENV', 'production') == 'local':
        expected_path = (Path(tempfile.gettempdir()) /
                         'jmyrberg-food-recommender' / 'data' /
                         f'{list_id}.json')
        if expected_path.exists():
            with open(expected_path, 'r') as f:
                data = json.load(f)
            return {
                'status': 'success',
                'message': 'Food recommender list fetched successfully',
                'data': data
            }, 200
        else:
            return {
                'status':
                'error',
                'message': (f'Food recommender list "{list_id}"'
                            ' could not be found'),
                'data':
                None
            }, 404
    else:
        global storage_client
        if not storage_client:
            storage_client = Client()
        bucket_name = os.getenv('FOOD_RECOMMENDER_BUCKET_NAME',
                                'jmyrberg-food-recommender')
        blob_name = f'/data/{list_id}.json'
        blob = storage_client.bucket(bucket_name).get_blob(blob_name)
        if blob:
            data = json.loads(blob.download_as_string())
            return {
                'status': 'success',
                'message': 'Food recommender list fetched successfully',
                'data': data
            }, 200
        else:
            return {
                'status':
                'error',
                'message': (f'Food recommender list "{list_id}"'
                            ' could not be found'),
                'data':
                None
            }, 404
Пример #7
0
    def __init__(self, config=None):
        self.config = config if config else newhive.config

        # initialize s3 connection
        if self.config.buckets:
            try:
                from google.cloud.storage.client import Client
                self.con = Client()
                self.buckets = {
                    k: self.con.get_bucket(name)
                    for k, name in self.config.buckets.items()
                }
            except:
                print('google.cloud.storage.client failure')
Пример #8
0
    def test_current(self):
        from google.cloud.storage.client import Client
        project = 'PROJECT'
        credentials = _Credentials()
        client = Client(project=project, credentials=credentials)
        batch1 = self._makeOne(client)
        self.assertIsNone(batch1.current())

        client._push_batch(batch1)
        self.assertIs(batch1.current(), batch1)

        batch2 = self._makeOne(client)
        client._push_batch(batch2)
        self.assertIs(batch1.current(), batch2)
Пример #9
0
    def test_create_w_extra_properties(self):
        from google.cloud.storage.client import Client
        from google.cloud.storage.bucket import Bucket

        BUCKET_NAME = "bucket-name"
        PROJECT = "PROJECT"
        CORS = [
            {
                "maxAgeSeconds": 60,
                "methods": ["*"],
                "origin": ["https://example.com/frontend"],
                "responseHeader": ["X-Custom-Header"],
            }
        ]
        LIFECYCLE_RULES = [{"action": {"type": "Delete"}, "condition": {"age": 365}}]
        LOCATION = "eu"
        LABELS = {"color": "red", "flavor": "cherry"}
        STORAGE_CLASS = "NEARLINE"
        DATA = {
            "name": BUCKET_NAME,
            "cors": CORS,
            "lifecycle": {"rule": LIFECYCLE_RULES},
            "location": LOCATION,
            "storageClass": STORAGE_CLASS,
            "versioning": {"enabled": True},
            "billing": {"requesterPays": True},
            "labels": LABELS,
        }

        connection = _make_connection(DATA)
        client = Client(project=PROJECT)
        client._base_connection = connection

        bucket = Bucket(client=client, name=BUCKET_NAME)
        bucket.cors = CORS
        bucket.lifecycle_rules = LIFECYCLE_RULES
        bucket.storage_class = STORAGE_CLASS
        bucket.versioning_enabled = True
        bucket.requester_pays = True
        bucket.labels = LABELS
        client.create_bucket(bucket, location=LOCATION)

        connection.api_request.assert_called_once_with(
            method="POST",
            path="/b",
            query_params={"project": PROJECT},
            data=DATA,
            _target_object=bucket,
            timeout=self._get_default_timeout(),
        )
Пример #10
0
class GoogleCloudStorage(FileStoreBase):
    def __init__(self, *args, **kwargs):
        self.client = Client(**kwargs)

    def create_bucket(self, bucket_name, **kwargs):
        try:
            basic_request = {
                'bucket_name': bucket_name
            }
            create_bucket_request = {**basic_request, **kwargs}
            self.client.create_bucket(**create_bucket_request)
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.create_bucket] with bucket_name {} and kwargs {}'.format(bucket_name,
                                                                                                           kwargs))
            raise e
Пример #11
0
 def save_json(self, data, blob_name):
     if os.getenv('ENV', 'production') == 'local':
         expected_path = (Path(tempfile.gettempdir()) /
                          HOME_DATA_BUCKET_NAME / blob_name)
         expected_path.parent.mkdir(exist_ok=True, parents=True)
         with open(expected_path, 'w') as f:
             data = json.dump(data, f)
             print(f'Saved into {str(expected_path)}!')
     else:
         global storage_client
         if not storage_client:
             storage_client = Client()
         blob = storage_client.bucket(HOME_DATA_BUCKET_NAME).blob(blob_name)
         blob.upload_from_string(json.dumps(data),
                                 content_type='application/json')
         print(f'Saved into {blob_name}!')
def download(country_code, svg, pdf):
    client = Client.create_anonymous_client()

    def _download(blobs, svg):
        if len(blobs):
            for blob in blobs:
                extension = "svg" if svg else "pdf"
                fname = f"{extension}s/{get_country(blob)}.{extension}"
                with open(fname, "wb+") as fileobj:
                    client.download_blob_to_file(blob, fileobj)

            print(
                f"Download {country_code} {extension} complete. Saved to /{extension}s"
            )
        else:
            print(f"Could not find a {extension} file for code {country_code}")

    if svg:
        regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+"
        blobs = get(filetype="SVG", regex=regex)
        _download(blobs, True)
    if pdf:
        regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+"
        blobs = get(filetype="PDF", regex=regex)
        _download(blobs, False)
Пример #13
0
def get_dataset(
    max_bytes=1024 * 1024,
    dir='data',
):
    def _remove_last_line(file):
        file.seek(0, os.SEEK_END)
        pos = file.tell() - 1
        while pos > 0 and file.read(1) != "\n":
            pos -= 1
            file.seek(pos, os.SEEK_SET)
        if pos > 0:
            file.seek(pos, os.SEEK_SET)
            file.truncate()

    from google.cloud.storage.client import Client

    client = Client.create_anonymous_client()
    bucket = client.get_bucket('quickdraw_dataset')
    blob_iterator = bucket.list_blobs(prefix='full/simplified')
    for blob in blob_iterator:
        file_name = '{}/{}'.format(dir, path.basename(blob.name))
        with open(file_name, 'wb+') as f:
            blob.download_to_file(
                f,
                start=0,
                end=max_bytes,
            )
        with open(file_name, 'r+') as f:
            _remove_last_line(f)
Пример #14
0
    def test_as_context_mgr_wo_error(self):
        from google.cloud.storage.client import Client
        URL = 'http://example.com/api'
        expected = _Response()
        expected['content-type'] = 'multipart/mixed; boundary="DEADBEEF="'
        http = _HTTP((expected, _THREE_PART_MIME_RESPONSE))
        project = 'PROJECT'
        credentials = _Credentials()
        client = Client(project=project, credentials=credentials)
        client._connection._http = http

        self.assertEqual(list(client._batch_stack), [])

        target1 = _MockObject()
        target2 = _MockObject()
        target3 = _MockObject()
        with self._makeOne(client) as batch:
            self.assertEqual(list(client._batch_stack), [batch])
            batch._make_request('POST', URL, {'foo': 1, 'bar': 2},
                                target_object=target1)
            batch._make_request('PATCH', URL, {'bar': 3},
                                target_object=target2)
            batch._make_request('DELETE', URL, target_object=target3)

        self.assertEqual(list(client._batch_stack), [])
        self.assertEqual(len(batch._requests), 3)
        self.assertEqual(batch._requests[0][0], 'POST')
        self.assertEqual(batch._requests[1][0], 'PATCH')
        self.assertEqual(batch._requests[2][0], 'DELETE')
        self.assertEqual(batch._target_objects, [target1, target2, target3])
        self.assertEqual(target1._properties,
                         {'foo': 1, 'bar': 2})
        self.assertEqual(target2._properties,
                         {'foo': 1, 'bar': 3})
        self.assertEqual(target3._properties, '')
Пример #15
0
def get(filetype="SVG", regex="\d{4}-\d{2}-\d{2}_.+"):
    client = Client.create_anonymous_client()
    blobs = filter(
        lambda b: re.match(f"{filetype}/{regex}", b.name),
        client.list_blobs(BUCKET),
    )
    return list(blobs)
Пример #16
0
 def client(self):
     if self._client is None:
         self._client = Client(
             project=self.project_id,
             credentials=self.credentials
         )
     return self._client
def download(country_code):
    client = Client.create_anonymous_client()

    def _download(blobs, extension):

        download_count = 0

        if len(blobs):
            for blob in blobs:
                if blob.name.split("/")[-1].split('_')[0] == DATE:
                    fname = f"{extension}s/{get_country(blob)}.{extension}"
                    with open(fname, "wb+") as fileobj:

                        client.download_blob_to_file(blob, fileobj)

                    print(
                        f"Download {country_code} {extension} complete. Saved to /{extension}s"
                    )
                    download_count += 1

        if download_count == 0:
            print(f"Could not find a {extension} file for code {country_code}")

    regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_M.+"

    blobs = get(filetype="SVG", regex=regex)
    _download(blobs, "svg")

    blobs = get(filetype="PDF", regex=regex)
    _download(blobs, "pdf")
Пример #18
0
    def __init__(self, project, cas_url_prefix, credentials=None):
        assert project is not None

        self.buckets = {}
        self.client = GSClient(project, credentials=credentials)
        if cas_url_prefix[-1] == "/":
            cas_url_prefix = cas_url_prefix[:-1]
        self.cas_url_prefix = cas_url_prefix
Пример #19
0
 def url(self, name):
     # Preserve the trailing slash after normalizing the path.
     name = self._normalize_name(clean_name(name))
     blob = self._get_blob(self._encode_name(name))
     if self.expiry_time:
         client = Client.from_service_account_json(self.keyfile_path) if self.keyfile_path else None
         return blob.generate_signed_url(datetime.timedelta(seconds=self.expiry_time), client=client)
     return blob.public_url
Пример #20
0
class GoogleStorage(object):
    def __init__(self, config=None):
        self.config = config if config else newhive.config

        # initialize s3 connection
        if self.config.buckets:
            try:
                from google.cloud.storage.client import Client
                self.con = Client()
                self.buckets = {
                    k: self.con.get_bucket(name)
                    for k, name in self.config.buckets.items()
                }
            except:
                print('google.cloud.storage.client failure')

    def upload_file(self, file, bucket_name, path, name, mimetype, md5=None):
        bucket = self.buckets[bucket_name]
        remote = bucket.blob(path)
        if mimetype:
            remote.content_type = mimetype
        remote.cache_control = 'max-age=' + str(86400 * 3650)
        if md5:
            remote.md5_hash = b64encode(b16decode(md5.upper()))

        if isinstance(file, basestring):
            remote.upload_from_filename(file)
        else:
            file.seek(0)
            remote.upload_from_file(file, num_retries=3)
        return self.url(bucket_name, path)

    def delete_file(self, bucket, path):
        bucket = self.buckets[bucket]
        remote = bucket.blob(path)
        if remote.exists():
            remote.delete()
            return True
        return False

    def file_exists(self, bucket, path):
        bucket = self.buckets[bucket]
        remote = bucket.blob(path)
        return remote.exists()

    def bucket_url(self, bucket='media'):
        return '//' + self.config.buckets[bucket] + '/'

    def url(self,
            bucket='media',
            key='',
            bucket_name=None,
            http=False,
            secure=False):
        url = self.bucket_url(bucket) + key
        if http: url = 'http' + url
        if secure: url = 'https' + url
        return url
Пример #21
0
def get_gc_client(project_id=None,
                  key_path=None,
                  keyfile_dict=None,
                  credentials=None,
                  scopes=None):
    credentials = credentials or get_gc_credentials(
        key_path=key_path, keyfile_dict=keyfile_dict, scopes=scopes)
    project_id = project_id or get_project_id()
    return Client(project=project_id, credentials=credentials)
Пример #22
0
    def _download_gcs_json(cls, file_url):
        parsed_url = urlparse(file_url)
        if parsed_url.scheme == 'gs':
            file_uri = file_url
        else:
            if parsed_url.netloc == GCS_HOST:
                file_uri = "gs:/{}".format(file_path)
            else:
                raise Exception(
                    "Host URL is not a Google Cloud Storage URI: {}".format(
                        file_url))

        gcs_client = GCSClient()
        with BytesIO() as json_in:
            gcs_client.download_blob_to_file(file_uri, json_in)
            json_in.seek(0)
            json_string = json_in.read().decode('utf-8')
        return json_string
Пример #23
0
def get_client(project_id, credentials):
    global _client
    if _client[2] is None or (project_id, credentials) != (_client[0], _client[1]):
        client = Client(project=project_id, credentials=credentials)
        session = client._http
        adapter = TimeoutAdapter(timeout=GCS_TIMEOUT)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        _client = (project_id, credentials, client)
    return _client[2]
Пример #24
0
def test_anonymous_client_access_to_public_bucket():
    from google.cloud.storage.client import Client

    anonymous_client = Client.create_anonymous_client()
    bucket = anonymous_client.bucket(public_bucket)
    (blob,) = _helpers.retry_429_503(anonymous_client.list_blobs)(
        bucket, max_results=1,
    )
    with tempfile.TemporaryFile() as stream:
        _helpers.retry_429_503(blob.download_to_file)(stream)
Пример #25
0
    def test_as_context_mgr_w_error(self):
        from google.cloud.storage.batch import _FutureDict
        from google.cloud.storage.client import Client

        URL = 'http://example.com/api'
        http = _make_requests_session([])
        connection = _Connection(http=http)
        project = 'PROJECT'
        credentials = _make_credentials()
        client = Client(project=project, credentials=credentials)
        client._base_connection = connection

        self.assertEqual(list(client._batch_stack), [])

        target1 = _MockObject()
        target2 = _MockObject()
        target3 = _MockObject()
        try:
            with self._make_one(client) as batch:
                self.assertEqual(list(client._batch_stack), [batch])
                batch._make_request('POST',
                                    URL, {
                                        'foo': 1,
                                        'bar': 2
                                    },
                                    target_object=target1)
                batch._make_request('PATCH',
                                    URL, {'bar': 3},
                                    target_object=target2)
                batch._make_request('DELETE', URL, target_object=target3)
                raise ValueError()
        except ValueError:
            pass

        http.request.assert_not_called()
        self.assertEqual(list(client._batch_stack), [])
        self.assertEqual(len(batch._requests), 3)
        self.assertEqual(batch._target_objects, [target1, target2, target3])
        # Since the context manager fails, finish will not get called and
        # the _properties will still be futures.
        self.assertIsInstance(target1._properties, _FutureDict)
        self.assertIsInstance(target2._properties, _FutureDict)
        self.assertIsInstance(target3._properties, _FutureDict)
Пример #26
0
def get_client(project_id, credentials):
    global _client
    if _client[2] is None or (project_id, credentials) != (_client[0],
                                                           _client[1]):
        _client = (project_id, credentials,
                   Client(
                       project=project_id,
                       credentials=credentials,
                   ))
    return _client[2]
Пример #27
0
    def test_as_context_mgr_wo_error(self):
        from google.cloud.storage.client import Client

        url = "http://example.com/api"
        expected_response = _make_response(
            content=_THREE_PART_MIME_RESPONSE,
            headers={"content-type": 'multipart/mixed; boundary="DEADBEEF="'},
        )
        http = _make_requests_session([expected_response])
        project = "PROJECT"
        credentials = _make_credentials()
        client = Client(project=project, credentials=credentials)
        client._http_internal = http

        self.assertEqual(list(client._batch_stack), [])

        target1 = _MockObject()
        target2 = _MockObject()
        target3 = _MockObject()

        with self._make_one(client) as batch:
            self.assertEqual(list(client._batch_stack), [batch])
            batch._make_request("POST",
                                url, {
                                    "foo": 1,
                                    "bar": 2
                                },
                                target_object=target1)
            batch._make_request("PATCH",
                                url, {"bar": 3},
                                target_object=target2)
            batch._make_request("DELETE", url, target_object=target3)

        self.assertEqual(list(client._batch_stack), [])
        self.assertEqual(len(batch._requests), 3)
        self.assertEqual(batch._requests[0][0], "POST")
        self.assertEqual(batch._requests[1][0], "PATCH")
        self.assertEqual(batch._requests[2][0], "DELETE")
        self.assertEqual(batch._target_objects, [target1, target2, target3])
        self.assertEqual(target1._properties, {"foo": 1, "bar": 2})
        self.assertEqual(target2._properties, {"foo": 1, "bar": 3})
        self.assertEqual(target3._properties, b"")
Пример #28
0
    def get_previous_history_timestamp(self):
        timestamp = None
        if os.getenv('ENV', 'production') == 'local':
            expected_path = (Path(tempfile.gettempdir()) /
                             HOME_DATA_BUCKET_NAME /
                             'nibe/history/previous_timestamp.txt')
            if expected_path.exists():
                with open(expected_path, 'r') as f:
                    timestamp = int(json.load(f)[0])
        else:
            global storage_client
            if not storage_client:
                storage_client = Client()
            blob_name = 'nibe/history/previous_timestamp.txt'
            blob = (storage_client.bucket(HOME_DATA_BUCKET_NAME).get_blob(
                blob_name))
            if blob:
                timestamp = int(json.loads(blob.download_as_string())[0])

        return timestamp
Пример #29
0
    def test_create_hit(self):
        from google.cloud.storage.client import Client

        PROJECT = "PROJECT"
        BUCKET_NAME = "bucket-name"
        DATA = {"name": BUCKET_NAME}
        connection = _make_connection(DATA)
        client = Client(project=PROJECT)
        client._base_connection = connection

        bucket = client.create_bucket(BUCKET_NAME)

        connection.api_request.assert_called_once_with(
            method="POST",
            path="/b",
            query_params={"project": PROJECT},
            data=DATA,
            _target_object=bucket,
            timeout=self._get_default_timeout(),
        )
Пример #30
0
def test_upload_html_gcs(client: Client):
    id_ = str(uuid.uuid4())
    html = "hello"
    mime = "text/html"
    bucket_name = "vcm-ml-scratch"
    blob = f"testing/{id_}/index.html"
    url = f"gs://{bucket_name}/{blob}"

    upload(html, url, mime)
    bucket = client.bucket(bucket_name)
    blob = bucket.get_blob(blob)
    blob.content_type == "text/html"
Пример #31
0
    def _get_client(cls,
                    project_id=None,
                    key_path=None,
                    keyfile_dict=None,
                    credentials=None,
                    scopes=None):
        credentials = credentials or cls.get_credentials(
            key_path=key_path, keyfile_dict=keyfile_dict, scopes=scopes)

        project_id = project_id or config("GCP_PROJECT_ID", default=None)

        return Client(project=project_id, credentials=credentials)
Пример #32
0
    def test_as_context_mgr_w_error(self):
        from google.cloud.storage.batch import _FutureDict
        from google.cloud.storage.client import Client

        URL = 'http://example.com/api'
        http = _HTTP()
        connection = _Connection(http=http)
        project = 'PROJECT'
        credentials = _make_credentials()
        client = Client(project=project, credentials=credentials)
        client._base_connection = connection

        self.assertEqual(list(client._batch_stack), [])

        target1 = _MockObject()
        target2 = _MockObject()
        target3 = _MockObject()
        try:
            with self._make_one(client) as batch:
                self.assertEqual(list(client._batch_stack), [batch])
                batch._make_request('POST', URL, {'foo': 1, 'bar': 2},
                                    target_object=target1)
                batch._make_request('PATCH', URL, {'bar': 3},
                                    target_object=target2)
                batch._make_request('DELETE', URL, target_object=target3)
                raise ValueError()
        except ValueError:
            pass

        self.assertEqual(list(client._batch_stack), [])
        self.assertEqual(len(http._requests), 0)
        self.assertEqual(len(batch._requests), 3)
        self.assertEqual(batch._target_objects, [target1, target2, target3])
        # Since the context manager fails, finish will not get called and
        # the _properties will still be futures.
        self.assertIsInstance(target1._properties, _FutureDict)
        self.assertIsInstance(target2._properties, _FutureDict)
        self.assertIsInstance(target3._properties, _FutureDict)
Пример #33
0
 def get_connection(self):
     if self._client is None:
         self._client = Client.from_service_account_json(json_credentials_path=self.credentials)
     return self._client