Exemplo n.º 1
0
class GoogleCloudStorage(FileStoreBase):
    def __init__(self, *args, **kwargs):
        self.client = Client(**kwargs)

    def list_files(self, bucket_name, prefix, **kwargs):
        try:
            basic_request = {'prefix': prefix}
            list_files_request = {**basic_request, **kwargs}
            bucket = self.client.get_bucket(bucket_name)
            response_elements = bucket.list_blobs(**list_files_request)
            return [x.name for x in response_elements]
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.list_files] with bucket_name {} and prefix {} and kwargs {}'
                .format(bucket_name, prefix, kwargs))
            raise e

    def delete_object(self, bucket_name, key, **kwargs):
        try:
            bucket = self.client.get_bucket(bucket_name)
            bucket.delete_blob(key)
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.delete_object] with bucket_name {} and key {}'
                .format(bucket_name, key))
            raise e

    def put_object(self, bucket_name, key, obj, **kwargs):
        try:
            bucket = self.client.get_bucket(bucket_name)
            blob = Blob(key, bucket)
            blob.upload_from_string(obj)
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.put_object] with bucket_name {} and key {}'
                .format(bucket_name, key))
            raise e

    def delete_bucket(self, bucket_name):
        try:
            bucket = self.client.get_bucket(bucket_name)
            bucket.delete()
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.delete_bucket] with bucket_name {}'
                .format(bucket_name))
            raise e

    def create_bucket(self, bucket_name, **kwargs):
        try:
            basic_request = {'bucket_name': bucket_name}
            create_bucket_request = {**basic_request, **kwargs}
            self.client.create_bucket(**create_bucket_request)
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.create_bucket] with bucket_name {} and kwargs {}'
                .format(bucket_name, kwargs))
            raise e
Exemplo n.º 2
0
    def test_create_w_extra_properties(self):
        from google.cloud.storage.client import Client
        from google.cloud.storage.bucket import Bucket

        BUCKET_NAME = "bucket-name"
        PROJECT = "PROJECT"
        CORS = [
            {
                "maxAgeSeconds": 60,
                "methods": ["*"],
                "origin": ["https://example.com/frontend"],
                "responseHeader": ["X-Custom-Header"],
            }
        ]
        LIFECYCLE_RULES = [{"action": {"type": "Delete"}, "condition": {"age": 365}}]
        LOCATION = "eu"
        LABELS = {"color": "red", "flavor": "cherry"}
        STORAGE_CLASS = "NEARLINE"
        DATA = {
            "name": BUCKET_NAME,
            "cors": CORS,
            "lifecycle": {"rule": LIFECYCLE_RULES},
            "location": LOCATION,
            "storageClass": STORAGE_CLASS,
            "versioning": {"enabled": True},
            "billing": {"requesterPays": True},
            "labels": LABELS,
        }

        connection = _make_connection(DATA)
        client = Client(project=PROJECT)
        client._base_connection = connection

        bucket = Bucket(client=client, name=BUCKET_NAME)
        bucket.cors = CORS
        bucket.lifecycle_rules = LIFECYCLE_RULES
        bucket.storage_class = STORAGE_CLASS
        bucket.versioning_enabled = True
        bucket.requester_pays = True
        bucket.labels = LABELS
        client.create_bucket(bucket, location=LOCATION)

        connection.api_request.assert_called_once_with(
            method="POST",
            path="/b",
            query_params={"project": PROJECT},
            data=DATA,
            _target_object=bucket,
            timeout=self._get_default_timeout(),
        )
Exemplo n.º 3
0
class GoogleCloudStorage(FileStoreBase):
    def __init__(self, *args, **kwargs):
        self.client = Client(**kwargs)

    def create_bucket(self, bucket_name, **kwargs):
        try:
            basic_request = {
                'bucket_name': bucket_name
            }
            create_bucket_request = {**basic_request, **kwargs}
            self.client.create_bucket(**create_bucket_request)
        except Exception as e:
            logging.exception(
                'Exception in [GoogleCloudStorage.create_bucket] with bucket_name {} and kwargs {}'.format(bucket_name,
                                                                                                           kwargs))
            raise e
Exemplo n.º 4
0
    def test_create_hit(self):
        from google.cloud.storage.client import Client

        PROJECT = "PROJECT"
        BUCKET_NAME = "bucket-name"
        DATA = {"name": BUCKET_NAME}
        connection = _make_connection(DATA)
        client = Client(project=PROJECT)
        client._base_connection = connection

        bucket = client.create_bucket(BUCKET_NAME)

        connection.api_request.assert_called_once_with(
            method="POST",
            path="/b",
            query_params={"project": PROJECT},
            data=DATA,
            _target_object=bucket,
            timeout=self._get_default_timeout(),
        )
Exemplo n.º 5
0
    def test_create_w_predefined_acl_valid(self):
        from google.cloud.storage.client import Client

        PROJECT = "PROJECT"
        BUCKET_NAME = "bucket-name"
        DATA = {"name": BUCKET_NAME}

        client = Client(project=PROJECT)
        connection = _make_connection(DATA)
        client._base_connection = connection
        bucket = client.create_bucket(BUCKET_NAME, predefined_acl="publicRead")

        connection.api_request.assert_called_once_with(
            method="POST",
            path="/b",
            query_params={
                "project": PROJECT,
                "predefinedAcl": "publicRead"
            },
            data=DATA,
            _target_object=bucket,
        )
Exemplo n.º 6
0
    def test_create_w_explicit_location(self):
        from google.cloud.storage.client import Client

        PROJECT = "PROJECT"
        BUCKET_NAME = "bucket-name"
        LOCATION = "us-central1"
        DATA = {"location": LOCATION, "name": BUCKET_NAME}

        connection = _make_connection(
            DATA, "{'location': 'us-central1', 'name': 'bucket-name'}")

        client = Client(project=PROJECT)
        client._base_connection = connection

        bucket = client.create_bucket(BUCKET_NAME, location=LOCATION)

        connection.api_request.assert_called_once_with(
            method="POST",
            path="/b",
            data=DATA,
            _target_object=bucket,
            query_params={"project": "PROJECT"},
        )
        self.assertEqual(bucket.location, LOCATION)
Exemplo n.º 7
0
class GoogleStorageArchive(VirtualArchive):
    TIMEOUT = 84600

    def __init__(self, bucket=None):
        super(GoogleStorageArchive, self).__init__(bucket)
        self.client = Client()
        log.info("Archive: gs://%s", bucket)

        self.bucket = self.client.lookup_bucket(bucket)
        if self.bucket is None:
            self.bucket = self.client.create_bucket(bucket)

        policy = {
            "origin": ['*'],
            "method": ['GET'],
            "responseHeader": [
                'Accept-Ranges',
                'Content-Encoding',
                'Content-Length',
                'Content-Range'
            ],
            "maxAgeSeconds": self.TIMEOUT
        }
        self.bucket.cors = [policy]
        self.bucket.update()

    def _locate_blob(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = self._get_prefix(content_hash)
        if prefix is None:
            return
        for blob in self.bucket.list_blobs(max_results=1, prefix=prefix):
            return blob

    def archive_file(self, file_path, content_hash=None):
        """Store the file located at the given path on S3, based on a path
        made up from its SHA1 content hash."""
        if content_hash is None:
            content_hash = checksum(file_path)

        blob = self._locate_blob(content_hash)
        if blob is None:
            path = os.path.join(self._get_prefix(content_hash), 'data')
            blob = Blob(path, self.bucket)
            blob.upload_from_filename(file_path)
        return content_hash

    def load_file(self, content_hash, file_name=None, temp_path=None):
        """Retrieve a file from S3 storage and put it onto the local file
        system for further processing."""
        blob = self._locate_blob(content_hash)
        if blob is not None:
            path = self._local_path(content_hash, file_name, temp_path)
            blob.download_to_filename(path)
            return path

    def generate_url(self, content_hash, file_name=None, mime_type=None):
        blob = self._locate_blob(content_hash)
        if blob is None:
            return
        disposition = None
        if file_name:
            disposition = 'inline; filename=%s' % file_name
        expire = datetime.utcnow() + timedelta(seconds=self.TIMEOUT)
        return blob.generate_signed_url(expire,
                                        response_type=mime_type,
                                        response_disposition=disposition)
Exemplo n.º 8
0
class GoogleStorageArchive(VirtualArchive):
    TIMEOUT = 84600

    def __init__(self, bucket=None):
        super(GoogleStorageArchive, self).__init__(bucket)
        self.client = Client()
        log.info("Archive: gs://%s", bucket)

        self.bucket = self.client.lookup_bucket(bucket)
        if self.bucket is None:
            self.bucket = self.client.create_bucket(bucket)
            self.upgrade()

    def upgrade(self):
        # 'Accept-Ranges',
        # 'Content-Encoding',
        # 'Content-Length',
        # 'Content-Range',
        # 'Cache-Control',
        # 'Content-Language',
        # 'Content-Type',
        # 'Expires',
        # 'Last-Modified',
        # 'Pragma',
        # 'Range',
        # 'Date',
        policy = {
            "origin": ['*'],
            "method": ['GET', 'HEAD', 'OPTIONS'],
            "responseHeader": ['*'],
            "maxAgeSeconds": self.TIMEOUT
        }
        self.bucket.cors = [policy]
        self.bucket.update()

    def _locate_blob(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = self._get_prefix(content_hash)
        if prefix is None:
            return

        # First, check the standard file name:
        blob = Blob(os.path.join(prefix, 'data'), self.bucket)
        if blob.exists():
            return blob

        # Second, iterate over all file names:
        for blob in self.bucket.list_blobs(max_results=1, prefix=prefix):
            return blob

    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on Google, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        file_path = ensure_posix_path(file_path)
        for attempt in service_retries():
            try:
                # blob = self._locate_blob(content_hash)
                # if blob is not None:
                #     return content_hash

                path = os.path.join(self._get_prefix(content_hash), 'data')
                blob = Blob(path, self.bucket)
                blob.upload_from_filename(file_path, content_type=mime_type)
                return content_hash
            except FAILURES:
                log.exception("Store error in GS")
                backoff(failures=attempt)

    def load_file(self, content_hash, file_name=None, temp_path=None):
        """Retrieve a file from Google storage and put it onto the local file
        system for further processing."""
        for attempt in service_retries():
            try:
                blob = self._locate_blob(content_hash)
                if blob is not None:
                    path = self._local_path(content_hash, file_name, temp_path)
                    blob.download_to_filename(path)
                    return path
            except FAILURES:
                log.exception("Load error in GS")
                backoff(failures=attempt)

        # Returns None for "persistent error" as well as "file not found" :/
        log.debug("[%s] not found, or the backend is down.", content_hash)

    def generate_url(self, content_hash, file_name=None, mime_type=None):
        blob = self._locate_blob(content_hash)
        if blob is None:
            return
        disposition = None
        if file_name is not None:
            disposition = 'inline; filename=%s' % file_name
        expire = datetime.utcnow() + timedelta(seconds=self.TIMEOUT)
        return blob.generate_signed_url(expire,
                                        response_type=mime_type,
                                        response_disposition=disposition)