Пример #1
0
class GoogleStorageArchive(VirtualArchive):
    TIMEOUT = 84600

    def __init__(self, bucket=None):
        super(GoogleStorageArchive, self).__init__(bucket)
        self.client = Client()
        log.info("Archive: gs://%s", bucket)

        self.bucket = self.client.lookup_bucket(bucket)
        if self.bucket is None:
            self.bucket = self.client.create_bucket(bucket)

        policy = {
            "origin": ['*'],
            "method": ['GET'],
            "responseHeader": [
                'Accept-Ranges',
                'Content-Encoding',
                'Content-Length',
                'Content-Range'
            ],
            "maxAgeSeconds": self.TIMEOUT
        }
        self.bucket.cors = [policy]
        self.bucket.update()

    def _locate_blob(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = self._get_prefix(content_hash)
        if prefix is None:
            return
        for blob in self.bucket.list_blobs(max_results=1, prefix=prefix):
            return blob

    def archive_file(self, file_path, content_hash=None):
        """Store the file located at the given path on S3, based on a path
        made up from its SHA1 content hash."""
        if content_hash is None:
            content_hash = checksum(file_path)

        blob = self._locate_blob(content_hash)
        if blob is None:
            path = os.path.join(self._get_prefix(content_hash), 'data')
            blob = Blob(path, self.bucket)
            blob.upload_from_filename(file_path)
        return content_hash

    def load_file(self, content_hash, file_name=None, temp_path=None):
        """Retrieve a file from S3 storage and put it onto the local file
        system for further processing."""
        blob = self._locate_blob(content_hash)
        if blob is not None:
            path = self._local_path(content_hash, file_name, temp_path)
            blob.download_to_filename(path)
            return path

    def generate_url(self, content_hash, file_name=None, mime_type=None):
        blob = self._locate_blob(content_hash)
        if blob is None:
            return
        disposition = None
        if file_name:
            disposition = 'inline; filename=%s' % file_name
        expire = datetime.utcnow() + timedelta(seconds=self.TIMEOUT)
        return blob.generate_signed_url(expire,
                                        response_type=mime_type,
                                        response_disposition=disposition)
Пример #2
0
class GoogleStorageArchive(VirtualArchive):
    TIMEOUT = 84600

    def __init__(self, bucket=None):
        super(GoogleStorageArchive, self).__init__(bucket)
        self.client = Client()
        log.info("Archive: gs://%s", bucket)

        self.bucket = self.client.lookup_bucket(bucket)
        if self.bucket is None:
            self.bucket = self.client.create_bucket(bucket)
            self.upgrade()

    def upgrade(self):
        # 'Accept-Ranges',
        # 'Content-Encoding',
        # 'Content-Length',
        # 'Content-Range',
        # 'Cache-Control',
        # 'Content-Language',
        # 'Content-Type',
        # 'Expires',
        # 'Last-Modified',
        # 'Pragma',
        # 'Range',
        # 'Date',
        policy = {
            "origin": ['*'],
            "method": ['GET', 'HEAD', 'OPTIONS'],
            "responseHeader": ['*'],
            "maxAgeSeconds": self.TIMEOUT
        }
        self.bucket.cors = [policy]
        self.bucket.update()

    def _locate_blob(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = self._get_prefix(content_hash)
        if prefix is None:
            return

        # First, check the standard file name:
        blob = Blob(os.path.join(prefix, 'data'), self.bucket)
        if blob.exists():
            return blob

        # Second, iterate over all file names:
        for blob in self.bucket.list_blobs(max_results=1, prefix=prefix):
            return blob

    def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on Google, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        file_path = ensure_posix_path(file_path)
        for attempt in service_retries():
            try:
                # blob = self._locate_blob(content_hash)
                # if blob is not None:
                #     return content_hash

                path = os.path.join(self._get_prefix(content_hash), 'data')
                blob = Blob(path, self.bucket)
                blob.upload_from_filename(file_path, content_type=mime_type)
                return content_hash
            except FAILURES:
                log.exception("Store error in GS")
                backoff(failures=attempt)

    def load_file(self, content_hash, file_name=None, temp_path=None):
        """Retrieve a file from Google storage and put it onto the local file
        system for further processing."""
        for attempt in service_retries():
            try:
                blob = self._locate_blob(content_hash)
                if blob is not None:
                    path = self._local_path(content_hash, file_name, temp_path)
                    blob.download_to_filename(path)
                    return path
            except FAILURES:
                log.exception("Load error in GS")
                backoff(failures=attempt)

        # Returns None for "persistent error" as well as "file not found" :/
        log.debug("[%s] not found, or the backend is down.", content_hash)

    def generate_url(self, content_hash, file_name=None, mime_type=None):
        blob = self._locate_blob(content_hash)
        if blob is None:
            return
        disposition = None
        if file_name is not None:
            disposition = 'inline; filename=%s' % file_name
        expire = datetime.utcnow() + timedelta(seconds=self.TIMEOUT)
        return blob.generate_signed_url(expire,
                                        response_type=mime_type,
                                        response_disposition=disposition)