示例#1
0
class Resizer(object):
    """ Pipeline process which takes an image and resizes using a given image mode """

    def __init__(self, engine):
        self.engine = engine

    def _resize_using_pg(self, image, width, height, mode):
        """ Resize using image mode. """

        blob = pg.Blob(image)
        blob_out = pg.Blob()
        img = pg.Image(blob)
        img.filterType(pg.FilterTypes.LanczosFilter)

        img = process_image_with_mode(img, width, height, mode)

        # Image should be repaged after a crop/resize
        img.page(pg.Geometry(0, 0, 0, 0))
        img.quality(90)  # minimise artifacts but keep size down

        img.write(blob_out, 'JPEG')
        return blob_out.data, img.size().width(), img.size().height()

    @time_on_statsd(statsd_name(), 'resizer')
    @defer.inlineCallbacks
    def process_image(self, payload, **kwargs):
        """ Resizes image to given parameters """

        # If original path given, don't resize
        if 'skip_resize' in payload.keys():
            payload['image'] = payload['original_image']
            defer.returnValue(payload)

        data, w, h = yield threads.deferToThread(
            self._resize_using_pg, payload['original_image'], payload['width'],
            payload['height'], payload['mode']
        )

        if settings.DEBUG:
            log.msg(
                "[%s] Resized Image Size %s" % (datetime.now().isoformat(), len(data)),
                logLevel=logging.DEBUG
            )
        payload['image'] = data
        payload['resized_width'] = w
        payload['resized_height'] = h

        defer.returnValue(payload)
示例#2
0
class CacheCheck(object):
    """ Pipeline process which checks if an image already exists in the cache that can
        be resized, rather than hitting S3 """
    def __init__(self, engine):
        self.engine = engine

    def _find_cache_matches(self, file_path):
        """ Look for a file (no resized, original s3 download) in the cache """

        orig_format = '%s.jpeg' % file_path[:-1]
        for f in glob.iglob(orig_format):
            if f == orig_format:
                return f
        return False

    def _read_image(self, file_path):
        """ Read file from filesystem """

        with open(file_path, 'r') as image:
            return image.read()

    @time_on_statsd(statsd_name(), 'cache_check')
    @defer.inlineCallbacks
    def process_image(self, payload, **kwargs):
        """ Checks the cache for a suitable source image
            This allows S3 to be skipped
        """

        filecache_loc = settings.CACHE_LOCATION
        filefront = payload['image_path'].split('.')[0]
        file_cache = "%s*" % os.path.join(filecache_loc, filefront)

        bigger_cache = self._find_cache_matches(file_cache)

        if not bigger_cache:
            defer.returnValue(payload)

        if settings.DEBUG:
            log.msg('Original file found in cache, skipping S3: %s' %
                    (bigger_cache),
                    logLevel=logging.DEBUG)

        data = yield threads.deferToThread(self._read_image, bigger_cache)
        payload['original_image'] = data

        defer.returnValue(payload)
示例#3
0
class S3Downloader(object):
    """ Pipeline process which downloads a media file from S3 """

    def __init__(self, engine):
        self.engine = engine
        self.s3conn = boto.connect_s3(
            aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
            aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY
        )
        if not settings.USE_BOTO:
            self.txs3conn = AWSServiceRegion(
                access_key=settings.AWS_ACCESS_KEY_ID,
                secret_key=settings.AWS_SECRET_ACCESS_KEY,
                s3_uri=S3_US[0]['endpoint'],
                # s3_uri='https://s3.amazonaws.com',
            ).get_s3_client()
        self.botobucket = self.s3conn.get_bucket(settings.IMAGES_STORE)

    @defer.inlineCallbacks
    def _get_data_from_s3_tx(self, path):
        """ txAWS GET from S3 """
        image = yield self.txs3conn.get_object(
            settings.IMAGES_STORE,
            str(path),
        )
        defer.returnValue(image)

    def _get_data_from_s3(self, path):
        """ boto GET from S3 """
        key = self.botobucket.get_key(path)
        data = key.get_contents_as_string()
        return data

    @time_on_statsd(statsd_name(), 's3_downloader')
    def process_image(self, payload,  **kwargs):
        """ Gets image data from S3.
            This attempts to download from s3 settings.ATTEMPTS times and timeouts after
            settings.S3_TIMEOUT """

        def _create_deferred(timeout=0):
            """ Creates a deferred which will run after a given delay """
            if settings.USE_BOTO:
                dfd = task.deferLater(
                    reactor, timeout,
                    threads.deferToThread, self._get_data_from_s3, payload['image_path']
                )
                return dfd
            else:
                dfd = task.deferLater(
                    reactor, timeout, self._get_data_from_s3_tx, payload['image_path']
                )
                return dfd

        def _s3callback(deferred_list_result):
            """ When one of the requests has completed, cancel the rest """

            [dfd.cancel() for dfd in dfds_list if not dfd.called]
            if not deferred_list_result[0]:
                raise NoDataInS3Error()
            payload['original_image'] = deferred_list_result[0]
            return payload

        def _timeout_and_fail(dfds_list):
            """ If none of the defers has finished by (attempts+1)*timeout then
                cancel and return an error """

            [dfd.cancel() for dfd in dfds_list if not dfd.called]

        def _surpress_cancel_error(result):
            if isinstance(result, defer.CancelledError):
                pass

        # Skip if already exists from cache
        if 'original_image' in payload.keys():
            return payload

        if settings.DEBUG:
            log.msg(
                "[%s] Starting S3 Download" % datetime.now().isoformat(), logLevel=logging.DEBUG
            )

        # Make a deferred list of download attempts that have their predefined starting
        # times baked into the deferred. Return when any deferred has a successful result
        # Keep a list of the original deferred as we cannot access them once in DeferredList
        dfds_list = []
        for attempt in range(0, settings.S3_ATTEMPTS):
            dfds_list.append(_create_deferred(timeout=attempt*settings.S3_TIMEOUT))
            dfds_list[-1].addErrback(_surpress_cancel_error)
        dfds = defer.DeferredList(dfds_list, fireOnOneCallback=True)
        dfds.addCallback(_s3callback)

        # Auto cancel requests which don't fire after their max timeout
        reactor.callLater(
            settings.S3_ATTEMPTS*settings.S3_TIMEOUT, _timeout_and_fail, dfds_list
        )

        return dfds
示例#4
0
class Cacher(object):
    """ Pipeline process which caches original and resized image into local cache """
    def __init__(self, engine):
        self.engine = engine

    @time_on_statsd(statsd_name(), 'cacher')
    @defer.inlineCallbacks
    def process_image(self, payload, **kwargs):
        """ Writes images to the cache """

        filecache_loc = settings.CACHE_LOCATION
        webcache_loc = settings.WEB_CACHE_LOCATION
        cache_filename_parts = payload['image_path'].split('.')
        filefront = cache_filename_parts[0]
        fileend = cache_filename_parts[1]
        cache_filename = ''

        original_filename = '%s.%s' % (
            filefront,
            fileend,
        )
        cache_filename = '%s_%sx%s_%s.%s' % (
            filefront,
            payload['width'],
            payload['height'],
            payload['mode'],
            fileend,
        )

        file_cache = os.path.join(filecache_loc, cache_filename)
        web_cache = os.path.join(webcache_loc, cache_filename)

        # Files are normally binned in subdir, create them in cache
        dirs = os.path.dirname(file_cache)
        try:
            os.makedirs(dirs)
        except os.error:
            pass

        if 'skip_resize' in payload.keys():
            # Just save/servwe original image as there is no resized image
            file_cache = os.path.join(filecache_loc, original_filename)
            web_cache = os.path.join(webcache_loc, original_filename)

        # Save image to be served
        fd = open(file_cache, 'w')
        fdesc.setNonBlocking(fd.fileno())
        yield fdesc.writeToFD(fd.fileno(), payload['image'])
        fd.close()

        if 'skip_resize' not in payload.keys():
            # If image to be served has beenr esized, also cache full size image
            file_cache = os.path.join(filecache_loc, original_filename)
            fd = open(file_cache, 'w')
            fdesc.setNonBlocking(fd.fileno())
            yield fdesc.writeToFD(fd.fileno(), payload['original_image'])
            fd.close()

        if settings.DEBUG:
            log.msg("[%s] Cached image location: %s" %
                    (datetime.now().isoformat(), file_cache),
                    logLevel=logging.DEBUG)

        defer.returnValue(web_cache)