class Resizer(object): """ Pipeline process which takes an image and resizes using a given image mode """ def __init__(self, engine): self.engine = engine def _resize_using_pg(self, image, width, height, mode): """ Resize using image mode. """ blob = pg.Blob(image) blob_out = pg.Blob() img = pg.Image(blob) img.filterType(pg.FilterTypes.LanczosFilter) img = process_image_with_mode(img, width, height, mode) # Image should be repaged after a crop/resize img.page(pg.Geometry(0, 0, 0, 0)) img.quality(90) # minimise artifacts but keep size down img.write(blob_out, 'JPEG') return blob_out.data, img.size().width(), img.size().height() @time_on_statsd(statsd_name(), 'resizer') @defer.inlineCallbacks def process_image(self, payload, **kwargs): """ Resizes image to given parameters """ # If original path given, don't resize if 'skip_resize' in payload.keys(): payload['image'] = payload['original_image'] defer.returnValue(payload) data, w, h = yield threads.deferToThread( self._resize_using_pg, payload['original_image'], payload['width'], payload['height'], payload['mode'] ) if settings.DEBUG: log.msg( "[%s] Resized Image Size %s" % (datetime.now().isoformat(), len(data)), logLevel=logging.DEBUG ) payload['image'] = data payload['resized_width'] = w payload['resized_height'] = h defer.returnValue(payload)
class CacheCheck(object): """ Pipeline process which checks if an image already exists in the cache that can be resized, rather than hitting S3 """ def __init__(self, engine): self.engine = engine def _find_cache_matches(self, file_path): """ Look for a file (no resized, original s3 download) in the cache """ orig_format = '%s.jpeg' % file_path[:-1] for f in glob.iglob(orig_format): if f == orig_format: return f return False def _read_image(self, file_path): """ Read file from filesystem """ with open(file_path, 'r') as image: return image.read() @time_on_statsd(statsd_name(), 'cache_check') @defer.inlineCallbacks def process_image(self, payload, **kwargs): """ Checks the cache for a suitable source image This allows S3 to be skipped """ filecache_loc = settings.CACHE_LOCATION filefront = payload['image_path'].split('.')[0] file_cache = "%s*" % os.path.join(filecache_loc, filefront) bigger_cache = self._find_cache_matches(file_cache) if not bigger_cache: defer.returnValue(payload) if settings.DEBUG: log.msg('Original file found in cache, skipping S3: %s' % (bigger_cache), logLevel=logging.DEBUG) data = yield threads.deferToThread(self._read_image, bigger_cache) payload['original_image'] = data defer.returnValue(payload)
class S3Downloader(object): """ Pipeline process which downloads a media file from S3 """ def __init__(self, engine): self.engine = engine self.s3conn = boto.connect_s3( aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY ) if not settings.USE_BOTO: self.txs3conn = AWSServiceRegion( access_key=settings.AWS_ACCESS_KEY_ID, secret_key=settings.AWS_SECRET_ACCESS_KEY, s3_uri=S3_US[0]['endpoint'], # s3_uri='https://s3.amazonaws.com', ).get_s3_client() self.botobucket = self.s3conn.get_bucket(settings.IMAGES_STORE) @defer.inlineCallbacks def _get_data_from_s3_tx(self, path): """ txAWS GET from S3 """ image = yield self.txs3conn.get_object( settings.IMAGES_STORE, str(path), ) defer.returnValue(image) def _get_data_from_s3(self, path): """ boto GET from S3 """ key = self.botobucket.get_key(path) data = key.get_contents_as_string() return data @time_on_statsd(statsd_name(), 's3_downloader') def process_image(self, payload, **kwargs): """ Gets image data from S3. This attempts to download from s3 settings.ATTEMPTS times and timeouts after settings.S3_TIMEOUT """ def _create_deferred(timeout=0): """ Creates a deferred which will run after a given delay """ if settings.USE_BOTO: dfd = task.deferLater( reactor, timeout, threads.deferToThread, self._get_data_from_s3, payload['image_path'] ) return dfd else: dfd = task.deferLater( reactor, timeout, self._get_data_from_s3_tx, payload['image_path'] ) return dfd def _s3callback(deferred_list_result): """ When one of the requests has completed, cancel the rest """ [dfd.cancel() for dfd in dfds_list if not dfd.called] if not deferred_list_result[0]: raise NoDataInS3Error() payload['original_image'] = deferred_list_result[0] return payload def _timeout_and_fail(dfds_list): """ If none of the defers has finished by (attempts+1)*timeout then cancel and return an error """ [dfd.cancel() for dfd in dfds_list if not dfd.called] def _surpress_cancel_error(result): if isinstance(result, defer.CancelledError): pass # Skip if already exists from cache if 'original_image' in payload.keys(): return payload if settings.DEBUG: log.msg( "[%s] Starting S3 Download" % datetime.now().isoformat(), logLevel=logging.DEBUG ) # Make a deferred list of download attempts that have their predefined starting # times baked into the deferred. Return when any deferred has a successful result # Keep a list of the original deferred as we cannot access them once in DeferredList dfds_list = [] for attempt in range(0, settings.S3_ATTEMPTS): dfds_list.append(_create_deferred(timeout=attempt*settings.S3_TIMEOUT)) dfds_list[-1].addErrback(_surpress_cancel_error) dfds = defer.DeferredList(dfds_list, fireOnOneCallback=True) dfds.addCallback(_s3callback) # Auto cancel requests which don't fire after their max timeout reactor.callLater( settings.S3_ATTEMPTS*settings.S3_TIMEOUT, _timeout_and_fail, dfds_list ) return dfds
class Cacher(object): """ Pipeline process which caches original and resized image into local cache """ def __init__(self, engine): self.engine = engine @time_on_statsd(statsd_name(), 'cacher') @defer.inlineCallbacks def process_image(self, payload, **kwargs): """ Writes images to the cache """ filecache_loc = settings.CACHE_LOCATION webcache_loc = settings.WEB_CACHE_LOCATION cache_filename_parts = payload['image_path'].split('.') filefront = cache_filename_parts[0] fileend = cache_filename_parts[1] cache_filename = '' original_filename = '%s.%s' % ( filefront, fileend, ) cache_filename = '%s_%sx%s_%s.%s' % ( filefront, payload['width'], payload['height'], payload['mode'], fileend, ) file_cache = os.path.join(filecache_loc, cache_filename) web_cache = os.path.join(webcache_loc, cache_filename) # Files are normally binned in subdir, create them in cache dirs = os.path.dirname(file_cache) try: os.makedirs(dirs) except os.error: pass if 'skip_resize' in payload.keys(): # Just save/servwe original image as there is no resized image file_cache = os.path.join(filecache_loc, original_filename) web_cache = os.path.join(webcache_loc, original_filename) # Save image to be served fd = open(file_cache, 'w') fdesc.setNonBlocking(fd.fileno()) yield fdesc.writeToFD(fd.fileno(), payload['image']) fd.close() if 'skip_resize' not in payload.keys(): # If image to be served has beenr esized, also cache full size image file_cache = os.path.join(filecache_loc, original_filename) fd = open(file_cache, 'w') fdesc.setNonBlocking(fd.fileno()) yield fdesc.writeToFD(fd.fileno(), payload['original_image']) fd.close() if settings.DEBUG: log.msg("[%s] Cached image location: %s" % (datetime.now().isoformat(), file_cache), logLevel=logging.DEBUG) defer.returnValue(web_cache)