def post(self, fileid): image = yield motor.Op( self.db.images.find_one, {'fileid': fileid} ) if not image: raise tornado.web.HTTPError(404, "File not found") all_tiles = find_all_tiles( fileid, self.application.settings['static_path'] ) count = 0 q = Queue('low', connection=self.redis) years = int(self.get_argument('years', 1)) buckets = [] bucket = [] for tile in all_tiles: tile_path = tile.replace( self.application.settings['static_path'], '' ) if tile_path.startswith('/'): tile_path = tile_path[1:] bucket.append(tile_path) if len(bucket) > 50: buckets.append(bucket) bucket = [] if bucket: buckets.append(bucket) for tile_paths in buckets: q.enqueue( update_tiles_metadata, tile_paths, years=years, ) count += 1 logging.info("Put %d tiles on the AWS update queue" % count) self.redis.setex( 'awsupdated:%s' % fileid, time.time(), 60 * 60 * 24 * 360 * years ) url = self.reverse_url('admin_image', fileid) self.redirect(url)
def post(self, fileid): image = yield motor.Op(self.db.images.find_one, {'fileid': fileid}) if not image: raise tornado.web.HTTPError(404, "File not found") all_tiles = find_all_tiles(fileid, self.application.settings['static_path']) count = 0 q = Queue('low', connection=self.redis, default_timeout=600) years = int(self.get_argument('years', 1)) buckets = [] bucket = [] for tile in all_tiles: tile_path = tile.replace(self.application.settings['static_path'], '') if tile_path.startswith('/'): tile_path = tile_path[1:] bucket.append(tile_path) if len(bucket) > 50: buckets.append(bucket) bucket = [] if bucket: buckets.append(bucket) for tile_paths in buckets: q.enqueue( update_tiles_metadata, tile_paths, years=years, ) count += 1 logging.info("Put %d tiles on the AWS update queue" % count) self.redis.setex('awsupdated:%s' % fileid, time.time(), 60 * 60 * 24 * 360 * years) url = self.reverse_url('admin_image', fileid) self.redirect(url)
def upload_all_tiles(fileid, static_path, bucket_id, max_count=0, only_if_no_cdn_domain=False): log_file = os.path.join(static_path, 'upload.%s.txt' % fileid) conn = S3Connection(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.lookup(bucket_id) or conn.create_bucket( bucket_id, location=Location.EU) #bucket.set_acl('public-read') db_connection = motor.MotorConnection().open_sync() db = db_connection[settings.DATABASE_NAME] document = yield motor.Op(db.images.find_one, {'fileid': fileid}) if not document: logging.warning("Image %r does not exist" % fileid) IOLoop.instance().stop() return if document.get('cdn_domain'): if only_if_no_cdn_domain: IOLoop.instance().stop() return else: warnings.warn("%s already has a cdn_domain (%s)" % (fileid, document['cdn_domain'])) try: count = 0 all_done = True all_tiles = list(find_all_tiles(fileid, static_path)) random.shuffle(all_tiles) #if len(all_tiles) > max_count: # total = max_count #else: # total = len(all_tiles) total = len(all_tiles) for each in all_tiles: # load which ones we've done every time to prevent # parallel workers uploading the same file more than once try: done = [x.strip() for x in open(log_file) if x.strip()] except IOError: done = [] if each not in done: done.append(each) relative_path = each.replace(static_path, '') k = Key(bucket) k.key = relative_path # docs: # http://boto.cloudhackers.com/en/latest/ref/s3.html#boto.s3.\ # key.Key.set_contents_from_filename print "uploading", relative_path, try: count_done = set(x.strip() for x in open(log_file)) except IOError: count_done = [] print "(%d of %d)" % (len(count_done), total) k.set_contents_from_filename( each, # because we sometimes reset and thus might # upload it again replace=False, reduced_redundancy=True) k.make_public() open(log_file, 'a').write(each + '\n') count += 1 if max_count > 0 and count >= max_count: print "STOPPING @", count all_done = False break if all_done: data = {'cdn_domain': settings.DEFAULT_CDN_TILER_DOMAIN} print "Updating document finally" yield motor.Op(db.images.update, {'_id': document['_id']}, {'$set': data}) # invalidate some redis keys _redis = redis.client.Redis(settings.REDIS_HOST, settings.REDIS_PORT) lock_key = 'uploading:%s' % fileid _redis.delete(lock_key) metadata_key = 'metadata:%s' % fileid # make it expire in a minute data = _redis.get(metadata_key) if data: # this gives all workers a chance to finish # any leftover jobs such as optimizations _redis.setex(metadata_key, data, 60) finally: print "# done", count IOLoop.instance().stop()
def upload_all_tiles(fileid, static_path, bucket_id, max_count=0, only_if_no_cdn_domain=False, replace=True): #print "upload_all_tiles", fileid, static_path, bucket_id, max_count, only_if_no_cdn_domain, replace log_file = os.path.join(static_path, 'upload.%s.txt' % fileid) conn = connect_to_s3() bucket = conn.lookup(bucket_id) or conn.create_bucket(bucket_id, location=Location.EU) #bucket.set_acl('public-read') db_connection = motor.MotorConnection().open_sync() db = db_connection[settings.DATABASE_NAME] document = yield motor.Op(db.images.find_one, {'fileid': fileid}) if not document: logging.warning("Image %r does not exist" % fileid) IOLoop.instance().stop() return if document.get('cdn_domain'): if only_if_no_cdn_domain: IOLoop.instance().stop() return else: warnings.warn("%s already has a cdn_domain (%s)" % (fileid, document['cdn_domain'])) try: count = 0 all_done = True all_tiles = list(find_all_tiles(fileid, static_path)) random.shuffle(all_tiles) #if len(all_tiles) > max_count: # total = max_count #else: # total = len(all_tiles) total = len(all_tiles) aggressive_headers = get_aggressive_headers() for each in all_tiles: # load which ones we've done every time to prevent # parallel workers uploading the same file more than once try: done = [x.strip() for x in open(log_file) if x.strip()] except IOError: done = [] if each not in done: done.append(each) relative_path = each.replace(static_path, '') k = Key(bucket) k.key = relative_path # docs: # http://boto.cloudhackers.com/en/latest/ref/s3.html#boto.s3.\ # key.Key.set_contents_from_filename print "uploading", relative_path, try: count_done = set(x.strip() for x in open(log_file)) except IOError: count_done = [] print "(%d of %d)" % (len(count_done), total) k.set_contents_from_filename( each, replace=replace, reduced_redundancy=True, headers=aggressive_headers, ) k.make_public() open(log_file, 'a').write(each + '\n') count += 1 if max_count > 0 and count >= max_count: print "STOPPING @", count all_done = False break if all_done: data = {'cdn_domain': settings.DEFAULT_CDN_TILER_DOMAIN} print "Updating document finally" yield motor.Op(db.images.update, {'_id': document['_id']}, {'$set': data}) # invalidate some redis keys _redis = redis.client.Redis(settings.REDIS_HOST, settings.REDIS_PORT) lock_key = 'uploading:%s' % fileid _redis.delete(lock_key) metadata_key = 'metadata:%s' % fileid # make it expire in a minute data = _redis.get(metadata_key) if data: # this gives all workers a chance to finish # any leftover jobs such as optimizations _redis.setex(metadata_key, data, 60) finally: print "# done", count IOLoop.instance().stop()
def run(fileids): static_path = os.path.join(os.path.abspath(os.curdir), 'static') for fileid in fileids: for each in find_all_tiles(fileid, static_path): print each assert os.path.isfile(each), each