def save(self): key = _k(self.hash) obj = self.__get_vars() del obj['hash'] r.hmset(self.__get_key() , obj) r.sadd(_k(self.__class__.__name__.lower()), self.hash) # Add to type-set
def save(self): key = _k(self.hash) obj = self.__get_vars() del obj["hash"] r.hmset(self.__get_key(), obj) r.sadd(_k(self.__class__.__name__.lower()), self.hash) # Add to type-set
def save(self): key = _k(self.hash) obj = self.__get_vars() del obj['hash'] obj = stringify(obj) print("saving", obj) r.hmset(self.__get_key(), obj) r.sadd(_k(self.__class__.__name__.lower()), self.hash) # Add to type-set
def add_report(self): self.reports = int(self.reports) self.reports += 1 r.hincrby(File.get_key(self.hash), "reports", 1) if self.reports > 0: r.sadd(_k("reports-triggered"), self.hash)
def upload(f, filename): if not f.content_type: f.content_type = get_mimetype(filename) or "application/octet-stream" #if f.content_type.split("/")[0] not in ['video', 'image', 'audio']: # return "no", 415 ignore_limit = current_app.debug or r.sismember(_k("whitelisted_ips"), get_ip()) if not ignore_limit: rate_limit_update(file_length(f)) if rate_limit_exceeded(): return None, 420 h = get_hash(f) identifier = to_id(h) if "." not in filename: ext = mimetypes.guess_extension( f.content_type)[1:] # This not very scientific, but it works else: ext = extension(filename) filename = "%s.%s" % (identifier, ext) path = tempfile.NamedTemporaryFile( suffix="." + ext).name # Fix for imagemagick's silliness if os.path.exists(file_storage(filename)): if File.exists(identifier): return identifier, 409 else: # Delete residual files from storage by creating a dummy File dummy = File(original=filename) dummy.delete = lambda: None # nop delete_file(dummy) f.seek(0) # Otherwise it'll write a 0-byte file f.save(path) file_object = File(hash=identifier) file_object.compression = os.path.getsize(path) file_object.original = filename file_object.mimetype = f.content_type file_object.ip = secure_ip() result = process_file.delay(path, identifier, ignore_limit) file_object.taskid = result.id file_object.save() return identifier, 200
def upload_url(self): url = request.form['url'] f = URLFile() try: success = f.download(url) except: return {'error': 400}, 400 if not success: return {'error': 404}, 404 result, status = upload(f, f.filename) r.set(_k("url.%s" % url), result) return _upload_object(result, status)
def upload(f, filename): if not f.content_type: f.content_type = get_mimetype(filename) or "application/octet-stream" #if f.content_type.split("/")[0] not in ['video', 'image', 'audio']: # return "no", 415 ignore_limit = current_app.debug or r.sismember(_k("whitelisted_ips"), get_ip()) if not ignore_limit: rate_limit_update(file_length(f)) if rate_limit_exceeded(): return None, 420 h = get_hash(f) identifier = to_id(h) if "." not in filename: ext = mimetypes.guess_extension(f.content_type)[1:] # This not very scientific, but it works else: ext = extension(filename) filename = "%s.%s" % (identifier, ext) path = tempfile.NamedTemporaryFile(suffix="." + ext).name # Fix for imagemagick's silliness if os.path.exists(file_storage(filename)): if File.exists(identifier): return identifier, 409 else: # Delete residual files from storage by creating a dummy File dummy = File(original=filename) dummy.delete = lambda: None # nop delete_file(dummy) f.seek(0) # Otherwise it'll write a 0-byte file f.save(path) file_object = File(hash=identifier) file_object.compression = os.path.getsize(path) file_object.original = filename file_object.mimetype = f.content_type file_object.ip = secure_ip() result = process_file.delay(path, identifier, ignore_limit) file_object.taskid = result.id file_object.save() return identifier, 200
def urlinfo(self): l = request.form['list'] items = l.split(",") if "," in l else [l] result = {} for item in items: key = _k("url.%s" % item) h = r.get(key) if h: f = File.from_hash(h) if f: result[item] = _file_object(f) else: result[item] = None r.delete(key) else: result[item] = None return result
def upload_url(self): if is_tor(): return {'error': 420}, 420 url = request.form['url'] f = URLFile() try: success = f.download(url) except FileTooBig: return {'error': 413}, 413 except Exception: return {'error': 400}, 400 if not success: return {'error': 404}, 404 result, status = upload(f, f.filename) r.set(_k("url.%s" % url), result) return _upload_object(result, status)
def upload_url(self): url = request.form['url'] f = URLFile() try: success = f.download(url) except: return {'error': 400}, 400 if not success: return {'error': 404}, 404 result = _upload_f(f, f.filename) h = None if isinstance(result, dict) and 'hash' in result: h = result['hash'] elif isinstance(result, tuple) and 'hash' in result[0]: h = result[0]['hash'] if h: r.set(_k("url.%s" % url), h) return result
from mediacrush.objects import File, RedisObject from mediacrush.database import r, _k from mediacrush.fileutils import file_storage from mediacrush.processing.invocation import Invocation from mediacrush.config import _cfg, _cfgi import sys import json if __name__ == '__main__': files = File.get_all() count = len(files) print "About to process %d files." % count done = 0 errors = [] for f in files: h = f.hash k = _k("file.%s" % h) r.hset(k, "ip", "") print "\n%d/%d files processed, errors:" % (done, count), errors def normalise_processor(processor): if not processor: return None return processor.split("/")[0] if "/" in processor else processor
def klass(cls, hash): for subclass in cls.__subclasses__(): if r.sismember(_k(subclass.__name__.lower()), hash): return subclass return None
from mediacrush.objects import File from mediacrush.database import r, _k from mediacrush.processing.detect import detect from mediacrush.mimetypes import get_mimetype, extension from mediacrush.files import file_storage if __name__ == '__main__': for h in r.smembers(_k("file")): f = File.from_hash(h) ext = extension(f.original) overrides = { 'jpe': 'image/jpeg', 'ogg': 'audio/ogg', } processor, extra = detect(file_storage(f.original)) f.mimetype = overrides.get(ext, get_mimetype(f.original)) f.processor = processor f.save()
def get_key(cls, hash): classname = cls.__name__ return _k("%s.%s" % (classname.lower(), hash))
from mediacrush.worker import process_gif from mediacrush.database import r, _k import time import multiprocessing if __name__ == '__main__': pool = multiprocessing.Pool() while True: while r.llen(_k("gifqueue")): job = r.lpop(_k("gifqueue")) pool.apply_async(process_gif, args=(job,)) time.sleep(1)
def delete(self): r.srem(_k(self.__class__.__name__.lower()), self.hash) r.delete(self.__get_key())
print "About to process %d files." % count done = 0 errors = [] for f in files: h = f.hash configvector = 0 try: path = file_storage(f.original) result = detect(path) if result and result['flags']: bv = BitVector(flags_per_processor.get(result['type'], [])) for flag, value in result['flags'].items(): setattr(bv, flag, value) configvector = int(bv) print h, result['type'], int(bv) done += 1 except Exception, e: errors.append(h) k = _k("file.%s" % h) r.hset(k, "configvector", configvector) print "%d/%d files processed, errors:" % (done, count), errors
def get_key(cls, hash): classname = cls.__name__ return _k("{}.{}".format(classname.lower(), hash))