def parse_zipped_j3m(uv_task): task_tag = "PARSING ZIPPED J3M" print "\n\n************** %s [START] ******************\n" % task_tag print "parsing zipped j3m asset at %s" % uv_task.doc_id uv_task.setStatus(302) import os from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG from vars import ASSET_TAGS media = UnveillanceDocument(_id=uv_task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return from conf import ANNEX_DIR if hasattr(uv_task, "j3m_name"): j3m_name = uv_task.j3m_name else: j3m_name = os.path.join(media.base_path, "j3m_raw.gz") if not media.getFile(j3m_name): print "NO J3M.GZ at %s" % j3m_name print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return from cStringIO import StringIO from lib.Worker.Utils.funcs import getFileType, unGzipBinary from vars import MIME_TYPES j3m = media.loadFile(j3m_name) j3m_type = getFileType(j3m, as_buffer=True) if j3m_type == MIME_TYPES['gzip']: j3m = unGzipBinary(j3m) if j3m is None or getFileType(j3m, as_buffer=True) != MIME_TYPES['json']: print "THIS IS NOT A J3M (type %s)" % j3m_type print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(status=412) return asset_path = "j3m_raw.json" media.addAsset(j3m, asset_path, as_literal=False) uv_task.put_next([ "J3M.j3mify.j3mify", "J3M.massage_j3m.massageJ3M", "PGP.verify_signature.verifySignature", "J3M.verify_visual_content.verifyVisualContent" ]) uv_task.routeNext(inflate={'j3m_name' : asset_path}) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def pullFromAnnex(uv_task): task_tag = "PULL FROM ANNEX" print "\n\n************** %s [START] ******************\n" % task_tag print "pulling file from document %s from annex" % uv_task.doc_id print uv_task.emit() uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG, BASE_DIR, getConfig document = UnveillanceDocument(_id=uv_task.doc_id) if document is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return if not document.getFile(document.file_name): print "NO FILE CONTENT" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return if hasattr(uv_task, "atttempt_sync") and uv_task.attempt_sync: print "SHOULD ATTEMPT SYNC AGAIN." from fabric.api import settings, local with settings(warn_only=True): local("%s %s %s" % (getConfig('python_home'), os.path.join(BASE_DIR, "sync_file.py"), document.file_name)) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def evaluateFile(task): task_tag = "EVALUATING DOCUMENT (INFORMACAM)" print "\n\n************** %s [START] ******************\n" % task_tag print "image preprocessing at %s" % task.doc_id task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG from vars import ASSET_TAGS document = UnveillanceDocument(_id=task.doc_id) if document is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return if not document.getFile(task.file_name): print "NO FILE CONTENT" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return from lib.Worker.Models.uv_task import UnveillanceTask from lib.Worker.Utils.funcs import getFileType from vars import MIME_TYPE_TASKS from conf import ANNEX_DIR try: mime_type = getFileType(os.path.join(ANNEX_DIR, task.file_name)) new_task = UnveillanceTask(inflate={ 'task_path' : MIME_TYPE_TASKS[mime_type][0], 'doc_id' : document._id, 'file_name' : task.file_name }) document.addCompletedTask(task.task_path) new_task.run() except IndexError as e: print "NO NEXT TASK: %s" % e print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def decrypt(uv_task): task_tag = "DECRYPTING" print "\n\n************** %s [START] ******************\n" % task_tag print "decrypting pgp blob for %s" % uv_task.doc_id uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument media = UnveillanceDocument(_id=uv_task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return if not media.getFile(uv_task.pgp_file): print "NO PGP FILE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return from conf import getSecrets gpg_pwd = getSecrets("gpg_pwd") if gpg_pwd is None: err_msg = "NO PASSPHRASE TO DECRYPT" print err_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=err_msg) return gpg_dir = getSecrets("gpg_dir") # save as task.pgp_file.decrypted or whatever import os from fabric.api import local, settings from fabric.context_managers import hide from conf import ANNEX_DIR, DEBUG if not hasattr(uv_task, "save_as"): save_as = "%s.decrypted" % uv_task.pgp_file else: save_as = uv_task.save_as print "\n\n************** %s [INFO] ******************\n" % task_tag print "SAVING DECRYPTED ASSET TO %s IF SUCCESSFUL" % save_as with settings(hide("everything"), warn_only=True): d_cmd = "gpg --yes --no-tty --homedir=%s --passphrase %s --output %s --decrypt %s" % ( gpg_dir, gpg_pwd, os.path.join(ANNEX_DIR, save_as), os.path.join(ANNEX_DIR, uv_task.pgp_file), ) decrypted = local(d_cmd) print decrypted.return_code del gpg_pwd if decrypted.return_code == 2: err_msg = "could not successfully decrypt %s" % uv_task.pgp_file print err_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(status=412, message=err_msg) return media.addCompletedTask(uv_task.task_path) if uv_task.get_next() is None: # route according to mime type # get mime type of decrypted from vars import MIME_TYPE_TASKS from lib.Worker.Utils.funcs import getFileType mime_type = getFileType(os.path.join(ANNEX_DIR, save_as)) # usable: json (a j3m), zip (a source or a log->batch) if mime_type in MIME_TYPE_TASKS.keys(): print "mime type (%s) usable..." % mime_type try: uv_task.put_next(MIME_TYPE_TASKS[mime_type]) except Exception as e: print e uv_task.routeNext(inflate={"file_name": save_as}) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def unzipAndEvaluateArchive(uv_task): task_tag = "UNZIPPING FILE" print "\n\n************** %s [START] ******************\n" % task_tag print "unzipping and evaluating %s" % uv_task.doc_id uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG from vars import ASSET_TAGS media = UnveillanceDocument(_id=uv_task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return if hasattr(uv_task, "file_name"): zip = uv_task.file_name else: zip = media.file_name if DEBUG: print "Zip file here: %s" % zip if zip is None or not media.getFile(zip): print "THERE IS NO ZIP HERE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return import os from time import sleep from fabric.api import * from fabric.context_managers import hide from conf import ANNEX_DIR with settings(warn_only=True): this_dir = os.getcwd() os.chdir(ANNEX_DIR) local("unzip -o %s -d %s" % (zip, media.base_path)) sleep(2) try: unzipped_files = local("ls %s" % media.base_path, capture=True).splitlines() except Exception as e: print e err_msg = "Could not find any unzipped files in %s" % media.base_path print err_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(status=412, message=err_msg) return os.chdir(this_dir) if DEBUG: print "UNZIPPED FILES: \n%s" % unzipped_files ZIPPED_ASSET_EXPECTED_NAMES = { 'source' : [ r"publicKey", r"baseImage_\d", r"credentials" ], 'j3mlog' : [ r"log.j3m(?:\.json)?", r".+\.(?:jpg|mkv)$" ] } assets = [] import re for facet, names in ZIPPED_ASSET_EXPECTED_NAMES.iteritems(): for file in unzipped_files: matches = [n for n in names if re.match(n, file) is not None] if len(matches) > 0: assets.append(file) if uv_task.get_next() is None: if facet == "source": uv_task.put_next([ "Source.init_source.initSource" ]) elif facet == "j3mlog": uv_task.put_next([ "Log.unpack_j3mlog.unpackJ3MLog", "J3M.j3mify.j3mify", "J3M.massage_j3m.massageJ3M", "PGP.verify_signature.verifySignature", "J3M.verify_visual_content.verifyVisualContent" ]) media.addCompletedTask(uv_task.task_path) if uv_task.get_next() is None: print "NO DECERNABLE TASK PATH" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return ''' could be either a source or a j3mlog at this point. ''' uv_task.routeNext(inflate={'assets' : assets}) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag