def massageJ3M(task): task_tag = "MASSAGING J3M" print "\n\n************** %s [START] ******************\n" % task_tag print "massaging j3m at %s" % task.doc_id task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG from vars import ASSET_TAGS media = UnveillanceDocument(_id=task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return if hasattr(task, "j3m_name"): j3m_name = task.j3m_name else: j3m_name = "j3m.json" j3m = media.loadAsset(j3m_name) if j3m is None: print "J3M IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return from json import loads try: j3m = loads(j3m) except Exception as e: print "J3M IS INVALID" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail(status=412) return try: media.date_created = j3m['genealogy']['dateCreated'] media.saveFields("date_created") except KeyError as e: print "J3M HAS NO DATE CREATED: %s" % e print "\n\n************** %s [WARN] ******************\n" % task_tag from hashlib import sha1 try: j3m['public_hash'] = sha1("".join( [j3m['genealogy']['createdOnDevice'], "".join(j3m['genealogy']['hashes'])])).hexdigest() except KeyError as e: if DEBUG: print "no key %s" % e pass if 'data' in j3m.keys(): try: location = j3m['data']['exif']['location'] j3m['data']['exif'].update({ 'location' : [location[1], location[0]] }) except KeyError as e: if DEBUG: print "no key %s" % e pass try: if type(j3m['data']['sensorCapture']) is list: pass except KeyError as e: if DEBUG: print "no key %s" % e pass if 'sensorCapture' in j3m['data'].keys(): for playback in j3m['data']['sensorCapture']: if 'gps_coords' in playback['sensorPlayback'].keys(): try: gps = str(playback['sensorPlayback']['gps_coords'])[1:-1].split(",") if DEBUG: print "REPLACING %s as geopoint" % gps print type(gps) playback['sensorPlayback'].update({ 'gps_coords' : [float(gps[1]), float(gps[0])] }) except Exception as e: if DEBUG: print e pass if 'regionLocationData' in playback['sensorPlayback'].keys(): try: gps = str(playback['sensorPlayback']['regionLocationData']['gps_coords']) gps = gps[1:-1].split(",") if DEBUG: print "REPLACING %s as geopoint" % gps playback['sensorPlayback']['regionLocationData'].update({ 'gps_coords' : [float(gps[1]), float(gps[0])] }) except Exception as e: if DEBUG: print e pass if 'visibleWifiNetworks' in playback['sensorPlayback'].keys(): try: for i,b in enumerate(playback['sensorPlayback']['visibleWifiNetworks']): playback['sensorPlayback']['visibleWifiNetworks'][i].update({ 'bt_hash' : sha1(b['bssid']).hexdigest() }) except Exception as e: if DEBUG: print e pass import os, json from conf import getConfig from lib.Core.Utils.funcs import b64decode from lib.Worker.Utils.funcs import getFileType, unGzipBinary searchable_text = [] if 'userAppendedData' in j3m['data'].keys(): try: with open(os.path.join(getConfig('informacam.forms_root'), "forms.json"), 'rb') as F: form_data = json.loads(F.read())['forms'] for udata in j3m['data']['userAppendedData']: for aForms in udata['associatedForms']: st_keys = aForms['answerData'].keys() for f in form_data: if f['namespace'] == aForms['namespace']: try: for mapping in f['mapping']: try: group = mapping.keys()[0] key = aForms['answerData'][group].split(" ") for m in mapping[group]: if m.keys()[0] in key: key[key.index(m.keys()[0])] = m[m.keys()[0]] aForms['answerData'][group] = " ".join(key) except KeyError as e: if DEBUG: print "no key %s" % e pass except KeyError as e: if DEBUG: print "no key %s" % e pass try: idx = 0 for audio in f['audio_form_data']: try: while audio in st_keys: st_keys.remove(audio) except Exception as e: pass try: audio_data = b64decode( aForms['answerData'][audio]) if audio_data is None: if DEBUG: print "could not unb64 audio" continue if getFileType(audio_data, as_buffer=True) != MIME_TYPES['gzip']: if DEBUG: print "audio is not gzipped" continue audio_f = "audio_%d.3gp" % idx idx += 1 media.addAsset(unGzipBinary(audio_data), audio_f, tags=[ASSET_TAGS['A_3GP']], description="3gp audio file from form") ''' new_task=UnveillanceTask(inflate={ 'task_path' : "Media.convert.audioConvert", 'doc_id' : media._id, 'formats' : ["3gp", "wav"], 'src_file' : "audio_%d.3gp" % idx, 'queue' : task.queue }) new_task.run() ''' aForms['answerData'][audio] = "audio_%d.wav" except KeyError as e: if DEBUG: print "no key %s" % e pass except KeyError as e: if DEBUG: print "no key %s" % e pass if len(st_keys) > 0: for key in st_keys: searchable_text.append(aForms['answerData'][key]) except KeyError as e: if DEBUG: print "no key %s" % e pass except IOError as e: print "\n\n************** %s [WARN] ******************\n" % task_tag if DEBUG: print "no forms to go over: %s" % e except ValueError as e: print "\n\n************** %s [WARN] ******************\n" % task_tag if DEBUG: print "for some reason, forms.json is not legible?\n%s" % e if media.addAsset(j3m, "j3m.json", as_literal=False) is False: print "J3M COULD NOT BE ADDED" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return from lib.Worker.Models.ic_j3m import InformaCamJ3M j3m['media_id'] = media._id if len(searchable_text) > 0: j3m['searchable_text'] = searchable_text j3m = InformaCamJ3M(inflate=j3m) print "\n\n***NEW J3M CREATED***\n\n" j3m.save() media.j3m_id = j3m._id media.save() media.addCompletedTask(task.task_path) task.routeNext() task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def preprocessImage(task): task_tag = "IMAGE PREPROCESSING" print "\n\n************** %s [START] ******************\n" % task_tag print "image preprocessing at %s" % task.doc_id task.setStatus(302) from lib.Worker.Models.ic_image import InformaCamImage from conf import DEBUG from vars import ASSET_TAGS image = InformaCamImage(_id=task.doc_id) if image is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return import os from conf import getConfig, ANNEX_DIR try: J3M_DIR = getConfig('jpeg_tools_dir') except Exception as e: if DEBUG: print "NO J3M DIR! %s" % e print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return import re from subprocess import Popen, PIPE from cStringIO import StringIO from vars import UPLOAD_RESTRICTION from lib.Worker.Models.uv_task import UnveillanceTask tiff_txt = StringIO() obscura_marker_found = False was_encrypted = False ic_j3m_txt = None cmd = [os.path.join(J3M_DIR, "j3mparser.out"), os.path.join(ANNEX_DIR, image.file_name)] p = Popen(cmd, stdout=PIPE, close_fds=True) data = p.stdout.readline() while data: data = data.strip() if re.match(r'^file: .*', data): pass elif re.match(r'^Generic APPn .*', data): pass elif re.match(r'^Component.*', data): pass elif re.match(r'^Didn\'t find .*', data): pass elif re.match(r'^Got obscura marker.*', data): if DEBUG: print "\n\nWE ALSO HAVE J3M DATA\n\n" obscura_marker_found = True ic_j3m_txt = StringIO() else: if obscura_marker_found: ic_j3m_txt.write(data) else: tiff_txt.write(data) data = p.stdout.readline() p.stdout.close() image.addAsset(tiff_txt.getvalue(), "file_metadata.txt", description="tiff metadata as per jpeg redaction library") tiff_txt.close() del tiff_txt if ic_j3m_txt is not None: from lib.Worker.Utils.funcs import getFileType from vars import MIME_TYPES, MIME_TYPE_MAP ic_j3m_txt = ic_j3m_txt.getvalue() ic_j3m_txt_mime_type = getFileType(ic_j3m_txt, as_buffer=True) inflate = {} if ic_j3m_txt_mime_type != MIME_TYPES['json']: from lib.Core.Utils.funcs import b64decode un_b64 = b64decode(ic_j3m_txt) if un_b64 is not None: un_b64_mime_type = getFileType(un_b64, as_buffer=True) if un_b64_mime_type in [MIME_TYPES['pgp'], MIME_TYPES['gzip']]: if DEBUG: print "MIME TYPE: %s" % un_b64_mime_type asset_path = "j3m_raw.%s" % MIME_TYPE_MAP[un_b64_mime_type] image.addAsset(un_b64, asset_path) if DEBUG: print "\n\nPGP KEY FILE PATH: %s\n\n" % asset_path gz = image.addAsset(None, "j3m_raw.gz", tags=[ASSET_TAGS['OB_M']], description="j3m data extracted from obscura marker") if un_b64_mime_type == MIME_TYPES['pgp']: task.put_next([ "PGP.decrypt.decrypt", "J3M.j3mify.parse_zipped_j3m" ]) inflate.update({ 'pgp_file' : os.path.join(image.base_path, asset_path), 'save_as' : gz }) was_encrypted = True elif un_b64_mime_type == MIME_TYPES['gzip']: task.put_next("J3M.j3mify.parse_zipped_j3m") else: asset_path = image.addAsset(ic_j3m_txt, "j3m_raw.json", as_literal=False) task.put_next([ "J3M.j3mify.j3mify", "J3M.massage_j3m.massageJ3M", "PGP.verify_signature.verifySignature", "J3M.verify_visual_content.verifyVisualContent" ]) inflate.update({'j3m_name' : "j3m_raw.json"}) try: upload_restriction = image.getFileMetadata('uv_restriction') except Exception as e: print "could not get metadata for uv_restriction" print e else: print "NO IC J3M TEXT FOUND???" print "\n\n************** %s [WARN] ******************\n" % task_tag upload_restriction = UPLOAD_RESTRICTION['for_local_use_only'] if upload_restriction is None or upload_restriction == UPLOAD_RESTRICTION['no_restriction']: task.put_next("Image.make_derivatives.makeDerivatives") task.put_next("Image.get_vector.get_vector") image.addCompletedTask(task.task_path) task.routeNext(inflate=inflate) task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def evaluateTextFile(task): task_tag = "EVALUATING TEXT FILE" print "\n\n************** %s [START] ******************\n" % task_tag print "evaluating text file at %s" % task.doc_id task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG from vars import ASSET_TAGS media = UnveillanceDocument(_id=task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return if not media.queryFile(media.file_name): print "NO DOCUMENT CONTENT" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return content = media.loadFile(media.file_name) if content is None: print "NO DOCUMENT CONTENT" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return from lib.Core.Utils.funcs import b64decode un_b64 = b64decode(content) # We have removed base 64-ing from the log files... if un_b64 is None: un_b64 = content if un_b64 is not None: from lib.Worker.Utils.funcs import getFileType from vars import MIME_TYPES, MIME_TYPE_MAP un_b64_mime_type = getFileType(un_b64, as_buffer=True) if DEBUG: print "MIME TYPE: %s" % un_b64_mime_type if un_b64_mime_type not in [MIME_TYPES['pgp'], MIME_TYPES['wildcard']]: err_msg = "MIME TYPE NOT USABLE" print err_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail(status=412, message=err_msg) return media.addAsset(un_b64, "%s.pgp" % media.file_name, description="un-b64'ed pgp asset") media.addCompletedTask(task.task_path) message_sentinel = "-----BEGIN PGP MESSAGE-----" if un_b64[0:len(message_sentinel)] == message_sentinel: task.put_next("PGP.decrypt.decrypt") task.routeNext(inflate={ 'pgp_file' : ".data/%s/%s.pgp" % (media._id, media.file_name) }) task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def locate_j3m(uv_task): task_tag = "PULLING J3M" print "\n\n************** %s [START] ******************\n" % task_tag print "pulling j3m at %s" % uv_task.doc_id uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG, ANNEX_DIR from vars import ASSET_TAGS media = UnveillanceDocument(_id=uv_task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return from lib.Worker.Utils.funcs import getFileType from vars import MIME_TYPES, MIME_TYPE_MAP ic_j3m_txt = media.loadAsset("j3m_raw.txt") ic_j3m_txt_mime_type = getFileType(ic_j3m_txt, as_buffer=True, force_json=True) inflate = {} print "J3M MIME TYPE SNIFFED: %s" % ic_j3m_txt_mime_type if ic_j3m_txt_mime_type != MIME_TYPES['json']: import os from lib.Core.Utils.funcs import b64decode un_b64 = b64decode(ic_j3m_txt) if un_b64 is not None: un_b64_mime_type = getFileType(un_b64, as_buffer=True) if un_b64_mime_type in [MIME_TYPES['pgp'], MIME_TYPES['gzip']]: if DEBUG: print "MIME TYPE: %s" % un_b64_mime_type asset_path = "j3m_raw.%s" % MIME_TYPE_MAP[un_b64_mime_type] media.addAsset(un_b64, asset_path) if DEBUG: print "\n\nPGP KEY FILE PATH: %s\n\n" % asset_path gz = media.addAsset(None, "j3m_raw.gz", tags=[ASSET_TAGS['OB_M']], description="j3m data extracted from obscura marker") if un_b64_mime_type == MIME_TYPES['pgp']: uv_task.put_next([ "PGP.decrypt.decrypt", "J3M.j3mify.parse_zipped_j3m" ]) inflate.update({ 'pgp_file' : os.path.join(media.base_path, asset_path), 'save_as' : gz }) was_encrypted = True elif un_b64_mime_type in MIME_TYPES['gzip']: uv_task.put_next("J3M.j3mify.parse_zipped_j3m") else: import os from fabric.api import settings, local with settings(warn_only=True): src_j3m = os.path.join(ANNEX_DIR, media.base_path, "j3m_raw.txt") dest_j3m = os.path.join(ANNEX_DIR, media.base_path, "j3m_raw.json") local("mv %s %s" % (src_j3m, dest_j3m)) print "PUTTING J3M FROM HERE!!!! WAS JSON! (%s -> %s)" % (src_j3m, dest_j3m) media.addAsset(None, "j3m_raw.json") uv_task.put_next([ "J3M.j3mify.j3mify", "PGP.verify_signature.verifySignature", "J3M.massage_j3m.massageJ3M", "J3M.verify_visual_content.verifyVisualContent", "J3M.notarize.notarize_media" ]) inflate.update({'j3m_name' : "j3m_raw.json"}) media.addCompletedTask(uv_task.task_path) uv_task.routeNext(inflate=inflate) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def preprocessVideo(uv_task): task_tag = "PREPROCESSING VIDEO" print "\n\n************** %s [START] ******************\n" % task_tag print "image preprocessing at %s" % uv_task.doc_id uv_task.setStatus(302) from lib.Worker.Models.ic_video import InformaCamVideo from conf import DEBUG from vars import ASSET_TAGS video = InformaCamVideo(_id=uv_task.doc_id) if video is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return asset_path = video.addAsset(None, "j3m_raw.txt") if asset_path is None: print "COULD NOT MAKE ASSET" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return was_encrypted = False obscura_marker_found = False import os from fabric.api import settings, local from lib.Core.Utils.funcs import b64decode from conf import ANNEX_DIR j3m_attachment = os.path.join(ANNEX_DIR, asset_path) cmd = ["ffmpeg", "-y", "-dump_attachment:t", j3m_attachment, "-i", os.path.join(ANNEX_DIR, video.file_name)] with settings(warn_only=True): ffmpeg = local(" ".join(cmd)) if not os.path.exists(j3m_attachment): print "FFMPEG COULD NOT DO THE THING" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return from lib.Worker.Utils.funcs import getFileType from vars import MIME_TYPES, MIME_TYPE_MAP next_tasks = [] inflate = {} j3m_content = video.loadAsset("j3m_raw.txt") print j3m_content j3m_content_mime_type = getFileType(j3m_content, as_buffer=True) if j3m_content_mime_type not in [MIME_TYPES["pgp"], MIME_TYPES["gzip"]]: j3m_content = b64decode(j3m_content) if j3m_content is not None: j3m_content_mime_type = getFileType(j3m_content, as_buffer=True) if j3m_content_mime_type in [MIME_TYPES["pgp"], MIME_TYPES["gzip"]]: asset_path = "j3m_raw.%s" % MIME_TYPE_MAP[j3m_content_mime_type] video.addAsset(un_b64, asset_path) if j3m_content_mime_type == MIME_TYPES["pgp"]: next_tasks.append("PGP.request_decrypt.requestDecrypt") inflate["pgp_file"] = asset_path elif j3m_content_mime_type == MIME_TYPES["gzip"]: next_tasks.append("J3M.j3mify.j3mify") video.addAsset( None, "j3m_raw.gz", tags=[ASSET_TAGS["OB_M"]], description="j3m data extracted from mkv stream" ) video.addCompletedTask(task.task_path) from vars import UPLOAD_RESTRICTION try: upload_restriction = video.getFileMetadata("uv_restriction") except Exception as e: print "could not get metadata for uv_restriction" print e if upload_restriction is None or upload_restriction == UPLOAD_RESTRICTION["no_restriction"]: next_tasks.append("Video.make_derivatives.makeDerivatives") if len(next_tasks) > 0: uv_task.put_next(next_tasks) uv_task.routeNext(inflate=inflate) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag