예제 #1
0
def evaluateDocument(uv_task):
	task_tag = "DOCUMENT EVALUATION"
	print "\n\n************** %s [START] ******************\n" % task_tag
	uv_task.setStatus(302)
	
	from lib.Worker.Models.uv_document import UnveillanceDocument
	from conf import DEBUG, UUID
	
	document = None
	
	if hasattr(uv_task, "doc_id"):
		if DEBUG:
			print "GETTING A DOCUMENT FROM ID: %s" % uv_task.doc_id
		
		document = UnveillanceDocument(_id=uv_task.doc_id)
	else:
		if DEBUG:
			print "INFLATING NEW DOCUMENT WITH FILE NAME: %s" % uv_task.file_name
		
		document = UnveillanceDocument(inflate={'file_name' : uv_task.file_name})
	
	if document is None:
		print "\n\n************** %s [INVALID] ******************\n" % task_tag
		print "DOCUMENT INVALID (is None)"
		
		uv_task.fail(message="DOCUMUENT INVALID (is none)")
		return
			
	from lib.Worker.Models.uv_task import UnveillanceTask
	from vars import MIME_TYPE_TASKS, MIME_TYPES
	
	document.addCompletedTask(uv_task.task_path)
	uv_task.put_next(uv_task.task_path)
	
	mime_type = document.query_mime_type()

	print "\n\n************** %s [INFO] ******************\n" % task_tag
	print "MIME TYPE: %s" % mime_type
		
	if mime_type in MIME_TYPE_TASKS.keys():
		if DEBUG:
			print "mime type (%s) usable..." % mime_type
			print MIME_TYPE_TASKS[mime_type]

		uv_task.put_next(MIME_TYPE_TASKS[mime_type])
		
	else:
		uv_task.fail(status=412, message="document mime type (%s) not important" % mime_type)
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		return

	inflate = {'doc_id' : document._id}
	if mime_type == MIME_TYPES['symlink']:
		inflate['attempt_sync'] = True
		
	uv_task.routeNext(inflate=inflate)

	uv_task.finish()
	print "\n\n************** %s [END] ******************\n" % task_tag
	
예제 #2
0
def routeNextTask(task, document, task_extras=None):
	if not hasattr(task, 'no_continue') or not task.no_continue:
		next_task_path = None
		
		from lib.Worker.Models.uv_task import UnveillanceTask
		
		if hasattr(task, 'next_task_path'):
			next_task_path = task.next_task_path
		else:
			from vars import MIME_TYPE_TASKS
		
			if document.mime_type in MIME_TYPE_TASKS.keys():
				try:
					next_task_path = MIME_TYPE_TASKS[document.mime_type][1]
				except Exception as e:
					if DEBUG: print e				
		
		if next_task_path is not None:
			inflate = {
				'task_path' : next_task_path,
				'doc_id' : document._id,
				'queue' : task.queue
			}
			
			if task_extras is not None: inflate.update(task_extras)
			
			next_task = UnveillanceTask(inflate=inflate)
			next_task.run()
예제 #3
0
def evaluateText(task):
	task_tag = "TEXT EVALUATION"
	print "\n\n************** %s [START] ******************\n" % task_tag
	print "evaluating text at %s" % task.doc_id
	task.setStatus(302)
	
	from lib.Worker.Models.uv_document import UnveillanceDocument
	from conf import DEBUG
	from vars import MIME_TYPE_TASKS
	
	document = UnveillanceDocument(_id=task.doc_id)	
	"""
		limited choices: json, pgp, or txt
	"""

	if hasattr(task, "text_file"):
		content = document.loadAsset(task.text_file)
	else:
		content = document.loadFile(document.file_name)	
	
	if content is None:
		print "no text to evaluate :("
		print "\n\n************** %s [ERROR] ******************\n" % task_tag
		task.fail()
		return
	
	new_mime_type = None
	import json
	try:
		json_txt = json.loads(content)
		new_mime_type = "application/json"
		
		print "THIS IS JSON"
	except Exception as e:
		print "NOT JSON: %s" % e
	
	task_path = None	
	if new_mime_type is not None:
		document.mime_type = new_mime_type
		document.save()
		
		if document.mime_type in MIME_TYPE_TASKS.keys():
			task_path = MIME_TYPE_TASKS[document.mime_type][0]
	else:
		try:
			from lib.Core.Utils.funcs import cleanLine
			from vars import ASSET_TAGS
			
			txt_json = []
			txt_pages = []
			line_count = 0
			
			# this is arbitrary
			MAX_LINES_PER_PAGE = 80
			
			for line in content.splitlines():
				txt_pages.append(cleanLine(line))
				line_count += 1
				
				if line_count == MAX_LINES_PER_PAGE:
					txt_json.append(" ".join(txt_pages))
					txt_pages = []
					line_count = 0

			txt_json.append(" ".join(txt_pages))

			document.total_pages = len(txt_json)
			document.save()
						
			asset_path = document.addAsset(txt_json, "doc_texts.json", as_literal=False,
				description="jsonified text of original document, segment by segment",
				tags=[ASSET_TAGS['TXT_JSON']])

			from lib.Worker.Models.uv_text import UnveillanceText
			uv_text = UnveillanceText(inflate={
				'media_id' : document._id,
				'searchable_text' : txt_json,
				'file_name' : asset_path
			})
			
			document.text_id = uv_text._id
			document.save()
		except Exception as e: 
			if DEBUG:
				print "ERROR HERE GENERATING DOC TEXTS:"
				print e
	
	document.addCompletedTask(task.task_path)
	task.finish()
	task.routeNext()
	print "\n\n************** %s [END] ******************\n" % task_tag
예제 #4
0
def decrypt(uv_task):
    task_tag = "DECRYPTING"
    print "\n\n************** %s [START] ******************\n" % task_tag
    print "decrypting pgp blob for %s" % uv_task.doc_id
    uv_task.setStatus(302)

    from lib.Worker.Models.uv_document import UnveillanceDocument

    media = UnveillanceDocument(_id=uv_task.doc_id)
    if media is None:
        print "DOC IS NONE"
        print "\n\n************** %s [ERROR] ******************\n" % task_tag
        uv_task.fail()
        return

    if not media.getFile(uv_task.pgp_file):
        print "NO PGP FILE"
        print "\n\n************** %s [ERROR] ******************\n" % task_tag
        uv_task.fail()
        return

    from conf import getSecrets

    gpg_pwd = getSecrets("gpg_pwd")
    if gpg_pwd is None:
        err_msg = "NO PASSPHRASE TO DECRYPT"
        print err_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag
        uv_task.fail(message=err_msg)
        return

    gpg_dir = getSecrets("gpg_dir")

    # save as task.pgp_file.decrypted or whatever
    import os
    from fabric.api import local, settings
    from fabric.context_managers import hide

    from conf import ANNEX_DIR, DEBUG

    if not hasattr(uv_task, "save_as"):
        save_as = "%s.decrypted" % uv_task.pgp_file
    else:
        save_as = uv_task.save_as

    print "\n\n************** %s [INFO] ******************\n" % task_tag
    print "SAVING DECRYPTED ASSET TO %s IF SUCCESSFUL" % save_as

    with settings(hide("everything"), warn_only=True):
        d_cmd = "gpg --yes --no-tty --homedir=%s --passphrase %s --output %s --decrypt %s" % (
            gpg_dir,
            gpg_pwd,
            os.path.join(ANNEX_DIR, save_as),
            os.path.join(ANNEX_DIR, uv_task.pgp_file),
        )

        decrypted = local(d_cmd)
        print decrypted.return_code

        del gpg_pwd
        if decrypted.return_code == 2:
            err_msg = "could not successfully decrypt %s" % uv_task.pgp_file
            print err_msg
            print "\n\n************** %s [ERROR] ******************\n" % task_tag
            uv_task.fail(status=412, message=err_msg)
            return

    media.addCompletedTask(uv_task.task_path)

    if uv_task.get_next() is None:
        # route according to mime type
        # get mime type of decrypted
        from vars import MIME_TYPE_TASKS
        from lib.Worker.Utils.funcs import getFileType

        mime_type = getFileType(os.path.join(ANNEX_DIR, save_as))

        # usable: json (a j3m), zip (a source or a log->batch)
        if mime_type in MIME_TYPE_TASKS.keys():
            print "mime type (%s) usable..." % mime_type

            try:
                uv_task.put_next(MIME_TYPE_TASKS[mime_type])
            except Exception as e:
                print e

    uv_task.routeNext(inflate={"file_name": save_as})
    uv_task.finish()
    print "\n\n************** %s [END] ******************\n" % task_tag