def processRequest(request):

    output = ""

    print("request: {}".format(request))

    bucketName = request["bucketName"]
    objectName = request["objectName"]
    documentsTable = request["documentsTable"]
    outputTable = request["outputTable"]

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if ext and ext in ["jpg", "jpeg", "png", "pdf"]:
        documentId = str(uuid.uuid1())
        ds = datastore.DocumentStore(documentsTable, outputTable)
        ds.createDocument(documentId, bucketName, objectName)

        output = "Saved document {} for {}/{}".format(documentId, bucketName,
                                                      objectName)

        print(output)

    return {"statusCode": 200, "body": json.dumps(output)}
示例#2
0
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    itemId = request["itemId"]
    bucketName = request["bucketName"]
    objectName = request["objectName"]

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if(ext and ext in ["jpg", "jpeg", "png"]):
        qUrl = request['syncQueueUrl']
    elif (ext in ["mov", "mp4"]):
        qUrl = request['asyncQueueUrl']

    if(qUrl):
        jsonMessage = { 'itemId' : itemId,
            'bucketName': bucketName,
            'objectName' : objectName }

        client = AwsHelper().getClient('sqs')
        response = postMessage(client, qUrl, jsonMessage)

    output = "Completed routing for itemId: {}, object: {}/{}".format(itemId, bucketName, objectName)

    print(output)
    return response
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    bucketName = request["bucketName"]
    objectName = request["objectName"]
    itemsTable = request["itemsTable"]
    outputBucket = request["outputBucket"]

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if (ext and ext in ["jpg", "jpeg", "png", "mov", "mp4"]):
        itemId = str(uuid.uuid1())
        ds = datastore.ItemStore(itemsTable)
        ds.createItem(itemId, bucketName, objectName)

        output = "Saved item {} for {}/{}".format(itemId, bucketName,
                                                  objectName)

        print(output)

    return {'statusCode': 200, 'body': json.dumps(output)}
示例#4
0
def processRequest(request):

    output = ""

    print(f"request: {request}")
    bucketName = request["bucketName"]
    objectName = request["objectName"]
    documentsTable = request["documentsTable"]
    outputTable = request["outputTable"]

    print(f"Input Object: {bucketName}/{objectName}")

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print(f"Extension: {ext}")

    if(ext and ext in ["jpg", "jpeg", "png", "pdf"]):
        documentId = str(uuid.uuid1())
        ds = datastore.DocumentStore(documentsTable, outputTable)
        ds.createDocument(documentId, bucketName, objectName)

        output = f"Saved document {documentId} for {bucketName}/{objectName}"
        print(output)

    return {
        'statusCode': 200,
        'body': json.dumps(output)
    }
示例#5
0
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    documentId = request["documentId"]
    bucketName = request["bucketName"]
    objectName = request["objectName"]
    jobErrorHandlerQueueUrl = request['errorHandlerQueueUrl']

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    client = AwsHelper().getClient('sqs')
    # If not expected extension, change status to FAILED and exit
    if(ext and ext not in ["jpg", "jpeg", "png", "pdf"]):
        jsonErrorHandlerMessage = {
            'documentId': documentId
        }
        postMessage(client, jobErrorHandlerQueueUrl, jsonErrorHandlerMessage)
        return

    if(ext and ext in ["jpg", "jpeg", "png"]):
        qUrl = request['syncQueueUrl']
        errorHandlerTimeoutSeconds = SYNC_JOB_TIMEOUT_SECONDS
    elif (ext in ["pdf"]):
        qUrl = request['asyncQueueUrl']
        errorHandlerTimeoutSeconds = ASYNC_JOB_TIMEOUT_SECONDS

    if(qUrl):
        features = ["Text", "Forms", "Tables"]
        jsonMessage = {'documentId': documentId,
                       "features": features,
                       'bucketName': bucketName,
                       'objectName': objectName}
        postMessage(client, qUrl, jsonMessage)

        jsonErrorHandlerMessage = {
            'documentId': documentId
        }
        postMessage(client, jobErrorHandlerQueueUrl,
                    jsonErrorHandlerMessage, errorHandlerTimeoutSeconds)

    output = "Completed routing for documentId: {}, object: {}/{}".format(
        documentId, bucketName, objectName)
    print(output)
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    bucketName = request["bucketName"]
    objectName = request["objectName"]
    itemsTable = request["itemsTable"]
    outputBucket = request["outputBucket"]

    jobId = request["jobId"]
    invocationId = request['invocationId']
    invocationSchemaVersion = request['invocationSchemaVersion']
    taskId = request['taskId']

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if (ext and ext in ["jpg", "jpeg", "png", "mov", "mp4"]):
        itemId = str(uuid.uuid1())
        ds = datastore.ItemStore(itemsTable)
        ds.createItem(itemId, bucketName, objectName)

        output = "Saved item {} for {}/{}".format(itemId, bucketName,
                                                  objectName)

        print(output)

    results = [{
        'taskId':
        taskId,
        'resultCode':
        'Succeeded',
        'resultString':
        "Item submitted for processing with Id: {}".format(itemId)
    }]

    return {
        'invocationSchemaVersion': invocationSchemaVersion,
        'treatMissingKeysAs': 'PermanentFailure',
        'invocationId': invocationId,
        'results': results
    }
示例#7
0
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    bucketName = request["bucketName"]
    objectName = request["objectName"]
    documentsTable = request["documentsTable"]
    outputTable = request["outputTable"]

    jobId = request["jobId"]
    invocationId = request["invocationId"]
    invocationSchemaVersion = request["invocationSchemaVersion"]
    taskId = request["taskId"]

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if ext and ext in ["jpg", "jpeg", "png", "pdf"]:
        documentId = str(uuid.uuid1())
        ds = datastore.DocumentStore(documentsTable, outputTable)
        ds.createDocument(documentId, bucketName, objectName)

        output = "Saved document {} for {}/{}".format(documentId, bucketName,
                                                      objectName)

        print(output)

    results = [{
        "taskId":
        taskId,
        "resultCode":
        "Succeeded",
        "resultString":
        "Document submitted for processing with Id: {}".format(documentId),
    }]

    return {
        "invocationSchemaVersion": invocationSchemaVersion,
        "treatMissingKeysAs": "PermanentFailure",
        "invocationId": invocationId,
        "results": results,
    }
示例#8
0
    def __init__(self, bucketName, documentPath, awsRegion, detectText,
                 detectForms, detectTables):

        ip = Input()
        if (bucketName):
            ip.bucketName = bucketName
        if (documentPath):
            ip.documentPath = documentPath
        if (awsRegion):
            ip.awsRegion = awsRegion
        if (detectText):
            ip.detectText = detectText
        if (detectForms):
            ip.detectForms = detectForms
        if (detectTables):
            ip.detectTables = detectTables

        if (not ip.bucketName and not ip.documentPath):
            raise Exception("Document is required.")

        if (ip.bucketName):
            ip.isLocalDocument = False
        else:
            ip.isLocalDocument = True

        ext = FileHelper.getFileExtenstion(ip.documentPath).lower()
        print("Extension", ext)
        if (ext == "pdf"):
            ip.documentType = "PDF"
        elif (ext == "jpg" or ext == "jpeg" or ext == "png"):
            ip.documentType = "IMAGE"
        else:
            raise Exception("Document should be jpg/jpeg, png or pdf.")

        if (ip.documentType == "PDF" and ip.isLocalDocument):
            raise Exception("PDF must be in S3 bucket.")

        if (ip.detectText == False and ip.detectForms == False
                and ip.detectTables == False):
            raise Exception(
                "Select at least one option to extract text, form or table")

        self.inputParameters = ip
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    documentId = request["documentId"]
    bucketName = request["bucketName"]
    objectName = request["objectName"]

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if(ext and ext in ["jpg", "jpeg", "png"]):
        qUrl = request['syncQueueUrl']
    elif (ext in ["pdf"]):
        qUrl = request['asyncQueueUrl']
    elif (ext in ["htm", "html"]):
        qUrl = os.environ['HTMLTOBOUNDINGBOX_QUEUE_URL']

    if(qUrl):
        features = ["Text"] # , "Forms", "Tables"]

        jsonMessage = { 'documentId' : documentId,
            "features" : features,
            'bucketName': bucketName,
            'objectName' : objectName }

        client = AwsHelper().getClient('sqs')
        postMessage(client, qUrl, jsonMessage)
        if (ext in ["pdf"]):
            postMessage(client, request["pdftoimgQueueUrl"], jsonMessage)

    output = "Completed routing for documentId: {}, object: {}/{}".format(documentId, bucketName, objectName)

    print(output)
def processRequest(request):

    output = ""

    print("request: {}".format(request))

    documentId = request["documentId"]
    bucketName = request["bucketName"]
    objectName = request["objectName"]

    print("Input Object: {}/{}".format(bucketName, objectName))

    ext = FileHelper.getFileExtenstion(objectName.lower())
    print("Extension: {}".format(ext))

    if (ext and ext in ["jpg", "jpeg", "png"]):
        qUrl = request['syncQueueUrl']
    elif (ext in ["pdf"]):
        qUrl = request['asyncQueueUrl']

    if (qUrl):
        # To avoid form and tables outputs, this will be added to a configuration file
        #features = ["Text", "Forms", "Tables"]
        features = ["Text"]
        jsonMessage = {
            'documentId': documentId,
            "features": features,
            'bucketName': bucketName,
            'objectName': objectName
        }

        client = AwsHelper().getClient('sqs')
        postMessage(client, qUrl, jsonMessage)

    output = "Completed routing for documentId: {}, object: {}/{}".format(
        documentId, bucketName, objectName)

    print(output)