def processRequest(request): output = "" print("request: {}".format(request)) bucketName = request["bucketName"] objectName = request["objectName"] documentsTable = request["documentsTable"] outputTable = request["outputTable"] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if ext and ext in ["jpg", "jpeg", "png", "pdf"]: documentId = str(uuid.uuid1()) ds = datastore.DocumentStore(documentsTable, outputTable) ds.createDocument(documentId, bucketName, objectName) output = "Saved document {} for {}/{}".format(documentId, bucketName, objectName) print(output) return {"statusCode": 200, "body": json.dumps(output)}
def processRequest(request): output = "" print("request: {}".format(request)) itemId = request["itemId"] bucketName = request["bucketName"] objectName = request["objectName"] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if(ext and ext in ["jpg", "jpeg", "png"]): qUrl = request['syncQueueUrl'] elif (ext in ["mov", "mp4"]): qUrl = request['asyncQueueUrl'] if(qUrl): jsonMessage = { 'itemId' : itemId, 'bucketName': bucketName, 'objectName' : objectName } client = AwsHelper().getClient('sqs') response = postMessage(client, qUrl, jsonMessage) output = "Completed routing for itemId: {}, object: {}/{}".format(itemId, bucketName, objectName) print(output) return response
def processRequest(request): output = "" print("request: {}".format(request)) bucketName = request["bucketName"] objectName = request["objectName"] itemsTable = request["itemsTable"] outputBucket = request["outputBucket"] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if (ext and ext in ["jpg", "jpeg", "png", "mov", "mp4"]): itemId = str(uuid.uuid1()) ds = datastore.ItemStore(itemsTable) ds.createItem(itemId, bucketName, objectName) output = "Saved item {} for {}/{}".format(itemId, bucketName, objectName) print(output) return {'statusCode': 200, 'body': json.dumps(output)}
def processRequest(request): output = "" print(f"request: {request}") bucketName = request["bucketName"] objectName = request["objectName"] documentsTable = request["documentsTable"] outputTable = request["outputTable"] print(f"Input Object: {bucketName}/{objectName}") ext = FileHelper.getFileExtenstion(objectName.lower()) print(f"Extension: {ext}") if(ext and ext in ["jpg", "jpeg", "png", "pdf"]): documentId = str(uuid.uuid1()) ds = datastore.DocumentStore(documentsTable, outputTable) ds.createDocument(documentId, bucketName, objectName) output = f"Saved document {documentId} for {bucketName}/{objectName}" print(output) return { 'statusCode': 200, 'body': json.dumps(output) }
def processRequest(request): output = "" print("request: {}".format(request)) documentId = request["documentId"] bucketName = request["bucketName"] objectName = request["objectName"] jobErrorHandlerQueueUrl = request['errorHandlerQueueUrl'] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) client = AwsHelper().getClient('sqs') # If not expected extension, change status to FAILED and exit if(ext and ext not in ["jpg", "jpeg", "png", "pdf"]): jsonErrorHandlerMessage = { 'documentId': documentId } postMessage(client, jobErrorHandlerQueueUrl, jsonErrorHandlerMessage) return if(ext and ext in ["jpg", "jpeg", "png"]): qUrl = request['syncQueueUrl'] errorHandlerTimeoutSeconds = SYNC_JOB_TIMEOUT_SECONDS elif (ext in ["pdf"]): qUrl = request['asyncQueueUrl'] errorHandlerTimeoutSeconds = ASYNC_JOB_TIMEOUT_SECONDS if(qUrl): features = ["Text", "Forms", "Tables"] jsonMessage = {'documentId': documentId, "features": features, 'bucketName': bucketName, 'objectName': objectName} postMessage(client, qUrl, jsonMessage) jsonErrorHandlerMessage = { 'documentId': documentId } postMessage(client, jobErrorHandlerQueueUrl, jsonErrorHandlerMessage, errorHandlerTimeoutSeconds) output = "Completed routing for documentId: {}, object: {}/{}".format( documentId, bucketName, objectName) print(output)
def processRequest(request): output = "" print("request: {}".format(request)) bucketName = request["bucketName"] objectName = request["objectName"] itemsTable = request["itemsTable"] outputBucket = request["outputBucket"] jobId = request["jobId"] invocationId = request['invocationId'] invocationSchemaVersion = request['invocationSchemaVersion'] taskId = request['taskId'] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if (ext and ext in ["jpg", "jpeg", "png", "mov", "mp4"]): itemId = str(uuid.uuid1()) ds = datastore.ItemStore(itemsTable) ds.createItem(itemId, bucketName, objectName) output = "Saved item {} for {}/{}".format(itemId, bucketName, objectName) print(output) results = [{ 'taskId': taskId, 'resultCode': 'Succeeded', 'resultString': "Item submitted for processing with Id: {}".format(itemId) }] return { 'invocationSchemaVersion': invocationSchemaVersion, 'treatMissingKeysAs': 'PermanentFailure', 'invocationId': invocationId, 'results': results }
def processRequest(request): output = "" print("request: {}".format(request)) bucketName = request["bucketName"] objectName = request["objectName"] documentsTable = request["documentsTable"] outputTable = request["outputTable"] jobId = request["jobId"] invocationId = request["invocationId"] invocationSchemaVersion = request["invocationSchemaVersion"] taskId = request["taskId"] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if ext and ext in ["jpg", "jpeg", "png", "pdf"]: documentId = str(uuid.uuid1()) ds = datastore.DocumentStore(documentsTable, outputTable) ds.createDocument(documentId, bucketName, objectName) output = "Saved document {} for {}/{}".format(documentId, bucketName, objectName) print(output) results = [{ "taskId": taskId, "resultCode": "Succeeded", "resultString": "Document submitted for processing with Id: {}".format(documentId), }] return { "invocationSchemaVersion": invocationSchemaVersion, "treatMissingKeysAs": "PermanentFailure", "invocationId": invocationId, "results": results, }
def __init__(self, bucketName, documentPath, awsRegion, detectText, detectForms, detectTables): ip = Input() if (bucketName): ip.bucketName = bucketName if (documentPath): ip.documentPath = documentPath if (awsRegion): ip.awsRegion = awsRegion if (detectText): ip.detectText = detectText if (detectForms): ip.detectForms = detectForms if (detectTables): ip.detectTables = detectTables if (not ip.bucketName and not ip.documentPath): raise Exception("Document is required.") if (ip.bucketName): ip.isLocalDocument = False else: ip.isLocalDocument = True ext = FileHelper.getFileExtenstion(ip.documentPath).lower() print("Extension", ext) if (ext == "pdf"): ip.documentType = "PDF" elif (ext == "jpg" or ext == "jpeg" or ext == "png"): ip.documentType = "IMAGE" else: raise Exception("Document should be jpg/jpeg, png or pdf.") if (ip.documentType == "PDF" and ip.isLocalDocument): raise Exception("PDF must be in S3 bucket.") if (ip.detectText == False and ip.detectForms == False and ip.detectTables == False): raise Exception( "Select at least one option to extract text, form or table") self.inputParameters = ip
def processRequest(request): output = "" print("request: {}".format(request)) documentId = request["documentId"] bucketName = request["bucketName"] objectName = request["objectName"] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if(ext and ext in ["jpg", "jpeg", "png"]): qUrl = request['syncQueueUrl'] elif (ext in ["pdf"]): qUrl = request['asyncQueueUrl'] elif (ext in ["htm", "html"]): qUrl = os.environ['HTMLTOBOUNDINGBOX_QUEUE_URL'] if(qUrl): features = ["Text"] # , "Forms", "Tables"] jsonMessage = { 'documentId' : documentId, "features" : features, 'bucketName': bucketName, 'objectName' : objectName } client = AwsHelper().getClient('sqs') postMessage(client, qUrl, jsonMessage) if (ext in ["pdf"]): postMessage(client, request["pdftoimgQueueUrl"], jsonMessage) output = "Completed routing for documentId: {}, object: {}/{}".format(documentId, bucketName, objectName) print(output)
def processRequest(request): output = "" print("request: {}".format(request)) documentId = request["documentId"] bucketName = request["bucketName"] objectName = request["objectName"] print("Input Object: {}/{}".format(bucketName, objectName)) ext = FileHelper.getFileExtenstion(objectName.lower()) print("Extension: {}".format(ext)) if (ext and ext in ["jpg", "jpeg", "png"]): qUrl = request['syncQueueUrl'] elif (ext in ["pdf"]): qUrl = request['asyncQueueUrl'] if (qUrl): # To avoid form and tables outputs, this will be added to a configuration file #features = ["Text", "Forms", "Tables"] features = ["Text"] jsonMessage = { 'documentId': documentId, "features": features, 'bucketName': bucketName, 'objectName': objectName } client = AwsHelper().getClient('sqs') postMessage(client, qUrl, jsonMessage) output = "Completed routing for documentId: {}, object: {}/{}".format( documentId, bucketName, objectName) print(output)