def create_documents(size): # create document, add first page image and return document id content_type = request.headers['Content-Type'] if content_type != "image/tiff" and content_type != 'image/jpeg' and content_type != 'application/pdf': logging.error('Content-Type is not a valid image format') return Response(status=415) if 'type' in request.args: logging.info("Form type specified") form_type = request.args['type'] else: # ocr form to detect application type image_as_bytes = io.BytesIO(request.data) form_type = recognise(image_as_bytes) cursor = connect(cursor_factory=psycopg2.extras.DictCursor) try: cursor.execute('select max(document_id)+1 from documents') next_doc_id = cursor.fetchone() if next_doc_id[0] is None: next_doc_id[0] = 1 cursor.execute("insert into documents (document_id, form_type, content_type, page, size, image) " "values ( %(document_id)s, %(form_type)s, %(content_type)s, %(page)s, %(size)s, " "%(image)s ) returning document_id", { "document_id": next_doc_id[0], "form_type": form_type, "content_type": content_type, "page": "1", "size": size, "image": psycopg2.Binary(request.data) }) res = cursor.fetchone() document_id = res[0] complete(cursor) except: rollback(cursor) raise return Response(json.dumps({"id": document_id, "form_type": form_type}), status=201, mimetype='application/json')
def change_image(doc_id, page_no, size): # replace an existing page image content_type = request.headers['Content-Type'] if content_type != "image/tiff" and content_type != 'image/jpeg' and content_type != 'application/pdf': logging.error('Content-Type is not a valid image format') return Response(status=415) cursor = connect(cursor_factory=psycopg2.extras.DictCursor) try: if page_no == 1: # ocr form to detect application type bytes = io.BytesIO(request.data) form_type = recognise(bytes) # TODO: if form_type is different to the original type, need to consider updating any page 2,3 etc... TEST reallocate on multi-page form else: cursor.execute('select form_type from documents where document_id=%(doc_id)s and page = 1', {"doc_id": doc_id}) row = cursor.fetchone() if row is None: return Response(status=404) form_type = row['form_type'] cursor.execute("update documents set form_type=%(form_type)s, content_type=%(content_type)s, " "size=%(size)s , image=%(image)s where document_id=%(doc_id)s and page=%(page)s", { "doc_id": doc_id, "form_type": form_type, "content_type": content_type, "page": page_no, "size": size, "image": psycopg2.Binary(request.data) }) rowcount = cursor.rowcount complete(cursor) except: rollback(cursor) raise if rowcount == 0: return Response(status=404) return Response(status=200)
def scan_image(self, file): filename = os.path.join(dir_, "ocr_test/" + file) file_bytes = open(filename, 'rb') return recognise(file_bytes)