Пример #1
0
def ocr_api():
    image = request.files['img']

    image.save(os.path.join(app.config['UPLOAD_FOLDER'], "down.jpg"))

    img = cv2.imread('down.jpg')
    text = run_ocr(img)
    # pos = text.find('EXP')
    # target = text[pos: pos+10]
    # pos = target.find("/")
    # date = target[pos-2: pos] + target[pos+1: pos+3]
    print(text)
    date = ""
    try:
        date = re.search(r'[0-9]+/[0-9]+', text).group()
        text2 = text[text.find(date) + 3:]
        try:
            date = re.search(r'[0-9]+/[0-9]+', text2).group()
        except:
            pass
    except:
        return json.dumps("sorry can't read date please retake image")
    return json.dumps(date)
Пример #2
0
def creator(files, printer):
    printer.sprint('Initializing OCR...', 0, 0)

    if threading.currentThread().getName() != 'MainThread':
        pythoncom.CoInitialize()

    #socket.emit()

    docs = [
        'Bank Statement', 'Bill Statement', 'Loan Repayment Schedule',
        'CAT Template'
    ]

    template_loc = "static/template"
    temp_loc = "static/temp"
    output_loc = "static/output"
    upload_loc = "static/uploads"
    wd = os.getcwd().replace("\\", "/") + "/"

    template = template_loc + '/template.xlsm'
    output = output_loc + '/output.xlsm'

    shutil.copy(template, output)

    def adder(file_path):
        xl = win32.gencache.EnsureDispatch('Excel.Application')
        xl.Visible = False  # change to False after development
        xl.DisplayAlerts = False
        xl.AskToUpdateLinks = False
        output_xl = xl.Workbooks.Open(wd + output_loc + '/output.xlsm')
        file_xl = xl.Workbooks.Open(file_path)
        file_xl.Sheets(1).Activate()

        for j in xl.ActiveWorkbook.Sheets:
            xl.DisplayAlerts = False
            j.Copy(After=output_xl.Sheets(output_xl.Sheets.Count))
        xl.DisplayAlerts = False
        file_xl.Close(SaveChanges=False)

        output_xl.Sheets(1).Activate()
        for k in xl.ActiveWorkbook.Sheets:
            if k.Name not in ["Introduction", "Summary", "User Metric"]:
                k.Activate()
                k.Visible = False

        output_xl.Save()

    # def adder(file_path):
    #     xl = win32.gencache.EnsureDispatch('Excel.Application')
    #     xl.Visible = False  # change to False after development
    #     xl.DisplayAlerts = False
    #     output_xl = xl.Workbooks.Open(wd + output_loc + '/output.xlsm')
    #     file_xl = xl.Workbooks.Open(file_path)
    #     file_xl.Sheets(1).Activate()
    #
    #     for j in xl.ActiveWorkbook.Sheets:
    #         j.Activate()
    #         j.Visible = 1
    #         j.Copy(After=output_xl.Sheets(output_xl.Sheets.Count))
    #     file_xl.Close()
    #
    #     output_xl.Sheets(1).Activate()
    #     for k in xl.ActiveWorkbook.Sheets:
    #         if k.Name != "Introduction":
    #             k.Activate()
    #             k.Visible = False
    #
    #     output_xl.Save()
    #     xl.Quit()

    # def adder(file_path):
    #     xl = win32.gencache.EnsureDispatch('Excel.Application')
    #     xl.Visible = False  # change to False after development
    #     xl.DisplayAlerts = False
    #     xl.AskToUpdateLinks = False
    #     output_xl = xl.Workbooks.Open(wd + output_loc + '/output.xlsm', UpdateLinks=False)
    #     file_xl = xl.Workbooks.Open(file_path, UpdateLinks=False)
    #     file_xl.Sheets(1).Activate()
    #
    #     for j in xl.ActiveWorkbook.Sheets:
    #         j.Activate()
    #         j.Visible = 1
    #         j.Copy(After=output_xl.Sheets(output_xl.Sheets.Count))
    #     file_xl.Close()
    #
    #     output_xl.Sheets(1).Activate()
    #     for k in xl.ActiveWorkbook.Sheets:
    #         if k.Name != "Introduction":
    #             k.Activate()
    #             k.Visible = False
    #
    #     output_xl.Save()
    #     xl.Quit()

    def identifier(file):
        if 'bank' in file.lower():
            return 'BaS', docs[0]
        elif 'bill' in file.lower():
            return 'BiS', docs[1]
        elif 'facility' in file.lower():
            return 'LR', docs[2]
        elif 'cat' in file.lower():
            return 'CAT', docs[3]
        else:
            return "Not a compatible document type."

    printer.sprint('Identifying documents...', 4, 0)
    #frames = {'BiS': None, 'LR': None, 'BaS': None, 'CAT': None}
    frames = {}
    for file in files:
        doc_type, doc_name = identifier(file)
        if doc_type == 'CAT':
            printer.sprint('Reading CAT Template...', 7, 0)
            adder(file)
            continue
        data, coor, conf = run_ocr(file, doc_type, printer).multi_page_reader()
        frames[doc_type] = {
            'filename': file,
            'docname': doc_name,
            'data': data,
            'coor': coor,
            'conf': conf
        }

    printer.sprint('Storing Data...', 11, 0)
    with pd.ExcelWriter(temp_loc + '/temp.xlsx') as writer:
        for doc_type in frames:
            if doc_type == 'CAT':
                continue
            doc_name = frames[doc_type]['docname']
            coor = '_coor' + doc_type
            conf = '_conf' + doc_type
            frames[doc_type]['data'].to_excel(writer, sheet_name=doc_name)
            frames[doc_type]['coor'].to_excel(writer, sheet_name=coor)
            frames[doc_type]['conf'].to_excel(writer, sheet_name=conf)
            frames[doc_type]['sheets'] = [doc_name, coor, conf]

    adder(wd + temp_loc + '/temp.xlsx')

    xl = win32.gencache.EnsureDispatch('Excel.Application')
    xl.Visible = False  # change to False after development
    xl.DisplayAlerts = False
    xl.AskToUpdateLinks = False

    output_xl = xl.Workbooks.Open(wd + output_loc + '/output.xlsm',
                                  UpdateLinks=False)
    #output_xl.Sheets("Sheet1").Delete()

    output_xl.Sheets(1).Activate()
    output_xl.VBProject.VBComponents('Sheet1').CodeModule.AddFromString(
        'Option Explicit')

    # try:
    #     xl.Application.Run("output.xlsm!Module3.InsertPivotTable")
    # except:
    #     pass

    _sheets = {}
    for i, j in enumerate(xl.ActiveWorkbook.Sheets):
        j.Activate()
        _sheets[j.Name] = [i + 1, j.CodeName]

    printer.sprint('Creating Excel file...', 10, 0)
    for i in frames:
        if i == "CAT":
            continue
        data_sheet, coor_sheet, conf_sheet = frames[i]['sheets']
        print(data_sheet, coor_sheet, conf_sheet)
        # output_xl.Sheets(data_sheet).Visible = False
        # output_xl.Sheets(coor_sheet).Visible = False
        # output_xl.Sheets(conf_sheet).Visible = False
        # output_xl.Sheets(data_sheet).Activate()
        data_ws = output_xl.Sheets(data_sheet)
        data_ws.Visible = True
        data_ws.Activate()
        data = data_ws.Range("B:Z")
        data.Select()
        data.FormatConditions.Add(
            2,
            Formula1='=if({0}!B1 <> "", {0}!B1<85, False)'.format(conf_sheet))
        #data.FormatConditions(data.FormatConditions.Count).SetFirstPriority()
        data.FormatConditions(
            data.FormatConditions.Count).Interior.ColorIndex = 22
        output_xl.Save()
        pagecol = frames[i]['coor'].columns.get_loc("Page Number")
        col = chr(pagecol + 98).capitalize()
        maxpg = frames[i]['coor']["Page Number"].max()

        pagels = {}
        pageurl = {}
        for j in range(int(maxpg)):
            name = wd + 'sample{}/{}.jpg'.format(i, j + 1)
            img = cv2.imread(name, cv2.IMREAD_UNCHANGED)
            pagels[j + 1] = img.shape
            pageurl[j + 1] = name
        w_factor = 600 / pagels[1][1]

        selector_code = '''Option Explicit
    
    
    Private Sub Worksheet_SelectionChange(ByVal Target As Range)
        On Error Resume Next
        'Dim words As Collection
        'Set words = CollectionCreator()
        Dim Range As Range
        Dim rCell As Range
    
         If Selection.Count > 0 And Selection.Count < 3 Then
            On Error Resume Next
            Call DeleteAllShapes
            For Each rCell In Target.Cells
                Dim rng As String
                Dim val As String
                Dim i As Integer
                Dim msgString As String
                Dim coordinates() As String
    
                rng = rCell.Address(RowAbsolute:=False, ColumnAbsolute:=False)
                val = Worksheets("{0}").Range(rng).Value
    
                    If Not val = "" Then
                        coordinates = Split(val, " ")
                        Dim vArr() As String
                        Dim row_num As String
                        Dim pn_address As String
                        Dim pn As String
                        Dim name As String
                        Dim top As Double
                        Dim shp as Object
    
                        vArr = Split(rCell.Address(True, False), "$")
                        row_num = vArr(1)
                        pn_address = "{1}" & row_num
                        pn = Worksheets("{0}").Range(pn_address).Value
    
                        name = "page" & pn
                        ActiveSheet.Shapes(name).Visible = True
                        ActiveSheet.Shapes(name).Width = 600

                        top = ActiveSheet.Shapes(name).top
    
                        For Each shp In ActiveSheet.Shapes
                           If shp.name <> name Then shp.Visible = False
                        Next shp
    
                    'If Exists(words, range) Then
                        Dim x As Double: x = coordinates(0)*{2}
                        Dim y As Double: y = coordinates(1)*{2} + top
                        Dim w As Double: w = coordinates(2)*{2}
                        Dim h As Double: h = coordinates(3)*{2}
                        'Dim name As String: name = coordinates(4)
                    Call BoxCreator(x, y, w, h)
                    'Else
                        'Call DeleteAllShapes
                    End If
            Next rCell
        End If
    
    End Sub'''.format(coor_sheet, col, w_factor)

        # print(_sheets)
        # for i in output_xl.VBProject.VBComponents:
        #     print(i)
        #     print(i.Name)
        #     print(i.Properties)
        output_xl.VBProject.VBComponents(str(
            _sheets[data_sheet][1])).CodeModule.AddFromString(selector_code)
        data_ws.Visible = True
        data_ws.Activate()
        xl.Range("B1").Select()  # add the () at the end here
        xl.ActiveWindow.FreezePanes = True
        data_ws.Columns("A").ColumnWidth = 106
        data_ws.Columns("A").Font.ColorIndex = 2
        #xl.Selection.ClearContents()

        # print(pageurl)
        for l in pageurl:
            # data_ws.Activate()
            entry_index = frames[i]['coor'].index[
                frames[i]['coor']["Page Number"] == l].tolist()[0] + 1
            top = xl.Range("A" + str(entry_index)).Top
            link = pageurl[l].replace("/", "\\")
            pic = data_ws.Shapes.AddPicture(link,
                                            LinkToFile=False,
                                            SaveWithDocument=True,
                                            Left=0,
                                            Top=0,
                                            Width=-1,
                                            Height=-1)

            pic.Name = 'page' + str(l)
            pic.Left = 0
            pic.Top = top
            pic.Width = 600
            if l == 1:
                pic.Visible = True
            else:
                pic.Visible = False

    output_xl.Sheets(1).Activate()
    xl.DisplayAlerts = False

    output_xl.Save()
    xl.Quit()
from model import ProductClassifier
from ocr import run_ocr
from config import *
import argparse
import time

PC = ProductClassifier()
PC.load(MODEL_PATH)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Process images.')
    parser.add_argument('-i',
                        '--image',
                        type=str,
                        help='images of products to identify')
    args = parser.parse_args()

    output = list()
    start = time.time()
    ocr_result = run_ocr(args.image)
    print('Ocr time: {0} sec.'.format(round(time.time() - start, 2)))
    product_name, class_probabilities = PC.predict(ocr_result)
    print('Full time: {0} sec.'.format(round(time.time() - start, 2)))
    print(product_name, class_probabilities.max())
Пример #4
0
from ocr import run_ocr
from excel import excel_coor
import pandas as pd

#data, coor, conf = run_ocr('Obligor_5_Facility_Letter_Dah_Sing.pdf', 'LR').multi_page_reader()
data, coor, conf = run_ocr('Obligor_3_DBS_Bills_Statements_Apr.pdf', 'BiS').multi_page_reader()
data.to_excel("static/temp/output_info.xlsx")
coor.to_excel("static/temp/cor_info.xlsx")
excel_coor()
Пример #5
0
capture = cv2.VideoCapture(
    f'http://{creds.username}:{creds.password}@{creds.ip}/video')

# .jpg image file name
image_file = r'outfiles\image.jpg'

while True:
    ret, frame = capture.read()

    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # grayscaling the image
    _, th = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY +
                          cv2.THRESH_OTSU)  # applying otsu binarization

    cv2.imshow('frame', th)

    k = cv2.waitKey(1)

    if k == 32:  # RETURN Key is pressed
        r = requests.get(url_af)
        with open(image_file, 'wb') as f:
            f.write(r.content)

        ocr.filter_image(image_file, False)
        text = ocr.run_ocr(image_file)

        print(text)
        with open(r'outfiles\infile.txt', 'w') as f:
            f.write(text)

    if k == 27:  # ESC Key is pressed
        break
Пример #6
0
                help="path to input images to be OCR'd",
                default="images")
ap.add_argument("-p",
                "--preprocess",
                type=str,
                help="type of preprocessing to be done",
                default="thresh")
ap.add_argument("--framepath",
                help="path to OCR output of frames",
                default="output")
args = vars(ap.parse_args())

start = time.perf_counter()
frame_extractor.extract_frames(args["video"])
frame_extract_time = time.perf_counter() - start

start = time.perf_counter()
ocr.run_ocr(args["image_path"], args["preprocess"])
ocr_time = time.perf_counter() - start

start = time.perf_counter()
match_code.match(args["codefile"], args["framepath"])
code_match_time = time.perf_counter() - start

print("\n")
print("Frame extraction completed in : ", frame_extract_time)
print("\n")
print("OCR completed in : ", ocr_time)
print("\n")
print("Code matching completed in : ", code_match_time)