예제 #1
0
def DetectVATInvoice(box, symbol, filePath):  # 识别增值税发票种类1

    # 定位二维码

    # 定位×

    dic = xmlToDict.XmlTodict('ModeLabel_00001.xml')

    tplt = [
        dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][2],
        dic['figureX'][3]
    ]
    for c in tplt:
        if c == None:
            print('Templet VATInvoice error')

    for item in dic:
        if item != 'QRCode' and item != 'figureX':
            # print(item)
            tmp = MakeFileInV([[int(dic.get(item)[0]),
                                int(dic.get(item)[1])],
                               [int(dic.get(item)[2]),
                                int(dic.get(item)[3])]], box, symbol, filePath,
                              item, tplt)

            print(item + ":   " + OcrPic(tmp))

    js = InterfaceType.JsonInterface.invoice()
    js.addVATInvoiceInfo
예제 #2
0
def mubanDetectInvoiceDate(filepath, setKey='invoiceDate'):
    midProcessResult = [None, None]
    midProcessResult[0] = filepath
    midProcessResult[1] = 11
    # vat发票专票
    VATInvoiceTemplet = {
    }

    dic = xmlToDict.XmlTodict('/home/huangzheng/ocr/VATInvoiceSimpleMuban.xml')

    # tplt = [dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2]
    tplt = [dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2]
    # print(tplt)
    '''
    for c in tplt:
        if c == None:
            print('Templet VATInvoice error')
    '''
    TemType = {}
    if midProcessResult[1] == 11:  # 增值税专用
        VATInvoiceTemplet[setKey] = [int(dic.get(setKey)[0]), int(dic.get(setKey)[1]), int(dic.get(setKey)[2]),
                                     int(dic.get(setKey)[3])]
        TemType = VATInvoiceTemplet

    fcv = cv2.imread(filepath, 1)
    # print(fcv)
    try:
        w1 = fcv.shape
    except:
        print("picture is None")

    if w1[0] + w1[1] > 1500:
        rate = 0.5
        # print("rate : 0.5")

    if midProcessResult[1] == 11:
        # box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        figureP = FindCircle.findSymbol(filepath)
        # StBox = sortBox(box)
        # print(box)
        # print(figureP)
        # print(StBox)
        if figureP == None:
            return None
        Templet = simplyAdjust(TemType, [figureP[0], figureP[1]], tplt, w1)  # 增值税专票

        attributeLine = lineToAttribute.getAtbt.compute(textline(midProcessResult[0]), Templet)

    return attributeLine
예제 #3
0
def init(filepath, type):
    # filepath = 'Image_065.jpg'
    '''if len(sys.argv) > 1:
        filePathg = sys.argv[1]
        filepath = filePathg
    '''
    # print(sys.argv)
    # print(filepath)
    if type == None:
        midProcessResult = detectType.detectType(filepath)  # tangpeng 预处理
        # 未分类
    else:
        if type == 'blue' or type == 'excess':
            midProcessResult = PipeInvoice.getPipe(filepath, type, False)
        else:
            ##type == 'red'
            midProcessResult = [None, None, None]
            midProcessResult[0] = filepath
            midProcessResult[1] = 2  # 专票
            midProcessResult[2] = textline(midProcessResult[0])
    # midProcessResult = [None, None]
    # midProcessResult[0] = midPR[0]
    # midProcessResult[1] = midPR[1]
    # midProcessResult[0] = filepath
    # 行提取
    blueTemplet = {
        'departCity': [48, 62, 222, 56],
        'arriveCity': [412, 61, 228, 55],
        'trainNumber': [264, 62, 170, 57],
        'invoiceDate': [24, 139, 369, 42],
        'seatNum': [408, 138, 160, 40],
        'idNum': [22, 276, 306, 38],
        'passenger': [328, 276, 150, 38],
        'totalAmount': [33, 177, 151, 39],
        'ticketsNum': [21, 10, 195, 66]
    }
    '''        'departCity': [29, 74, 218, 54],
            'arriveCity': [425, 68, 224, 64],
            'trainNumber': [230, 65, 203, 62],
            'invoiceDate': [0, 163, 357, 41],
            'seatNum': [392, 164, 203, 46],
            'idNum': [0, 343, 350, 45],
            'totalAmount': [3, 206, 212, 52],
            'ticketsNum': [34, 40, 202, 47]
        }'''
    redTemplet = {
        'idNum': [66, 242, 357, 38],
        'departCity': [66, 66, 222, 45],
        'arriveCity': [388, 66, 225, 47],
        'trainNumber': [288, 53, 103, 41],
        'invoiceDate': [66, 114, 237, 43],
        'seatNum': [400, 115, 210, 46],
        'totalAmount': [66, 163, 188, 34],
        'ticketsNum': [21, 23, 218, 47]
    }
    ''''''
    excessTemplet = {
        'departCity': [26, 40, 151, 33],
        'arriveCity': [271, 40, 169, 33],
        'trainNumber': [178, 35, 92, 32],
        'invoiceDate': [12, 82, 203, 29],
        'seatNum': [315, 79, 136, 33],
        'totalAmount': [12, 118, 167, 28],
        'ticketsNum': [23, 12, 147, 37]
    }

    # vatinvoice
    VATInvoiceTemplet = {}

    dic = xmlToDict.XmlTodict('VATInvoiceMuban.xml')

    tplt = [
        dic['QRCode'][0], dic['QRCode'][1],
        dic['figureX'][0] + dic['figureX'][2] / 2,
        dic['figureX'][1] + dic['figureX'][3] / 2
    ]
    # print(tplt)
    for c in tplt:
        if c == None:
            print('Templet VATInvoice error')

    # print(VATInvoiceTemplet)
    # midProcessResult[1] = 2#专票 for test
    # TemType = redTemplet #默认蓝票
    if midProcessResult[1] == 1:
        TemType = blueTemplet

    if midProcessResult[1] == 2:
        TemType = redTemplet

    if midProcessResult[1] == 3:
        TemType = excessTemplet

    if midProcessResult[1] == 11:  # 增值税专用             预留
        for item in dic:
            if item != 'QRCode' and item != 'figureX':
                # print(item)
                # tmp = MakeFileInV([[int(dic.get(item)[0]), int(dic.get(item)[1])], [int(dic.get(item)[2]), int(dic.get(item)[3])]], box, symbol, filePath, item, tplt)
                VATInvoiceTemplet[item] = [
                    int(dic.get(item)[0]),
                    int(dic.get(item)[1]),
                    int(dic.get(item)[2]),
                    int(dic.get(item)[3])
                ]
        TemType = VATInvoiceTemplet

    rate = 1
    # print(filepath)
    fcv = cv2.imread(filepath, 1)
    # print(fcv)
    w1 = fcv.shape
    if w1[0] + w1[1] > 1500:
        rate = 0.5
        print("rate : 0.5")

    if midProcessResult[1] == 1:
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        Templet = adjustToTextLine(TemType, box, midProcessResult[1],
                                   None)  # 蓝火车票
    if midProcessResult[1] == 2:
        rate = 2.0
        print("rate : 2.0")
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        # print( box.tolist())
        Templet = adjustToTextLine(TemType, box, midProcessResult[1],
                                   None)  # 红火车票
    if midProcessResult[1] == 3:
        rate = 1.0
        print("rate : 1.0")
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        # print( box.tolist())
        Templet = adjustToTextLine(TemType, box, midProcessResult[1],
                                   None)  # 红(补票)车票
    if midProcessResult[1] == 11:
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        figureP = FindCircle.findSymbol(filepath)
        StBox = sortBox(box)
        # print(box)
        # print(figureP)
        # print(StBox)
        Templet = adjustToTextLine(
            TemType, [StBox[0], StBox[1], figureP[0], figureP[1]],
            midProcessResult[1], tplt)  # 增值税专票

    im = cv2.imread(filepath, 0)
    rec = []
    for c in TemType:
        rec.append(TemType[c])
    vis_textline0 = fp.util.visualize.rects(im, rec)
    # vis_textline1 = fp.util.visualize.rects(im, rects, types)
    # 显示
    pl.figure(figsize=(15, 10))
    pl.subplot(2, 2, 1)
    pl.imshow(im, 'gray')

    pl.subplot(2, 2, 2)
    pl.imshow(vis_textline0)
    pl.show()

    # print(textline(midProcessResult[0]))
    attributeLine = lineToAttribute.getAtbt.compute(midProcessResult[2],
                                                    Templet)

    # print(attributeLine)
    # print(type(attributeLine))
    # print(attributeLine['departCity'])
    jsonResult = flow.cropToOcr(midProcessResult[0], attributeLine,
                                midProcessResult[1])  # ocr和分词
    print(jsonResult)
    return jsonResult
예제 #4
0
def mubanDetectInvoiceDate(filepath, setKey='invoiceDate'):
    midProcessResult = [None, None]
    midProcessResult[0] = filepath
    midProcessResult[1] = 11
    # vat发票专票
    VATInvoiceTemplet = {}

    dic = xmlToDict.XmlTodict('/home/huangzheng/ocr/VATInvoiceSimpleMuban.xml')

    # tplt = [dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2]
    tplt = [
        dic['figureX'][0] + dic['figureX'][2] / 2,
        dic['figureX'][1] + dic['figureX'][3] / 2
    ]
    # print(tplt)
    '''
    for c in tplt:
        if c == None:
            print('Templet VATInvoice error')
    '''
    TemType = {}
    if midProcessResult[1] == 11:  # 增值税专用
        VATInvoiceTemplet[setKey] = [
            int(dic.get(setKey)[0]),
            int(dic.get(setKey)[1]),
            int(dic.get(setKey)[2]),
            int(dic.get(setKey)[3])
        ]
        TemType = VATInvoiceTemplet

    fcv = cv2.imread(filepath, 1)
    # print(fcv)
    try:
        w1 = fcv.shape
    except:
        print("picture is None")

    if w1[0] + w1[1] > 1500:
        rate = 0.5
        # print("rate : 0.5")

    if midProcessResult[1] == 11:
        # box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        figureP = FindCircle.findSymbol(filepath)
        # StBox = sortBox(box)
        # print(box)
        # print(figureP)
        # print(StBox)
        if figureP == None:
            return None
        Templet = simplyAdjust(TemType, [figureP[0], figureP[1]], tplt,
                               w1)  # 增值税专票

        attributeLine = lineToAttribute.getAtbt.compute(
            textline(midProcessResult[0]), Templet)

        # 生成行提取的图片
        plt_rects = []
        for x in attributeLine:
            plt_rects.append(attributeLine[x])
        # 显示
        vis_textline0 = fp.util.visualize.rects(
            cv2.imread(midProcessResult[0], 0), plt_rects)
        pl.imshow(vis_textline0)
        # 保存到line目录
        pltpath = midProcessResult[0].replace("upload", "line")
        try:
            pl.savefig(pltpath)
        except Exception as e:
            print("绘制行提取图片不支持bmp格式:{}".format(e))

    return attributeLine
예제 #5
0
def surface(filename, type='blue'):
    filepath = os.path.join('allstatic', filename)

    # 原方法
    if type == None:
        midProcessResult = detectType.detectType('allstatic', filename)  # tangpeng 预处理
        # 未分类
    else:
        if type == 'blue' or type == 'excess':
            # return out_filename, 1, pipe.textlines, cdic
            midProcessResult = PipeInvoice.getPipe('allstatic', filename, type, False)
        else:
            ##type == 'red'
            midProcessResult = [None, None, None]
            out_filename = filename.replace('upload', 'out')
            out_filename = os.path.join('allstatic', out_filename)

            # 拷贝到out
            shutil.copy(filepath, out_filename)

            midProcessResult[0] = out_filename
            midProcessResult[1] = 2  # 专票
            # 暂时用原图,不用校正后的图
            midProcessResult[2] = textline(midProcessResult[0])
    # 行提取
    blueTemplet = {
        'departCity': [48, 62, 222, 56],
        'arriveCity': [412, 61, 228, 55],
        'trainNumber': [264, 62, 170, 57],
        'invoiceDate': [24, 139, 369, 42],
        'seatNum': [408, 138, 160, 40],
        'idNum': [22, 276, 306, 38],
        'passenger': [328, 276, 150, 38],
        'totalAmount': [33, 177, 151, 39],
        'ticketsNum': [21, 10, 195, 66]
    }

    '''        'departCity': [29, 74, 218, 54],
            'arriveCity': [425, 68, 224, 64],
            'trainNumber': [230, 65, 203, 62],
            'invoiceDate': [0, 163, 357, 41],
            'seatNum': [392, 164, 203, 46],
            'idNum': [0, 343, 350, 45],
            'totalAmount': [3, 206, 212, 52],
            'ticketsNum': [34, 40, 202, 47]
        }'''
    redTemplet = {
        'idNum': [66, 242, 357, 38],
        'departCity': [66, 66, 222, 45],
        'arriveCity': [388, 66, 225, 47],
        'trainNumber': [288, 53, 103, 41],
        'invoiceDate': [66, 114, 237, 43],
        'seatNum': [400, 115, 210, 46],
        'totalAmount': [66, 163, 188, 34],
        'ticketsNum': [21, 23, 218, 47]
    }
    ''''''
    excessTemplet = {
        'departCity': [26, 40, 151, 33],
        'arriveCity': [271, 40, 169, 33],
        'trainNumber': [178, 35, 92, 32],
        'invoiceDate': [12, 82, 203, 29],
        'seatNum': [315, 79, 136, 33],
        'totalAmount': [12, 118, 167, 28],
        'ticketsNum': [23, 12, 147, 37]
    }

    # vatinvoice
    VATInvoiceTemplet = {
    }


    if midProcessResult[1] == 1:
        TemType = blueTemplet

    if midProcessResult[1] == 2:
        TemType = redTemplet

    if midProcessResult[1] == 3:
        TemType = excessTemplet

    if midProcessResult[1] == 11:  # 增值税专用             预留
        dic = xmlToDict.XmlTodict('VATInvoiceMuban.xml')

        tplt = [dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2,
                dic['figureX'][1] + dic['figureX'][3] / 2]
        # print(tplt)
        for c in tplt:
            if c == None:
                print('Templet VATInvoice error')

        for item in dic:
            if item != 'QRCode' and item != 'figureX':
                # print(item)
                # tmp = MakeFileInV([[int(dic.get(item)[0]), int(dic.get(item)[1])], [int(dic.get(item)[2]), int(dic.get(item)[3])]], box, symbol, filePath, item, tplt)
                VATInvoiceTemplet[item] = [int(dic.get(item)[0]), int(dic.get(item)[1]), int(dic.get(item)[2]),
                                           int(dic.get(item)[3])]
        TemType = VATInvoiceTemplet

    rate = 1
    fcv = cv2.imread(filepath, 1)
    w1 = fcv.shape
    if w1[0] + w1[1] > 1500:
        rate = 0.5
        # print("rate : 0.5")

    if midProcessResult[1] == 1:
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        Templet = adjustToTextLine(TemType, box, midProcessResult[1], None)  # 蓝火车票
    if midProcessResult[1] == 2:
        rate = 2.0
        print("rate : 2.0")
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        # print( box.tolist())
        Templet = adjustToTextLine(TemType, box, midProcessResult[1], None)  # 红火车票
    if midProcessResult[1] == 3:
        rate = 1.0
        print("rate : 1.0")
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        # print( box.tolist())
        Templet = adjustToTextLine(TemType, box, midProcessResult[1], None)  # 红(补票)车票
    if midProcessResult[1] == 11:
        box = Detect.detect(cv2.imread(midProcessResult[0]), rate)
        figureP = FindCircle.findSymbol(filepath)
        StBox = sortBox(box)
        Templet = adjustToTextLine(TemType, [StBox[0], StBox[1], figureP[0], figureP[1]], midProcessResult[1],
                                   tplt)  # 增值税专票

    attributeLine = lineToAttribute.getAtbt.compute(midProcessResult[2], Templet)

    # 生成行提取的图片
    plt_rects = []
    for x in attributeLine:
        plt_rects.append(attributeLine[x])
    # 显示
    vis_textline0 = fp.util.visualize.rects(cv2.imread(midProcessResult[0], 0), plt_rects)
    pl.imshow(vis_textline0)
    # 保存到line目录
    pltpath = midProcessResult[0].replace("out", "line")
    try:
        pl.savefig(pltpath)
    except Exception as e:
        print("绘制行提取图片不支持bmp格式:{}".format(e))
        pass

    return midProcessResult[0], midProcessResult[1], attributeLine