Пример #1
0
                labels_pred2 = net.forward_ocr(features2)

                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)

                ind = np.unravel_index(labels, ctc_f.shape)
                conf = np.mean(np.exp(ctc_f.max(2)[labels > 3]))
                #if conf < 0.4:
                #  print('Too low conf!')
                #  continue

                conf_raw = np.exp(ctc_f[ind])

                det_text, conf2, dec_s, word_splits = print_seq_ext(
                    labels[0, :], codec)
                det_text = det_text.strip()

                if args.debug:
                    im += 1
                    im *= 128
                    cv2.imshow('im', im.astype(np.uint8))
                    cv2.waitKey(0)

                if args.debug:
                    print(det_text)

                if conf < 0.01 and len(det_text) == 3:
                    print('Too low conf short: {0} {1}'.format(det_text, conf))
                    continue
Пример #2
0
def test(net,
         codec,
         args,
         list_file='/home/busta/data/icdar_ch8_validation/ocr_valid.txt',
         norm_height=32,
         max_samples=1000000):

    codec_rev = {}
    index = 4
    for i in range(0, len(codec)):
        codec_rev[codec[i]] = index
        index += 1

    net = net.eval()
    #list_file = '/mnt/textspotter/tmp/90kDICT32px/train_list.txt'
    #list_file = '/home/busta/data/Challenge2_Test_Task3_Images/gt.txt'
    #list_file = '/home/busta/data/90kDICT32px/train_icdar_ch8.txt'
    fout = open('/tmp/ch8_valid.txt', 'w')
    fout_ocr = open('/tmp/ocr_valid.txt', 'w')

    dir_name = os.path.dirname(list_file)
    images = []
    with open(list_file, "r") as ins:
        for line in ins:
            images.append(line.strip())
            #if len(images) > 1000:
            #  break

    scripts = [
        '', 'DIGIT', 'LATIN', 'ARABIC', 'BENGALI', 'HANGUL', 'CJK', 'HIRAGANA',
        'KATAKANA'
    ]

    conf_matrix = np.zeros((len(scripts), len(scripts)), dtype=np.int)

    gt_script = {}
    ed_script = {}
    correct_ed1_script = {}
    correct_script = {}
    count_script = {}
    for scr in scripts:
        gt_script[scr] = 0
        ed_script[scr] = 0
        correct_script[scr] = 0
        correct_ed1_script[scr] = 0
        count_script[scr] = 0

    it = 0
    it2 = 0
    correct = 0
    correct_ed1 = 0
    ted = 0
    gt_all = 0
    images_count = 0
    bad_words = []

    for img in images:

        imageNo = it2
        #imageNo = random.randint(0, len(images) - 1)
        if imageNo >= len(images) or imageNo > max_samples:
            break

        image_name = img

        spl = image_name.split(",")
        delim = ","
        if len(spl) == 1:
            spl = image_name.split(" ")
            delim = " "
        image_name = spl[0].strip()
        gt_txt = ''
        if len(spl) > 1:
            gt_txt = spl[1].strip()
            if len(spl) > 2:
                gt_txt += delim + spl[2]

            if len(gt_txt) > 1 and gt_txt[0] == '"' and gt_txt[-1] == '"':
                gt_txt = gt_txt[1:len(gt_txt) - 1]

        it2 += 1
        if len(gt_txt) == 0:
            print(images[imageNo])
            continue

        if image_name[-1] == ',':
            image_name = image_name[0:-1]

        img_nameo = image_name
        image_name = '{0}/{1}'.format(dir_name, image_name)
        img = cv2.imread(image_name)

        if img is None:
            print(image_name)
            continue

        scale = norm_height / float(img.shape[0])
        width = int(img.shape[1] * scale)
        width = max(8, int(round(width / 4)) * 4)

        scaled = cv2.resize(img, (int(width), norm_height))
        #scaled = scaled[:, :, ::-1]
        scaled = np.expand_dims(scaled, axis=0)

        scaled = np.asarray(scaled, dtype=np.float)
        scaled /= 128
        scaled -= 1

        try:
            scaled_var = net_utils.np_to_variable(scaled,
                                                  is_cuda=args.cuda).permute(
                                                      0, 3, 1, 2)
            x = net.forward_features(scaled_var)
            ctc_f = net.forward_ocr(x)
            ctc_f = ctc_f.data.cpu().numpy()
            ctc_f = ctc_f.swapaxes(1, 2)

            labels = ctc_f.argmax(2)
            det_text, conf, dec_s, _ = print_seq_ext(labels[0, :], codec)
        except:
            print('bad image')
            det_text = ''

        det_text = det_text.strip()
        gt_txt = gt_txt.strip()

        try:
            if 'ARABIC' in ud.name(gt_txt[0]):
                #gt_txt = gt_txt[::-1]
                det_text = det_text[::-1]
        except:
            continue

        it += 1

        scr_count = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        scr_count = np.array(scr_count)

        for c_char in gt_txt:
            assigned = False
            for idx, scr in enumerate(scripts):
                if idx == 0:
                    continue
                symbol_name = ud.name(c_char)
                if scr in symbol_name:
                    scr_count[idx] += 1
                    assigned = True
                    break
            if not assigned:
                scr_count[0] += 1

        maximum_indices = np.where(scr_count == np.max(scr_count))
        script = scripts[maximum_indices[0][0]]

        det_count = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        det_count = np.array(det_count)
        for c_char in det_text:
            assigned = False
            for idx, scr in enumerate(scripts):
                if idx == 0:
                    continue
                try:
                    symbol_name = ud.name(c_char)
                    if scr in symbol_name:
                        det_count[idx] += 1
                        assigned = True
                        break
                except:
                    pass
            if not assigned:
                det_count[0] += 1

        maximum_indices_det = np.where(det_count == np.max(det_count))
        script_det = scripts[maximum_indices_det[0][0]]

        conf_matrix[maximum_indices[0][0], maximum_indices_det[0][0]] += 1

        edit_dist = distance(det_text.lower(), gt_txt.lower())
        ted += edit_dist
        gt_all += len(gt_txt)

        gt_script[script] += len(gt_txt)
        ed_script[script] += edit_dist
        images_count += 1

        fout_ocr.write('{0}, "{1}"\n'.format(os.path.basename(image_name),
                                             det_text.strip()))

        if det_text.lower() == gt_txt.lower():
            correct += 1
            correct_ed1 += 1
            correct_script[script] += 1
            correct_ed1_script[script] += 1
        else:
            if edit_dist == 1:
                correct_ed1 += 1
                correct_ed1_script[script] += 1
            image_prev = "<img src=\"{0}\" height=\"32\" />".format(img_nameo)
            bad_words.append(
                (gt_txt, det_text, edit_dist, image_prev, img_nameo))
            print('{0} - {1} / {2:.2f} - {3:.2f}'.format(
                det_text, gt_txt, correct / float(it), ted / 3.0))

        count_script[script] += 1
        fout.write('{0}|{1}|{2}|{3}\n'.format(os.path.basename(image_name),
                                              gt_txt, det_text, edit_dist))

    print('Test accuracy: {0:.3f}, {1:.2f}, {2:.3f}'.format(
        correct / float(images_count), ted / 3.0, ted / float(gt_all)))

    itf = open("per_script_accuracy.csv", "w")
    itf.write(
        'Script & Accuracy & Edit Distance & ed1 & Ch instances & Im Instances \\\\\n'
    )
    for scr in scripts:
        correct_scr = correct_script[scr]
        correct_scr_ed1 = correct_ed1_script[scr]
        all = count_script[scr]
        ted_scr = ed_script[scr]
        gt_all_scr = gt_script[scr]
        print(' Script:{3} Acc : {0:.3f}, {1:.2f}, {2:.3f}, {4}'.format(
            correct_scr / float(max(all, 1)), ted_scr / 3.0,
            ted_scr / float(max(gt_all_scr, 1)), scr, gt_all_scr))

        itf.write(
            '{0} & {1:.3f} & {5:.3f} &  {2:.3f} & {3} & {4} \\\\\n'.format(
                scr.title(), correct_scr / float(max(all, 1)),
                ted_scr / float(max(gt_all_scr, 1)), gt_all_scr, all,
                correct_scr_ed1 / float(max(all, 1))))

    itf.write('{0} & {1:.3f} & {5:.3f} &  {2:.3f} & {3} & {4} \\\\\n'.format(
        'Total', correct / float(max(images_count, 1)),
        ted / float(max(gt_all, 1)), gt_all, images_count,
        correct_ed1 / float(max(images_count, 1))))
    itf.close()

    print(conf_matrix)
    np.savetxt("conf_matrix.csv",
               conf_matrix,
               delimiter=' & ',
               fmt='%d',
               newline=' \\\\\n')

    itf = open("conf_matrix_out.csv", "w")
    itf.write(' & ')
    delim = ""
    for scr in scripts:
        itf.write(delim)
        itf.write(scr.title())
        delim = " & "
    itf.write('\\\\\n')

    script_no = 0
    with open("conf_matrix.csv", "r") as ins:
        for line in ins:
            line = scripts[script_no].title() + " & " + line
            itf.write(line)
            script_no += 1
            if script_no >= len(scripts):
                break

    fout.close()
    fout_ocr.close()
    net.train()

    pd.options.display.max_rows = 9999
    #pd.options.display.max_cols = 9999

    if len(bad_words) > 0:
        wworst = sorted(bad_words, key=lambda x: x[2])

        ww = np.asarray(wworst, np.object)
        ww = ww[0:1500, :]
        df2 = pd.DataFrame({
            'gt': ww[:, 0],
            'pred': ww[:, 1],
            'ed': ww[:, 2],
            'image': ww[:, 3]
        })

        html = df2.to_html(escape=False)
        report = open('{0}/ocr_bad.html'.format(dir_name), 'w')
        report.write(html)
        report.close()

        wworst = sorted(bad_words, key=lambda x: x[2], reverse=True)

        ww = np.asarray(wworst, np.object)
        ww = ww[0:1500, :]
        df2 = pd.DataFrame({
            'gt': ww[:, 0],
            'pred': ww[:, 1],
            'ed': ww[:, 2],
            'image': ww[:, 3]
        })

        html = df2.to_html(escape=False)
        report = open('{0}/ocr_not_sobad.html'.format(dir_name), 'w')
        report.write(html)
        report.close()

    return correct / float(images_count), ted
Пример #3
0
def evaluate_e2e_crnn(root,
                      net,
                      norm_height=48,
                      name_model='E2E',
                      normalize=False,
                      save=False,
                      cuda=True,
                      save_dir='eval'):
    #Decription : evaluate model E2E
    net = net.eval()
    # if cuda:
    #   print('Using cuda ...')
    #   net = net.to(device)

    images = glob.glob(os.path.join(root, '*.jpg'))
    png = glob.glob(os.path.join(root, '*.png'))
    images.extend(png)
    png = glob.glob(os.path.join(root, '*.JPG'))
    images.extend(png)

    imagess = np.asarray(images)

    tp_all = 0
    gt_all = 0
    tp_e2e_all = 0
    gt_e2e_all = 0
    tp_e2e_ed1_all = 0
    detecitons_all = 0
    eval_text_length = 2
    segm_thresh = 0.5
    min_height = 8
    idx = 0

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    note_path = os.path.join(save_dir, 'note_eval.txt')
    note_file = open(note_path, 'a')

    with torch.no_grad():

        index = np.arange(0, imagess.shape[0])
        # np.random.shuffle(index)
        for i in index:
            img_name = imagess[i]
            base_nam = os.path.basename(img_name)
            #
            # if args.evaluate == 1:
            res_gt = base_nam.replace(".jpg", '.txt').replace(".png", '.txt')
            res_gt = '{0}/gt_{1}'.format(root, res_gt)
            if not os.path.exists(res_gt):
                res_gt = base_nam.replace(".jpg", '.txt').replace("_", "")
                res_gt = '{0}/gt_{1}'.format(root, res_gt)
                if not os.path.exists(res_gt):
                    print('missing! {0}'.format(res_gt))
                    gt_rect, gt_txts = [], []
            # continue
            gt_rect, gt_txts = load_gt(res_gt)

            # print(img_name)
            img = cv2.imread(img_name)

            im_resized, _ = resize_image(
                img, max_size=1848 * 1024,
                scale_up=False)  # 1348*1024 #1848*1024
            images = np.asarray([im_resized], dtype=np.float)

            if normalize:
                images /= 128
                images -= 1
            im_data = net_utils.np_to_variable(images, is_cuda=cuda).permute(
                0, 3, 1, 2)

            [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data)
            iou = iou_pred.data.cpu()[0].numpy()
            iou = iou.squeeze(0)

            rbox = rboxs[0].data.cpu()[0].numpy()
            rbox = rbox.swapaxes(0, 1)
            rbox = rbox.swapaxes(1, 2)

            detections = get_boxes(iou, rbox,
                                   angle_pred[0].data.cpu()[0].numpy(),
                                   segm_thresh)

            im_scalex = im_resized.shape[1] / img.shape[1]
            im_scaley = im_resized.shape[0] / img.shape[0]

            detetcions_out = []
            detectionso = np.copy(detections)
            if len(detections) > 0:
                detections[:, 0] /= im_scalex
                detections[:, 2] /= im_scalex
                detections[:, 4] /= im_scalex
                detections[:, 6] /= im_scalex

                detections[:, 1] /= im_scaley
                detections[:, 3] /= im_scaley
                detections[:, 5] /= im_scaley
                detections[:, 7] /= im_scaley

            for bid, box in enumerate(detections):

                boxo = detectionso[bid]
                # score = boxo[8]
                boxr = boxo[0:8].reshape(-1, 2)
                # box_area = area(boxr.reshape(8))

                # conf_factor = score / box_area

                center = (boxr[0, :] + boxr[1, :] + boxr[2, :] +
                          boxr[3, :]) / 4

                dw = boxr[2, :] - boxr[1, :]
                dw2 = boxr[0, :] - boxr[3, :]
                dh = boxr[1, :] - boxr[0, :]
                dh2 = boxr[3, :] - boxr[2, :]

                h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + 1
                h2 = math.sqrt(dh2[0] * dh2[0] + dh2[1] * dh2[1]) + 1
                h = (h + h2) / 2
                w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1])
                w2 = math.sqrt(dw2[0] * dw2[0] + dw2[1] * dw2[1])
                w = (w + w2) / 2

                if ((h - 1) / im_scaley) < min_height:
                    continue

                input_W = im_data.size(3)
                input_H = im_data.size(2)
                target_h = norm_height

                scale = target_h / h
                target_gw = int(w * scale + target_h / 4)
                target_gw = max(8, int(round(target_gw / 8)) * 8)
                xc = center[0]
                yc = center[1]
                w2 = w
                h2 = h

                angle = math.atan2((boxr[2][1] - boxr[1][1]),
                                   boxr[2][0] - boxr[1][0])
                angle2 = math.atan2((boxr[3][1] - boxr[0][1]),
                                    boxr[3][0] - boxr[0][0])
                angle = (angle + angle2) / 2

                # show pooled image in image layer
                scalex = (w2 + h2 / 4) / input_W
                scaley = h2 / input_H

                th11 = scalex * math.cos(angle)
                th12 = -math.sin(angle) * scaley * input_H / input_W
                th13 = (2 * xc - input_W - 1) / (input_W - 1)

                th21 = math.sin(angle) * scalex * input_W / input_H
                th22 = scaley * math.cos(angle)
                th23 = (2 * yc - input_H - 1) / (input_H - 1)

                t = np.asarray([th11, th12, th13, th21, th22, th23],
                               dtype=np.float)
                t = torch.from_numpy(t).type(torch.FloatTensor)
                t = t.to(device)
                theta = t.view(-1, 2, 3)

                grid = F.affine_grid(
                    theta, torch.Size((1, 3, int(target_h), int(target_gw))))
                x = F.grid_sample(im_data, grid)

                # features = net.forward_features(x)
                # labels_pred = net.forward_ocr(features)
                labels_pred = net.forward_ocr(x)
                labels_pred = labels_pred.permute(1, 2, 0)

                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)

                conf = np.mean(np.exp(ctc_f.max(2)[labels > 3]))
                if conf < 0.02:
                    continue

                det_text, conf2, dec_s, word_splits = print_seq_ext(
                    labels[0, :], codec)
                det_text = det_text.strip()

                if conf < 0.01 and len(det_text) == 3:
                    continue

                if len(det_text) > 0:
                    dtxt = det_text.strip()
                    if len(dtxt) >= eval_text_length:
                        # print('{0} - {1}'.format(dtxt, conf_factor))
                        boxw = np.copy(boxr)
                        boxw[:, 1] /= im_scaley
                        boxw[:, 0] /= im_scalex
                        boxw = boxw.reshape(8)

                        detetcions_out.append([boxw, dtxt])

            pix = img

            # if args.evaluate == 1:
            tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt, pixx = evaluate_image(
                pix,
                detetcions_out,
                gt_rect,
                gt_txts,
                eval_text_length=eval_text_length)
            tp_all += tp
            gt_all += len(gt_txts)
            tp_e2e_all += tp_e2e
            gt_e2e_all += gt_e2e
            tp_e2e_ed1_all += tp_e2e_ed1
            detecitons_all += len(detetcions_out)
            # print(gt_all)
            if save:
                cv2.imwrite('{0}/{1}'.format(save_dir, base_nam), pixx)

            # print("	E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f}".format(
            #   tp_e2e_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, gt_e2e_all)),
            #   tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, detecitons_all))))

        note_file.write(
            'Model{4}---E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f} \n'
            .format(tp_e2e_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, gt_e2e_all)),
                    tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, detecitons_all)), name_model))

        note_file.close()
    return (tp_e2e_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, gt_e2e_all)),
            tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, detecitons_all)))