Пример #1
0
def single_recognition(img, model_dir):
    img_w = 256
    img_h = 32
    img_c = 1
    downsample_factor = 4
    if img_c == 1:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (img_w, img_h))
    img = np.expand_dims(img, axis=-1)
    img = img / 255.0 * 2.0 - 1.0

    img_batch = np.zeros((1, img_h, img_w, img_c))
    img_batch[0, :, :, :] = img

    # print(gray_img.shape)

    model_for_predict = model(is_training=False, img_shape=(img_h, img_w, img_c), num_classes=11, max_label_length=26)
    # model_for_predict = vgg_b_ctc.model(is_training=False, img_size=(256,32), num_classes=11, max_label_length=25)
    model_for_predict.load_weights(model_dir)

    y_pred_probMatrix = model_for_predict.predict(img_batch)
    # Decode 阶段
    y_pred_labels_tensor_list, _ = keras.backend.ctc_decode(y_pred_probMatrix, [img_w // downsample_factor],
                                                            greedy=True)  # 使用的是最简单的贪婪算法
    y_pred_labels_tensor = y_pred_labels_tensor_list[0]
    y_pred_labels = keras.backend.get_value(y_pred_labels_tensor)  # 现在还是字符编码
    # 转换成字符串
    y_pred_text = ''
    for num in y_pred_labels[0]:
        y_pred_text += num2char_dict[num]
    # print(y_pred_labels)
    return y_pred_text
def single_recognition(img, w=256, h=32, debug=False):
    sess = K.get_session()
    keras.backend.clear_session()
    img_w = 256
    downsample_factor = 4
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.resize(img, (w, h))
    gray_img = np.expand_dims(gray_img, axis=-1)
    gray_img = gray_img / 255.0 * 2.0 - 1.0

    img_batch = np.zeros((1, h, w, 1))
    img_batch[0, :, :, :] = gray_img

    model_for_predict = model(is_training=False,
                              img_size=(256, 32),
                              num_classes=11,
                              max_label_length=26)
    model_for_predict.load_weights('crnn/model/ocr-bankcard-keras-new.h5')

    y_pred_probMatrix = model_for_predict.predict(img_batch)

    # Decode 阶段
    y_pred_labels_tensor_list, _ = keras.backend.ctc_decode(
        y_pred_probMatrix, [img_w // downsample_factor],
        greedy=True)  # 使用的是最简单的贪婪算法
    y_pred_labels_tensor = y_pred_labels_tensor_list[0]
    y_pred_labels = keras.backend.get_value(y_pred_labels_tensor)  # 现在还是字符编码
    # 转换成字符串
    y_pred_text = ''
    for num in y_pred_labels[0]:
        y_pred_text += num2char_dict[num]
    # print(y_pred_labels)

    K.set_session(sess)
    return y_pred_text
def main():
    img_path_list = os.listdir(test_dir)
    img_path_list = sorted(img_path_list)
    for num, img_name in enumerate(img_path_list):
        img = Image(cv2.imread(test_dir + img_name))
        cv2.imwrite(res_dir + 'card_' + str(num + 1) + '.jpg', img.pos_img)
        print(img_name + '定位完成')

    pre_model = model(is_training=False,
                      img_size=(256, 32),
                      num_classes=11,
                      max_label_length=26)
    res = PredictLabels_by_filename(pre_model,
                                    res_dir, (256, 32),
                                    downsample_factor=4,
                                    weight_path='model/train_weight.h5')
    res_key = sorted(res)
    f = open(res_dir + 'result.txt', 'w')
    for img_name in res_key:
        f.write(img_name[:-4] + ':' + res[img_name] + '\n')
    f.close()
Пример #4
0
def main():
    '''
    model_choice: 0--vgg_bgru_ctc, 1--vgg_blstm_ctc, 2--resnet_blstm_ctc 3--resnet18_blstm
    '''
    model_dict = {
        0: "vgg_bgru_ctc",
        1: "vgg_blstm_ctc",
        2: "resnet_blstm_ctc",
        3: 'resnet18_blstm'
    }
    model_choice = 1
    model_for_train, model_for_predict = None, None

    # 各种路径 以及参数
    current_time = time.strftime('%Y_%m_%d_%H_%M_%S',
                                 time.localtime(time.time()))
    weight_save_path = "../trained_weights/{}_{}_best_weight.h5".format(
        current_time, model_dict[model_choice])
    # weight_save_path = "../trained_weights/2018_12_16_14_25_51_vgg_bgru_ctc_best_weight.h5"
    # 数字训练路径

    # img_data_dir = "../data/numbers_croped"
    # train_txt_path = "../data/data_txt/numbers/numbers_train.txt"
    # val_txt_path = "../data/data_txt/numbers/numbers_val.txt"
    # img_size = (128, 32) # W*H
    # 各种训练时候的参数
    # num_classes = 11 # 包含“blank”
    # max_label_length = 12
    # downsample_factor = 4
    # epochs = 100

    # img_data_dir = "../data/all_data_croped"
    # train_txt_path = "../data/data_txt/all_except_long/train_data.txt"
    # val_txt_path = "../data/data_txt/all_except_long/test_data.txt"

    # # 300w+ 训练路径
    img_data_dir = "../data/img_300w/img"
    train_txt_path = "../data/img_300w/txt/train.txt"
    val_txt_path = "../data/img_300w/txt/test.txt"

    # part 300w+ 训练参数
    #路径
    # img_data_dir = "../data/part_300w/img"
    # train_txt_path = "../data/part_300w/txt/train.txt"
    # val_txt_path = "../data/part_300w/txt/test.txt"
    # 训练参数
    img_size = (280, 32)  # W*H
    num_classes = 5991  # 把最后以为当成"blank", 舍弃掉第一位
    max_label_length = 10
    downsample_factor = 4
    epochs = 7

    if model_choice == 0:
        # vgg_bgru_ctc
        model_for_train = vgg_bgru_ctc.model(is_training=True,
                                             img_size=img_size,
                                             num_classes=num_classes,
                                             max_label_length=max_label_length)
        model_for_predict = vgg_bgru_ctc.model(
            is_training=False,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)
    elif model_choice == 1:
        # vgg_blstm_ctc
        model_for_train = vgg_blstm_ctc.model(
            is_training=True,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)
        model_for_predict = vgg_blstm_ctc.model(
            is_training=False,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)
    elif model_choice == 2:
        # resnet_blstm_ctc
        model_for_train = resnet_bgru_ctc.model(
            is_training=True,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)
        model_for_predict = resnet_bgru_ctc.model(
            is_training=False,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)
    elif model_choice == 3:
        # resnet18_blstm
        model_for_train = resnet18_blstm.model(
            is_training=True,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)
        model_for_predict = resnet18_blstm.model(
            is_training=False,
            img_size=img_size,
            num_classes=num_classes,
            max_label_length=max_label_length)

    # 训练模型
    train_model(model_for_train,
                img_data_dir,
                train_txt_path,
                val_txt_path,
                weight_save_path,
                epochs=epochs,
                img_size=img_size,
                batch_size=512,
                max_label_length=max_label_length,
                down_sample_factor=downsample_factor)

    # 使用模型进行预测
    predict_labels = PredictLabels(model_for_predict,
                                   img_data_dir,
                                   val_txt_path,
                                   img_size,
                                   downsample_factor,
                                   batch_size=128,
                                   weight_path=weight_save_path)

    # check accuracy
    acc, misclassified = check_acc(predict_labels, val_txt_path)
    print("accuracy on the on the val_data is {}.".format(acc))

    # 保存预测的结果
    test_result_save_path = "../predicted_results/{}_acc_{:.2f}_{}_result.txt".format(
        current_time, acc, model_dict[model_choice])
    result_txt = open(test_result_save_path, 'w')
    result_txt.write("acc :{}.\n".format(acc))
    result_txt.write("misclassified: {}.\n".format(len(misclassified)))
    for key, value in misclassified.items():
        result_txt.write(str(key) + ": " + str(value) + '\n')
    result_txt.write("all results:\n")
    for key, value in predict_labels.items():
        result_txt.write(str(key) + ": " + str(value) + '\n')
    result_txt.close()

    return 0