def single_recognition(img, model_dir): img_w = 256 img_h = 32 img_c = 1 downsample_factor = 4 if img_c == 1: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.resize(img, (img_w, img_h)) img = np.expand_dims(img, axis=-1) img = img / 255.0 * 2.0 - 1.0 img_batch = np.zeros((1, img_h, img_w, img_c)) img_batch[0, :, :, :] = img # print(gray_img.shape) model_for_predict = model(is_training=False, img_shape=(img_h, img_w, img_c), num_classes=11, max_label_length=26) # model_for_predict = vgg_b_ctc.model(is_training=False, img_size=(256,32), num_classes=11, max_label_length=25) model_for_predict.load_weights(model_dir) y_pred_probMatrix = model_for_predict.predict(img_batch) # Decode 阶段 y_pred_labels_tensor_list, _ = keras.backend.ctc_decode(y_pred_probMatrix, [img_w // downsample_factor], greedy=True) # 使用的是最简单的贪婪算法 y_pred_labels_tensor = y_pred_labels_tensor_list[0] y_pred_labels = keras.backend.get_value(y_pred_labels_tensor) # 现在还是字符编码 # 转换成字符串 y_pred_text = '' for num in y_pred_labels[0]: y_pred_text += num2char_dict[num] # print(y_pred_labels) return y_pred_text
def single_recognition(img, w=256, h=32, debug=False): sess = K.get_session() keras.backend.clear_session() img_w = 256 downsample_factor = 4 img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.resize(img, (w, h)) gray_img = np.expand_dims(gray_img, axis=-1) gray_img = gray_img / 255.0 * 2.0 - 1.0 img_batch = np.zeros((1, h, w, 1)) img_batch[0, :, :, :] = gray_img model_for_predict = model(is_training=False, img_size=(256, 32), num_classes=11, max_label_length=26) model_for_predict.load_weights('crnn/model/ocr-bankcard-keras-new.h5') y_pred_probMatrix = model_for_predict.predict(img_batch) # Decode 阶段 y_pred_labels_tensor_list, _ = keras.backend.ctc_decode( y_pred_probMatrix, [img_w // downsample_factor], greedy=True) # 使用的是最简单的贪婪算法 y_pred_labels_tensor = y_pred_labels_tensor_list[0] y_pred_labels = keras.backend.get_value(y_pred_labels_tensor) # 现在还是字符编码 # 转换成字符串 y_pred_text = '' for num in y_pred_labels[0]: y_pred_text += num2char_dict[num] # print(y_pred_labels) K.set_session(sess) return y_pred_text
def main(): img_path_list = os.listdir(test_dir) img_path_list = sorted(img_path_list) for num, img_name in enumerate(img_path_list): img = Image(cv2.imread(test_dir + img_name)) cv2.imwrite(res_dir + 'card_' + str(num + 1) + '.jpg', img.pos_img) print(img_name + '定位完成') pre_model = model(is_training=False, img_size=(256, 32), num_classes=11, max_label_length=26) res = PredictLabels_by_filename(pre_model, res_dir, (256, 32), downsample_factor=4, weight_path='model/train_weight.h5') res_key = sorted(res) f = open(res_dir + 'result.txt', 'w') for img_name in res_key: f.write(img_name[:-4] + ':' + res[img_name] + '\n') f.close()
def main(): ''' model_choice: 0--vgg_bgru_ctc, 1--vgg_blstm_ctc, 2--resnet_blstm_ctc 3--resnet18_blstm ''' model_dict = { 0: "vgg_bgru_ctc", 1: "vgg_blstm_ctc", 2: "resnet_blstm_ctc", 3: 'resnet18_blstm' } model_choice = 1 model_for_train, model_for_predict = None, None # 各种路径 以及参数 current_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(time.time())) weight_save_path = "../trained_weights/{}_{}_best_weight.h5".format( current_time, model_dict[model_choice]) # weight_save_path = "../trained_weights/2018_12_16_14_25_51_vgg_bgru_ctc_best_weight.h5" # 数字训练路径 # img_data_dir = "../data/numbers_croped" # train_txt_path = "../data/data_txt/numbers/numbers_train.txt" # val_txt_path = "../data/data_txt/numbers/numbers_val.txt" # img_size = (128, 32) # W*H # 各种训练时候的参数 # num_classes = 11 # 包含“blank” # max_label_length = 12 # downsample_factor = 4 # epochs = 100 # img_data_dir = "../data/all_data_croped" # train_txt_path = "../data/data_txt/all_except_long/train_data.txt" # val_txt_path = "../data/data_txt/all_except_long/test_data.txt" # # 300w+ 训练路径 img_data_dir = "../data/img_300w/img" train_txt_path = "../data/img_300w/txt/train.txt" val_txt_path = "../data/img_300w/txt/test.txt" # part 300w+ 训练参数 #路径 # img_data_dir = "../data/part_300w/img" # train_txt_path = "../data/part_300w/txt/train.txt" # val_txt_path = "../data/part_300w/txt/test.txt" # 训练参数 img_size = (280, 32) # W*H num_classes = 5991 # 把最后以为当成"blank", 舍弃掉第一位 max_label_length = 10 downsample_factor = 4 epochs = 7 if model_choice == 0: # vgg_bgru_ctc model_for_train = vgg_bgru_ctc.model(is_training=True, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) model_for_predict = vgg_bgru_ctc.model( is_training=False, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) elif model_choice == 1: # vgg_blstm_ctc model_for_train = vgg_blstm_ctc.model( is_training=True, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) model_for_predict = vgg_blstm_ctc.model( is_training=False, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) elif model_choice == 2: # resnet_blstm_ctc model_for_train = resnet_bgru_ctc.model( is_training=True, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) model_for_predict = resnet_bgru_ctc.model( is_training=False, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) elif model_choice == 3: # resnet18_blstm model_for_train = resnet18_blstm.model( is_training=True, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) model_for_predict = resnet18_blstm.model( is_training=False, img_size=img_size, num_classes=num_classes, max_label_length=max_label_length) # 训练模型 train_model(model_for_train, img_data_dir, train_txt_path, val_txt_path, weight_save_path, epochs=epochs, img_size=img_size, batch_size=512, max_label_length=max_label_length, down_sample_factor=downsample_factor) # 使用模型进行预测 predict_labels = PredictLabels(model_for_predict, img_data_dir, val_txt_path, img_size, downsample_factor, batch_size=128, weight_path=weight_save_path) # check accuracy acc, misclassified = check_acc(predict_labels, val_txt_path) print("accuracy on the on the val_data is {}.".format(acc)) # 保存预测的结果 test_result_save_path = "../predicted_results/{}_acc_{:.2f}_{}_result.txt".format( current_time, acc, model_dict[model_choice]) result_txt = open(test_result_save_path, 'w') result_txt.write("acc :{}.\n".format(acc)) result_txt.write("misclassified: {}.\n".format(len(misclassified))) for key, value in misclassified.items(): result_txt.write(str(key) + ": " + str(value) + '\n') result_txt.write("all results:\n") for key, value in predict_labels.items(): result_txt.write(str(key) + ": " + str(value) + '\n') result_txt.close() return 0