def recognize_jmz(image_path, weights_path, char_dict_path, txt_file_path): """ 识别函数 :param image_path: 图片所在路径 :param weights_path: 模型保存路径 :param char_dict_path: 字典文件存放位置 :param txt_file_path: 包含图片名的txt文件 :return: None """ files = os.listdir(txt_file_path) txt_files = [txt for txt in files if txt.endswith(".txt") and txt.split(".")[0] + ".json" not in files] inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS], # 宽度可变 name='input') input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length') net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length, # 序列宽度可变 merge_repeated=False, beam_width=1) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) # sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION # sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for idx, txt_file in enumerate(txt_files): reg_result = {} txt_path = os.path.join(txt_file_path, txt_file) with open(txt_path, 'r') as fd: image_names = [line.strip() for line in fd.readlines()] for image_name in image_names: image_paths = os.path.join(image_path, image_name) image = cv2.imread(image_paths, cv2.IMREAD_COLOR) if image is None: print(image_paths+'is not exist') continue image = _resize_image(image) image = np.array(image, np.float32) / 127.5 - 1.0 seq_len = np.array([image.shape[1] / 4], dtype=np.int32) preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len}) preds = _sparse_matrix_to_list(preds[0], char_dict_path) reg_result[image_name] = preds[0] print('Predict image {:s} result: {:s}'.format(image_name, preds[0])) with open(txt_path[:-4] + ".json", "w") as fw: # 建议改为.split('.') json.dump(reg_result, fw) sess.close() return
def recognize(image_path, weights_path, char_dict_path, txt_path): """ 识别函数 :param image_path: 图片所在路径 :param weights_path: 模型保存路径 :param char_dict_path: 字典文件存放位置 :param txt_path: 包含图片名的txt文件 :return: None """ NUM_CLASSES = get_num_class(char_dict_path) with open(txt_path, 'r', encoding='UTF-8') as fd: # image_names = [line.split(' ')[0] for line in fd.readlines()] # 有标注的情况 image_names = [line.strip() for line in fd.readlines()] # 无标注的情况 #with tf.device('/gpu:0'): inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS], #宽度可变 name='input') input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length') net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) #decodes = inference_ret decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length, # 序列宽度可变 merge_repeated=False, beam_width=10) #preds = _sparse_matrix_to_list(decodes[0], char_dict_path) # 更改到此结束,把with tf.device注释了20191120 # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) #, log_device_placement=True) # allow_soft_placement=True 不能在gpu上运行的自动迁移到cpu; log_device_placement=True 打印使用的设备 sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for image_name in image_names: # time_start = time.time() image_paths = os.path.join(image_path, image_name) # print(image_paths) image = cv2.imread(image_paths, cv2.IMREAD_COLOR) if image is None: print(image_paths+' is not exist') continue image = np.array(image, np.float32) / 127.5 - 1.0 seq_len = np.array([image.shape[1] / 4], dtype=np.int32) # time_end_1 = time.time() preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len}) # time_end_2 = time.time() preds = _sparse_matrix_to_list(preds[0], char_dict_path) # time_end_3 = time.time() # print('Predict image {:s} result: {:s} cost time:{:f}'.format(image_name, preds[0], time_end-time_start)) # print('Predict image {:s} total time:{:f} pre_process time:{:f}, run time:{:f}, convert_time:{:f}'.format(preds[0], time_end_3 - time_start, time_end_1 - time_start, time_end_2 - time_end_1, time_end_3 - time_end_2)) print('Predict image {:s} result: {:s}'.format(image_name, preds[0])) sess.close() return
def recognize_jmz(image_path, weights_path, char_dict_path, txt_file_path, test_count): """ 识别函数 :param image_path: 图片所在路径 :param weights_path: 模型保存路径 :param char_dict_path: 字典文件存放位置 :param txt_file_path: 包含图片名的txt文件 :return: None """ global reg_result tf.reset_default_graph() NUM_CLASSES = get_num_class(char_dict_path) inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS], # 宽度可变 name='input') input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length') net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length, # 序列宽度可变 merge_repeated=False, beam_width=1) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) # sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION # sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) weights_path = tf.train.latest_checkpoint(weights_path) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) with open(txt_file_path, 'r') as fd: lines = [line.strip() for line in fd.readlines()] for i in range(test_count): line = lines[i] image_name = line.split(' ')[0] label = line.split(' ')[1] image_paths = os.path.join(image_path, image_name) image = cv2.imread(image_paths, cv2.IMREAD_COLOR) if image is None: print(image_paths+'is not exist') continue image = _resize_image(image) image = np.array(image, np.float32) / 127.5 - 1.0 seq_len = np.array([image.shape[1] / 4], dtype=np.int32) preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len}) preds = _sparse_matrix_to_list(preds[0], char_dict_path) print('Label: [{:20s}]'.format(label)) print('Pred : [{:20s}]\n'.format(preds[0])) sess.close() return
def train_shadownet(dataset_dir, weights_path, char_dict_path, save_path): """ 训练网络,参考: https://github.com/MaybeShewill-CV/CRNN_Tensorflow :param dataset_dir: tfrecord文件路径 :param weights_path: 要加载的预训练模型路径 :param char_dict_path: 字典文件路径 :param save_path: 模型保存路径 :return: None """ # prepare dataset train_dataset = read_tfrecord.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, flags='train') train_images, train_labels, train_images_paths = train_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE) ####################添加数据增强############################## # train_images = tf.multiply(tf.add(train_images, 1.0), 128.0) # removed since read_tfrecord.py is changed tf.summary.image('original_image', train_images) # 保存到log,方便测试观察 images = apply_with_random_selector( train_images, lambda x, ordering: distort_color(x, ordering), num_cases=2) # images = tf.subtract(tf.divide(images, 127.5), 1.0) # 转化到【-1,1】 changed 128.0 to 127.5 train_images = tf.clip_by_value(images, -1.0, 1.0) tf.summary.image('distord_turned_image', train_images) ################################################################ NUM_CLASSES = get_num_class(char_dict_path) # declare crnn net shadownet = crnn_model.ShadowNet(phase='train',hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES) # set up training graph with tf.device('/gpu:0'): # compute loss and seq distance train_inference_ret, train_ctc_loss = shadownet.compute_loss(inputdata=train_images, labels=train_labels, name='shadow_net', reuse=False) # set learning rate global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay(learning_rate=CFG.TRAIN.LEARNING_RATE, global_step=global_step, decay_steps=CFG.TRAIN.LR_DECAY_STEPS, decay_rate=CFG.TRAIN.LR_DECAY_RATE, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, # momentum=0.9).minimize(loss=train_ctc_loss, global_step=global_step) optimizer = tf.train.AdadeltaOptimizer(learning_rate=\ learning_rate).minimize(loss=train_ctc_loss, global_step=global_step) # 源代码优化器是momentum,改成adadelta,与CRNN论文一致 # Set tf summary os.makedirs(save_path, exist_ok=True) tf.summary.scalar(name='train_ctc_loss', tensor=train_ctc_loss) tf.summary.scalar(name='learning_rate', tensor=learning_rate) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(save_path, model_name) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(save_path) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = CFG.TRAIN.EPOCHS with sess.as_default(): epoch = 0 if if weights_path is None or not os.path.exists(weights_path) or len(os.listdir(weights_path)) < 5: print('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: weights_path = tf.train.latest_checkpoint(weights_path) print('Restore model from last model checkpoint {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) epoch = sess.run(tf.train.get_global_step()) cost_history = [np.inf] while epoch < train_epochs: epoch += 1 _, train_ctc_loss_value, merge_summary_value, learning_rate_value = sess.run( [optimizer, train_ctc_loss, merge_summary_op, learning_rate]) if (epoch+1) % CFG.TRAIN.DISPLAY_STEP == 0: print('lr = {:9f} epoch : {:d} cost= {:9f}'.format(\ learning_rate_value, epoch+1, train_ctc_loss_value)) # record history train ctc loss cost_history.append(train_ctc_loss_value) # add training sumary summary_writer.add_summary(summary=merge_summary_value, global_step=epoch) if (epoch+1) % CFG.TRAIN.SAVE_STEPS == 0: saver.save(sess=sess, save_path=model_save_path, global_step=epoch) return np.array(cost_history[1:]) # Don't return the first np.inf