def input_rec(image_list, weights_path, char_dict_path, ord_map_dict_path): new_heigth = 32 new_width = CFG.ARCH.INPUT_SIZE[0] inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference( inputdata=inputdata, name='shadow_net', reuse=tf.AUTO_REUSE #было#False ) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=10) return inputdata, codec, net, inference_ret, decodes, _
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english=True): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) new_heigth = 32 scale_rate = new_heigth / image.shape[0] new_width = int(scale_rate * image.shape[1]) new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[ 0] else CFG.ARCH.INPUT_SIZE[0] image = cv2.resize(image, (new_width, new_heigth), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=10) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: [image]}) preds = codec.sparse_tensor_to_str(preds[0])[0] if is_english: preds = ' '.join(wordninja.split(preds)) logger.info('Predict image {:s} result: {:s}'.format( ops.split(image_path)[1], preds)) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show() sess.close() return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english=False): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :param is_english: :return: """ df = pd.DataFrame({"file_name": [], "file_code": []}) inputdata = tf.placeholder(dtype=tf.float32, shape=[ TEST_BATCH, CFG.ARCH.INPUT_SIZE[1], CFG.ARCH.INPUT_SIZE[0], CFG.ARCH.INPUT_CHANNELS ], name='input') net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_greedy_decoder(inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(TEST_BATCH), merge_repeated=True) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) saver = tf.train.Saver() with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for batch_num in range(int(40000 / TEST_BATCH)): batch_images_names = [] all_images = [] for pic in all_test_imgs[batch_num * TEST_BATCH:(batch_num + 1) * TEST_BATCH]: image = cv2.imread('../data/my_data/test/' + pic, cv2.IMREAD_COLOR) batch_images_names.append(pic) # print('picture name is ', pic) # image = cv2.resize(image, dsize=tuple(CFG.ARCH.INPUT_SIZE), interpolation=cv2.INTER_LINEAR) image = resize(image, (64, 100)) image_vis = image image = np.asarray(image, np.float32) # image = crop_img / 255. all_images.append(image) # if batch_num > 2: # break # codec = tf_io_pipline_fast_tools.CrnnFeatureReader( # char_dict_path=char_dict_path, # ord_map_dict_path=ord_map_dict_path # ) preds = sess.run(decodes, feed_dict={inputdata: all_images}) # print('before codes:', preds) result_list = [] for i in range(TEST_BATCH): result = preds[0].values[preds[0].indices[:, 0] == i] result_str = ''.join(str(i) for i in result) df.loc[TEST_BATCH * batch_num + i] = [batch_images_names[i], result_str] # if batch_num > 2: # break print('finish batch %d total batch %d' % (batch_num, 40000 / TEST_BATCH)) # result_list.append(result_str) df = df.sort_values('file_name') df.to_csv('./submit0617.csv', index=None) # preds = codec.sparse_tensor_to_str(preds[0])[0] # print('after codec:', preds) # if is_english: # preds = ' '.join(wordninja.split(preds)) # # logger.info('Predict image {:s} result: {:s}'.format( # ops.split(image_path)[1], preds) # ) # # if is_vis: # plt.figure('CRNN Model Demo') # plt.imshow(image_vis[:, :, (2, 1, 0)]) # plt.show() sess.close() return
def evaluate_shadownet(dataset_dir, weights_path, char_dict_path, ord_map_dict_path, is_visualize=False, is_process_all_data=False): """ :param dataset_dir: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_visualize: :param is_process_all_data: :return: """ # prepare dataset test_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='test') test_images, test_labels, test_images_paths = test_dataset.inputs( batch_size=CFG.TEST.BATCH_SIZE) # set up test sample count if is_process_all_data: log.info('Start computing test dataset sample counts') t_start = time.time() test_sample_count = test_dataset.sample_counts() log.info( 'Computing test dataset sample counts finished, cost time: {:.5f}'. format(time.time() - t_start)) num_iterations = int(math.ceil(test_sample_count / CFG.TEST.BATCH_SIZE)) else: num_iterations = 1 # declare crnn net shadownet = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) # set up decoder decoder = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) # compute inference result test_inference_ret = shadownet.inference(inputdata=test_images, name='shadow_net', reuse=False) test_decoded, test_log_prob = tf.nn.ctc_beam_search_decoder( test_inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TEST.BATCH_SIZE), beam_width=1, merge_repeated=False) # recover image from [-1.0, 1.0] ---> [0.0, 255.0] test_images = tf.multiply(tf.add(test_images, 1.0), 127.5, name='recoverd_test_images') # Set saver configuration saver = tf.train.Saver() # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) log.info('Start predicting...') per_char_accuracy = 0.0 full_sequence_accuracy = 0.0 total_labels_char_list = [] total_predictions_char_list = [] while True: try: for epoch in range(num_iterations): test_predictions_value, test_images_value, test_labels_value, \ test_images_paths_value = sess.run( [test_decoded, test_images, test_labels, test_images_paths]) test_images_paths_value = np.reshape( test_images_paths_value, newshape=test_images_paths_value.shape[0]) test_images_paths_value = [ tmp.decode('utf-8') for tmp in test_images_paths_value ] test_images_names_value = [ ops.split(tmp)[1] for tmp in test_images_paths_value ] test_labels_value = decoder.sparse_tensor_to_str( test_labels_value) test_predictions_value = decoder.sparse_tensor_to_str( test_predictions_value[0]) per_char_accuracy += evaluation_tools.compute_accuracy( test_labels_value, test_predictions_value, display=False, mode='per_char') full_sequence_accuracy += evaluation_tools.compute_accuracy( test_labels_value, test_predictions_value, display=False, mode='full_sequence') for index, test_image in enumerate(test_images_value): log.info( 'Predict {:s} image with gt label: {:s} **** predicted label: {:s}' .format(test_images_names_value[index], test_labels_value[index], test_predictions_value[index])) if is_visualize: plt.imshow( np.array(test_image, np.uint8)[:, :, (2, 1, 0)]) plt.show() test_labels_char_list_value = [ s for s in test_labels_value[index] ] test_predictions_char_list_value = [ s for s in test_predictions_value[index] ] if not test_labels_char_list_value or not test_predictions_char_list_value: continue if len(test_labels_char_list_value) != len( test_predictions_char_list_value): min_length = min( len(test_labels_char_list_value), len(test_predictions_char_list_value)) test_labels_char_list_value = test_labels_char_list_value[: min_length - 1] test_predictions_char_list_value = test_predictions_char_list_value[: min_length - 1] assert len(test_labels_char_list_value) == len(test_predictions_char_list_value), \ log.error('{}, {}'.format(test_labels_char_list_value, test_predictions_char_list_value)) total_labels_char_list.extend( test_labels_char_list_value) total_predictions_char_list.extend( test_predictions_char_list_value) if is_visualize: plt.imshow( np.array(test_image, np.uint8)[:, :, (2, 1, 0)]) except tf.errors.OutOfRangeError: log.error('End of tfrecords sequence') break except Exception as err: log.error(err) break avg_per_char_accuracy = per_char_accuracy / num_iterations avg_full_sequence_accuracy = full_sequence_accuracy / num_iterations log.info('Mean test per char accuracy is {:5f}'.format( avg_per_char_accuracy)) log.info('Mean test full sequence accuracy is {:5f}'.format( avg_full_sequence_accuracy)) # compute confusion matrix cnf_matrix = confusion_matrix(total_labels_char_list, total_predictions_char_list) np.set_printoptions(precision=2) evaluation_tools.plot_confusion_matrix(cm=cnf_matrix, normalize=True) plt.show()
def build_saved_model(ckpt_path, export_dir): """ Convert source ckpt weights file into tensorflow saved model :param ckpt_path: :param export_dir: :return: """ #if ops.exists(export_dir): # raise ValueError('Export dir must be a dir path that does not exist') #assert ops.exists(ops.split(ckpt_path)[0]) # build inference tensorflow graph image_size = tuple(CFG.ARCH.INPUT_SIZE) image_tensor = tf.placeholder(dtype=tf.float32, shape=[1, image_size[1], image_size[0], 3], name='input_tensor') # set crnn net net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) # compute inference logits inference_ret = net.inference(inputdata=image_tensor, name='shadow_net', reuse=False) # beam search decode decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=CFG.ARCH.SEQ_LENGTH * np.ones(1), merge_repeated=False) indices_output_tensor_info = tf.saved_model.utils.build_tensor_info( decodes[0].indices) values_output_tensor_info = tf.saved_model.utils.build_tensor_info( decodes[0].values) dense_shape_output_tensor_info = tf.saved_model.utils.build_tensor_info( decodes[0].dense_shape) saver = tf.train.Saver() # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allocator_type = 'BFC' sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=ckpt_path) # set model save builder saved_builder = sm.builder.SavedModelBuilder(export_dir) # add tensor need to be saved saved_input_tensor = sm.utils.build_tensor_info(image_tensor) saved_prediction_tensor = sm.utils.build_tensor_info(decodes[0]) # build SignatureDef protobuf signatur_def = sm.signature_def_utils.build_signature_def( inputs={'input_tensor': saved_input_tensor}, outputs={ 'decodes_indices': indices_output_tensor_info, 'decodes_values': values_output_tensor_info, 'decodes_dense_shape': dense_shape_output_tensor_info, }, method_name=sm.signature_constants.PREDICT_METHOD_NAME, ) # add graph into MetaGraphDef protobuf saved_builder.add_meta_graph_and_variables( sess, tags=[sm.tag_constants.SERVING], signature_def_map={ sm.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signatur_def }, ) # save model saved_builder.save() return
def train_shadownet_multi_gpu(dataset_dir, weights_path, char_dict_path, ord_map_dict_path): """ :param dataset_dir: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :return: """ # prepare dataset information train_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='train' ) val_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='val' ) train_images, train_labels, train_images_paths = train_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE ) val_images, val_labels, val_images_paths = val_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE ) # set crnn net shadownet = crnn_net.ShadowNet( phase='train', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) shadownet_val = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) # set average container tower_grads = [] train_tower_loss = [] val_tower_loss = [] batchnorm_updates = None train_summary_op_updates = None # set lr global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay( learning_rate=CFG.TRAIN.LEARNING_RATE, global_step=global_step, decay_steps=CFG.TRAIN.LR_DECAY_STEPS, decay_rate=CFG.TRAIN.LR_DECAY_RATE, staircase=CFG.TRAIN.LR_STAIRCASE) # set up optimizer optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # set distributed train op with tf.variable_scope(tf.get_variable_scope()): is_network_initialized = False for i in range(CFG.TRAIN.GPU_NUM): with tf.device('/gpu:{:d}'.format(i)): with tf.name_scope('tower_{:d}'.format(i)) as _: train_loss, grads = compute_net_gradients( train_images, train_labels, shadownet, optimizer, is_net_first_initialized=is_network_initialized) is_network_initialized = True # Only use the mean and var in the first gpu tower to update the parameter if i == 0: batchnorm_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_summary_op_updates = tf.get_collection(tf.GraphKeys.SUMMARIES) tower_grads.append(grads) train_tower_loss.append(train_loss) with tf.name_scope('validation_{:d}'.format(i)) as _: val_loss, _ = compute_net_gradients( val_images, val_labels, shadownet_val, optimizer, is_net_first_initialized=is_network_initialized) val_tower_loss.append(val_loss) grads = average_gradients(tower_grads) avg_train_loss = tf.reduce_mean(train_tower_loss) avg_val_loss = tf.reduce_mean(val_tower_loss) # Track the moving averages of all trainable variables variable_averages = tf.train.ExponentialMovingAverage( CFG.TRAIN.MOVING_AVERAGE_DECAY, num_updates=global_step) variables_to_average = tf.trainable_variables() + tf.moving_average_variables() variables_averages_op = variable_averages.apply(variables_to_average) # Group all the op needed for training batchnorm_updates_op = tf.group(*batchnorm_updates) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) train_op = tf.group(apply_gradient_op, variables_averages_op, batchnorm_updates_op) # set tensorflow summary tboard_save_path = 'tboard/crnn_syn90k_multi_gpu' os.makedirs(tboard_save_path, exist_ok=True) summary_writer = tf.summary.FileWriter(tboard_save_path) avg_train_loss_scalar = tf.summary.scalar(name='average_train_loss', tensor=avg_train_loss) avg_val_loss_scalar = tf.summary.scalar(name='average_val_loss', tensor=avg_val_loss) learning_rate_scalar = tf.summary.scalar(name='learning_rate_scalar', tensor=learning_rate) train_merge_summary_op = tf.summary.merge( [avg_train_loss_scalar, learning_rate_scalar] + train_summary_op_updates ) val_merge_summary_op = tf.summary.merge([avg_val_loss_scalar]) # set tensorflow saver saver = tf.train.Saver() model_save_dir = 'model/crnn_syn90k_multi_gpu' os.makedirs(model_save_dir, exist_ok=True) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(model_save_dir, model_name) # set sess config sess_config = tf.ConfigProto(device_count={'GPU': CFG.TRAIN.GPU_NUM}, allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allocator_type = 'BFC' # Set the training parameters train_epochs = CFG.TRAIN.EPOCHS logger.info('Global configuration is as follows:') logger.info(CFG) sess = tf.Session(config=sess_config) summary_writer.add_graph(sess.graph) with sess.as_default(): epoch = 0 tf.train.write_graph(graph_or_graph_def=sess.graph, logdir='', name='{:s}/shadownet_model.pb'.format(model_save_dir)) if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from last model checkpoint {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) epoch = sess.run(tf.train.get_global_step()) train_cost_time_mean = [] val_cost_time_mean = [] while epoch < train_epochs: epoch += 1 # training part t_start = time.time() _, train_loss_value, train_summary, lr = \ sess.run(fetches=[train_op, avg_train_loss, train_merge_summary_op, learning_rate]) if math.isnan(train_loss_value): raise ValueError('Train loss is nan') cost_time = time.time() - t_start train_cost_time_mean.append(cost_time) summary_writer.add_summary(summary=train_summary, global_step=epoch) # validation part t_start_val = time.time() val_loss_value, val_summary = \ sess.run(fetches=[avg_val_loss, val_merge_summary_op]) summary_writer.add_summary(val_summary, global_step=epoch) cost_time_val = time.time() - t_start_val val_cost_time_mean.append(cost_time_val) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Train: {:d} total_loss= {:6f} ' 'lr= {:6f} mean_cost_time= {:5f}s '. format(epoch + 1, train_loss_value, lr, np.mean(train_cost_time_mean) )) train_cost_time_mean.clear() if epoch % CFG.TRAIN.VAL_DISPLAY_STEP == 0: logger.info('Epoch_Val: {:d} total_loss= {:6f} ' ' mean_cost_time= {:5f}s '. format(epoch + 1, val_loss_value, np.mean(val_cost_time_mean))) val_cost_time_mean.clear() if epoch % 5000 == 0: saver.save(sess=sess, save_path=model_save_path, global_step=epoch) sess.close() return
def train_shadownet(dataset_dir, weights_path, char_dict_path, ord_map_dict_path, need_decode=False): """ :param dataset_dir: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param need_decode: :return: """ # prepare dataset train_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='train' ) val_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='val' ) train_images, train_labels, train_images_paths = train_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE ) val_images, val_labels, val_images_paths = val_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE ) # declare crnn net shadownet = crnn_net.ShadowNet( phase='train', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) shadownet_val = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) # set up decoder decoder = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) # set up training graph with tf.device('/gpu:1'): # compute loss and seq distance train_inference_ret, train_ctc_loss = shadownet.compute_loss( inputdata=train_images, labels=train_labels, name='shadow_net', reuse=False ) val_inference_ret, val_ctc_loss = shadownet_val.compute_loss( inputdata=val_images, labels=val_labels, name='shadow_net', reuse=True ) train_decoded, train_log_prob = tf.nn.ctc_beam_search_decoder( train_inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TRAIN.BATCH_SIZE), merge_repeated=False ) val_decoded, val_log_prob = tf.nn.ctc_beam_search_decoder( val_inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TRAIN.BATCH_SIZE), merge_repeated=False ) train_sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(train_decoded[0], tf.int32), train_labels), name='train_edit_distance' ) val_sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(val_decoded[0], tf.int32), val_labels), name='val_edit_distance' ) # set learning rate global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay( learning_rate=CFG.TRAIN.LEARNING_RATE, global_step=global_step, decay_steps=CFG.TRAIN.LR_DECAY_STEPS, decay_rate=CFG.TRAIN.LR_DECAY_RATE, staircase=CFG.TRAIN.LR_STAIRCASE) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9).minimize( loss=train_ctc_loss, global_step=global_step) # Set tf summary tboard_save_dir = 'tboard/crnn_syn90k' os.makedirs(tboard_save_dir, exist_ok=True) tf.summary.scalar(name='train_ctc_loss', tensor=train_ctc_loss) tf.summary.scalar(name='val_ctc_loss', tensor=val_ctc_loss) tf.summary.scalar(name='learning_rate', tensor=learning_rate) if need_decode: tf.summary.scalar(name='train_seq_distance', tensor=train_sequence_dist) tf.summary.scalar(name='val_seq_distance', tensor=val_sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() model_save_dir = 'model/crnn_syn90k' os.makedirs(model_save_dir, exist_ok=True) #train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) #model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_name = 'shadownet.ckpt' model_save_path = ops.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_dir) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = CFG.TRAIN.EPOCHS with sess.as_default(): epoch = 0 if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) epoch = sess.run(tf.train.get_global_step()) patience_counter = 1 cost_history = [np.inf] while epoch < train_epochs: epoch += 1 # setup early stopping if epoch > 1 and CFG.TRAIN.EARLY_STOPPING: # We always compare to the first point where cost didn't improve if cost_history[-1 - patience_counter] - cost_history[-1] > CFG.TRAIN.PATIENCE_DELTA: patience_counter = 1 else: patience_counter += 1 if patience_counter > CFG.TRAIN.PATIENCE_EPOCHS: logger.info("Cost didn't improve beyond {:f} for {:d} epochs, stopping early.". format(CFG.TRAIN.PATIENCE_DELTA, patience_counter)) break if need_decode and epoch % 500 == 0: # train part _, train_ctc_loss_value, train_seq_dist_value, \ train_predictions, train_labels_sparse, merge_summary_value = sess.run( [optimizer, train_ctc_loss, train_sequence_dist, train_decoded, train_labels, merge_summary_op]) train_labels_str = decoder.sparse_tensor_to_str(train_labels_sparse) train_predictions = decoder.sparse_tensor_to_str(train_predictions[0]) avg_train_accuracy = evaluation_tools.compute_accuracy(train_labels_str, train_predictions) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Train: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'.format( epoch + 1, train_ctc_loss_value, train_seq_dist_value, avg_train_accuracy)) # validation part val_ctc_loss_value, val_seq_dist_value, \ val_predictions, val_labels_sparse = sess.run( [val_ctc_loss, val_sequence_dist, val_decoded, val_labels]) val_labels_str = decoder.sparse_tensor_to_str(val_labels_sparse) val_predictions = decoder.sparse_tensor_to_str(val_predictions[0]) avg_val_accuracy = evaluation_tools.compute_accuracy(val_labels_str, val_predictions) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Val: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'.format( epoch + 1, val_ctc_loss_value, val_seq_dist_value, avg_val_accuracy)) else: _, train_ctc_loss_value, merge_summary_value = sess.run( [optimizer, train_ctc_loss, merge_summary_op]) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Train: {:d} cost= {:9f}'.format(epoch + 1, train_ctc_loss_value)) # record history train ctc loss cost_history.append(train_ctc_loss_value) # add training sumary summary_writer.add_summary(summary=merge_summary_value, global_step=epoch) if epoch % 2000 == 0: saver.save(sess=sess, save_path=model_save_path, global_step=epoch) return np.array(cost_history[1:]) # Don't return the first np.inf
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, output_path): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param output_path: :return: """ # read pdf image image = cv2.imread(image_path, cv2.IMREAD_COLOR) # split pdf image into row block pdf_image_row_blocks = split_pdf_image_into_row_image_block(image) # locate the text area in each row block pdf_image_text_areas = [] new_heigth = 32 max_text_area_length = -1 for index, row_block in enumerate(pdf_image_row_blocks): text_area = locate_text_area(row_block) text_area_height = text_area.shape[0] scale = new_heigth / text_area_height max_text_area_length = max(max_text_area_length, int(scale * text_area.shape[1])) pdf_image_text_areas.append(text_area) new_width = max_text_area_length new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[ 0] else CFG.ARCH.INPUT_SIZE[0] # definite the compute graph inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=1) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) pdf_recognize_results = [] for index, pdf_image_text_area in enumerate(pdf_image_text_areas): # resize text area into size (None, new_height) pdf_image_text_area_height = pdf_image_text_area.shape[0] scale = new_heigth / pdf_image_text_area_height new_width_tmp = int(scale * pdf_image_text_area.shape[1]) pdf_image_text_area = cv2.resize(pdf_image_text_area, (new_width_tmp, new_heigth), interpolation=cv2.INTER_LINEAR) # pad text area into size (new_width, new_height) if new_width_tmp < new_width if new_width_tmp < new_width: pad_area_width = new_width - new_width_tmp pad_area = np.zeros(shape=[new_heigth, pad_area_width, 3], dtype=np.uint8) + 255 pdf_image_text_area = np.concatenate( (pdf_image_text_area, pad_area), axis=1) pdf_image_text_area = np.array(pdf_image_text_area, np.float32) / 127.5 - 1.0 preds = sess.run(decodes, feed_dict={inputdata: [pdf_image_text_area]}) preds = codec.sparse_tensor_to_str(preds[0]) pdf_recognize_results.append(preds[0]) output_text = [] need_tab = True for index, pdf_text in enumerate(pdf_recognize_results): if need_tab: text_console_str = '---- {:s}'.format(pdf_text) text_file_str = ' {:s}'.format(pdf_text) print(text_console_str) output_text.append(text_file_str) need_tab = \ index < (len(pdf_recognize_results) - 1) and \ len(pdf_recognize_results[index + 1]) - len(pdf_text) > 10 else: text_console_str = '---- {:s}'.format(pdf_text) text_file_str = ' {:s}'.format(pdf_text) print(text_console_str) output_text.append(text_file_str) need_tab = \ index < (len(pdf_recognize_results) - 1) and \ len(pdf_recognize_results[index + 1]) - len(pdf_text) > 10 res = '\n'.join(output_text) with open(output_path, 'w') as file: file.writelines(res) return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english=True): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) new_heigth = 32 scale_rate = new_heigth / image.shape[0] new_width = int(scale_rate * image.shape[1]) new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[0] else \ CFG.ARCH.INPUT_SIZE[0] # TODO: Fix it, force 100. new_width = 100 image = cv2.resize(image, (new_width, new_heigth), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 print(new_width, new_heigth) inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input' ) codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) net = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) inference_ret = net.inference( inputdata=inputdata, name='shadow_net', reuse=False ) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=True, beam_width=10 ) decode = decodes[0] print(decode) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decode, feed_dict={inputdata: [image]}) print(preds) preds = codec.sparse_tensor_to_str(preds)[0] if is_english: preds = ' '.join(wordninja.split(preds)) # return preds_evaluated input_graph_name = "input_graph.pb" output_graph_name = "output_graph.pb" export_dir = 'export' tf.train.write_graph(sess.graph, export_dir, input_graph_name) tf.logging.info("Write graph at %s." % os.path.join(export_dir, input_graph_name)) export_graph = tf.Graph() with export_graph.as_default(): freeze_graph.freeze_graph(input_graph=os.path.join(export_dir, input_graph_name), input_saver="", input_binary=False, input_checkpoint=weights_path, output_node_names='CTCBeamSearchDecoder', restore_op_name="", filename_tensor_name="", output_graph=os.path.join(export_dir, output_graph_name), clear_devices=True, initializer_nodes=None, variable_names_blacklist="") tf.logging.info("Export model at %s." % os.path.join(export_dir, output_graph_name)) logger.info('Predict image {:s} result: {:s}'.format( ops.split(image_path)[1], preds) ) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show() sess.close() return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :return: """ image_list =[] new_width_list = [] inputdata_list = [] for i in range(len(test_img)):#(quantity_of_files): print(i, 'image processing') #load and normalize an image one_of_images = test_img[i] image =cv2.imread(one_of_images, cv2.IMREAD_COLOR) new_heigth = 32 scale_rate = new_heigth / image.shape[0] new_width = int(scale_rate * image.shape[1]) new_width = CFG.ARCH.INPUT_SIZE[0]#new_width if new_width > CFG.ARCH.INPUT_SIZE[0] else CFG.ARCH.INPUT_SIZE[0] new_width_list.append(new_width) image = cv2.resize(image, (new_width, new_heigth), interpolation=cv2.INTER_LINEAR) image_list.append(np.array(image, np.float32) / 127.5 - 1.0) inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input' ) #inputdata_list.append(inputdata) codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) net = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) inference_ret = net.inference( inputdata=inputdata, name='shadow_net', reuse=tf.AUTO_REUSE #было#False ) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=10 ) # config tf saver saver = tf.compat.v1.train.Saver() # config tf session sess_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.compat.v1.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for i in range(len(test_img)):#(quantity_of_files): #print('size of image', i+500, 'is',sys.getsizeof(image_list[i])) preds = sess.run(decodes, feed_dict={inputdata: [image_list[i]]}) #preds = sess.run(decodes, feed_dict={inputdata_list[i]: [image_list[i]]}) preds = codec.sparse_tensor_to_str(preds[0])[0] #print(i, 'image recognition result_txt is', preds) #print('size of preds', i+500, 'is',sys.getsizeof(preds)) result_txt.write(preds+ '\n') sess.close() del image_list del inputdata_list del new_width_list del net del saver del codec del inference_ret del decodes del inputdata return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english, txt_path): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :param is_english: :return: """ test_number = 20 print('Test file path {} '.format(txt_path)) NUM_CLASSES = get_num_class(char_dict_path) print('num_classes: ', NUM_CLASSES) with open(txt_path, 'r') as f1: linelist = f1.readlines() image_list = [] for i in range(test_number): image_path_temp = image_path + linelist[i].split(' ')[0] image_list.append((image_path_temp, linelist[i].split(' ')[1].replace( '\r', '').replace('\n', '').replace('\t', ''))) inputdata = tf.placeholder(dtype=tf.float32, shape=[ 1, CFG.ARCH.INPUT_SIZE[1], CFG.ARCH.INPUT_SIZE[0], CFG.ARCH.INPUT_CHANNELS ], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(CFG.ARCH.INPUT_SIZE[0] / 4) * np.ones(1), merge_repeated=False, beam_width=10) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) weights_path = tf.train.latest_checkpoint(weights_path) print('weights_path: ', weights_path) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for image_name, label in image_list: image = cv2.imread(image_name, cv2.IMREAD_COLOR) image = cv2.resize(image, dsize=tuple(CFG.ARCH.INPUT_SIZE), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 preds = sess.run(decodes, feed_dict={inputdata: [image]}) preds = codec.sparse_tensor_to_str(preds[0])[0] if is_english: preds = ' '.join(wordninja.split(preds)) print('Label[{:20s}] Pred:[{:20s}]'.format(label, preds)) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show() sess.close() return