def recognize(image_path, weights_path, is_vis=True): """ :param image_path: :param weights_path: :param is_vis: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.resize(image, (100, 32)) image = np.expand_dims(image, axis=0).astype(np.float32) inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(1), merge_repeated=False) decoder = data_utils.TextFeatureIO() # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) logger.info('Predict image {:s} label {:s}'.format( ops.split(image_path)[1], preds[0])) if is_vis: plt.figure('CRNN Model Demo') plt.imshow( cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)]) plt.show() sess.close() return
def write_features(dataset_dir, save_dir): """ :param dataset_dir: :param save_dir: :return: """ if not ops.exists(save_dir): os.makedirs(save_dir) print('Initialize the dataset provider ......') provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name='sample.txt', validation_set=True, validation_split=0.15, shuffle=None, normalization=None) print('Dataset provider intialize complete') feature_io = data_utils.TextFeatureIO() # write train tfrecords print('Start writing training tf records') train_images = provider.train.images train_images = [bytes(list(np.reshape(tmp, [100 * 32 * 3]))) for tmp in train_images] train_labels = provider.train.labels train_imagenames = provider.train.imagenames train_tfrecord_path = ops.join(save_dir, 'train_feature.tfrecords') feature_io.writer.write_features(tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images, imagenames=train_imagenames) # write test tfrecords print('Start writing testing tf records') test_images = provider.test.images test_images = [bytes(list(np.reshape(tmp, [100 * 32 * 3]))) for tmp in test_images] test_labels = provider.test.labels test_imagenames = provider.test.imagenames test_tfrecord_path = ops.join(save_dir, 'test_feature.tfrecords') feature_io.writer.write_features(tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images, imagenames=test_imagenames) # write val tfrecords print('Start writing validation tf records') val_images = provider.validation.images val_images = [bytes(list(np.reshape(tmp, [100 * 32 * 3]))) for tmp in val_images] val_labels = provider.validation.labels val_imagenames = provider.validation.imagenames val_tfrecord_path = ops.join(save_dir, 'validation_feature.tfrecords') feature_io.writer.write_features(tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images, imagenames=val_imagenames) return
def getRecognize(): #该函数用于从ckpt文件中恢复网络,并返回一个可以识别图像内容的recognize函数 with tf.Graph().as_default() as net2_graph: inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=20) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25*np.ones(1), merge_repeated=False) decoder = data_utils.TextFeatureIO() saver2 = tf.train.Saver() sess2 = tf.Session(graph=net2_graph) saver2.restore(sess=sess2, save_path=FLAGS.crnnWeightsPath) def recognize(img, rst): ''' 按照前一级网络定位的结果进行识别,并在图像上进行标记 输入: img:需要进行识别的原图 rst:含有RBOX坐标的dict 返回: img:完成识别并进行相应标注的图像 ''' for i,t in enumerate(rst['text_lines']): #先画RBOX框 d = np.array([t['x0'], t['y0'], t['x1'], t['y1'], t['x2'], t['y2'], t['x3'], t['y3']], dtype='int32') d = d.reshape(-1, 2)#按顺序两个元素组成一列,形成四个点的格式 cv2.polylines(img, [d], isClosed=True, color=(255, 255, 0)) #将RBOX框内内容送入识别器 x0 = int(min(t['x0'], t['x1'], t['x2'], t['x3'])) x1 = int(max(t['x0'], t['x1'], t['x2'], t['x3'])) y0 = int(min(t['y0'], t['y1'], t['y2'], t['y3'])) y1 = int(max(t['y0'], t['y1'], t['y2'], t['y3'])) offset = (x1-x0)//10 #由于定位存在问题,导致RBOX框的水平宽度经常不足 image = img[y0:y1, max(0, x0-offset):x1+offset] image = cv2.resize(image, (100, 32)) image = np.expand_dims(image, axis=0).astype(np.float32) preds = sess2.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) #根据识别判断对错并显示识别内容 if judge(str(preds[0])) == False: img = cv2.putText(img, preds[0], (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 0 ,0), True) img = cv2.line(img, (x0, y0), (x1, y1), (0, 0 ,255), thickness=4) return img return recognize
def execute(self, data, batch_size): sess = self.output['sess'] x = self.output['x'] y_ = self.output['y_'] decoder = data_utils.TextFeatureIO() ret = [] for i in range(batch_size): image = Image.open(data[i]) image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR) image = cv2.resize(image, (config.cfg.TRAIN.width, 32)) image = np.expand_dims(image, axis=0).astype(np.float32) preds = sess.run(y_, feed_dict={x: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0])[0] + '\n' ret.append(preds) return ret
def getRecognize(): #该函数用于从ckpt文件中恢复网络,并返回一个可以识别图像内容的recognize函数 with tf.Graph().as_default() as net2_graph: inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=19) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(1), merge_repeated=False) decoder = data_utils.TextFeatureIO() saver2 = tf.train.Saver() sess2 = tf.Session(graph=net2_graph) saver2.restore(sess=sess2, save_path=FLAGS.crnnWeightsPath) def recognize(path): ''' 对指定路径下的所有图像执行crnn网络识别,并在命令行上显示结果 输入: path:指定数据集路劲 返回: ''' imageList = getfilelist(path) for imagePath in imageList: image = cv2.imread(imagePath, cv2.IMREAD_COLOR) image = cv2.resize(image, (100, 32)) image = np.expand_dims(image, axis=0).astype(np.float32) preds = sess2.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) print('Predict image {:s} label {:s}'.format( os.path.split(imagePath)[1], preds[0])) return recognize
def test_shadownet(dataset_dir, weights_path, is_vis=False, is_recursive=True): """ :param dataset_dir: :param weights_path: :param is_vis: :param is_recursive: :return: """ # Initialize the record decoder decoder = data_utils.TextFeatureIO().reader images_t, labels_t, imagenames_t = decoder.read_features(ops.join( dataset_dir, 'test_feature.tfrecords'), num_epochs=None) if not is_recursive: images_sh, labels_sh, imagenames_sh = tf.train.shuffle_batch( tensors=[images_t, labels_t, imagenames_t], batch_size=32, capacity=1000 + 32 * 2, min_after_dequeue=2, num_threads=4) else: images_sh, labels_sh, imagenames_sh = tf.train.batch( tensors=[images_t, labels_t, imagenames_t], batch_size=32, capacity=1000 + 32 * 2, num_threads=4) images_sh = tf.cast(x=images_sh, dtype=tf.float32) # build shadownet net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False) # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) test_sample_count = 0 for record in tf.python_io.tf_record_iterator( ops.join(dataset_dir, 'test_feature.tfrecords')): test_sample_count += 1 loops_nums = int(math.ceil(test_sample_count / 32)) # loops_nums = 100 with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print('Start predicting ......') if not is_recursive: predictions, images, labels, imagenames = sess.run( [decoded, images_sh, labels_sh, imagenames_sh]) imagenames = np.reshape(imagenames, newshape=imagenames.shape[0]) imagenames = [tmp.decode('utf-8') for tmp in imagenames] preds_res = decoder.sparse_tensor_to_str(predictions[0]) gt_res = decoder.sparse_tensor_to_str(labels) accuracy = [] for index, gt_label in enumerate(gt_res): pred = preds_res[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) print('Mean test accuracy is {:5f}'.format(accuracy)) for index, image in enumerate(images): print( 'Predict {:s} image with gt label: {:s} **** predict label: {:s}' .format(imagenames[index], gt_res[index], preds_res[index])) if is_vis: plt.imshow(image[:, :, (2, 1, 0)]) plt.show() else: accuracy = [] for epoch in range(loops_nums): predictions, images, labels, imagenames = sess.run( [decoded, images_sh, labels_sh, imagenames_sh]) imagenames = np.reshape(imagenames, newshape=imagenames.shape[0]) imagenames = [tmp.decode('utf-8') for tmp in imagenames] preds_res = decoder.sparse_tensor_to_str(predictions[0]) gt_res = decoder.sparse_tensor_to_str(labels) for index, gt_label in enumerate(gt_res): pred = preds_res[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) for index, image in enumerate(images): print( 'Predict {:s} image with gt label: {:s} **** predict label: {:s}' .format(imagenames[index], gt_res[index], preds_res[index])) # if is_vis: # plt.imshow(image[:, :, (2, 1, 0)]) # plt.show() accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) print('Test accuracy is {:5f}'.format(accuracy)) coord.request_stop() coord.join(threads=threads) sess.close() return
def write_features(dataset_dir, save_dir, batch_size): """ :param dataset_dir: :param save_dir: :param batch_size: :return: """ if not ops.exists(save_dir): os.makedirs(save_dir) print('Initialize the dataset provider ......') provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name='sample.txt', validation_set=True, validation_split=0.05, shuffle='every_epoch', normalization=None) print('Dataset provider intialize complete') feature_io = data_utils.TextFeatureIO() # write train tfrecords print('Start writing training tf records') train_images_nums = provider.train.num_examples epoch_nums = int(math.ceil(train_images_nums / batch_size)) for loop in tqdm.tqdm(range(epoch_nums)): train_images, train_labels, train_imagenames = provider.train.next_batch(batch_size=batch_size) train_images = [cv2.resize(tmp, (width,32)) for tmp in train_images] train_images = [bytes(list(np.reshape(tmp, [width * 32*3]))) for tmp in train_images] if loop*batch_size+batch_size > train_images_nums: train_tfrecord_path = ops.join(save_dir, 'train_feature_{:d}_{:d}.tfrecords'.format( loop * batch_size, train_images_nums)) else: train_tfrecord_path = ops.join(save_dir, 'train_feature_{:d}_{:d}.tfrecords'.format( loop*batch_size, loop*batch_size+batch_size)) feature_io.writer.write_features(tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images, imagenames=train_imagenames) # write test tfrecords print('Start writing testing tf records') test_images_nums = provider.test.num_examples epoch_nums = int(math.ceil(test_images_nums / batch_size)) for loop in tqdm.tqdm(range(epoch_nums)): test_images, test_labels, test_imagenames = provider.test.next_batch(batch_size=batch_size) test_images = [cv2.resize(tmp, (32, width)) for tmp in test_images] test_images = [bytes(list(np.reshape(tmp, [32 * width * 3]))) for tmp in test_images] if loop * batch_size + batch_size > test_images_nums: test_tfrecord_path = ops.join(save_dir, 'test_feature_{:d}_{:d}.tfrecords'.format( loop*batch_size, test_images_nums)) else: test_tfrecord_path = ops.join(save_dir, 'test_feature_{:d}_{:d}.tfrecords'.format( loop * batch_size, loop * batch_size + batch_size)) feature_io.writer.write_features(tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images, imagenames=test_imagenames) # write val tfrecords print('Start writing validation tf records') val_image_nums = provider.validation.num_examples epoch_nums = int(math.ceil(val_image_nums / batch_size)) for loop in tqdm.tqdm(range(epoch_nums)): val_images, val_labels, val_imagenames = provider.validation.next_batch(batch_size=batch_size) val_images = [cv2.resize(tmp, (32, width)) for tmp in val_images] val_images = [bytes(list(np.reshape(tmp, [32 * width * 3]))) for tmp in val_images] if loop*batch_size+batch_size > val_image_nums: val_tfrecord_path = ops.join(save_dir, 'validation_feature_{:d}_{:d}.tfrecords'.format( loop*batch_size, val_image_nums)) else: val_tfrecord_path = ops.join(save_dir, 'validation_feature_{:d}_{:d}.tfrecords'.format( loop * batch_size, loop*batch_size+batch_size)) feature_io.writer.write_features(tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images, imagenames=val_imagenames) return
def train_shadownet(dataset_dir, weights_path=None): """ :param dataset_dir: :param weights_path: :return: """ # decode the tf records to get the training data decoder = data_utils.TextFeatureIO().reader images, labels, imagenames = decoder.read_features(ops.join( dataset_dir, 'train_feature.tfrecords'), num_epochs=None) inputdata, input_labels, input_imagenames = tf.train.shuffle_batch( tensors=[images, labels, imagenames], batch_size=32, capacity=1000 + 2 * 32, min_after_dequeue=100, num_threads=1) inputdata = tf.cast(x=inputdata, dtype=tf.float32) # initializa the net model shadownet = crnn_model.ShadowNet(phase='Train', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=37) with tf.variable_scope('shadow', reuse=False): net_out = shadownet.build_shadownet(inputdata=inputdata) cost = tf.reduce_mean( tf.nn.ctc_loss(labels=input_labels, inputs=net_out, sequence_length=25 * np.ones(32))) decoded, log_prob = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = config.cfg.TRAIN.LEARNING_RATE learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, config.cfg.TRAIN.LR_DECAY_STEPS, config.cfg.TRAIN.LR_DECAY_RATE, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer( learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) # Set tf summary tboard_save_path = 'tboard/shadownet' if not ops.exists(tboard_save_path): os.makedirs(tboard_save_path) tf.summary.scalar(name='Cost', tensor=cost) tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() model_save_dir = 'model/shadownet' if not ops.exists(model_save_dir): os.makedirs(model_save_dir) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_path) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = config.cfg.TRAIN.EPOCHS with sess.as_default(): if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in range(train_epochs): _, c, seq_distance, preds, gt_labels, summary = sess.run([ optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op ]) # calculate the precision preds = decoder.sparse_tensor_to_str(preds[0]) gt_labels = decoder.sparse_tensor_to_str(gt_labels) accuracy = [] for index, gt_label in enumerate(gt_labels): pred = preds[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) # if epoch % config.cfg.TRAIN.DISPLAY_STEP == 0: logger.info( 'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}' .format(epoch + 1, c, seq_distance, accuracy)) summary_writer.add_summary(summary=summary, global_step=epoch) saver.save(sess=sess, save_path=model_save_path, global_step=epoch) coord.request_stop() coord.join(threads=threads) sess.close() return
def train_shadownet(cfg: EasyDict, weights_path: str = None, decode: bool = False, num_threads: int = 4): """ :param cfg: configuration EasyDict (e.g. global_config.config.cfg) :param weights_path: Path to stored weights :param decode: Whether to perform CTC decoding to report progress during training :param num_threads: Number of threads to use in tf.train.shuffle_batch """ # decode the tf records to get the training data decoder = data_utils.TextFeatureIO( char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'), ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'ord_map.json')).reader images, labels, imagenames = decoder.read_features( ops.join(cfg.PATH.TFRECORDS_DIR, 'train_feature.tfrecords'), num_epochs=None, input_size=cfg.ARCH.INPUT_SIZE, input_channels=cfg.ARCH.INPUT_CHANNELS) inputdata, input_labels, input_imagenames = tf.train.shuffle_batch( tensors=[images, labels, imagenames], batch_size=cfg.TRAIN.BATCH_SIZE, capacity=1000 + 2 * cfg.TRAIN.BATCH_SIZE, min_after_dequeue=100, num_threads=num_threads) inputdata = tf.cast(x=inputdata, dtype=tf.float32) # initialise the net model shadownet = crnn_model.ShadowNet(phase='Train', hidden_nums=cfg.ARCH.HIDDEN_UNITS, layers_nums=cfg.ARCH.HIDDEN_LAYERS, num_classes=len(decoder.char_dict) + 1) with tf.variable_scope('shadow', reuse=False): net_out = shadownet.build_shadownet(inputdata=inputdata) cost = tf.reduce_mean( tf.nn.ctc_loss(labels=input_labels, inputs=net_out, sequence_length=cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TRAIN.BATCH_SIZE))) decoded, log_prob = tf.nn.ctc_beam_search_decoder( net_out, cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TRAIN.BATCH_SIZE), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = cfg.TRAIN.LEARNING_RATE learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, cfg.TRAIN.LR_DECAY_STEPS, cfg.TRAIN.LR_DECAY_RATE, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer( learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) # Set tf summary os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True) tf.summary.scalar(name='Cost', tensor=cost) tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(cfg.PATH.MODEL_SAVE_DIR, model_name) # Set sess configuration sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(cfg.PATH.TBOARD_SAVE_DIR) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = cfg.TRAIN.EPOCHS with sess.as_default(): if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in range(train_epochs): if decode: _, c, seq_distance, predictions, labels, summary = sess.run([ optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op ]) labels = decoder.sparse_tensor_to_str(labels) predictions = decoder.sparse_tensor_to_str(predictions[0]) accuracy = compute_accuracy(labels, predictions) if epoch % cfg.TRAIN.DISPLAY_STEP == 0: logger.info( 'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}' .format(epoch + 1, c, seq_distance, accuracy)) else: _, c, summary = sess.run([optimizer, cost, merge_summary_op]) if epoch % cfg.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch: {:d} cost= {:9f}'.format(epoch + 1, c)) summary_writer.add_summary(summary=summary, global_step=epoch) saver.save(sess=sess, save_path=model_save_path, global_step=epoch) coord.request_stop() coord.join(threads=threads)
def recognize(image_path: str, weights_path: str, cfg: EasyDict, is_vis: bool = True, num_classes: int = 0): """ :param image_path: :param weights_path: Path to stored weights :param cfg: :param is_vis: :param num_classes: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.resize(image, tuple(cfg.ARCH.INPUT_SIZE)) image = np.expand_dims(image, axis=0).astype(np.float32) w, h = cfg.ARCH.INPUT_SIZE inputdata = tf.placeholder(dtype=tf.float32, shape=[1, h, w, cfg.ARCH.INPUT_CHANNELS], name='input') codec = data_utils.TextFeatureIO( char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'), ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'ord_map.json')) num_classes = len( codec.reader.char_dict) + 1 if num_classes == 0 else num_classes net = crnn_model.ShadowNet(phase='Test', hidden_nums=cfg.ARCH.HIDDEN_UNITS, layers_nums=cfg.ARCH.HIDDEN_LAYERS, num_classes=num_classes) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=net_out, sequence_length=cfg.ARCH.SEQ_LENGTH * np.ones(1), merge_repeated=False) # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: image}) preds = codec.writer.sparse_tensor_to_str(preds[0]) logger.info('Predict image {:s} label {:s}'.format( ops.split(image_path)[1], preds[0])) if is_vis: plt.figure('CRNN Model Demo') plt.imshow( cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)]) plt.show() sess.close()
def train_shadownet(): """ :param dataset_dir: :param weights_path: :return: """ # input_tensor = tf.placeholder(dtype=tf.float32, shape=[config.cfg.TRAIN.BATCH_SIZE, 32, 100, 3], # name='input_tensor') # decode the tf records to get the training data decoder = data_utils.TextFeatureIO().reader images, labels, imagenames = decoder.read_features(FLAGS.dataset_dir, num_epochs=None, flag='Train') # images_val, labels_val, imagenames_val = decoder.read_features(dataset_dir, num_epochs=None, # flag='Validation') inputdata, input_labels, input_imagenames = tf.train.shuffle_batch( tensors=[images, labels, imagenames], batch_size=config.cfg.TRAIN.BATCH_SIZE, capacity=1000 + 2 * config.cfg.TRAIN.BATCH_SIZE, min_after_dequeue=100, num_threads=1) # inputdata_val, input_labels_val, input_imagenames_val = tf.train.shuffle_batch( # tensors=[images_val, labels_val, imagenames_val], batch_size=config.TRAIN.BATCH_SIZE, # capacity=1000 + 2 * config.TRAIN.BATCH_SIZE, # min_after_dequeue=100, num_threads=1) inputdata = tf.cast(x=inputdata, dtype=tf.float32) phase_tensor = tf.placeholder(dtype=tf.string, shape=None, name='phase') accuracy_tensor = tf.placeholder(dtype=tf.float32, shape=None, name='accuracy_tensor') # initialize the net model shadownet = crnn_model.ShadowNet(phase=phase_tensor, hidden_nums=256, layers_nums=2, seq_length=15, num_classes=config.cfg.TRAIN.CLASSES_NUMS, rnn_cell_type='lstm') with tf.variable_scope('shadow', reuse=False): net_out, tensor_dict = shadownet.build_shadownet(inputdata=inputdata) cost = tf.reduce_mean( tf.nn.ctc_loss(labels=input_labels, inputs=net_out, sequence_length=20 * np.ones(config.cfg.TRAIN.BATCH_SIZE))) decoded, log_prob = tf.nn.ctc_beam_search_decoder( net_out, 20 * np.ones(config.cfg.TRAIN.BATCH_SIZE), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = config.cfg.TRAIN.LEARNING_RATE learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, config.cfg.TRAIN.LR_DECAY_STEPS, config.cfg.TRAIN.LR_DECAY_RATE, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer( learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize( # loss=cost, global_step=global_step) # Set tf summary tboard_save_path = '/data/output/' if not ops.exists(tboard_save_path): os.makedirs(tboard_save_path) visualizor = tensorboard_vis_summary.CNNVisualizer() # training过程summary train_cost_scalar = tf.summary.scalar(name='train_cost', tensor=cost) train_accuracy_scalar = tf.summary.scalar(name='train_accuray', tensor=accuracy_tensor) train_seq_scalar = tf.summary.scalar(name='train_seq_dist', tensor=sequence_dist) train_conv1_image = visualizor.merge_conv_image( feature_map=tensor_dict['conv1'], scope='conv1_image') train_conv2_image = visualizor.merge_conv_image( feature_map=tensor_dict['conv2'], scope='conv2_image') train_conv3_image = visualizor.merge_conv_image( feature_map=tensor_dict['conv3'], scope='conv3_image') train_conv7_image = visualizor.merge_conv_image( feature_map=tensor_dict['conv7'], scope='conv7_image') lr_scalar = tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) weights_tensor_dict = dict() for vv in tf.trainable_variables(): if 'conv' in vv.name: weights_tensor_dict[vv.name[:-2]] = vv train_weights_hist_dict = visualizor.merge_weights_hist( weights_tensor_dict=weights_tensor_dict, scope='weights_histogram', is_merge=False) train_summary_merge_list = [ train_cost_scalar, train_accuracy_scalar, train_seq_scalar, lr_scalar, train_conv1_image, train_conv2_image, train_conv3_image ] for _, weights_hist in train_weights_hist_dict.items(): train_summary_merge_list.append(weights_hist) train_summary_op_merge = tf.summary.merge(inputs=train_summary_merge_list) # validation过程summary # val_cost_scalar = tf.summary.scalar(name='val_cost', tensor=cost) # val_seq_scalar = tf.summary.scalar(name='val_seq_dist', tensor=sequence_dist) # val_accuracy_scalar = tf.summary.scalar(name='val_accuracy', tensor=accuracy_tensor) # test_summary_op_merge = tf.summary.merge(inputs=[val_cost_scalar, val_accuracy_scalar, # val_seq_scalar]) # Set saver configuration restore_variable_list = [tmp.name for tmp in tf.trainable_variables()] saver = tf.train.Saver() model_save_dir = '/data/output' if not ops.exists(model_save_dir): os.makedirs(model_save_dir) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allocator_type = 'BFC' sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_path) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = config.cfg.TRAIN.EPOCHS print('Global configuration is as follows:') pprint.pprint(config.cfg) with sess.as_default(): if FLAGS.weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: # logger.info('Restore model from last crnn check point{:s}'.format(weights_path)) # init = tf.global_variables_initializer() # sess.run(init) # restore_saver = tf.train.Saver(var_list=restore_variable_list) # restore_saver.restore(sess=sess, save_path=weights_path) logger.info('Restore model from last crnn check point{:s}'.format( FLAGS.weights_path)) saver.restore(sess=sess, save_path=FLAGS.weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in range(train_epochs): _, c, seq_distance, preds, gt_labels = sess.run( [optimizer, cost, sequence_dist, decoded, input_labels], feed_dict={phase_tensor: 'train'}) # calculate the precision preds = decoder.sparse_tensor_to_str(preds[0]) gt_labels = decoder.sparse_tensor_to_str(gt_labels) accuracy = [] for index, gt_label in enumerate(gt_labels): pred = preds[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) train_summary = sess.run(train_summary_op_merge, feed_dict={ accuracy_tensor: accuracy, phase_tensor: 'train' }) summary_writer.add_summary(summary=train_summary, global_step=epoch) if epoch % config.cfg.TRAIN.DISPLAY_STEP == 0: logger.info( 'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}' .format(epoch + 1, c, seq_distance, accuracy)) # if epoch % config.cfg.TRAIN.VAL_STEP == 0: # inputdata_value = sess.run(inputdata_val) # val_c, val_seq, val_preds, val_gt_labels = sess.run([ # cost, sequence_dist, decoded, input_labels_val], # feed_dict={phase_tensor: 'test', # input_tensor: inputdata_value}) # # preds_val = decoder.sparse_tensor_to_str(val_preds[0]) # gt_labels_val = decoder.sparse_tensor_to_str(val_gt_labels) # # accuracy_val = [] # # for index, gt_label in enumerate(gt_labels_val): # pred = preds_val[index] # totol_count = len(gt_label) # correct_count = 0 # try: # for i, tmp in enumerate(gt_label): # if tmp == pred[i]: # correct_count += 1 # except IndexError: # continue # finally: # try: # accuracy_val.append(correct_count / totol_count) # except ZeroDivisionError: # if len(pred) == 0: # accuracy_val.append(1) # else: # accuracy_val.append(0) # # accuracy_val = np.mean(np.array(accuracy_val).astype(np.float32), axis=0) # # test_summary = sess.run(test_summary_op_merge, # feed_dict={accuracy_tensor: accuracy_val, # phase_tensor: 'test', # input_tensor: inputdata_value}) # summary_writer.add_summary(summary=test_summary, global_step=epoch) # # logger.info('Epoch: {:d} val_cost= {:9f} val_seq_distance= {:9f} val_accuracy= {:9f}'.format( # epoch + 1, val_c, val_seq, accuracy_val)) if epoch % 500 == 0: saver.save(sess=sess, save_path=model_save_path, global_step=epoch) coord.request_stop() coord.join(threads=threads) sess.close() return
def test_shadownet(weights_path: str, cfg: EasyDict, visualize: bool, process_all_data: bool = True, num_threads: int = 4, num_classes: int = 0): """ :param tfrecords_dir: Directory with test_feature.tfrecords :param charset_dir: Path to char_dict.json and ord_map.json (generated with write_text_features.py) :param weights_path: Path to stored weights :param cfg: configuration EasyDict (e.g. global_config.config.cfg) :param visualize: whether to display the images :param process_all_data: :param num_threads: Number of threads for tf.train.(shuffle_)batch :param num_classes: Number of different characters in the dataset """ decoder = data_utils.TextFeatureIO( char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'), ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'ord_map.json')).reader input_images, input_labels, input_image_names = decoder.read_features( cfg, cfg.TEST.BATCH_SIZE, num_threads, False) num_classes = len( decoder.char_dict) + 1 if num_classes == 0 else num_classes net = crnn_model.ShadowNet(phase='Test', hidden_nums=cfg.ARCH.HIDDEN_UNITS, layers_nums=cfg.ARCH.HIDDEN_LAYERS, num_classes=num_classes) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=input_images) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TEST.BATCH_SIZE), merge_repeated=False) # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) test_sample_count = sum(1 for _ in tf.python_io.tf_record_iterator( ops.join(cfg.PATH.TFRECORDS_DIR, 'test_feature.tfrecords'))) num_iterations = int(math.ceil(test_sample_count / cfg.TEST.BATCH_SIZE)) if process_all_data \ else 1 with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) print('Start predicting...') accuracy = 0 for epoch in range(num_iterations): predictions, images, labels, image_names = sess.run( [decoded, input_images, input_labels, input_image_names]) image_names = np.reshape(image_names, newshape=image_names.shape[0]) image_names = [tmp.decode('utf-8') for tmp in image_names] labels = decoder.sparse_tensor_to_str(labels) predictions = decoder.sparse_tensor_to_str(predictions[0]) accuracy += compute_accuracy(labels, predictions, display=False) for index, image in enumerate(images): print( 'Predict {:s} image with gt label: {:s} **** predicted label: {:s}' .format(image_names[index], labels[index], predictions[index])) # avoid accidentally displaying for the whole dataset if visualize and not process_all_data: plt.imshow(image[:, :, (2, 1, 0)]) plt.show() # We compute a mean of means, so we need the sample sizes to be constant # (BATCH_SIZE) for this to equal the actual mean accuracy /= num_iterations print('Mean test accuracy is {:5f}'.format(accuracy))
def main(): job_id = os.environ['PBS_JOBID'] codec = data_utils.TextFeatureIO( char_dict_path='app/Config/char_dict.json', ord_map_dict_path=r'app/Config/ord_map.json') log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) if args.cpu_extension and 'CPU' in args.device: plugin.add_cpu_extension(args.cpu_extension) # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) # if plugin.device == "CPU": # supported_layers = plugin.get_supported_layers(net) # not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] # if len(not_supported_layers) != 0: # log.error("Following layers are not supported by the plugin for specified device {}:\n {}". # format(plugin.device, ', '.join(not_supported_layers))) # log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " # "or --cpu_extension command line argument") # sys.exit(1) assert len( net.inputs.keys()) == 1, "Sample supports only single input topologies" assert len( net.outputs) == 1, "Sample supports only single output topologies" log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = len(args.input) job_id = os.environ['PBS_JOBID'].split('.')[0] # Read and pre-process input images n, c, h, w = net.inputs[input_blob].shape images = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = cv2.imread(args.input[i]) if image.shape[:-1] != (h, w): log.warning("Image {} is resized from {} to {}".format( args.input[i], image.shape[:-1], (h, w))) image = cv2.resize(image, (w, h)) image = image.transpose( (2, 0, 1)) # Change data layout from HWC to CHW images[i] = image log.info("Batch size is {}".format(n)) # Loading model to the plugin log.info("Loading model to the plugin") exec_net = plugin.load(network=net) del net # Start sync inference log.info("Starting inference ({} iterations)".format(args.number_iter)) infer_time = [] result_dir = os.path.join(args.output_dir, job_id) if not os.path.isdir(result_dir): print(result_dir) os.makedirs(result_dir, exist_ok=True) progress_file_path = os.path.join(result_dir, 'i_progress.txt') t0 = time() for i in range(args.number_iter): #t0 = time() res = exec_net.infer(inputs={input_blob: images}) #infer_time.append((time()-t0)*1000) print(i, args.number_iter) progressUpdate(progress_file_path, time() - t0, i + 1, args.number_iter) t1 = (time() - t0) log.info("Average running time of one iteration: {} ms".format( np.average(np.asarray(infer_time)))) if args.perf_counts: perf_counts = exec_net.requests[0].get_perf_counts() log.info("Performance counters:") print("{:<70} {:<15} {:<15} {:<15} {:<10}".format( 'name', 'layer_type', 'exet_type', 'status', 'real_time, us')) for layer, stats in perf_counts.items(): print("{:<70} {:<15} {:<15} {:<15} {:<10}".format( layer, stats['layer_type'], stats['exec_type'], stats['status'], stats['real_time'])) # Processing output blob log.info("Processing output blob") res = res[out_blob] preds = res.argmax(2) preds = preds.transpose(1, 0) preds = np.ascontiguousarray(preds, dtype=np.int8).view(dtype=np.int8) values = codec.writer.ordtochar(preds[0].tolist()) values = [v for i, v in enumerate(values) if i == 0 or v != values[i - 1]] values = [x for x in values if x != ' '] res = ''.join(values) print("The result is : " + res) avg_time = round((t1 * 1000 / args.number_iter), 3) with open(os.path.join(args.output_dir, job_id, 'result.txt'), 'w') as f: f.write(res + "\n Inference performed in " + str(avg_time) + "ms") stats = {} stats['time'] = str(round(t1, 1)) stats['frames'] = str(args.number_iter * n) stats['fps'] = str(args.number_iter * n / t1) stats_file = result_dir + "/stats.json" with open(stats_file, 'w') as f: json.dump(stats, f) del exec_net del plugin
def write_features(dataset_dir, save_dir, anno_name): """ :param dataset_dir: :param save_dir: :return: """ if not ops.exists(save_dir): os.makedirs(save_dir) print('Initialize the dataset provider ......') provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name=anno_name, validation_set=True, validation_split=0.001, shuffle=None, normalization=None) print('Dataset provider intialize complete') feature_io = data_utils.TextFeatureIO() # write train tfrecords print('Start writing training tf records') train_images_temp = provider.train.images train_image_widths = provider.train.image_widths train_images = [] for index, image in enumerate(train_images_temp): train_images.append( bytes(list(np.reshape(image, [train_image_width * 32 * 3])))) print(len(train_images)) train_labels = provider.train.labels train_imagenames = provider.train.imagenames train_tfrecord_path = ops.join(save_dir, anno_name[:-4] + '.tfrecords') # 'train_feature.tfrecords' train_class_num = feature_io.writer.write_features( tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images, imagenames=train_imagenames, image_widths=train_image_widths) print('training class_num: ', train_class_num) # # write test tfrecords # print('Start writing testing tf records') # test_images_temp = provider.test.images # test_image_widths = provider.test.image_widths # test_images = [] # for index, image in enumerate(test_images_temp): # test_images.append(bytes(list(np.reshape(image, [train_image_width*32*3])))) # print(len(test_images)) # test_labels = provider.test.labels # test_imagenames = provider.test.imagenames # test_tfrecord_path = ops.join(save_dir, 'test_feature.tfrecords') # test_class_num = feature_io.writer.write_features( # tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images, imagenames=test_imagenames, # image_widths=test_image_widths) # print('test num_class: ', test_class_num) # write val tfrecords # val_images_temp = provider.validation.images # val_image_widths = provider.validation.image_widths # val_images = [] # for index, image in enumerate(val_images_temp): # val_images.append(bytes(list(np.reshape(image, [train_image_width*32*3])))) # print(len(val_images)) # val_labels = provider.validation.labels # val_imagenames = provider.validation.imagenames # val_tfrecord_path = ops.join(save_dir, 'validation_feature.tfrecords') # val_class_num = feature_io.writer.write_features( # tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images, imagenames=val_imagenames, # image_widths=val_image_widths) # print('val num_class: ', val_class_num) return
def test_shadownet(dataset_dir, weights_path, is_vis=True): """ :param dataset_dir: :param weights_path: :param is_vis: :return: """ # Initialize the record decoder decoder = data_utils.TextFeatureIO().reader images_t, labels_t, imagenames_t = decoder.read_features(ops.join( dataset_dir, 'test_feature.tfrecords'), num_epochs=None) images_sh, labels_sh, imagenames_sh = tf.train.shuffle_batch( tensors=[images_t, labels_t, imagenames_t], batch_size=32, capacity=1000 + 32 * 2, min_after_dequeue=2, num_threads=4) images_sh = tf.cast(x=images_sh, dtype=tf.float32) # build shadownet net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False) # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print('Start predicting ......') predictions, images, labels, imagenames = sess.run( [decoded, images_sh, labels_sh, imagenames_sh]) imagenames = np.reshape(imagenames, newshape=imagenames.shape[0]) imagenames = [tmp.decode('utf-8') for tmp in imagenames] preds_res = decoder.sparse_tensor_to_str(predictions[0]) gt_res = decoder.sparse_tensor_to_str(labels) for index, image in enumerate(images): print( 'Predict {:s} image with gt label: {:s} **** predict label: {:s}' .format(imagenames[index], gt_res[index], preds_res[index])) if is_vis: plt.imshow(image[:, :, (2, 1, 0)]) plt.show() coord.request_stop() coord.join(threads=threads) sess.close() return
def train_shadownet(cfg: EasyDict, weights_path: str = None, decode: bool = False, num_threads: int = 4) -> np.array: """ :param cfg: configuration EasyDict (e.g. global_config.config.cfg) :param weights_path: Path to stored weights :param decode: Whether to perform CTC decoding to report progress during training :param num_threads: Number of threads to use in tf.train.shuffle_batch :return History of values of the cost function """ # decode the tf records to get the training data decoder = data_utils.TextFeatureIO( char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'), ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'ord_map.json')).reader input_images, input_labels, input_image_names = decoder.read_features( cfg, cfg.TRAIN.BATCH_SIZE, num_threads) shadownet = crnn_model.ShadowNet(phase='Train', hidden_nums=cfg.ARCH.HIDDEN_UNITS, layers_nums=cfg.ARCH.HIDDEN_LAYERS, num_classes=len(decoder.char_dict) + 1) with tf.variable_scope('shadow', reuse=False): net_out = shadownet.build_shadownet(inputdata=input_images) cost = tf.reduce_mean( tf.nn.ctc_loss(labels=input_labels, inputs=net_out, sequence_length=cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TRAIN.BATCH_SIZE))) decoded, log_prob = tf.nn.ctc_beam_search_decoder( net_out, cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TRAIN.BATCH_SIZE), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = cfg.TRAIN.LEARNING_RATE learning_rate = tf.train.exponential_decay( starter_learning_rate, global_step, cfg.TRAIN.LR_DECAY_STEPS, cfg.TRAIN.LR_DECAY_RATE, staircase=cfg.TRAIN.LR_STAIRCASE) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer( learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) # Set tf summary os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True) tf.summary.scalar(name='Cost', tensor=cost) tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) if decode: tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(cfg.PATH.MODEL_SAVE_DIR, model_name) # Set sess configuration sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(cfg.PATH.TBOARD_SAVE_DIR) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = cfg.TRAIN.EPOCHS with sess.as_default(): if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) patience_counter = 1 cost_history = [np.inf] for epoch in range(train_epochs): if epoch > 1 and cfg.TRAIN.EARLY_STOPPING: # We always compare to the first point where cost didn't improve if cost_history[-1 - patience_counter] - cost_history[ -1] > cfg.TRAIN.PATIENCE_DELTA: patience_counter = 1 else: patience_counter += 1 if patience_counter > cfg.TRAIN.PATIENCE_EPOCHS: logger.info( "Cost didn't improve beyond {:f} for {:d} epochs, stopping early." .format(cfg.TRAIN.PATIENCE_DELTA, patience_counter)) break if decode: _, c, seq_distance, predictions, labels, summary = sess.run([ optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op ]) labels = decoder.sparse_tensor_to_str(labels) predictions = decoder.sparse_tensor_to_str(predictions[0]) accuracy = compute_accuracy(labels, predictions) if epoch % cfg.TRAIN.DISPLAY_STEP == 0: logger.info( 'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}' .format(epoch + 1, c, seq_distance, accuracy)) else: _, c, summary = sess.run([optimizer, cost, merge_summary_op]) if epoch % cfg.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch: {:d} cost= {:9f}'.format(epoch + 1, c)) cost_history.append(c) summary_writer.add_summary(summary=summary, global_step=epoch) saver.save(sess=sess, save_path=model_save_path, global_step=epoch) return np.array(cost_history[1:]) # Don't return the first np.inf