def recognize(image_path, weights_path, is_vis=True): """ :param image_path: :param weights_path: :param is_vis: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) image = cv2.resize(image, (100, 32)) image = np.expand_dims(image, axis=0).astype(np.float32) inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=200, num_classes=148) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=200*np.ones(1), merge_repeated=False) decoder = data_utils.TextFeatureIO() # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) logger.info('Predict image {:s} label {:s}'.format(ops.split(image_path)[1], preds[0])) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)]) plt.show() sess.close() return
def write_features(dataset_dir, save_dir): """ :param dataset_dir: :param save_dir: :return: """ if not ops.exists(save_dir): os.makedirs(save_dir) print('Initialize the dataset provider ......') provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name='sample.txt', validation_set=True, validation_split=0.12505, shuffle='every_epoch', normalization=None) print('Dataset provider intialize complete') feature_io = data_utils.TextFeatureIO() # write train tfrecords print('Start writing training tf records') train_images = provider.train.images; train_imagenames = provider.train.imagenames print(len(train_images)) train_images = [cv2.resize(tmp, (600, 32)) for tmp in train_images] '''train_images1 = []; cnt = 0; cntlist = []; for tmp in train_images: try: train_images1.append(cv2.resize(tmp, (100, 32))); cntlist.append(cnt); #print("done: ",cnt); except: print(cnt,train_imagenames[cnt],"here") cnt +=1; print("done: ",cnt);''' train_images = [bytes(list(np.reshape(tmp, [600 * 32 * 3]))) for tmp in train_images] train_labels = provider.train.labels '''cnt = 0; for i in provider.train.labels: if cnt in cntlist: train_labels.append(i) cnt +=1;''' train_imagenames = provider.train.imagenames train_tfrecord_path = ops.join(save_dir, 'train_feature.tfrecords') feature_io.writer.write_features(tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images, imagenames=train_imagenames) # write test tfrecords print('Start writing testing tf records') test_images = provider.test.images test_images = [cv2.resize(tmp, (600, 32)) for tmp in test_images] '''test_images1 = []; cnt = 0; cntlist = []; for tmp in test_images: try: test_images1.append(cv2.resize(tmp, (100, 32))); cntlist.append(cnt); except: pass cnt +=1;''' test_images = [bytes(list(np.reshape(tmp, [600 * 32 * 3]))) for tmp in test_images] test_labels = provider.test.labels test_imagenames = provider.test.imagenames test_tfrecord_path = ops.join(save_dir, 'test_feature.tfrecords') feature_io.writer.write_features(tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images, imagenames=test_imagenames) # write val tfrecords print('Start writing validation tf records') val_images = provider.validation.images '''val_images1 = []; cnt = 0; cntlist = []; for tmp in val_images: try: val_images1.append(cv2.resize(tmp, (100, 32))); cntlist.append(cnt); except: pass cnt +=1;''' val_images = [cv2.resize(tmp, (600, 32)) for tmp in val_images] val_images = [bytes(list(np.reshape(tmp, [600 * 32 * 3]))) for tmp in val_images] val_labels = provider.validation.labels val_imagenames = provider.validation.imagenames val_tfrecord_path = ops.join(save_dir, 'validation_feature.tfrecords') feature_io.writer.write_features(tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images, imagenames=val_imagenames) return
def validation_shadownet(filename, weights_path, is_vis=False, is_recursive=True): """ :param dataset_dir: :param weights_path: :param is_vis: :param is_recursive: :return: """ # Initialize the record decoder decoder = data_utils.TextFeatureIO().reader images_t, labels_t, imagenames_t = decoder.read_features( os.getcwd() + "/model/CRNN/data/tfReal/" + filename, num_epochs=None) if not is_recursive: images_sh, labels_sh, imagenames_sh = tf.train.shuffle_batch( tensors=[images_t, labels_t, imagenames_t], batch_size=32, capacity=1000 + 32 * 2, min_after_dequeue=2, num_threads=4) else: images_sh, labels_sh, imagenames_sh = tf.train.batch( tensors=[images_t, labels_t, imagenames_t], batch_size=32, capacity=1000 + 32 * 2, num_threads=4) images_sh = tf.cast(x=images_sh, dtype=tf.float32) # build shadownet net = crnn_model.ShadowNet(phase='Validate', hidden_nums=256, layers_nums=2, seq_length=200, num_classes=148) with tf.variable_scope('shadow'): net_out = net.build_shadownet(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 200 * np.ones(32), merge_repeated=False) # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) validate_sample_count = 0 for record in tf.python_io.tf_record_iterator(os.getcwd() + "/model/CRNN/data/tfReal/" + filename): validate_sample_count += 1 loops_nums = int(math.ceil(validate_sample_count / 32)) # loops_nums = 100 with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print('Start predicting ......') if not is_recursive: predictions, images, labels, imagenames = sess.run( [decoded, images_sh, labels_sh, imagenames_sh]) imagenames = np.reshape(imagenames, newshape=imagenames.shape[0]) imagenames = [tmp.decode('utf-8') for tmp in imagenames] preds_res = decoder.sparse_tensor_to_str(predictions[0]) gt_res = decoder.sparse_tensor_to_str(labels) accuracy = [] for index, gt_label in enumerate(gt_res): pred = preds_res[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) print('Mean validate accuracy is {:5f}'.format(accuracy)) for index, image in enumerate(images): print( 'Predict {:s} image with gt label: {:s} **** predict label: {:s}' .format(imagenames[index], gt_res[index], preds_res[index])) if is_vis: plt.imshow(image[:, :, (2, 1, 0)]) plt.show() else: accuracy = [] for epoch in range(loops_nums): predictions, images, labels, imagenames = sess.run( [decoded, images_sh, labels_sh, imagenames_sh]) imagenames = np.reshape(imagenames, newshape=imagenames.shape[0]) imagenames = [tmp.decode('utf-8') for tmp in imagenames] preds_res = decoder.sparse_tensor_to_str(predictions[0]) gt_res = decoder.sparse_tensor_to_str(labels) for index, gt_label in enumerate(gt_res): pred = preds_res[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) for index, image in enumerate(images): print( 'Predict {:s} image with gt label: {:s} **** predict label: {:s}' .format(imagenames[index], gt_res[index], preds_res[index])) # if is_vis: # plt.imshow(image[:, :, (2, 1, 0)]) # plt.show() accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) print('Validate accuracy is {:5f}'.format(accuracy)) coord.request_stop() coord.join(threads=threads) sess.close() return
def train_shadownet(filename, train_epochs, weights_path=None, steps_per_checkpoint=None): """ :param dataset_dir: :param weights_path: :return: """ train_epochs = int(train_epochs) # decode the tf records to get the training data decoder = data_utils.TextFeatureIO().reader images, labels, imagenames = decoder.read_features( os.getcwd() + "/model/CRNN/data/tfReal/" + filename, num_epochs=None) inputdata, input_labels, input_imagenames = tf.train.shuffle_batch( tensors=[images, labels, imagenames], batch_size=32, capacity=1000 + 2 * 32, min_after_dequeue=100, num_threads=1) inputdata = tf.cast(x=inputdata, dtype=tf.float32) #ilabels = tf.sparse_tensor_to_dense(input_labels) #labels = tf.convert_to_tensor(labels) #labels = tf.convert_to_tensor(labels) #labels = tf.cast(labels,dtype=tf.float32) #print("ZZZ"+str(ilabels)) #ilabels = tf.Print(ilabels,[ilabels],"labelss",summarize = 1000) #word_av_list = [] #w = [] #for i in range(32): # label = tf.gather_nd(ilabels, [i]) # label = tf.Print(label,[label],"label",summarize = 1000) # element = tf.constant([32]) # cols = tf.where(tf.equal(label, element))[:,-1] # #cols = tf.Print(cols, [cols], "cols",summarize=1000) # extra = tf.cast(tf.shape(label)[0],'int64')*tf.ones([1], 'int64') # #extra = tf.Print(extra, [extra], "extra", summarize=1000) # cols = tf.concat([cols,extra],0) # #cols = tf.Print(cols,[cols],"cols",summarize = 1000) # cols_len = tf.shape(cols)[0] # cols_right_shifted = tf.concat([[-1], cols[:cols_len-1]], 0) # words_len_final = cols - cols_right_shifted - 1 # words_len_final = tf.cast(words_len_final,'float') # num_nonzeros = tf.count_nonzero(words_len_final) # #words_len_sum = tf.cond(tf.equal(tf.cast(num_nonzeros, tf.float32), tf.constant(0.0,dtype=tf.float32)), tf.constant(0.0,dtype=tf.float32),tf.divide(tf.cast(tf.reduce_sum(words_len_final),tf.float32), tf.cast(num_nonzeros, tf.float32))) # # #if tf.equal(tf.cast(num_nonzeros, tf.float32), tf.constant(0.0,dtype=tf.float32)): # # words_len_sum = tf.constant(0.0,dtype=tf.float32) # #else: # #words_len_sum = tf.divide(tf.cast(tf.reduce_sum(words_len_final),tf.float32), tf.cast(num_nonzeros, tf.float32)) # words_len_sum = tf.cond(tf.equal(tf.cast(num_nonzeros, tf.float32), tf.constant(0.0,dtype=tf.float32)), true_fn, lambda: false_fn1(num_nonzeros, words_len_final)) # w.append(words_len_sum) # words_len_av = tf.reduce_mean(words_len_final,0) # word_av_list.append(words_len_av) #word_av_tf = tf.convert_to_tensor(word_av_list) ##word_av_tf= tf.Print(word_av_tf,[word_av_tf],"word_av_tf",summarize = 1000) #w = tf.convert_to_tensor(w) #w = tf.Print(w,[w],"w",summarize = 1000) #median = get_real_median(w) ##norm_word_len = tf.cond(tf.equal(tf.cast(median, tf.float32), tf.constant(0.0,dtype=tf.float32)), tf.constant(0.0,dtype=tf.float32), tf.divide(w,median)) ##if tf.equal(tf.cast(median, tf.float32), tf.constant(0.0,dtype=tf.float32)): ## norm_word_len = tf.constant(0.0,dtype=tf.float32) ##else: ## norm_word_len = tf.divide(w,median) #norm_word_len = tf.cond(tf.equal(tf.cast(median, tf.float32), tf.constant(0.0,dtype=tf.float32)), true_fn, lambda: false_fn2(w,median)) #norm_word_len = tf.Print(norm_word_len, [norm_word_len], "norm_word_len", summarize = 1000) current_step = 0 shadownet = crnn_model.ShadowNet(phase='Train', hidden_nums=256, layers_nums=2, seq_length=200, num_classes=148) global_step = tf.Variable(0, trainable=False) with tf.variable_scope('shadow', reuse=False): net_out = shadownet.build_shadownet(inputdata=inputdata) cost = tf.reduce_mean( tf.nn.ctc_loss(labels=input_labels, inputs=net_out, sequence_length=200 * np.ones(32), ignore_longer_outputs_than_inputs=True)) decoded, log_prob = tf.nn.ctc_beam_search_decoder(net_out, 200 * np.ones(32), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = config.cfg.TRAIN.LEARNING_RATE learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, config.cfg.TRAIN.LR_DECAY_STEPS, config.cfg.TRAIN.LR_DECAY_RATE, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdadeltaOptimizer( learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) # Set tf summary tboard_save_path = 'model/CRNN/tboard/shadownet' if not ops.exists(tboard_save_path): os.makedirs(tboard_save_path) tf.summary.scalar(name='Cost', tensor=cost) tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() model_save_dir = 'model/CRNN/model/shadownet' if not ops.exists(model_save_dir): os.makedirs(model_save_dir) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'shadownet_' #{:s}.ckpt'.format(str(train_start_time)) model_save_path = ops.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_path) summary_writer.add_graph(sess.graph) # Set the training parameters #train_epochs = config.cfg.TRAIN.EPOCHS with sess.as_default(): if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in range(train_epochs): _, c, seq_distance, preds, gt_labels, summary = sess.run([ optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op ]) indices = gt_labels.indices values = gt_labels.values dense_shape = gt_labels.dense_shape #logger.info(indices.shape) #log preds = decoder.sparse_tensor_to_str(preds[0]) gt_labels = decoder.sparse_tensor_to_str(gt_labels) current_step += 1 accuracy = [] for index, gt_label in enumerate(gt_labels): pred = preds[index] totol_count = len(gt_label) correct_count = 0 try: for i, tmp in enumerate(gt_label): if tmp == pred[i]: correct_count += 1 except IndexError: continue finally: try: accuracy.append(correct_count / totol_count) except ZeroDivisionError: if len(pred) == 0: accuracy.append(1) else: accuracy.append(0) accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0) # if epoch % config.cfg.TRAIN.DISPLAY_STEP == 0: logger.info( 'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}' .format(epoch + 1, c, seq_distance, accuracy)) summary_writer.add_summary(summary=summary, global_step=global_step) if steps_per_checkpoint is None: saver.save(sess=sess, save_path=model_save_path, global_step=global_step) else: if current_step % int(steps_per_checkpoint) == 0: saver.save(sess=sess, save_path=model_save_path, global_step=global_step) coord.request_stop() coord.join(threads=threads) sess.close() return