def recognize(image_path: str, weights_path: str, files_limit=4): decoder = TextFeatureIO().reader images, filenames = load_images(image_path, files_limit) images = np.squeeze(images) padded_images = np.zeros([32, 32, 100, 3]) padded_images[:images.shape[0], :, :, :] = images tf.reset_default_graph() inputdata = tf.placeholder(dtype=tf.float32, shape=[32, 32, 100, 3], name='input') images_sh = tf.cast(x=inputdata, dtype=tf.float32) # build shadownet net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False) # config tf saver saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=weights_path) print("Predict...") start_time = time() predictions = sess.run(decoded, feed_dict={inputdata: padded_images}) end_time = time() print("Prediction time: {}".format(end_time - start_time)) preds_res = decoder.sparse_tensor_to_str(predictions[0]) for i, fname in enumerate(filenames): print("{}: {}".format(fname, preds_res[i]))
def save_model(weights_path: str, output_path: str): inputdata = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, 32, 100, 3], name='input') net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(BATCH_SIZE), merge_repeated=False) sparse_tensor_values = tf.to_int32(decodes[0]).values sparse_tensor_indices = tf.to_int32(decodes[0]).indices flattened_indices = tf.to_int32(tf.reshape(sparse_tensor_indices, [-1])) output = tf.concat([flattened_indices, sparse_tensor_values], 0, name='output') saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) save_graph(sess, output_path)
def run(self): self._recognition_time = [] images_sh, labels_sh, imagenames_sh = self.load_data() images_sh = tf.cast(x=images_sh, dtype=tf.float32) net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder( net_out, 25 * np.ones(self._batch_size), merge_repeated=self._merge_repeated) sess_config = self.config_tf_session() # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=self._weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) self._log.info('Start predicting ...') accuracy, distance = self.test(decoded, imagenames_sh, images_sh, labels_sh, sess) coord.request_stop() coord.join(threads=threads) sess.close() avg_time = np.mean(self._recognition_time) return accuracy, distance, avg_time
def _build_net_model(self, input_data): self._log.info('Build net model...') crnn = CRNN(phase='Train', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow', reuse=False): net_out = crnn.build(inputdata=input_data) return net_out
def recognize(image_path: str, weights_path: str, config: GlobalConfig, is_vis=True): logger = LogFactory.get_logger() image = load_and_resize_image(image_path) inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(1), merge_repeated=False) decoder = TextFeatureIO() # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.get_gpu_config( ).memory_fraction sess_config.gpu_options.allow_growth = config.get_gpu_config( ).is_tf_growth_allowed() # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) logger.info('Predict image {:s} label {:s}'.format( ops.split(image_path)[1], preds[0])) if is_vis: plt.figure('CRNN Model Demo') plt.imshow( cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)]) plt.show() sess.close()
def recognize(image_path: str, weights_path: str, output_file: str, files_limit=32): decoder = TextFeatureIO().reader #Read all the files in the images folder files = [ join(image_path, f) for f in listdir(image_path) if isfile(join(image_path, f)) ][:] tf.reset_default_graph() inputdata = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, 32, 100, 3], name='input') images_sh = tf.cast(x=inputdata, dtype=tf.float32) # build shadownet net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) #top_paths=NUMBER_OF_PREDICTIONS is the number of words to predict decoded, log_probabilities = tf.nn.ctc_beam_search_decoder( net_out, 25 * np.ones(BATCH_SIZE), merge_repeated=False, top_paths=NUMBER_OF_PREDICTIONS) # config tf saver saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): # restore the model weights #print('TFVERSION',tf.__version__) print("Restoring trained model") saver.restore(sess=sess, save_path=weights_path) print("Predicting {} images in chunks of {}".format( len(files), BATCH_SIZE)) starting_time = time() #Run inference in groups of BATCH_SIZE images #Run it with all the files from the provided folder for group in chunker(files, BATCH_SIZE): start_time = time() images, filenames = load_images(group, files_limit) images = np.squeeze(images) padded_images = np.zeros([BATCH_SIZE, 32, 100, 3]) padded_images[:images.shape[0], :, :, :] = images predictions, probs = sess.run([decoded, log_probabilities], feed_dict={inputdata: padded_images}) for i, fname in enumerate(filenames): result = '' #log_probabilities is recomputed for softmax probs e_x = np.exp(probs[i, :]) / np.sum(np.exp(probs[i, :])) #build the array of N predictions for each image for x in range(NUMBER_OF_PREDICTIONS): preds_res2 = decoder.sparse_tensor_to_str(predictions[x]) result = result + ',{:s},{:f}'.format( preds_res2[i], e_x[x]) #output string formatting and writing to csv file result = (basename(fname) + result) with open(output_file, 'a') as f: f.write(result) f.write('\n') end_time = time() print("Prediction time for {} images: {}".format( BATCH_SIZE, end_time - start_time)) print("Total prediction time: {}".format(end_time - starting_time)) print("Predictions saved in file {}".format(output_file))