def save_model(weights_path: str, output_path: str): inputdata = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, 32, 100, 3], name='input') net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(BATCH_SIZE), merge_repeated=False) sparse_tensor_values = tf.to_int32(decodes[0]).values sparse_tensor_indices = tf.to_int32(decodes[0]).indices flattened_indices = tf.to_int32(tf.reshape(sparse_tensor_indices, [-1])) output = tf.concat([flattened_indices, sparse_tensor_values], 0, name='output') saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) save_graph(sess, output_path)
def recognize(image_path: str, weights_path: str, files_limit=4): decoder = TextFeatureIO().reader images, filenames = load_images(image_path, files_limit) images = np.squeeze(images) padded_images = np.zeros([32, 32, 100, 3]) padded_images[:images.shape[0], :, :, :] = images tf.reset_default_graph() inputdata = tf.placeholder(dtype=tf.float32, shape=[32, 32, 100, 3], name='input') images_sh = tf.cast(x=inputdata, dtype=tf.float32) # build shadownet net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False) # config tf saver saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=weights_path) print("Predict...") start_time = time() predictions = sess.run(decoded, feed_dict={inputdata: padded_images}) end_time = time() print("Prediction time: {}".format(end_time - start_time)) preds_res = decoder.sparse_tensor_to_str(predictions[0]) for i, fname in enumerate(filenames): print("{}: {}".format(fname, preds_res[i]))
def run(self): self._recognition_time = [] images_sh, labels_sh, imagenames_sh = self.load_data() images_sh = tf.cast(x=images_sh, dtype=tf.float32) net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder( net_out, 25 * np.ones(self._batch_size), merge_repeated=self._merge_repeated) sess_config = self.config_tf_session() # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=self._weights_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) self._log.info('Start predicting ...') accuracy, distance = self.test(decoded, imagenames_sh, images_sh, labels_sh, sess) coord.request_stop() coord.join(threads=threads) sess.close() avg_time = np.mean(self._recognition_time) return accuracy, distance, avg_time
def _build_net_model(self, input_data): self._log.info('Build net model...') crnn = CRNN(phase='Train', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow', reuse=False): net_out = crnn.build(inputdata=input_data) return net_out
def __init__(self, model_path='./checkpoints/CRNN.pth'): self.alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet]) self.nclass = len(self.alphabet) + 1 self.model = CRNN(crnn_params.imgH, 1, self.nclass, 256) self.use_gpu = torch.cuda.is_available() if self.use_gpu: self.model.cuda() self.model.load_state_dict(torch.load(model_path)) for p in self.model.parameters(): p.requires_grad = False self.model.eval() self.converter = strLabelConverter(self.alphabet)
def recognize(image_path: str, weights_path: str, config: GlobalConfig, is_vis=True): logger = LogFactory.get_logger() image = load_and_resize_image(image_path) inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(1), merge_repeated=False) decoder = TextFeatureIO() # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.get_gpu_config( ).memory_fraction sess_config.gpu_options.allow_growth = config.get_gpu_config( ).is_tf_growth_allowed() # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) logger.info('Predict image {:s} label {:s}'.format( ops.split(image_path)[1], preds[0])) if is_vis: plt.figure('CRNN Model Demo') plt.imshow( cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)]) plt.show() sess.close()
class OcrTextRec(): def __init__(self, model_path='./checkpoints/CRNN.pth'): self.alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet]) self.nclass = len(self.alphabet) + 1 self.model = CRNN(crnn_params.imgH, 1, self.nclass, 256) self.use_gpu = torch.cuda.is_available() if self.use_gpu: self.model.cuda() self.model.load_state_dict(torch.load(model_path)) for p in self.model.parameters(): p.requires_grad = False self.model.eval() self.converter = strLabelConverter(self.alphabet) def inference(self, image): if len(image.shape) == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) h, w = image.shape w_new = int(w / h * crnn_params.imgH) image = cv2.resize(image, (w_new, crnn_params.imgH), interpolation=cv2.INTER_CUBIC) image = (np.reshape(image, (crnn_params.imgH, w_new, 1))).transpose(2, 0, 1) image = image.astype(np.float32) / 255. image = torch.from_numpy(image).type(torch.FloatTensor) image.sub_(crnn_params.mean).div_(crnn_params.std) image = image.view(1, *image.size()) if self.use_gpu: image = image.cuda() preds = self.model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = torch.IntTensor([preds.size(0)]) sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
image_names = np.sort( glob.glob(os.path.join(data_path, dataset_name, '*', '*.bmp'))) print(len(image_names)) annotation_path = os.path.join(data_path, dataset_name, 'Annotations') if not os.path.exists(annotation_path): os.makedirs(annotation_path) # TextBoxes++ + DenseNet tbpp = TBPP512_dense(softmax=False) # model = TBPP512(softmax=False) prior_util = PriorUtil(tbpp) checkdir = os.path.dirname(weights_path) input_width = 256 input_height = 32 crnn = CRNN((input_width, input_height, 1), len(alphabet), prediction_only=True, gru=False) print("started loading model weights") tic = time.time() tbpp.load_weights(weights_path) crnn.load_weights(weights_path_crnn) print(time.time() - tic) print("finished loading model weights") anno_tool = Annotater(tbpp, crnn, prior_util, image_names, annotation_path) anno_tool.annotate()
import tensorflow as tf import numpy as np from crnn_model import CRNN from utils import params, char_dict, decode_to_text, data_generator, sparse_tuple_from # options np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) np.set_printoptions(edgeitems=30, linewidth=100000) # init iter = 0 continue_training = True model = CRNN(num_classes=params['NUM_CLASSES'], training=True) [ model.load_weights('checkpoints/model_default') if continue_training else True ] # model.build(input_shape=(2, 32, 200, 1)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=5) loss_hist = [] # [print(i.name, i.shape) for i in model.trainable_variables] # training # dataset: https://www.robots.ox.ac.uk/~vgg/data/text/#sec-synth # please check the data_generator in utils for path to the dataset # the training set containts 7224612 images / 32 = 225769 batches for x_batch, y_batch in data_generator(batches=112884, batch_size=64, epochs=10):
print('Test loss: %.6f, accuray: %.6f' % (loss_avg.val(), accuracy)) return accuracy if __name__ == "__main__": opt = parser.parse_args() image_root = opt.image_root val_label = opt.val_label trained_net = opt.trained_net batch_size = opt.batch_size device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") nclass = len(crnn_params.alphabet) + 1 nc = 1 model = CRNN(32, nc, nclass, crnn_params.nh) if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True model = model.cuda() criterion = torch.nn.CTCLoss(reduction='sum') if trained_net != '' and os.path.exists(trained_net): print('loading pretrained model from %s' % trained_net) model.load_state_dict(torch.load(trained_net)) val_dataset = imgDataset(image_root, val_label, crnn_params.alphabet, (crnn_params.imgW, crnn_params.imgH), crnn_params.mean, crnn_params.std, is_aug=False)
def recognize(image_path: str, weights_path: str, output_file: str, files_limit=32): decoder = TextFeatureIO().reader #Read all the files in the images folder files = [ join(image_path, f) for f in listdir(image_path) if isfile(join(image_path, f)) ][:] tf.reset_default_graph() inputdata = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, 32, 100, 3], name='input') images_sh = tf.cast(x=inputdata, dtype=tf.float32) # build shadownet net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) #top_paths=NUMBER_OF_PREDICTIONS is the number of words to predict decoded, log_probabilities = tf.nn.ctc_beam_search_decoder( net_out, 25 * np.ones(BATCH_SIZE), merge_repeated=False, top_paths=NUMBER_OF_PREDICTIONS) # config tf saver saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): # restore the model weights #print('TFVERSION',tf.__version__) print("Restoring trained model") saver.restore(sess=sess, save_path=weights_path) print("Predicting {} images in chunks of {}".format( len(files), BATCH_SIZE)) starting_time = time() #Run inference in groups of BATCH_SIZE images #Run it with all the files from the provided folder for group in chunker(files, BATCH_SIZE): start_time = time() images, filenames = load_images(group, files_limit) images = np.squeeze(images) padded_images = np.zeros([BATCH_SIZE, 32, 100, 3]) padded_images[:images.shape[0], :, :, :] = images predictions, probs = sess.run([decoded, log_probabilities], feed_dict={inputdata: padded_images}) for i, fname in enumerate(filenames): result = '' #log_probabilities is recomputed for softmax probs e_x = np.exp(probs[i, :]) / np.sum(np.exp(probs[i, :])) #build the array of N predictions for each image for x in range(NUMBER_OF_PREDICTIONS): preds_res2 = decoder.sparse_tensor_to_str(predictions[x]) result = result + ',{:s},{:f}'.format( preds_res2[i], e_x[x]) #output string formatting and writing to csv file result = (basename(fname) + result) with open(output_file, 'a') as f: f.write(result) f.write('\n') end_time = time() print("Prediction time for {} images: {}".format( BATCH_SIZE, end_time - start_time)) print("Total prediction time: {}".format(end_time - starting_time)) print("Predictions saved in file {}".format(output_file))
train_data_path + '/country/', train_data_path + '/hiphop/', train_data_path + '/jazz/', train_data_path + '/light/' ] # Generate input data (mel-grams) melgrams = np.zeros((0, 96, 1366, 1)) trainDataX, trainDataY, testDataX, testDataY = generateData.generateData( taggedFilePath, 6, 50) # Save input data np.save(output_path + '/trainDataX.npy', trainDataX) np.save(output_path + '/testDataX.npy', testDataX) np.save(output_path + '/trainDataY.npy', trainDataY) np.save(output_path + '/testDataY.npy', testDataY) else: trainDataX = np.load(train_data_path + '/trainDataX.npy') testDataX = np.load(train_data_path + '/testDataX.npy') trainDataY = np.load(train_data_path + '/trainDataY.npy') testDataY = np.load(train_data_path + '/testDataY.npy') obj_NN = CRNN(dropout_layer_rate=0.1, rnn_dropout_rate=0.1, nb_epoch=200, optimizer="adam", batch_size=32, save_model=True, save_model_path=output_path) obj_NN.NN_getData(trainDataX, trainDataY, testDataX, testDataY) model1, score1 = obj_NN.NN_model_train() plotLossAcc(200, obj_NN.train_history)
# Validation val_pkl = PICKLE_DIR + os.path.splitext( os.path.basename(PICKLE_NAME))[0] + '_val.pkl' with open(val_pkl, 'rb') as f: gt_util_val = pickle.load(f) ph_dict = ph_utils.get_ph_dict(data_path=PICKLE_DIR, file_name=PICKLE_NAME) # print(len(ph_dict)) input_width = 256 input_height = 32 batch_size = 128 input_shape = (input_width, input_height, 1) model, model_pred = CRNN(input_shape, len(ph_dict)) max_string_len = model_pred.output_shape[1] gen_val = InputGenerator(gt_util_val, batch_size, ph_dict, input_shape[:2], grayscale=True, max_string_len=max_string_len) model.load_weights(CHECKPOINT_PATH) g = gen_val.generate() mean_ed = 0
encoding='UTF8') as f: gt_data = json.load(f) data_info = gt_data['info'] # crnn references ph_dict = ph_utils.get_ph_dict(data_path=PICKLE_DIR, file_name=PICKLE_NAME) input_width = 256 input_height = 32 batch_size = 128 input_shape = (input_width, input_height, 1) # model, model_pred = CRNN(input_shape, len(ph_dict)) model = CRNN((input_width, input_height, 1), len(ph_dict), prediction_only=True) model.load_weights( './checkpoints/202004011502_crnn_lstm_ph_all_v1/weights.110000.h5') # tesseract references lang = 'kor' tess_cfg = " --psm 6 --oem 1 --tessdata-dir tessdata/org" img_fnames = sorted( get_filenames('/home/sungsoo/Downloads/WORDS/', extensions='png', recursive_=True, exit_=True))
def recognize_text(batch_text_image): """ model configuration """ if 'CTC' in crnn_opt.Prediction: converter = CTCLabelConverter(crnn_opt.character) else: converter = AttnLabelConverter(crnn_opt.character) crnn_opt.num_class = len(converter.character) #log = open(f'result/predict_and_gt.txt', 'a') if crnn_opt.rgb: crnn_opt.input_channel = 3 if crnn_opt.sensitive: crnn_opt.character = string.printable[: -6] # same with ASTER setting (use 94 char). # model = CRNN(crnn_opt) # print('model input parameters', crnn_opt.imgH, crnn_opt.imgW, crnn_opt.num_fiducial, crnn_opt.input_channel, crnn_opt.output_channel, # crnn_opt.hidden_size, crnn_opt.num_class, crnn_opt.batch_max_length, crnn_opt.Transformation, crnn_opt.FeatureExtraction, # crnn_opt.SequenceModeling, crnn_opt.Prediction) # model = torch.nn.DataParallel(model).to(device) # # # load model # print('loading pretrained model from %s' % crnn_opt.saved_model) # model.load_state_dict(torch.load(crnn_opt.saved_model, map_location=device)) # crnn_opt.exp_name = '_'.join(crnn_opt.saved_model.split('/')[1:]) # print(model) """ keep evaluation model and result logs """ # os.makedirs(f'./result/{crnn_opt.exp_name}', exist_ok=True) # os.system(f'cp {crnn_opt.saved_model} ./result/{crnn_opt.exp_name}/') """ setup loss """ if 'CTC' in crnn_opt.Prediction: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to( device) # ignore [GO] token = ignore index 0 """ evaluation """ crnn_net = CRNN(crnn_opt) crnn_net = torch.nn.DataParallel(crnn_net, device_ids=[0]).to(device) crnn_net.load_state_dict( torch.load( './weights/TPS-ResNet-BiLSTM-Attn-Seed1111/best_accuracy.pth', map_location=device)) #log.write("==================================================================="+"\n") #log.write('Finished loading CRNN model!'+'\n') #print('Finished loading CRNN model!') crnn_net.eval() with torch.no_grad(): #evaluation_loader = (batch_text_image, labels) evaluation_loader = batch_text_image if crnn_opt.benchmark_all_eval: # evaluation with 10 benchmark evaluation datasets benchmark_all_eval(crnn_net, criterion, evaluation_loader, converter, crnn_opt) # log = open(f'./result/{crnn_opt.exp_name}/log_evaluation.txt', 'a') # AlignCollate_evaluation = AlignCollate(imgH=crnn_opt.imgH, imgW=crnn_opt.imgW, keep_ratio_with_pad=crnn_opt.PAD) # eval_data, eval_data_log = hierarchical_dataset(root=crnn_opt.eval_data, crnn_opt=crnn_opt) # evaluation_loader = torch.utils.data.DataLoader( # eval_data, batch_size=crnn_opt.batch_size, # shuffle=False, # num_workers=int(crnn_opt.workers), # collate_fn=AlignCollate_evaluation, pin_memory=True) # _, accuracy_by_best_model, _, _, _, _, _, _ = validation( # crnn_net, criterion, evaluation_loader, converter, crnn_opt) pred = validation(crnn_net, criterion, evaluation_loader, converter, crnn_opt) # log.write(eval_data_log) # print(f'{accuracy_by_best_model:0.3f}') # log.write(f'{accuracy_by_best_model:0.3f}\n') # log.close() return pred
ph_dict = ph_utils.get_ph_dict(data_path=PICKLE_DIR, file_name=PICKLE_NAME) print(len(ph_dict)) # AI-HUB # input_width = 256 # input_height = 32 # batch_size = 128 # AIG IDR input_width = 256 input_height = 32 batch_size = 128 input_shape = (input_width, input_height, 1) model, model_pred = CRNN(input_shape, len(ph_dict), gru=False) max_string_len = model_pred.output_shape[1] gen_train = InputGenerator(gt_util_train, batch_size, ph_dict, input_shape[:2], grayscale=True, max_string_len=max_string_len) gen_val = InputGenerator(gt_util_val, batch_size, ph_dict, input_shape[:2], grayscale=True, max_string_len=max_string_len)
Model = DSODSL512 input_shape = (512,512,3) weights_path = './checkpoints/201711132011_dsodsl512_synthtext/weights.001.h5' segment_threshold = 0.55 link_threshold = 0.40 det_model = Model(input_shape) prior_util = PriorUtil(det_model) det_model.load_weights(weights_path) #input_width = 256 input_width = 384 input_height = 32 weights_path = './checkpoints/201806190711_crnn_gru_synthtext/weights.300000.h5' rec_model = CRNN((input_width, input_height, 1), len(alphabet), prediction_only=True, gru=True) rec_model.load_weights(weights_path, by_name=True) # To test on webcam 0, /dev/video0 video_path = 0 start_frame = 0 record = True record_file_name = 'sl_end2end_record.avi' try: vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError(("Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!")) vid_w = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vid_h = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
m.bias.data.fill_(0) def backward_hook(self, grad_input, grad_output): for g in grad_input: g[g != g] = 0 # replace all nan/inf in gradients to zero if __name__ == "__main__": print('alphabet length : ', len(crnn_params.alphabet_list)) opt = parser.parse_args() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") nclass = len(crnn_params.alphabet) + 1 nc = 1 model = CRNN(crnn_params.imgH, nc, nclass, crnn_params.nh) criterion = torch.nn.CTCLoss(reduction='sum') optimizer = optim.Adam(model.parameters(), lr=crnn_params.lr, betas=(crnn_params.beta1, 0.999)) if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True model = model.cuda() criterion = criterion.cuda() if opt.resume_net !='' and os.path.exists(opt.resume_net): print('loading pretrained model from %s' % opt.resume_net) model.load_state_dict(torch.load(opt.resume_net)) else: model.apply(weights_init) model.register_backward_hook(backward_hook) train_dataset = imgDataset(opt.image_root, opt.train_label, crnn_params.alphabet, (crnn_params.imgW, crnn_params.imgH), crnn_params.mean, crnn_params.std)