def validate(svm, dir_name): img_list = [] HoG_list = [] labels = [] #load positive validation samples dir_name = dir_name + '/validation' #dir_name='train_data/validation' load_img(dir_name + '/p', img_list) for i in range(len(img_list)): labels.append(1) #load negtive validation samples tmp = len(img_list) load_img(dir_name + '/n', img_list) for i in range(len(img_list) - tmp): labels.append(-1) #get HoG features HoG_list = [] get_HoG(img_list, HoG_list) #SVM #svm=cv2.ml.SVM_load('first_train.xml') _, pred = svm.predict(np.array(HoG_list)) pred = [int(i) for i in pred] cur_acc = metrics.accuracy_score(labels, pred) print("on validation set,the current accuracy is ", cur_acc) return pred, cur_acc
def create_img_data(input_file, output_file): #seq=iaa.Sequential([iaa.Fliplr(0.5),iaa.GaussianBlur(sigma=(0,3.0))]) seq = iaa.SomeOf((1, 4), [ iaa.Fliplr(0.5), iaa.Flipud(1.0), iaa.GaussianBlur(1.0), iaa.AdditiveGaussianNoise() ]) img_list = [] load_img(input_file, img_list) load_img(input_file, img_list) img_aug = seq.augment_images(img_list) for i in range(len(img_aug)): cv2.imwrite(output_file + '/i_imgaug_' + str(i) + '.jpg', img_aug[i])
def train(o_dir_name): dir_name = o_dir_name + '/train' labels = [] img_list = [] #get positive img load_img(dir_name + '/p', img_list) for i in range(len(img_list)): labels.append(1) #get negtive img tmp = len(img_list) load_img(dir_name + '/n', img_list) for i in range(len(img_list) - tmp): labels.append(-1) #get HoG feature list HoG_list = [] get_HoG(img_list, HoG_list) #info print print('received ', tmp, ' positive sample(s)') print('received', len(img_list) - tmp, ' negtive sample(s)') print('start training') #train SVM 考虑基于Hard Example对分类器二次训练https://www.xuebuyuan.com/2083806.html best_c = 0 best_gamma = 0 best_acc = 0 for C in [0.01, 0.1, 1, 5, 10, 50, 100]: for gamma in [0.1, 0.5, 0.7, 1, 1.5, 2, 2.5, 5, 10]: #,1,1.5,2,5,10]: svm = cv2.ml.SVM_create() svm.setC(C) svm.setGamma(gamma) svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) svm.train(np.array(HoG_list), cv2.ml.ROW_SAMPLE, np.array(labels)) _, cur_acc = validate(svm, o_dir_name) if (cur_acc > best_acc): best_c = C best_gamma = gamma best_acc = cur_acc svm = cv2.ml.SVM_create() svm.setC(best_c) svm.setGamma(best_gamma) svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) svm.train(np.array(HoG_list), cv2.ml.ROW_SAMPLE, np.array(labels)) svm.save('first_train.xml') print('svm data has been saved')
def test(): tf.reset_default_graph() infer_graph = tf.Graph() with infer_graph.as_default(): encoder_outputs_t, inputs_t = build_cnn(False, flags.batch_size, flags.height, flags.width, flags.channels) _, _, pred_ids, logits_t, decoder_inputs_t, \ _, _ ,keep_prob_t= build_network(encoder_outputs_t, True, flags.batch_size, flags.decoder_length, flags.tgt_vocab_size, flags.attn_num_hidden, flags.encoder_length, flags.max_gradient_norm ) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) model_file = tf.train.latest_checkpoint(flags.load_dir) infer_saver.restore(infer_sess, model_file) with open(flags.test_txt) as f: test = [line.rstrip() for line in f] test_len = len(test) test = np.array(test) data_test = Dataset(test) if flags.lex_txt != None: with open(flags.lex_txt) as f: lex = [line.rstrip().lower() for line in f] ti = int(test_len / flags.batch_size) rest = test_len % flags.batch_size gt = [] predict = [] for t in range(ti): batch_test = data_test.next_batch(flags.batch_size) path = [] texts = [] for line in batch_test: path.append(line.split(' ', 1)[0]) texts.append(line.split(' ', 1)[1]) images = load_img(path, flags.height, flags.width) testing_decoder_inputs = np.zeros( (flags.decoder_length, flags.batch_size), dtype=float) feed_dict_t = { inputs_t: images[:, :, :, np.newaxis], decoder_inputs_t: testing_decoder_inputs, keep_prob_t: 1 } q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for j in range(flags.batch_size): gt.append(texts[j]) ans = np.array(q).T[j] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) batch_test = data_test.next_batch(flags.batch_size) path = [] texts = [] for line in batch_test: path.append(line.split(' ', 1)[0]) texts.append(line.split(' ', 1)[1]) images = load_img(path, flags.height, flags.width) feed_dict_t = { inputs_t: images[:, :, :, np.newaxis], decoder_inputs_t: testing_decoder_inputs, keep_prob_t: 1 } q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for k in range(rest): gt.append(texts[k]) ans = np.array(q).T[k] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt = cnt + 1 if gt[l] == predict[l]: correct = correct + 1 acc_s = correct / cnt if flags.lex_txt != None: correct_l = float(0) cnt = 0 for l in range(len(gt)): cnt = cnt + 1 lexicon = lex[l].split(',') dt = editdistance.eval(predict[l], lexicon[0]) pl = lexicon[0] for ll in lexicon[1:]: dt_temp = editdistance.eval(predict[l], ll) if dt_temp < dt: dt = dt_temp pl = ll if pl == gt[l]: correct_l = correct_l + 1 acc_l = correct_l / cnt print('accuracy: ', acc_s) if flags.lex_txt != None: print('accuracy with lexicon: ', acc_l)
def train(): f_size = int(flags.img_size / 8) encoder_length = f_size * f_size with open(flags.train_txt) as f: sample = [line.rstrip() for line in f] sample = np.array(sample) iteration = len(sample) // flags.batch_size data = Dataset(sample) tf.reset_default_graph() train_graph = tf.Graph() infer_graph = tf.Graph() start = time.time() with train_graph.as_default(): c, inputs = build_cnn(is_training=True, batch_size=flags.batch_size, img_size=flags.img_size, channels=flags.channels) deconv_outputs = build_deconv(True, c, flags.batch_size) x = np.linspace(-0.5, 0.5, f_size) x = np.tile(x, (f_size, 1)) y = np.transpose(x) x = np.expand_dims(x, axis=2) y = np.expand_dims(y, axis=2) m = np.concatenate((x, y), axis=2) m = np.expand_dims(m, axis=0) m = np.repeat(m, flags.batch_size, axis=0) m = tf.convert_to_tensor(m, np.float32) encoder_outputs = tf.concat([c, m], -1) encoder_outputs = tf.reshape(encoder_outputs, shape=(-1, f_size * f_size, 258)) encoder_outputs = tf.transpose(encoder_outputs, [1, 0, 2]) train_op, loss , sample_ids,logits, decoder_inputs, \ target_labels, learning_rate,attention_weights_history,att_label,lamda,att_mask,input_seg= build_network(encoder_outputs, False, flags.batch_size, flags.decoder_length, flags.tgt_vocab_size, flags.attn_num_hidden, encoder_length, flags.max_gradient_norm, f_size, flags.att_loss, flags.img_size, deconv_outputs ) initializer = tf.global_variables_initializer() train_saver = tf.train.Saver() train_sess = tf.Session(graph=train_graph) train_sess.run(initializer) with infer_graph.as_default(): c_t, inputs_t = build_cnn(is_training=False, batch_size=flags.batch_size, img_size=flags.img_size, channels=flags.channels) deconv_outputs_t = build_deconv(False, c_t, flags.batch_size) x_t = np.linspace(-0.5, 0.5, f_size) x_t = np.tile(x_t, (f_size, 1)) y_t = np.transpose(x_t) x_t = np.expand_dims(x_t, axis=2) y_t = np.expand_dims(y_t, axis=2) m_t = np.concatenate((x_t, y_t), axis=2) m_t = np.expand_dims(m_t, axis=0) m_t = np.repeat(m_t, flags.batch_size, axis=0) m_t = tf.convert_to_tensor(m_t, np.float32) encoder_outputs_t = tf.concat([c_t, m_t], -1) encoder_outputs_t = tf.reshape(encoder_outputs_t, shape=(-1, f_size * f_size, 258)) encoder_outputs_t = tf.transpose(encoder_outputs_t, [1, 0, 2]) _, _ , pred_ids,logits_t, decoder_inputs_t, \ _, _,_,_,_,_,_= build_network(encoder_outputs_t, True, flags.batch_size, flags.decoder_length, flags.tgt_vocab_size, flags.attn_num_hidden, encoder_length, flags.max_gradient_norm, f_size, flags.att_loss, flags.img_size, deconv_outputs_t ) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) # Training la = 10 acc_log = 0 count = 0 lr = flags.learning_rate for h in range(flags.epoch): for i in range(iteration): batch_train = data.next_batch(flags.batch_size) path = [] texts = [] for line in batch_train: path.append(line.split(' ')[0]) texts.append(line.split(' ')[1]) if flags.att_loss: images, npy, mask, seg = load_img_label( path, flags.img_size, flags.decoder_length) else: images = load_img(path, flags.img_size) training_target_labels = get_label(texts, flags.decoder_length) training_decoder_inputs = np.delete(training_target_labels, -1, axis=1) training_decoder_inputs = np.c_[ np.zeros(training_decoder_inputs.shape[0]), training_decoder_inputs].T feed_dict = { inputs: images, decoder_inputs: training_decoder_inputs, target_labels: training_target_labels, learning_rate: lr } if flags.att_loss: feed_dict[att_label] = npy feed_dict[att_mask] = mask feed_dict[input_seg] = seg[:, :, :, np.newaxis] feed_dict[lamda] = la _, loss_value, att = train_sess.run( [train_op, loss, attention_weights_history], feed_dict=feed_dict) step = float(i + 1) if step % flags.display_step == 0: now = time.time() print(step, now - start, loss_value) start = now if step % flags.eval_step == 0: train_saver.save(train_sess, flags.save_dir) model_file = tf.train.latest_checkpoint( flags.save_dir.rsplit('/', 1)[0]) infer_saver.restore(infer_sess, model_file) gt = [] predict = [] images = load_img(path, flags.img_size) testing_decoder_inputs = np.zeros( (flags.decoder_length, flags.batch_size), dtype=float) feed_dict_t = { inputs_t: images, decoder_inputs_t: testing_decoder_inputs } q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for j in range(flags.batch_size): gt.append(texts[j]) ans = np.array(q).T[j] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt = cnt + 1 if gt[l] == predict[l]: correct = correct + 1 count = count + 1 acc_s = correct / cnt if acc_s > acc_log: acc_log = acc_s count = 0 if count == (iteration // flags.eval_step): lr = lr / 5
def test(): tf.reset_default_graph() infer_graph = tf.Graph() with infer_graph.as_default(): _, _, pred_ids,pred_logits , inputs_t, decoder_inputs_t, decoder_lengths_t, \ _ = build_network(is_training=False, batch_size=FLAGS.batch_size, height=FLAGS.height, width=FLAGS.width, channels=FLAGS.channels, decoder_length=FLAGS.decoder_length, tgt_vocab_size=FLAGS.tgt_vocab_size, num_units=FLAGS.num_units, beam_width=FLAGS.beam_width, encoder_length=FLAGS.encoder_length, max_gradient_norm=FLAGS.max_gradient_norm, embedding_size=FLAGS.embedding_size) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) with open(FLAGS.test_txt) as f: test = [line.rstrip() for line in f] test_len = len(test) test = np.array(test) data_test = Dataset_test(test) if FLAGS.lex_txt != None: with open(FLAGS.lex_txt) as f: lex = [line.rstrip().lower() for line in f] ti = int(test_len / FLAGS.batch_size) rest = test_len % FLAGS.batch_size gt = [] predict = [] model_file=tf.train.latest_checkpoint(FLAGS.load_dir) infer_saver.restore(infer_sess, model_file) for t in range(ti): batch_test = data_test.next_batch(FLAGS.batch_size) path = [] texts = [] for line in batch_test: path.append(line.split(' ',1)[0]) texts.append(line.split(' ',1)[1]) images = load_img(path,FLAGS.height,FLAGS.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((FLAGS.batch_size), \ dtype=int) * FLAGS.decoder_length } q= infer_sess.run( pred_ids,feed_dict=feed_dict_t) for j in range(len(texts)): gt.append(texts[j]) ans = q[j].T[0] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) batch_test = data_test.next_batch(FLAGS.batch_size) path = [] texts = [] for line in batch_test: path.append(line.split(' ',1)[0]) texts.append(line.split(' ',1)[1]) images = load_img(path,FLAGS.height,FLAGS.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((FLAGS.batch_size), \ dtype=int) * FLAGS.decoder_length } q = infer_sess.run( pred_ids,feed_dict=feed_dict_t) for k in range(rest): gt.append(texts[k]) ans = q[k].T[0] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt =cnt + 1 if gt[l] == predict[l]: correct = correct + 1 acc_s = correct / cnt if FLAGS.lex_txt != None: correct_l = float(0) cnt = 0 for l in range(len(gt)): cnt =cnt + 1 lexicon = lex[l].split(',') dt = distance.levenshtein(predict[l], lexicon[0]) pl = lexicon[0] for ll in lexicon[1:]: dt_temp = distance.levenshtein(predict[l], ll) if dt_temp < dt: dt = dt_temp pl = ll if pl == gt[l]: correct_l = correct_l + 1 acc_l = correct_l / cnt print('accuracy: ', acc_s) if FLAGS.lex_txt != None: print('accuracy with lexicon: ', acc_l)
def train(): with open(flags.train_txt) as f: sample = [line.rstrip() for line in f] sample = np.array(sample) iteration = len(sample) // flags.batch_size data = Dataset(sample) tf.reset_default_graph() train_graph = tf.Graph() infer_graph = tf.Graph() with train_graph.as_default(): encoder_outputs, inputs = build_cnn(True, flags.batch_size, flags.height, flags.width, flags.channels) train_op, loss, sample_ids, logits, decoder_inputs, \ target_labels, learning_rate,keep_prob = build_network(encoder_outputs, False, flags.batch_size, flags.decoder_length, flags.tgt_vocab_size, flags.attn_num_hidden, flags.encoder_length, flags.max_gradient_norm ) initializer = tf.global_variables_initializer() train_saver = tf.train.Saver() train_sess = tf.Session(graph=train_graph) train_sess.run(initializer) with infer_graph.as_default(): encoder_outputs_t, inputs_t = build_cnn(False, flags.batch_size, flags.height, flags.width, flags.channels) _, _, pred_ids, logits_t, decoder_inputs_t, \ _, _ ,keep_prob_t= build_network(encoder_outputs_t, True, flags.batch_size, flags.decoder_length, flags.tgt_vocab_size, flags.attn_num_hidden, flags.encoder_length, flags.max_gradient_norm ) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) # Training start = time.time() acc_log = 0 count = 0 lr = flags.learning_rate for h in range(flags.epoch): for i in range(iteration): batch_train = data.next_batch(flags.batch_size) path = [] texts = [] for line in batch_train: path.append(line.split(' ')[0]) texts.append(line.split(' ')[1]) images = load_img(path, flags.height, flags.width) training_target_labels = get_label(texts, flags.decoder_length) training_decoder_inputs = np.delete(training_target_labels, -1, axis=1) training_decoder_inputs = np.c_[ np.zeros(training_decoder_inputs.shape[0]), training_decoder_inputs].T feed_dict = { inputs: images[:, :, :, np.newaxis], decoder_inputs: training_decoder_inputs, target_labels: training_target_labels, learning_rate: lr, keep_prob: 0.5 } _, loss_value = train_sess.run([train_op, loss], feed_dict=feed_dict) step = float(i + 1) if step % flags.display_step == 0: now = time.time() print(step, now - start, loss_value) start = now if step % flags.eval_step == 0: train_saver.save(train_sess, flags.save_dir) model_file = tf.train.latest_checkpoint( flags.save_dir.rsplit('/', 1)[0]) infer_saver.restore(infer_sess, model_file) gt = [] predict = [] images = load_img(path, flags.height, flags.width) testing_decoder_inputs = np.zeros( (flags.decoder_length, flags.batch_size), dtype=float) feed_dict_t = { inputs_t: images[:, :, :, np.newaxis], decoder_inputs_t: testing_decoder_inputs, keep_prob_t: 1 } q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for j in range(flags.batch_size): gt.append(texts[j]) ans = np.array(q).T[j] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt = cnt + 1 if gt[l] == predict[l]: correct = correct + 1 count = count + 1 acc_s = correct / cnt if acc_s > acc_log: acc_log = acc_s count = 0 if count == (iteration // flags.eval_step): lr = lr / 5
def test(): from dataset import Dataset_test from tools import load_img import editdistance tf.reset_default_graph() infer_graph = tf.Graph() with infer_graph.as_default(): encoder_inputs_t, x2_t, inputs_t = build_cnn(training=False,batch_size=flags.batch_size, height=flags.height, width=flags.width, channels=flags.channels) _, _, pred_ids,pred_logits , decoder_inputs_t, decoder_lengths_t, \ _,keep_prob_t,prob_t = build_network(encoder_inputs_t, is_training=False, batch_size=flags.batch_size, decoder_length=flags.decoder_length, tgt_vocab_size=flags.tgt_vocab_size, num_units=flags.num_units, beam_width=flags.beam_width, encoder_length=flags.encoder_length, max_gradient_norm=None, embedding_size=flags.embedding_size, initial_learning_rate=None) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) model_file=tf.train.latest_checkpoint(flags.r_path) print(flags.r_path) infer_saver.restore(infer_sess, model_file) class_graph = tf.Graph() with class_graph.as_default(): prob,inputs_f,inputs_l,loss,train_op = build_classifier(training=False, batch_size=flags.batch_size, c_learning_rate=None) class_saver = tf.train.Saver() class_sess = tf.Session(graph=class_graph) model_file=tf.train.latest_checkpoint(flags.c_path) class_saver.restore(class_sess, model_file) with open(flags.test_txt) as f: test = [line.rstrip() for line in f] test_len = len(test) test = np.array(test) data_test = Dataset_test(test) if flags.lex_txt != None: with open(flags.lex_txt) as f: lex = [line.rstrip().lower() for line in f] steps = int(test_len / flags.batch_size) rest = test_len % flags.batch_size predict_c = [] path_log = [] labelc = [] for t in range(steps): batch_test = data_test.next_batch(flags.batch_size) path = [] label = np.tile([1,0],(256,1)) for line in batch_test: path.append(line.split(' ',1)[0]) path_log.append(line.split(' ',1)[0]) images = load_img(path,flags.height,flags.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((flags.batch_size), \ dtype=int) * flags.decoder_length, keep_prob_t:1,prob_t:np.zeros(flags.batch_size)} feature = infer_sess.run(x2_t, feed_dict=feed_dict_t) feed_dict = {inputs_f:feature,inputs_l:label} o = class_sess.run(prob, feed_dict=feed_dict) for j in range(len(label)): predict_c.append(o[j]) labelc.append(np.argmax(o[j])) batch_test = data_test.next_batch(flags.batch_size) path = [] label = np.tile([1,0],(256,1)) for line in batch_test: path.append(line.split(' ',1)[0]) images = load_img(path,flags.height,flags.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((flags.batch_size), \ dtype=int) * flags.decoder_length, keep_prob_t:1,prob_t:np.zeros(flags.batch_size)} feature = infer_sess.run(x2_t, feed_dict=feed_dict_t) feed_dict = {inputs_f:feature,inputs_l:label} o = class_sess.run(prob, feed_dict=feed_dict) for j in range(len(label)): labelc.append(np.argmax(o[j])) for k in range(rest): predict_c.append(o[k]) correct = float(0) cnt = 0 acc_c = 0 for l in range(len(predict_c)): cnt =cnt + 1 if 0 == np.argmax(predict_c[l]): correct = correct + 1 acc_c = correct / cnt #print('acc_c:', acc_c) with open(flags.test_txt) as f: test = [line.rstrip() for line in f] test_len = len(test) test = np.array(test) data_test = Dataset_test(test) with open(flags.lex_txt) as f: lex = [line.rstrip().lower() for line in f] steps = int(test_len /flags.batch_size) rest = test_len % flags.batch_size gt = [] predict = [] for t in range(steps): batch_test = data_test.next_batch(flags.batch_size) path = [] texts = [] label = labelc[256*(t):256*(t+1)] for line in batch_test: path.append(line.split(' ',1)[0]) texts.append(line.split(' ',1)[1]) images = load_img(path,flags.height,flags.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((flags.batch_size), \ dtype=int) * flags.decoder_length, keep_prob_t:1,prob_t:label} q= infer_sess.run( pred_ids,feed_dict=feed_dict_t) for j in range(len(texts)): gt.append(texts[j]) ans = q[j].T[0] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) batch_test = data_test.next_batch(flags.batch_size) path = [] texts = [] label = labelc[-256:] for line in batch_test: path.append(line.split(' ',1)[0]) texts.append(line.split(' ',1)[1]) images = load_img(path,flags.height,flags.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((flags.batch_size), \ dtype=int) * flags.decoder_length, keep_prob_t:1,prob_t:label} q = infer_sess.run( pred_ids,feed_dict=feed_dict_t) for k in range(rest): gt.append(texts[k]) path_log.append(path[k]) ans = q[k].T[0] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt =cnt + 1 if gt[l] == predict[l]: correct = correct + 1 acc_s = correct / cnt if flags.lex_txt != None: correct_l = float(0) cnt = 0 for l in range(len(gt)): cnt =cnt + 1 lexicon = lex[l].split(',') dt = editdistance.eval(predict[l], lexicon[0]) pl = lexicon[0] for ll in lexicon[1:]: dt_temp = editdistance.eval(predict[l], ll) if dt_temp < dt: dt = dt_temp pl = ll if pl == gt[l]: correct_l = correct_l + 1 acc_l = correct_l / cnt print('accuracy: ', acc_s) if flags.lex_txt != None: print('accuracy with lexicon: ', acc_l)
def test(): f_size = int(flags.img_size / 8) encoder_length = f_size * f_size tf.reset_default_graph() infer_graph = tf.Graph() with infer_graph.as_default(): c_t, inputs_t = build_cnn(is_training=False, batch_size=flags.batch_size, img_size=flags.img_size, channels=flags.channels) deconv_outputs_t = build_deconv(False, c_t, flags.batch_size) x_t = np.linspace(-0.5, 0.5, f_size) x_t = np.tile(x_t, (f_size, 1)) y_t = np.transpose(x_t) x_t = np.expand_dims(x_t, axis=2) y_t = np.expand_dims(y_t, axis=2) m_t = np.concatenate((x_t, y_t), axis=2) m_t = np.expand_dims(m_t, axis=0) m_t = np.repeat(m_t, flags.batch_size, axis=0) m_t = tf.convert_to_tensor(m_t, np.float32) encoder_outputs_t = tf.concat([c_t, m_t], -1) encoder_outputs_t = tf.reshape(encoder_outputs_t, shape=(-1, f_size * f_size, 258)) encoder_outputs_t = tf.transpose(encoder_outputs_t, [1, 0, 2]) _, _ , pred_ids,logits_t, decoder_inputs_t, \ _, _,_,_,_,_,_= build_network(encoder_outputs_t, True, flags.batch_size, flags.decoder_length, flags.tgt_vocab_size, flags.attn_num_hidden, encoder_length, flags.max_gradient_norm, f_size, flags.att_loss, flags.img_size, deconv_outputs_t ) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) model_file = tf.train.latest_checkpoint(flags.load_dir) infer_saver.restore(infer_sess, model_file) with open(flags.test_txt) as f: test = [line.rstrip() for line in f] test_len = len(test) test = np.array(test) data_test = Dataset(test) if flags.lex_txt != None: with open(flags.lex_txt) as f: lex = [line.rstrip().lower() for line in f] ti = int(test_len / flags.batch_size) rest = test_len % flags.batch_size gt = [] predict = [] for t in range(ti): batch_test = data_test.next_batch(flags.batch_size) path = [] texts = [] for line in batch_test: path.append(line.split(' ')[0]) texts.append(line.split(' ')[1]) images = load_img(path, flags.img_size) testing_decoder_inputs = np.zeros( (flags.decoder_length, flags.batch_size), dtype=float) feed_dict_t = { inputs_t: images, decoder_inputs_t: testing_decoder_inputs } q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for j in range(flags.batch_size): gt.append(texts[j]) ans = np.array(q).T[j] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) batch_test = data_test.next_batch(flags.batch_size) path = [] texts = [] for line in batch_test: path.append(line.split(' ', 1)[0]) texts.append(line.split(' ', 1)[1]) images = load_img(path, flags.img_size) feed_dict_t = {inputs_t: images, decoder_inputs_t: testing_decoder_inputs} q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for k in range(rest): gt.append(texts[k]) ans = np.array(q).T[k] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt = cnt + 1 if gt[l] == predict[l]: correct = correct + 1 acc_s = correct / cnt for l in range(len(gt)): cnt = cnt + 1 if gt[l] == predict[l]: correct = correct + 1 acc_s = correct / cnt if flags.lex_txt != None: correct_l = float(0) cnt = 0 for l in range(len(gt)): cnt = cnt + 1 lexicon = lex[l].split(',') dt = editdistance.eval(predict[l], lexicon[0]) pl = lexicon[0] for ll in lexicon[1:]: dt_temp = editdistance.eval(predict[l], ll) if dt_temp < dt: dt = dt_temp pl = ll if pl == gt[l]: correct_l = correct_l + 1 acc_l = correct_l / cnt print('accuracy: ', acc_s) if flags.lex_txt != None: print('accuracy with lexicon: ', acc_l)
def train(): with open(FLAGS.train_txt) as f: sample = [line.rstrip() for line in f] sample = np.array(sample) iteration = len(sample) // FLAGS.batch_size data = Dataset(sample) tf.reset_default_graph() train_graph = tf.Graph() infer_graph = tf.Graph() with train_graph.as_default(): train_op, loss , sample_ids,logits, inputs, decoder_inputs, decoder_lengths, \ target_labels, keep_prob = build_network(is_training=True, batch_size=FLAGS.batch_size, height=FLAGS.height, width=FLAGS.width, channels=FLAGS.channels, decoder_length=FLAGS.decoder_length, tgt_vocab_size=FLAGS.tgt_vocab_size, num_units=FLAGS.num_units, beam_width=FLAGS.beam_width, encoder_length=FLAGS.encoder_length, max_gradient_norm=FLAGS.max_gradient_norm, embedding_size=FLAGS.embedding_size, initial_learning_rate=FLAGS.learning_rate) initializer = tf.global_variables_initializer() train_saver = tf.train.Saver() train_sess = tf.Session(graph=train_graph) train_sess.run(initializer) with infer_graph.as_default(): _, _, pred_ids,pred_logits , inputs_t, decoder_inputs_t, decoder_lengths_t, \ _,keep_prob_t = build_network(is_training=False, batch_size=FLAGS.batch_size, height=FLAGS.height, width=FLAGS.width, channels=FLAGS.channels, decoder_length=FLAGS.decoder_length, tgt_vocab_size=FLAGS.tgt_vocab_size, num_units=FLAGS.num_units, beam_width=FLAGS.beam_width, encoder_length=FLAGS.encoder_length, max_gradient_norm=FLAGS.max_gradient_norm, embedding_size=FLAGS.embedding_size, initial_learning_rate=None) infer_saver = tf.train.Saver() infer_sess = tf.Session(graph=infer_graph) start = time.time() acc_log = 0 count = 0 lr = FLAGS.learning_rate for h in range(FLAGS.epoch): for i in range(iteration): batch_train = data.next_batch(FLAGS.batch_size) np.random.shuffle(batch_train) path = [] texts = [] for line in batch_train: path.append(line.split(' ')[0]) texts.append(line.split(' ')[1]) images = load_train_img(path, FLAGS.height, FLAGS.width) training_target_labels = get_label(texts, FLAGS.decoder_length) training_decoder_inputs = np.delete(training_target_labels, -1, axis=1) training_decoder_inputs = np.c_[ np.zeros(training_decoder_inputs.shape[0]), training_decoder_inputs].T feed_dict = { inputs: images[:, :, :, np.newaxis], decoder_inputs: training_decoder_inputs, decoder_lengths: np.ones( (FLAGS.batch_size), dtype=int) * FLAGS.decoder_length, target_labels: training_target_labels, keep_prob: 0.8 } _, loss_value = train_sess.run([train_op, loss], feed_dict=feed_dict) step = float(i) if step % FLAGS.display_step == 0: now = time.time() print(step, now - start, loss_value) start = now if step % FLAGS.eval_step == 0: train_saver.save(train_sess, FLAGS.save_dir) model_file = tf.train.latest_checkpoint( FLAGS.save_dir.rsplit('/', 1)[0]) infer_saver.restore(infer_sess, model_file) gt = [] predict = [] images = load_img(path, FLAGS.height, FLAGS.width) feed_dict_t = {inputs_t:images[:, :, :, np.newaxis], decoder_lengths_t:np.ones((FLAGS.batch_size), \ dtype=int) * FLAGS.decoder_length, keep_prob_t:1} q = infer_sess.run(pred_ids, feed_dict=feed_dict_t) for j in range(len(texts)): gt.append(texts[j]) ans = q[j].T[0] pd = [] for c in ans: if c != -1: character = tools.idx_to_word[c] if character != '<EOS>': pd.append(character) predict.append(''.join(pd)) correct = float(0) cnt = 0 acc_s = 0 for l in range(len(gt)): cnt = cnt + 1 if gt[l] == predict[l]: correct = correct + 1 acc_s = correct / cnt if acc_s > acc_log: acc_log = acc_s count = 0 if count == (iteration // FLAGS.eval_step): lr = lr / 5
def read_input(self, img_path): self.img, self.img_name = load_img(img_path)