def run_test(sess, model, test_data, verbose=True): predicted_ids = [] alignment_history = [] batch_iter = make_batch_iter(list(zip(*test_data)), config.batch_size, shuffle=False, verbose=verbose) for step, batch in enumerate(batch_iter): value_seq, attr_seq, pos_fw_seq, pos_bw_seq, _ = list(zip(*batch)) src_len_seq = np.array([len(src) for src in value_seq]) value_seq = np.array(pad_batch(value_seq, config.pad_id)) attr_seq = np.array(pad_batch(attr_seq, config.pad_id)) pos_fw_seq = np.array(pad_batch(pos_fw_seq, config.pad_id)) pos_bw_seq = np.array(pad_batch(pos_bw_seq, config.pad_id)) _predicted_ids, _alignment_history = sess.run( [model.predicted_ids, model.alignment_history], feed_dict={ model.value_inp: value_seq, model.attr_inp: attr_seq, model.pos_fw_inp: pos_fw_seq, model.pos_bw_inp: pos_bw_seq, model.src_len: src_len_seq, model.training: False } ) predicted_ids.extend(_predicted_ids.tolist()) alignment_history.extend(np.argmax(_alignment_history, axis=-1).tolist()) if verbose: print('\rprocessing batch: {:>6d}'.format(step + 1), end='') print() return predicted_ids, alignment_history
def main(): print('loading data...') tokenizer = FullTokenizer(config.bert_vocab, do_lower_case=config.to_lower) pos_2_id, id_2_pos = read_dict(config.pos_dict) tag_2_id, id_2_tag = read_dict(config.tag_dict) config.num_pos = len(pos_2_id) config.num_tag = len(tag_2_id) data_reader = DataReader(config, tokenizer, pos_2_id, tag_2_id) input_file = args.input print('input file: {}'.format(input_file)) input_data = data_reader.load_data_from_file(input_file) print('building model...') model = get_model(config, is_training=False) saver = tf.train.Saver(max_to_keep=1) with tf.Session(config=sess_config) as sess: if tf.train.latest_checkpoint(config.result_dir): saver.restore(sess, tf.train.latest_checkpoint(config.result_dir)) print('loading model from {}'.format(tf.train.latest_checkpoint(config.result_dir))) batch_iter = make_batch_iter(list(zip(*input_data)), config.batch_size, shuffle=False) outputs = inference(sess, model, batch_iter, verbose=True) print('========== Saving Result ==========') output_file = args.output save_result(outputs, output_file, tokenizer, id_2_tag) else: print('model not found.') print('done')
def test(): print('loading data...') tokenizer = FullTokenizer(config.bert_vocab, do_lower_case=config.to_lower) pos_2_id, id_2_pos = read_dict(config.pos_dict) tag_2_id, id_2_tag = read_dict(config.tag_dict) config.num_pos = len(pos_2_id) config.num_tag = len(tag_2_id) data_reader = DataReader(config, tokenizer, pos_2_id, tag_2_id) test_data = data_reader.read_test_data() print('building model...') model = get_model(config, is_training=False) saver = tf.train.Saver(max_to_keep=1) with tf.Session(config=sess_config) as sess: if tf.train.latest_checkpoint(config.result_dir): saver.restore(sess, tf.train.latest_checkpoint(config.result_dir)) print('loading model from {}'.format(tf.train.latest_checkpoint(config.result_dir))) print('========== Test ==========') test_batch_iter = make_batch_iter(list(zip(*test_data)), config.batch_size, shuffle=False) outputs, test_loss, test_accu = evaluate(sess, model, test_batch_iter, verbose=True) print('The average test loss is {:>.4f}, average test accuracy is {:>.4f}'.format(test_loss, test_accu)) print('========== Saving Result ==========') save_result(outputs, config.test_result, tokenizer, id_2_tag) else: print('model not found.') print('done')
def run_test(sess, model, test_data): predicted_ids = [] batch_iter = tqdm( list( make_batch_iter(list(zip(*test_data)), config.batch_size, shuffle=False))) for batch in batch_iter: topic, topic_len, triple, triple_len, src, src_len, _, _ = make_batch_data( batch) _predicted_ids = sess.run(model.predicted_ids, feed_dict={ model.batch_size: len(topic), model.topic: topic, model.topic_len: topic_len, model.triple: triple, model.triple_len: triple_len, model.src: src, model.src_len: src_len, model.training: False }) predicted_ids.extend(_predicted_ids.tolist()) return predicted_ids
def run_evaluate(sess, model, valid_data): steps = 0 predicted_ids = [] total_loss = 0.0 total_accu = 0.0 batch_iter = tqdm(list( make_batch_iter(valid_data, config.batch_size, shuffle=False)), desc='Eval') for batch in batch_iter: src, src_len, tgt, tgt_len = make_batch_data(batch, config.pad_id) _predicted_ids, loss, accu, global_step, summary = sess.run( [ model.predicted_ids, model.loss, model.accu, model.global_step, model.summary ], feed_dict={ model.batch_size: len(src), model.src: src, model.src_len: src_len, model.tgt: tgt, model.tgt_len: tgt_len, model.training: False }) predicted_ids.extend(_predicted_ids.tolist()) steps += 1 total_loss += loss total_accu += accu batch_iter.set_description( 'Eval: loss is {:>.4f}, accuracy is {:>.4f}'.format(loss, accu)) return predicted_ids, total_loss / steps, total_accu / steps
def run_evaluate(sess, model, valid_data, valid_summary_writer=None, verbose=True): steps = 0 predicted_ids = [] alignment_history = [] total_loss = 0.0 total_accu = 0.0 batch_iter = make_batch_iter(list(zip(*valid_data)), config.batch_size, shuffle=False, verbose=verbose) for batch in batch_iter: src_seq, tgt_seq = list(zip(*batch)) src_len_seq = np.array([len(src) for src in src_seq]) tgt_len_seq = np.array([len(tgt) for tgt in tgt_seq]) src_seq = np.array(pad_batch(src_seq, config.pad_id)) tgt_seq = np.array(pad_batch(tgt_seq, config.pad_id)) _predicted_ids, _alignment_history, loss, accu, global_step, summary = sess.run( [ model.predicted_ids, model.alignment_history, model.loss, model.accu, model.global_step, model.summary ], feed_dict={ model.src_inp: src_seq, model.tgt_inp: tgt_seq[:, :-1], # 1 for eos model.tgt_out: tgt_seq[:, 1:], # 1 for sos model.src_len: src_len_seq, model.tgt_len: tgt_len_seq - 1, # 1 for eos model.training: False }) predicted_ids.extend(_predicted_ids.tolist()) if not config.beam_search: alignment_history.extend( np.argmax(_alignment_history, axis=-1).tolist()) steps += 1 total_loss += loss total_accu += accu if verbose: print('\rprocessing batch: {:>6d}'.format(steps + 1), end='') if steps % args.log_steps == 0 and valid_summary_writer is not None: valid_summary_writer.add_summary(summary, global_step) print() return predicted_ids, alignment_history, total_loss / steps, total_accu / steps
def run_test(sess, model, test_data): predicted_ids = [] batch_iter = tqdm(list( make_batch_iter(test_data, config.batch_size, shuffle=False)), desc='Test') for batch in batch_iter: src, src_len, _, _ = make_batch_data(batch, config.pad_id) _predicted_ids = sess.run(model.predicted_ids, feed_dict={ model.batch_size: len(src), model.src: src, model.src_len: src_len, model.training: False }) predicted_ids.extend(_predicted_ids.tolist()) return predicted_ids
def run_train(sess, model, train_data, valid_data, saver, evaluator, train_summary_writer=None, valid_summary_writer=None, verbose=True): flag = 0 train_log = 0.0 global_step = 0 for i in range(config.num_epoch): print_title('Train Epoch: {}'.format(i + 1)) steps = 0 total_loss = 0.0 total_accu = 0.0 batch_iter = make_batch_iter(list(zip(*train_data)), config.batch_size, shuffle=True, verbose=verbose) for batch in batch_iter: start_time = time.time() value_seq, attr_seq, pos_fw_seq, pos_bw_seq, desc_seq = list(zip(*batch)) src_len_seq = np.array([len(src) for src in value_seq]) tgt_len_seq = np.array([len(tgt) for tgt in desc_seq]) value_seq = np.array(pad_batch(value_seq, config.pad_id)) attr_seq = np.array(pad_batch(attr_seq, config.pad_id)) pos_fw_seq = np.array(pad_batch(pos_fw_seq, config.pad_id)) pos_bw_seq = np.array(pad_batch(pos_bw_seq, config.pad_id)) desc_seq = np.array(pad_batch(desc_seq, config.pad_id)) _, loss, accu, global_step, summary = sess.run( [model.train_op, model.loss, model.accu, model.global_step, model.summary], feed_dict={ model.value_inp: value_seq, model.attr_inp: attr_seq, model.pos_fw_inp: pos_fw_seq, model.pos_bw_inp: pos_bw_seq, model.desc_inp: desc_seq[:, :-1], # 1 for eos model.desc_out: desc_seq[:, 1:], # 1 for sos model.src_len: src_len_seq, model.tgt_len: tgt_len_seq - 1, # 1 for eos model.training: True } ) steps += 1 total_loss += loss total_accu += accu if verbose: print('\rafter {:>6d} batch(s), train loss is {:>.4f}, train accuracy is {:>.4f}, {:>.4f}s/batch' .format(steps, loss, accu, time.time() - start_time), end='') if steps % args.log_steps == 0 and train_summary_writer is not None: train_summary_writer.add_summary(summary, global_step) if global_step % args.save_steps == 0: saver.save(sess, config.model_file, global_step=global_step) # evaluate saved models after pre-train epochs if i + 1 > args.pre_train_epochs: predicted_ids, alignment_history, valid_loss, valid_accu = run_evaluate( sess, model, valid_data, valid_summary_writer, verbose=False ) print_title('Valid Result', sep='*') print('average valid loss: {:>.4f}, average valid accuracy: {:>.4f}'.format(valid_loss, valid_accu)) print_title('Saving Result') save_result(predicted_ids, alignment_history, config.id_2_word, config.valid_data_small, config.valid_result) eval_results = evaluator.evaluate(config.valid_data_small, config.valid_result, config.to_lower) # early stop if eval_results['Bleu_4'] > train_log: flag = 0 train_log = eval_results['Bleu_4'] elif flag < 5: flag += 1 elif args.early_stop: return print() print_title('Train Result') print('average train loss: {:>.4f}, average train accuracy: {:>.4f}'.format( total_loss / steps, total_accu / steps)) saver.save(sess, config.model_file, global_step=global_step)
def train(): if not os.path.exists(config.result_dir): os.makedirs(config.result_dir) if not os.path.exists(config.train_log_dir): os.mkdir(config.train_log_dir) if not os.path.exists(config.valid_log_dir): os.mkdir(config.valid_log_dir) print('loading data...') tokenizer = FullTokenizer(config.bert_vocab, do_lower_case=config.to_lower) pos_2_id, id_2_pos = read_dict(config.pos_dict) tag_2_id, id_2_tag = read_dict(config.tag_dict) config.num_pos = len(pos_2_id) config.num_tag = len(tag_2_id) data_reader = DataReader(config, tokenizer, pos_2_id, tag_2_id) train_data = data_reader.read_train_data() valid_data = data_reader.read_valid_data() print('building model...') model = get_model(config, is_training=True) tvars = tf.trainable_variables() assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint(tvars, config.bert_ckpt) tf.train.init_from_checkpoint(config.bert_ckpt, assignment_map) print('========== Trainable Variables ==========') for v in tvars: init_string = '' if v.name in initialized_variable_names: init_string = '<INIT_FROM_CKPT>' print(v.name, v.shape, init_string) print('========== Gradients ==========') for g in model.gradients: print(g) best_score = 0.0 saver = tf.train.Saver(max_to_keep=1) with tf.Session(config=sess_config) as sess: if tf.train.latest_checkpoint(config.result_dir): saver.restore(sess, tf.train.latest_checkpoint(config.result_dir)) print('loading model from {}'.format(tf.train.latest_checkpoint(config.result_dir))) else: tf.global_variables_initializer().run() print('initializing from scratch.') train_writer = tf.summary.FileWriter(config.train_log_dir, sess.graph) for i in range(config.num_epoch): print('========== Epoch {} Train =========='.format(i + 1)) train_batch_iter = make_batch_iter(list(zip(*train_data)), config.batch_size, shuffle=True) train_loss, train_accu = run_epoch(sess, model, train_batch_iter, train_writer, verbose=True) print('The average train loss is {:>.4f}, average train accuracy is {:>.4f}'.format(train_loss, train_accu)) print('========== Epoch {} Valid =========='.format(i + 1)) valid_batch_iter = make_batch_iter(list(zip(*valid_data)), config.batch_size, shuffle=False) outputs, valid_loss, valid_accu = evaluate(sess, model, valid_batch_iter, verbose=True) print('The average valid loss is {:>.4f}, average valid accuracy is {:>.4f}'.format(valid_loss, valid_accu)) print('========== Saving Result ==========') save_result(outputs, config.valid_result, tokenizer, id_2_tag) if valid_accu > best_score: best_score = valid_accu saver.save(sess, config.model_file)
def run_train(sess, model, train_data, valid_data, saver, evaluator, summary_writer=None): flag = 0 best_valid_result = 0.0 valid_log_history = defaultdict(list) global_step = 0 for i in range(config.num_epoch): logger.info(log_title('Train Epoch: {}'.format(i + 1))) steps = 0 total_loss = 0.0 total_accu = 0.0 batch_iter = tqdm( list( make_batch_iter(list(zip(*train_data)), config.batch_size, shuffle=True))) for batch in batch_iter: topic, topic_len, triple, triple_len, src, src_len, tgt, tgt_len = make_batch_data( batch) _, loss, accu, global_step, summary = sess.run( [ model.train_op, model.loss, model.accu, model.global_step, model.summary ], feed_dict={ model.batch_size: len(topic), model.topic: topic, model.topic_len: topic_len, model.triple: triple, model.triple_len: triple_len, model.src: src, model.src_len: src_len, model.tgt: tgt, model.tgt_len: tgt_len, model.training: True }) steps += 1 total_loss += loss total_accu += accu batch_iter.set_description( 'loss: {:>.4f} accuracy: {:>.4f}'.format(loss, accu)) if global_step % args.log_steps == 0 and summary_writer is not None: summary_writer.add_summary(summary, global_step) if global_step % args.save_steps == 0: # evaluate saved models after pre-train epochs if i < args.pre_train_epochs: saver.save(sess, config.model_file, global_step=global_step) else: predicted_ids, valid_loss, valid_accu = run_evaluate( sess, model, valid_data) logger.info( 'valid loss: {:>.4f}, valid accuracy: {:>.4f}'.format( valid_loss, valid_accu)) save_outputs(predicted_ids, config.id_2_word, config.valid_data, config.valid_outputs) valid_results = evaluator.evaluate(config.valid_data, config.valid_outputs, config.to_lower) # early stop if valid_results['BLEU 4'] >= best_valid_result: flag = 0 best_valid_result = valid_results['BLEU 4'] logger.info('saving model-{}'.format(global_step)) saver.save(sess, config.model_file, global_step=global_step) save_json(valid_results, config.valid_results) elif flag < args.early_stop: flag += 1 elif args.early_stop: return valid_log_history for key, value in valid_results.items(): valid_log_history[key].append(value) valid_log_history['loss'].append(valid_loss) valid_log_history['accuracy'].append(valid_accu) valid_log_history['global_step'].append(int(global_step)) logger.info('train loss: {:>.4f}, train accuracy: {:>.4f}'.format( total_loss / steps, total_accu / steps)) saver.save(sess, config.model_file, global_step=global_step) return valid_log_history
def run_train(sess, model, train_data, valid_data, saver, evaluator, train_summary_writer=None, valid_summary_writer=None, verbose=True): flag = 0 valid_log = 0.0 best_valid_log = 0.0 valid_log_history = {'loss': [], 'accuracy': [], 'global_step': []} global_step = 0 for i in range(config.num_epoch): print_title('Train Epoch: {}'.format(i + 1)) steps = 0 total_loss = 0.0 total_accu = 0.0 batch_iter = make_batch_iter(list(zip(*train_data)), config.batch_size, shuffle=True, verbose=verbose) for batch in batch_iter: start_time = time.time() src_seq, tgt_seq = list(zip(*batch)) src_len_seq = np.array([len(src) for src in src_seq]) tgt_len_seq = np.array([len(tgt) for tgt in tgt_seq]) src_seq = np.array(pad_batch(src_seq, config.pad_id)) tgt_seq = np.array(pad_batch(tgt_seq, config.pad_id)) _, loss, accu, global_step, summary = sess.run( [ model.train_op, model.loss, model.accu, model.global_step, model.summary ], feed_dict={ model.src_inp: src_seq, model.tgt_inp: tgt_seq[:, :-1], # 1 for eos model.tgt_out: tgt_seq[:, 1:], # 1 for sos model.src_len: src_len_seq, model.tgt_len: tgt_len_seq - 1, # 1 for eos model.training: True }) steps += 1 total_loss += loss total_accu += accu if verbose: print( '\rafter {:>6d} batch(s), train loss is {:>.4f}, train accuracy is {:>.4f}, {:>.4f}s/batch' .format(steps, loss, accu, time.time() - start_time), end='') if steps % args.log_steps == 0 and train_summary_writer is not None: train_summary_writer.add_summary(summary, global_step) if global_step % args.save_steps == 0: # evaluate saved models after pre-train epochs if i < args.pre_train_epochs: saver.save(sess, config.model_file, global_step=global_step) else: predicted_ids, alignment_history, valid_loss, valid_accu = run_evaluate( sess, model, valid_data, valid_summary_writer, verbose=False) print_title('Valid Result', sep='*') print( 'average valid loss: {:>.4f}, average valid accuracy: {:>.4f}' .format(valid_loss, valid_accu)) print_title('Saving Result') if not config.beam_search: save_result_v1(predicted_ids, alignment_history, config.id_2_word, config.valid_data, config.valid_result) else: save_result_v2(predicted_ids, config.id_2_word, config.valid_result) valid_results = evaluator.evaluate(config.valid_data, config.valid_result, config.to_lower) if valid_results['Bleu_4'] >= best_valid_log: best_valid_log = valid_results['Bleu_4'] saver.save(sess, config.model_file, global_step=global_step) # early stop if valid_results[ 'Bleu_4'] - args.early_stop_delta >= valid_log: flag = 0 elif flag < args.early_stop: flag += 1 elif args.early_stop: return valid_log_history valid_log = valid_results['Bleu_4'] valid_log_history['loss'].append(valid_loss) valid_log_history['accuracy'].append(valid_accu) valid_log_history['global_step'].append(int(global_step)) print() print_title('Train Result') print('average train loss: {:>.4f}, average train accuracy: {:>.4f}'. format(total_loss / steps, total_accu / steps)) saver.save(sess, config.model_file, global_step=global_step) return valid_log_history