def __init__(self): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) in_seq_train, out_seq_train, label_train = date_set[0] in_seq_dev, out_seq_dev, label_dev = date_set[1] in_seq_test, out_seq_test, label_test = date_set[2] vocab_path, tag_vocab_path, label_vocab_path = date_set[3] vocab, rev_vocab = data_utils.initialize_vocab(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocab( label_vocab_path) self.sess = tf.Session() self.model, self.model_test = create_model(self.sess, len(vocab), len(tag_vocab), len(label_vocab))
def main(_): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) in_seq_train, out_seq_train, label_train = date_set[0] in_seq_dev, out_seq_dev, label_dev = date_set[1] in_seq_test, out_seq_test, label_test = date_set[2] vocab_path, tag_vocab_path, label_vocab_path = date_set[3]
def train(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing trec data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/taging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/taging.test.hyp.txt' label_valid_out_file = result_dir + '/label.valid.hyp.txt' label_test_out_file = result_dir + '/label.valid.hyp.txt' vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary( label_vocab_path) LM_vocab = vocab.copy() assert LM_vocab[data_utils._BOS] == data_utils.BOS_ID del LM_vocab[data_utils._BOS] LM_vocab[data_utils._BOS] = data_utils.BOS_ID rev_LM_vocab = [x for x in rev_vocab] rev_LM_vocab[data_utils.BOS_ID] = data_utils._EOS config = tf.ConfigProto( gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23), ) with tf.Session(config=config) as sess: # Create model. print("Max sequence length: %d ." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab), len(LM_vocab)) print( "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d, and lm_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab), len(LM_vocab))) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 if FLAGS.label_in_training == 'true_label': print("Use TRUE label during model training") train_with_true_label = True elif FLAGS.label_in_training == 'predicted_label': print("Use PREDICTED label during model training") train_with_true_label = False elif FLAGS.label_in_training == 'scheduled_sampling': print("Use Scheduled Sampling label during model training") while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, encoder_inputs_shiftByOne, tags, tag_weights, intent_weights, lm_weights, batch_sequence_length, labels = model.get_batch( train_set, bucket_id) if FLAGS.label_in_training == 'scheduled_sampling': random_number_02 = np.random.random_sample() final_training_step = FLAGS.max_training_steps if random_number_02 < float(model.global_step.eval( )) / final_training_step: # use predicted label in training train_with_true_label = False else: train_with_true_label = True _, step_loss, tagging_logits, classification_logits = model.joint_step( sess, encoder_inputs, encoder_inputs_shiftByOne, lm_weights, tags, tag_weights, labels, intent_weights, batch_sequence_length, bucket_id, False, train_with_true_label=train_with_true_label) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 def run_eval(data_set, mode): # mode = "Valid", "Test" # Run evals on development/test set and print their accyracy. word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 for bucket_id in xrange( len(_buckets)): # len(_buckets) = 1 here eval_loss = 0.0 count = 0 total_word_count = 0 for i in xrange(len(data_set[bucket_id])): count += 1 eval_encoder_inputs, eval_encoder_inputs_shiftByOne, eval_tags, eval_tag_weights, eval_intent_weights, eval_lm_weights, eval_sequence_length, eval_labels = model_test.get_one( data_set, bucket_id, i) eval_intent_weights = eval_tag_weights tagging_logits = [] classification_logits = [] _, step_loss, tagging_logits, classification_logits = model_test.joint_step( sess, eval_encoder_inputs, eval_encoder_inputs_shiftByOne, eval_lm_weights, eval_tags, eval_tag_weights, eval_labels, eval_intent_weights, eval_sequence_length, bucket_id, True) eval_loss += step_loss * (eval_sequence_length[0]) total_word_count += eval_sequence_length[0] hyp_label = None # intent results ref_label_list.append( rev_label_vocab[eval_labels[0][0]]) hyp_label = np.argmax(classification_logits[0], 0) hyp_label_list.append(rev_label_vocab[hyp_label]) if eval_labels[0] == hyp_label: correct_count += 1 # tagging results word_list.append([ rev_vocab[x[0]] for x in eval_encoder_inputs[:eval_sequence_length[0]] ]) ref_tag_list.append([ rev_tag_vocab[x[0]] for x in eval_tags[:eval_sequence_length[0]] ]) hyp_tag_list.append([ rev_tag_vocab[np.argmax(x)] for x in tagging_logits[:eval_sequence_length[0]] ]) eval_perplexity = math.exp( float(eval_loss) / total_word_count) print(" %s perplexity: %.2f" % (mode, eval_perplexity)) accuracy = float(correct_count) * 100 / count print(" %s accuracy: %.2f %d/%d" % (mode, accuracy, correct_count, count)) tagging_eval_result = dict() if mode == 'Valid': output_file = current_taging_valid_out_file elif mode == 'Test': output_file = current_taging_test_out_file tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list, word_list, output_file) print(" %s f1-score: %.2f" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return eval_perplexity, tagging_eval_result, hyp_label_list # run valid valid_perplexity, valid_tagging_result, valid_hyp_label_list = run_eval( dev_set, 'Valid') # record best results if valid_tagging_result['f1'] > best_valid_score: best_valid_score = valid_tagging_result['f1'] subprocess.call([ 'mv', current_taging_valid_out_file, current_taging_valid_out_file + '.best_f1_%.2f' % best_valid_score ]) with open( '%s.best_f1_%.2f' % (label_valid_out_file, best_valid_score), 'w') as f: for i in range(len(valid_hyp_label_list)): f.write(valid_hyp_label_list[i] + '\n') # run test after each validation for development purpose. test_perplexity, test_tagging_result, test_hyp_label_list = run_eval( test_set, 'Test') # record best results if test_tagging_result['f1'] > best_test_score: best_test_score = test_tagging_result['f1'] subprocess.call([ 'mv', current_taging_test_out_file, current_taging_test_out_file + '.best_f1_%.2f' % best_test_score ]) with open( '%s.best_f1_%.2f' % (label_test_out_file, best_test_score), 'w') as f: for i in range(len(test_hyp_label_list)): f.write(test_hyp_label_list[i] + '\n')
def test(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary( label_vocab_path) with tf.Session() as sess: # Create model. model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) def feed_sentence(sentence, vocab): data_set = [[]] token_ids = data_utils.prepare_one_data(sentence, vocab) slot_ids = [0 for i in range(len(token_ids))] data_set[0].append([token_ids, slot_ids, [0]]) encoder_inputs, tags, tag_weights, sequence_length, labels = model_test.get_one( data_set, 0, 0) if task['joint'] == 1: _, step_loss, tagging_logits, classification_logits = model_test.joint_step( sess, encoder_inputs, tags, tag_weights, labels, sequence_length, 0, True) elif task['tagging'] == 1: _, step_loss, tagging_logits = model_test.tagging_step( sess, encoder_inputs, tags, tag_weights, sequence_length, 0, True) elif task['intent'] == 1: _, step_loss, classification_logits = model_test.classification_step( sess, encoder_inputs, labels, sequence_length, 0, True) classification = [ np.argmax(classification_logit) for classification_logit in classification_logits ] tagging_logit = [ np.argmax(tagging_logit) for tagging_logit in tagging_logits ] classification_word = [rev_label_vocab[c] for c in classification] tagging_word = [ rev_tag_vocab[t] for t in tagging_logit[:sequence_length[0]] ] return classification_word, tagging_word sys.stdout.write('>') sys.stdout.flush() sentence = sys.stdin.readline() while sentence: print(feed_sentence(sentence, vocab)) sys.stdout.write('>') sys.stdout.flush() sentence = sys.stdin.readline()
def train(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary( label_vocab_path) with tf.Session() as sess: # Create model. print("Max sequence length: %d." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print( "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab))) # Read data into buckets and compute their sizes. print("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, tags, tag_weights, batch_sequence_length, labels = model.get_batch( train_set, bucket_id) if task['joint'] == 1: _, step_loss, tagging_logits, classification_logits = model.joint_step( sess, encoder_inputs, tags, tag_weights, labels, batch_sequence_length, bucket_id, False) elif task['tagging'] == 1: _, step_loss, tagging_logits = model.tagging_step( sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, False) elif task['intent'] == 1: _, step_loss, classification_logits = model.classification_step( sess, encoder_inputs, labels, batch_sequence_length, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 for bucket_id in range(len(_buckets)): eval_loss = 0.0 encoder_inputs, tags, tag_weights, batch_sequence_length, labels = model_test.get_batch( dev_set, bucket_id) tagging_logits = [] classification_logits = [] if task['joint'] == 1: _, step_loss, tagging_logits, classification_logits = model_test.joint_step( sess, encoder_inputs, tags, tag_weights, labels, batch_sequence_length, bucket_id, True) elif task['tagging'] == 1: _, step_loss, tagging_logits = model_test.tagging_step( sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, True) elif task['intent'] == 1: _, step_loss, classification_logits = model_test.classification_step( sess, encoder_inputs, labels, batch_sequence_length, bucket_id, True) eval_ppx = math.exp( step_loss) if step_loss < 300 else float('inf') print("validation perplexity %.2f" % eval_ppx) sys.stdout.flush() '''
def train(): print('Applying Parameters:') for k, v in FLAGS.__flags.iteritems(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) in_seq_train, out_seq_train, label_train = date_set[0] in_seq_dev, out_seq_dev, label_dev = date_set[1] in_seq_test, out_seq_test, label_test = date_set[2] vocab_path, tag_vocab_path, label_vocab_path = date_set[3] result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' vocab, rev_vocab = data_utils.initialize_vocab(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocab( label_vocab_path) config = tf.ConfigProto( gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23), #device_count = {'gpu': 2} ) with tf.Session(config=config) as sess: # Create model. print("Max sequence length: %d." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print ("Creating model with " + "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." \ % (len(vocab), len(tag_vocab), len(label_vocab))) # Read data into buckets and compute their sizes. print("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() batch_data = model.get_batch(train_set, bucket_id) encoder_inputs, tags, tag_weights, batch_sequence_length, labels = batch_data if task['joint'] == 1: step_outputs = model.joint_step(sess, encoder_inputs, tags, tag_weights, labels, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits, class_logits = step_outputs elif task['tagging'] == 1: step_outputs = model.tagging_step(sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model.classification_step( sess, encoder_inputs, labels, batch_sequence_length, bucket_id, False) _, step_loss, class_logits = step_outputs step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 def run_valid_test(data_set, mode): # mode: Eval, Test # Run evals on development/test set and print the accuracy. word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 tagging_eval_result = dict() for bucket_id in xrange(len(_buckets)): eval_loss = 0.0 count = 0 for i in xrange(len(data_set[bucket_id])): count += 1 sample = model_test.get_one(data_set, bucket_id, i) encoder_inputs, tags, tag_weights, sequence_length, labels = sample tagging_logits = [] class_logits = [] if task['joint'] == 1: step_outputs = model_test.joint_step( sess, encoder_inputs, tags, tag_weights, labels, sequence_length, bucket_id, True) _, step_loss, tagging_logits, class_logits = step_outputs elif task['tagging'] == 1: step_outputs = model_test.tagging_step( sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model_test.classification_step( sess, encoder_inputs, labels, sequence_length, bucket_id, True) _, step_loss, class_logits = step_outputs eval_loss += step_loss / len(data_set[bucket_id]) hyp_label = None if task['intent'] == 1: ref_label_list.append( rev_label_vocab[labels[0][0]]) hyp_label = np.argmax(class_logits[0], 0) hyp_label_list.append( rev_label_vocab[hyp_label]) if labels[0] == hyp_label: correct_count += 1 if task['tagging'] == 1: word_list.append([rev_vocab[x[0]] for x in \ encoder_inputs[:sequence_length[0]]]) ref_tag_list.append([rev_tag_vocab[x[0]] for x in \ tags[:sequence_length[0]]]) hyp_tag_list.append( [rev_tag_vocab[np.argmax(x)] for x in \ tagging_logits[:sequence_length[0]]]) accuracy = float(correct_count) * 100 / count if task['intent'] == 1: print(" %s accuracy: %.2f %d/%d" \ % (mode, accuracy, correct_count, count)) sys.stdout.flush() if task['tagging'] == 1: if mode == 'Eval': taging_out_file = current_taging_valid_out_file elif mode == 'Test': taging_out_file = current_taging_test_out_file tagging_eval_result = conlleval( hyp_tag_list, ref_tag_list, word_list, taging_out_file) print(" %s f1-score: %.2f" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return accuracy, tagging_eval_result # valid valid_accuracy, valid_tagging_result = run_valid_test( dev_set, 'Eval') if task['tagging'] == 1 \ and valid_tagging_result['f1'] > best_valid_score: best_valid_score = valid_tagging_result['f1'] # save the best output file subprocess.call(['mv', current_taging_valid_out_file, current_taging_valid_out_file + '.best_f1_%.2f' \ % best_valid_score]) # test, run test after each validation for development purpose. test_accuracy, test_tagging_result = run_valid_test( test_set, 'Test') if task['tagging'] == 1 \ and test_tagging_result['f1'] > best_test_score: best_test_score = test_tagging_result['f1'] # save the best output file subprocess.call(['mv', current_taging_test_out_file, current_taging_test_out_file + '.best_f1_%.2f' \ % best_test_score])
def train(): # See parameters. print ('Applying Parameters:') for k,v in FLAGS.__dict__['__flags'].items(): print ('%s: %s' % (k, str(v))) # 4-3-1. Prepare indexing data and correspondiing labels. print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' # 4-3-1-1. String data --) token index / Make word and label dictionary. date_set = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) # 4-3-1-2. Get path of each result. in_seq_train, out_seq_train = date_set[0] in_seq_test, out_seq_test = date_set[1] vocab_path, tag_vocab_path = date_set[2] # Where do we save the result? result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' # 4-3-2. Get index dictionary and word list. vocab, rev_vocab = data_utils.initialize_vocab(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path) tag_vocab_inv = dict() for string, i in tag_vocab.items(): tag_vocab_inv[i] = string config = tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.23), #device_count = {'gpu': 2} ) with tf.Session(config=config) as sess: print("Max sequence length: %d." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) # 4-3-3. Make train/test model. model, model_test = create_model(sess, len(vocab), len(tag_vocab) ) print ("Creating model with " + "source_vocab_size=%d, target_vocab_size=%d" \ % (len(vocab), len(tag_vocab))) # Read data into buckets and compute their sizes. print ("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) # 4-3-4. Load data using "# 4-1." test_set = read_data(in_seq_test, out_seq_test) train_set = read_data(in_seq_train, out_seq_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # 4-3-5. Train Loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) start_time = time.time() # 4-3-5-1. get batch batch_data = model.get_batch(train_set, bucket_id) encoder_inputs,tags,tag_weights,batch_sequence_length = batch_data step_outputs = model.tagging_step(sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits = step_outputs step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Test count = 0 word_list = list() ref_tag_list = list() hyp_tag_list = list() for bucket_id in xrange(len(_buckets)): for i in xrange(len(test_set[bucket_id])): count += 1 sample = model_test.get_one(test_set, bucket_id, i) encoder_inputs, tags, tag_weights, sequence_length= sample step_outputs = model_test.tagging_step(sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) _, step_loss, tagging_logits = step_outputs lst = [] string = "" for num in encoder_inputs: num = num[0] word = rev_vocab[num] if word == "_PAD" or word == "_UNK": continue else: lst.append(word) string = string + word + " " string = string + " : " string2 = string for word in tagging_logits: word = word[0] sort_num = np.argsort(word) b = sort_num[39999] word = rev_tag_vocab[b] if word == "_PAD" or word == "_UNK": continue else: lst.append(word) string = string + word + " " print(string) for word in tagging_logits: word = word[0] sort_num = np.argsort(word) b = sort_num[39998] word = rev_tag_vocab[b] if word == "_PAD" or word == "_UNK": continue else: lst.append(word) string2 = string2 + word + " " print(string2) print("\n")
def test(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("\nPreparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) in_seq_test, out_seq_test, label_test = date_set[2] vocab_path, tag_vocab_path, label_vocab_path = date_set[3] vocab, rev_vocab = data_utils.initialize_vocab(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocab( label_vocab_path) with tf.Session() as sess: # Create model. print("\nCreating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print ("Created model with " + "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." \ % (len(vocab), len(tag_vocab), len(label_vocab))) # Read data into buckets and compute their sizes. print("\nReading test data") test_set = read_data(in_seq_test, out_seq_test, label_test) def run_valid_test(data_set, mode): # mode: Eval, Test # Run evals on development/test set and print the accuracy. word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 tagging_eval_result = dict() for bucket_id in xrange(len(_buckets)): eval_loss = 0.0 count = 0 for i in xrange(len(data_set[bucket_id])): count += 1 sample = model_test.get_one(data_set, bucket_id, i) encoder_inputs, tags, tag_weights, sequence_length, labels = sample tagging_logits = [] class_logits = [] if task['joint'] == 1: step_outputs = model_test.joint_step( sess, encoder_inputs, tags, tag_weights, labels, sequence_length, bucket_id, True) _, step_loss, tagging_logits, class_logits = step_outputs class_prob = _softmax(class_logits[0]) elif task['tagging'] == 1: step_outputs = model_test.tagging_step( sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model_test.classification_step( sess, encoder_inputs, labels, sequence_length, bucket_id, True) _, step_loss, class_logits = step_outputs eval_loss += step_loss / len(data_set[bucket_id]) hyp_label = None if task['intent'] == 1: ref_label_list.append(rev_label_vocab[labels[0][0]]) hyp_label = np.argmax(class_logits[0], 0) hyp_label_list.append(rev_label_vocab[hyp_label]) if labels[0] == hyp_label: correct_count += 1 if task['tagging'] == 1: word_list.append([rev_vocab[x[0]] for x in \ encoder_inputs[:sequence_length[0]]]) ref_tag = [x[0] for x in tags[:sequence_length[0]]] ref_tag_list.append([rev_tag_vocab[x[0]] for x in \ tags[:sequence_length[0]]]) hyp_tag = [ np.argmax(x) for x in tagging_logits[:sequence_length[0]] ] hyp_tag_list.append( [rev_tag_vocab[np.argmax(x)] for x in \ tagging_logits[:sequence_length[0]]]) if labels[0] != hyp_label or ref_tag != hyp_tag: error_type = [] if labels[0] != hyp_label: error_type.append("Intent misclassification") if ref_tag != hyp_tag: error_type.append("Slot error") print("\n" + ", ".join(error_type)) print("(intent) input: (%s) %s" % (rev_label_vocab[labels[0][0]], " ".join([rev_vocab[x[0]] for x in \ encoder_inputs[:sequence_length[0]]]))) print("true slots: %s" % " ".join(ref_tag_list[-1])) print("pred slots: %s" % " ".join(hyp_tag_list[-1])) pred_labels = np.argsort(class_prob)[-3:] intent_preds = [ rev_label_vocab[l] for l in pred_labels ] print("Top 3 predicted intents:") for idx in reversed(pred_labels): print("%s (%.4f)" % (rev_label_vocab[idx], class_prob[idx])) accuracy = float(correct_count) * 100 / count if task['intent'] == 1: print(" %s accuracy: %.2f %d/%d" \ % (mode, accuracy, correct_count, count)) sys.stdout.flush() ''' if task['tagging'] == 1: tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list, word_list, None) print(" %s f1-score: %.2f" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return accuracy, tagging_eval_result ''' return accuracy, ref_label_list, hyp_label_list # test, run test after each validation for development purpose. #test_accuracy, test_tagging_result = run_valid_test(test_set, 'Test') test_accuracy, ref_label_list, hyp_label_list = run_valid_test( test_set, 'Test') # Compute confusion matrix cnf_matrix = confusion_matrix(ref_label_list, hyp_label_list, labels=rev_label_vocab) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure(figsize=(12, 10)) plot_confusion_matrix(cnf_matrix, classes=rev_label_vocab, title='Confusion matrix, without normalization') # Plot normalized confusion matrix plt.figure(figsize=(12, 10)) plot_confusion_matrix(cnf_matrix, classes=rev_label_vocab, normalize=True, title='Normalized confusion matrix') plt.show()
def train(): tf.logging.info('Applying Parameters:') tf.logging.info("Preparing data in %s" % FLAGS.data_dir) nowTime = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') tf.logging.set_verbosity(tf.logging.INFO) handlers = [ logging.FileHandler(os.path.join(FLAGS.log, nowTime + '.log')), logging.StreamHandler(sys.stdout) ] logging.getLogger('tensorflow').handlers = handlers date_set = data_utils.prepare_multi_task_data(FLAGS.data_dir) in_seq_train, out_seq_train, label_train = date_set[0] in_seq_dev, out_seq_dev, label_dev = date_set[1] in_seq_test, out_seq_test, label_test = date_set[2] vocab_path, tag_vocab_path, label_vocab_path = date_set[3] result_dir = FLAGS.train_dir + '/test_results' if not tf.gfile.IsDirectory(result_dir): tf.gfile.MakeDirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' if not tf.gfile.Exists('data_bak/vocab.json') or not tf.gfile.Exists( 'data_bak/rev_vocab.json'): vocab, rev_vocab = data_utils.initialize_vocab(vocab_path) with tf.gfile.GFile('data_bak/vocab.json', 'w') as vocab_file, tf.gfile.GFile( 'data_bak/rev_vocab.json', 'w') as rev_vocab_file: vocab_file.write(json.dumps(vocab, ensure_ascii=False, indent=4)) rev_vocab_file.write( json.dumps(rev_vocab, ensure_ascii=False, indent=4)) else: with tf.gfile.GFile('data_bak/vocab.json', 'r') as vocab_file, tf.gfile.GFile( 'data_bak/rev_vocab.json', 'r') as rev_vocab_file: vocab = json.load(vocab_file) rev_vocab = seq.json(rev_vocab_file).map( lambda x: (int(x[0]), x[1])).to_dict() if not tf.gfile.Exists('data_bak/tag_vocab.json') or not tf.gfile.Exists( 'data_bak/rev_tag_vocab.json'): tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path) with tf.gfile.GFile('data_bak/tag_vocab.json', 'w') as tag_vocab_file, \ tf.gfile.GFile('data_bak/rev_tag_vocab.json', 'w') as rev_tag_vocab_file: tag_vocab_file.write( json.dumps(tag_vocab, ensure_ascii=False, indent=4)) rev_tag_vocab_file.write( json.dumps(rev_tag_vocab, ensure_ascii=False, indent=4)) else: with tf.gfile.GFile('data_bak/tag_vocab.json', 'r') as tag_vocab_file, tf.gfile.GFile( 'data_bak/rev_tag_vocab.json', 'r') as rev_tag_vocab_file: tag_vocab = json.load(tag_vocab_file) rev_tag_vocab = seq.json(rev_tag_vocab_file).map( lambda x: (int(x[0]), x[1])).to_dict() if not tf.gfile.Exists('data_bak/label_vocab.json') or not tf.gfile.Exists( 'data_bak/rev_label_vocab.json'): label_vocab, rev_label_vocab = data_utils.initialize_vocab( label_vocab_path) with tf.gfile.GFile('data_bak/label_vocab.json', 'w') as label_vocab_file, \ tf.gfile.GFile('data_bak/rev_label_vocab.json', 'w') as rev_label_vocab_file: label_vocab_file.write( json.dumps(label_vocab, ensure_ascii=False, indent=4)) rev_label_vocab_file.write( json.dumps(rev_label_vocab, ensure_ascii=False, indent=4)) else: with tf.gfile.GFile('data_bak/label_vocab.json', 'r') as label_vocab_file, tf.gfile.GFile( 'data_bak/rev_label_vocab.json', 'r') as rev_label_vocab_file: label_vocab = json.load(label_vocab_file) rev_label_vocab = seq.json(rev_label_vocab_file).map( lambda x: (int(x[0]), x[1])).to_dict() # Read data into buckets and compute their sizes. tf.logging.info("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) config = tf.ConfigProto( gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23), # device_count = {'gpu': 2} ) with tf.Session(config=config) as sess: # Create model. tf.logging.info("Max sequence length: %d." % _buckets[0][0]) tf.logging.info("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) tf.logging.info("Creating model with " + "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." \ % (len(vocab), len(tag_vocab), len(label_vocab))) tf.summary.scalar('loss', model.loss) tf.summary.scalar('dev_accuracy', model.best_dev_accuracy) tf.summary.scalar('dev_f1', model.best_dev_f1) tf.summary.scalar('test_accuracy', model.best_test_accuracy) tf.summary.scalar('test_f1', model.best_test_f1) model.merged = tf.summary.merge_all() model.writer = tf.summary.FileWriter( os.path.join(FLAGS.tensorboard, nowTime)) model.writer.add_graph(graph=sess.graph) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 no_improve_step = 0 while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() batch_data = model.get_batch(train_set, bucket_id) encoder_inputs, tags, tag_weights, batch_sequence_length, labels = batch_data if task['joint'] == 1: step_outputs = model.joint_step(sess, encoder_inputs, tags, tag_weights, labels, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits, class_logits = step_outputs elif task['tagging'] == 1: step_outputs = model.tagging_step(sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model.classification_step( sess, encoder_inputs, labels, batch_sequence_length, bucket_id, False) _, step_loss, class_logits = step_outputs summary = sess.run(model.merged, model.input_feed) model.writer.add_summary(summary, model.global_step.eval()) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') tf.logging.info( "global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") step_time, loss = 0.0, 0.0 def run_valid_test(data_set, mode): # mode: Eval, Test # Run evals on development/test set and print the accuracy. word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 tagging_eval_result = dict() for bucket_id in xrange(len(_buckets)): eval_loss = 0.0 count = 0 for i in xrange(len(data_set[bucket_id])): count += 1 sample = model_test.get_one(data_set, bucket_id, i) encoder_inputs, tags, tag_weights, sequence_length, labels = sample tagging_logits = [] class_logits = [] if task['joint'] == 1: step_outputs = model_test.joint_step( sess, encoder_inputs, tags, tag_weights, labels, sequence_length, bucket_id, True) _, step_loss, tagging_logits, class_logits = step_outputs elif task['tagging'] == 1: step_outputs = model_test.tagging_step( sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model_test.classification_step( sess, encoder_inputs, labels, sequence_length, bucket_id, True) _, step_loss, class_logits = step_outputs eval_loss += step_loss / len(data_set[bucket_id]) hyp_label = None if task['intent'] == 1: ref_label_list.append( rev_label_vocab[labels[0][0]]) hyp_label = np.argmax(class_logits[0], 0) hyp_label_list.append( rev_label_vocab[hyp_label]) if labels[0] == hyp_label: correct_count += 1 if task['tagging'] == 1: word_list.append([rev_vocab[x[0]] for x in \ encoder_inputs[:sequence_length[0]]]) ref_tag_list.append([rev_tag_vocab[x[0]] for x in \ tags[:sequence_length[0]]]) hyp_tag_list.append( [rev_tag_vocab[np.argmax(x)] for x in \ tagging_logits[:sequence_length[0]]]) accuracy = float(correct_count) * 100 / count if task['intent'] == 1: tf.logging.info("\t%s accuracy: %.2f %d/%d" \ % (mode, accuracy, correct_count, count)) sys.stdout.flush() if task['tagging'] == 1: if mode == 'Eval': taging_out_file = current_taging_valid_out_file elif mode == 'Test': taging_out_file = current_taging_test_out_file tagging_eval_result = conlleval( hyp_tag_list, ref_tag_list, word_list, taging_out_file) tf.logging.info("\t%s f1-score: %.2f" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return accuracy, tagging_eval_result # valid valid_accuracy, valid_tagging_result = run_valid_test( dev_set, 'Eval') if task['tagging'] == 1 and task['intent'] == 0: best_dev_f1 = model.best_dev_f1.eval() if valid_tagging_result['f1'] > best_dev_f1: tf.assign(model.best_dev_f1, valid_tagging_result['f1']).eval() # save the best output file subprocess.call(['mv', current_taging_valid_out_file, current_taging_valid_out_file + '.best_f1_%.2f' \ % best_dev_f1], shell=True) model.saver.save(sess, checkpoint_path, global_step=model.global_step) no_improve_step = 0 else: no_improve_step += 1 if task['tagging'] == 1 and task['intent'] == 1: best_dev_accuracy = model.best_dev_accuracy.eval() best_dev_f1 = model.best_dev_f1.eval() if valid_accuracy > best_dev_accuracy and valid_tagging_result[ 'f1'] > best_dev_f1: tf.assign(model.best_dev_accuracy, valid_accuracy).eval() tf.assign(model.best_dev_f1, valid_tagging_result['f1']).eval() subprocess.call(['mv', current_taging_valid_out_file, current_taging_valid_out_file + '.best_f1_%.2f' \ % best_dev_f1], shell=True) model.saver.save(sess, checkpoint_path, global_step=model.global_step) no_improve_step = 0 else: no_improve_step += 1 # test, run test after each validation for development purpose. test_accuracy, test_tagging_result = run_valid_test( test_set, 'Test') if task['tagging'] == 1 and task['intent'] == 0: best_test_f1 = model.best_test_f1.eval() if test_tagging_result['f1'] > best_test_f1: tf.assign(model.best_test_f1, test_tagging_result['f1']).eval() # save the best output file subprocess.call(['mv', current_taging_test_out_file, current_taging_test_out_file + '.best_f1_%.2f' \ % best_test_f1], shell=True) if task['tagging'] == 1 and task['intent'] == 1: best_test_accuracy = model.best_test_accuracy.eval() best_test_f1 = model.best_test_f1.eval() if test_accuracy > best_test_accuracy and test_tagging_result[ 'f1'] > best_test_f1: tf.assign(model.best_test_accuracy, test_accuracy).eval() tf.assign(model.best_test_f1, test_tagging_result['f1']).eval() subprocess.call(['mv', current_taging_test_out_file, current_taging_test_out_file + '.best_f1_%.2f' \ % best_test_f1], shell=True) if no_improve_step > FLAGS.no_improve_per_step: tf.logging.info("continuous no improve per step " + str(FLAGS.no_improve_per_step) + ", auto stop...") tf.logging.info("max accuracy is: " + str(model.best_dev_accuracy.eval()) + ", max f1 score is: " + str(model.best_dev_f1.eval())) break
def train(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) sent_train, label_train, \ sent_valid, label_valid, \ sent_test, label_test, \ sent_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.sent_vocab_size) result_dir = FLAGS.data_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_valid_out_file = result_dir + '/valid_hyp' current_test_out_file = result_dir + '/test_hyp' sent_vocab, rev_sent_vocab = data_utils.initialize_vocabulary( sent_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary( label_vocab_path) print(rev_label_vocab) sent_vocab_size = len(sent_vocab) label_vocab_size = len(label_vocab) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Create model. print("Max sequence length: %d." % FLAGS.max_sequence_length) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) sess.run(tf.global_variables_initializer()) model, model_test = create_model(sess, sent_vocab_size, label_vocab_size) print("Creating model with sent_vocab_size=%d," "and label_vocab_size=%d." % (sent_vocab_size, label_vocab_size)) # Read data into buckets and compute their sizes. print("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) valid_set = data_utils.read_data(sent_valid, label_valid) test_set = data_utils.read_data(sent_test, label_test) train_set = data_utils.read_data(sent_train, label_train) # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: # Get a batch and make a step. start_time = time.time() batch_inputs, batch_labels, batch_sequence_length = model.get_batch( train_set) # print(batch_inputs[0].shape) _, step_loss, logits = model.step(sess, batch_inputs, batch_labels, batch_sequence_length, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 def write_eval_result(result_list, result_path): with tf.gfile.GFile(result_path, 'w') as f: for i in range(len(result_list)): f.write(result_list[i] + '\n') def run_valid_test(data_set, mode): # mode: Eval, Test # Run evals on development/test set and print the accuracy. ref_label_list = list() hyp_label_list = list() label_correct_count = 0 # accuracy = 0.0 eval_loss = 0.0 count = 0 for i in range(len(data_set)): count += 1 inputs, labels, sequence_length = model_test.get_one( data_set, i) _, _step_loss, logits = model_test.step( sess, inputs, labels, sequence_length, True) eval_loss += _step_loss / len(data_set) ref_label = np.argmax(labels) ref_label_list.append(rev_label_vocab[ref_label]) hyp_label = np.argmax(logits[0]) hyp_label_list.append(rev_label_vocab[hyp_label]) if ref_label == hyp_label: label_correct_count += 1 label_accuracy = float(label_correct_count) * 100 / count print(" %s label_accuracy: %.2f %d/%d" % (mode, label_accuracy, label_correct_count, count)) sys.stdout.flush() out_file = None if mode == 'Valid': out_file = current_valid_out_file elif mode == 'Test': out_file = current_test_out_file write_eval_result( hyp_label_list, out_file ) # write prediction result to output file path return label_accuracy, hyp_label_list # valid valid_label_accuracy, hyp_list = run_valid_test( valid_set, 'Valid') if valid_label_accuracy > best_valid_score: best_valid_score = valid_label_accuracy # save the best output file subprocess.call([ 'mv', current_valid_out_file, current_valid_out_file + '_best_acc_%.2f' % best_valid_score ]) # test, run test after each validation for development purpose. test_label_accuracy, hyp_list = run_valid_test( test_set, 'Test') if test_label_accuracy > best_test_score: best_test_score = test_label_accuracy # save the best output file subprocess.call([ 'mv', current_test_out_file, current_test_out_file + '_best_acc_%.2f' % best_test_score ])
def train(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].iteritems(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary( label_vocab_path) with tf.Session() as sess: # Create model. print("Max sequence length: %d." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print( "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab))) # Read data into buckets and compute their sizes. print("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] def run_valid_test(data_set, mode): # mode: Eval, Test # Run evals on development/test set and print the accuracy. word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 tagging_eval_result = dict() for bucket_id in xrange(len(_buckets)): eval_loss = 0.0 count = 0 for i in xrange(len(data_set[bucket_id])): count += 1 encoder_inputs, tags, tag_weights, sequence_length, labels = model_test.get_one( data_set, bucket_id, i) tagging_logits = [] classification_logits = [] if task['joint'] == 1: _, step_loss, tagging_logits, classification_logits = model_test.joint_step( sess, encoder_inputs, tags, tag_weights, labels, sequence_length, bucket_id, True) elif task['tagging'] == 1: _, step_loss, tagging_logits = model_test.tagging_step( sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) elif task['intent'] == 1: _, step_loss, classification_logits = model_test.classification_step( sess, encoder_inputs, labels, sequence_length, bucket_id, True) eval_loss += step_loss / len(data_set[bucket_id]) hyp_label = None if task['intent'] == 1: ref_label_list.append(rev_label_vocab[labels[0][0]]) hyp_label = np.argmax(classification_logits[0], 0) hyp_label_list.append(rev_label_vocab[hyp_label]) if labels[0] == hyp_label: correct_count += 1 if task['tagging'] == 1: word_list.append([ rev_vocab[x[0]] for x in encoder_inputs[:sequence_length[0]] ]) ref_tag_list.append([ rev_tag_vocab[x[0]] for x in tags[:sequence_length[0]] ]) hyp_tag_list.append([ rev_tag_vocab[np.argmax(x)] for x in tagging_logits[:sequence_length[0]] ]) accuracy = float(correct_count) * 100 / count if task['intent'] == 1: print(" %s accuracy: %.2f %d/%d" % (mode, accuracy, correct_count, count)) sys.stdout.flush() if task['tagging'] == 1: if mode == 'Eval': taging_out_file = current_taging_valid_out_file elif mode == 'Test': print("Test!!!") taging_out_file = current_taging_test_out_file tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list, word_list, taging_out_file) print(" %s f1-score: %.2f" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return accuracy, tagging_eval_result, hyp_label_list, hyp_tag_list # test, run test after each validation for development purpose. print("run valid") test_accuracy, test_tagging_result, label, tag = run_valid_test( test_set, 'Test') with open(sys.argv[1], "w+") as f: for i in range(len(tag)): f.write(' '.join(tag[i]) + '\n')
def train(): print ('Applying Parameters:') for k,v in FLAGS.__dict__['__flags'].iteritems(): print ('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary(label_vocab_path) with tf.Session() as sess: # Create model. print("Max sequence length: %d." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print ("Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab))) # Read data into buckets and compute their sizes. print ("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, tags, tag_weights, batch_sequence_length, labels = model.get_batch(train_set, bucket_id) if task['joint'] == 1: _, step_loss, tagging_logits, classification_logits = model.joint_step(sess, encoder_inputs, tags, tag_weights, labels, batch_sequence_length, bucket_id, False) elif task['tagging'] == 1: _, step_loss, tagging_logits = model.tagging_step(sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, False) elif task['intent'] == 1: _, step_loss, classification_logits = model.classification_step(sess, encoder_inputs, labels, batch_sequence_length, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print ("global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 def run_valid_test(data_set, mode): # mode: Eval, Test # Run evals on development/test set and print the accuracy. word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 tagging_eval_result = dict() for bucket_id in xrange(len(_buckets)): eval_loss = 0.0 count = 0 for i in xrange(len(data_set[bucket_id])): count += 1 encoder_inputs, tags, tag_weights, sequence_length, labels = model_test.get_one( data_set, bucket_id, i) tagging_logits = [] classification_logits = [] if task['joint'] == 1: _, step_loss, tagging_logits, classification_logits = model_test.joint_step(sess, encoder_inputs, tags, tag_weights, labels, sequence_length, bucket_id, True) elif task['tagging'] == 1: _, step_loss, tagging_logits = model_test.tagging_step(sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) elif task['intent'] == 1: _, step_loss, classification_logits = model_test.classification_step(sess, encoder_inputs, labels, sequence_length, bucket_id, True) eval_loss += step_loss / len(data_set[bucket_id]) hyp_label = None if task['intent'] == 1: ref_label_list.append(rev_label_vocab[labels[0][0]]) hyp_label = np.argmax(classification_logits[0],0) hyp_label_list.append(rev_label_vocab[hyp_label]) if labels[0] == hyp_label: correct_count += 1 if task['tagging'] == 1: word_list.append([rev_vocab[x[0]] for x in encoder_inputs[:sequence_length[0]]]) ref_tag_list.append([rev_tag_vocab[x[0]] for x in tags[:sequence_length[0]]]) hyp_tag_list.append([rev_tag_vocab[np.argmax(x)] for x in tagging_logits[:sequence_length[0]]]) accuracy = float(correct_count)*100/count if task['intent'] == 1: print(" %s accuracy: %.2f %d/%d" % (mode, accuracy, correct_count, count)) sys.stdout.flush() if task['tagging'] == 1: if mode == 'Eval': taging_out_file = current_taging_valid_out_file elif mode == 'Test': taging_out_file = current_taging_test_out_file tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list, word_list, taging_out_file) print(" %s f1-score: %.2f" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return accuracy, tagging_eval_result # valid valid_accuracy, valid_tagging_result = run_valid_test(dev_set, 'Eval') if task['tagging'] == 1 and valid_tagging_result['f1'] > best_valid_score: best_valid_score = valid_tagging_result['f1'] # save the best output file subprocess.call(['mv', current_taging_valid_out_file, current_taging_valid_out_file + '.best_f1_%.2f' % best_valid_score]) # test, run test after each validation for development purpose. test_accuracy, test_tagging_result = run_valid_test(test_set, 'Test') if task['tagging'] == 1 and test_tagging_result['f1'] > best_test_score: best_test_score = test_tagging_result['f1'] # save the best output file subprocess.call(['mv', current_taging_test_out_file, current_taging_test_out_file + '.best_f1_%.2f' % best_test_score])
def testing(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].iteritems(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' in_seq_train, out_seq_train, label_train, in_seq_dev, out_seq_dev, label_dev, in_seq_test, out_seq_test, label_test, vocab_path, tag_vocab_path, label_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_taging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_taging_test_out_file = result_dir + '/tagging.test.hyp.txt' global sess global vocab global rev_vocab global tag_vocab global rev_tag_vocab global label_vocab global rev_label_vocab vocab, rev_vocab = data_utils.initialize_vocabulary(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocabulary(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocabulary( label_vocab_path) global sess sess = tf.Session() # Create model. print("Max sequence length: %d." % _buckets[0][0]) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) global model_test model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print( "Creating model with source_vocab_size=%d, target_vocab_size=%d, and label_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab))) # The model has been loaded. server = grpc.server(futures.ThreadPoolExecutor(max_workers=3)) #Service_OpenFace_pb2.add_openfaceServicer_to_server(Servicer_openface(), server) FoodBot_pb2.add_FoodBotRequestServicer_to_server(FoodbotRequest(), server) server.add_insecure_port('[::]:50055') server.start() print("GRCP Server is running. Press any key to stop it.") try: while True: # openface_GetXXXXXX will be responsed if any incoming request is received. time.sleep(24 * 60 * 60) except KeyboardInterrupt: server.stop(0)
def train(): print('应用参数:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("准备数据 %s" % FLAGS.data_dir) vocab_path = '' tag_vocab_path = '' label_vocab_path = '' date_set = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.in_vocab_size, FLAGS.out_vocab_size) in_seq_train, out_seq_train, label_train = date_set[0] in_seq_dev, out_seq_dev, label_dev = date_set[1] in_seq_test, out_seq_test, label_test = date_set[2] vocab_path, tag_vocab_path, label_vocab_path = date_set[3] result_dir = FLAGS.train_dir + '/test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_tagging_valid_out_file = result_dir + '/tagging.valid.hyp.txt' current_tagging_test_out_file = result_dir + '/tagging.test.hyp.txt' vocab, rev_vocab = data_utils.initialize_vocab(vocab_path) tag_vocab, rev_tag_vocab = data_utils.initialize_vocab(tag_vocab_path) label_vocab, rev_label_vocab = data_utils.initialize_vocab(label_vocab_path) config = tf.ConfigProto( gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.23), # device_count = {'gpu': 2} ) with tf.Session(config=config) as sess: # 创建模型。 print("最大序列长度: %d." % _buckets[0][0]) print("创建%d单元的%d层。" % (FLAGS.num_layers, FLAGS.size)) model, model_test = create_model(sess, len(vocab), len(tag_vocab), len(label_vocab)) print("创建模型 " + "source_vocab_size=%d, target_vocab_size=%d, label_vocab_size=%d." % (len(vocab), len(tag_vocab), len(label_vocab))) # 将数据读入桶中并计算桶的大小。 print("读取 train/valid/test 数据 (训练集范围: %d)." % FLAGS.max_train_data_size) dev_set = read_data(in_seq_dev, out_seq_dev, label_dev) test_set = read_data(in_seq_test, out_seq_test, label_test) train_set = read_data(in_seq_train, out_seq_train, label_train) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] # 这是一个训练循环。 step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: random_number_01 = np.random.random_sample() bucket_id = min([i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # 获取一个分支并执行一步 start_time = time.time() batch_data = model.get_batch(train_set, bucket_id) encoder_inputs, tags, tag_weights, batch_sequence_length, labels = batch_data if task['joint'] == 1: step_outputs = model.joint_step(sess, encoder_inputs, tags, tag_weights, labels, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits, class_logits = step_outputs elif task['tagging'] == 1: step_outputs = model.tagging_step(sess, encoder_inputs, tags, tag_weights, batch_sequence_length, bucket_id, False) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model.classification_step(sess, encoder_inputs, labels, batch_sequence_length, bucket_id, False) _, step_loss, class_logits = step_outputs step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # 有时,我们保存检查点、打印统计数据并运行evals。 if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print("全局步数 %d 每步时间 %.2fs 训练复杂度 %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # 保存检查点和零计时器和损失。 checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # 模式: Eval, Test def run_valid_test(data_set, mode): # 在开发/测试集上运行evals并打印准确性。 word_list = list() ref_tag_list = list() hyp_tag_list = list() ref_label_list = list() hyp_label_list = list() correct_count = 0 accuracy = 0.0 tagging_eval_result = dict() eval_loss = 0.0 count = 0 for bucket_id in xrange(len(_buckets)): for i in xrange(len(data_set[bucket_id])): count += 1 sample = model_test.get_one(data_set, bucket_id, i) encoder_inputs, tags, tag_weights, sequence_length, labels = sample tagging_logits = [] class_logits = [] step_loss = None if task['joint'] == 1: step_outputs = model_test.joint_step(sess, encoder_inputs, tags, tag_weights, labels, sequence_length, bucket_id, True) _, step_loss, tagging_logits, class_logits = step_outputs elif task['tagging'] == 1: step_outputs = model_test.tagging_step(sess, encoder_inputs, tags, tag_weights, sequence_length, bucket_id, True) _, step_loss, tagging_logits = step_outputs elif task['intent'] == 1: step_outputs = model_test.classification_step(sess, encoder_inputs, labels, sequence_length, bucket_id, True) _, step_loss, class_logits = step_outputs eval_loss += step_loss / len(data_set[bucket_id]) hyp_label = None if task['intent'] == 1: ref_label_list.append(rev_label_vocab[labels[0][0]]) hyp_label = np.argmax(class_logits[0], 0) hyp_label_list.append(rev_label_vocab[hyp_label]) if labels[0] == hyp_label: correct_count += 1 if task['tagging'] == 1: word_list.append([rev_vocab[x[0]] for x in encoder_inputs[:sequence_length[0]]]) ref_tag_list.append([rev_tag_vocab[x[0]] for x in tags[:sequence_length[0]]]) hyp_tag_list.append( [rev_tag_vocab[np.argmax(x)] for x in tagging_logits[:sequence_length[0]]]) accuracy = float(correct_count) * 100 / count if task['intent'] == 1: print(" %s 准确性: %.2f%% %d/%d" % (mode, accuracy, correct_count, count)) sys.stdout.flush() if task['tagging'] == 1: taging_out_file = None if mode == 'Eval': taging_out_file = current_tagging_valid_out_file elif mode == 'Test': taging_out_file = current_tagging_test_out_file tagging_eval_result = conlleval(hyp_tag_list, ref_tag_list, word_list, taging_out_file) print(" %s f1-score: %.2f%%" % (mode, tagging_eval_result['f1'])) sys.stdout.flush() return accuracy, tagging_eval_result # valid valid_accuracy, valid_tagging_result = run_valid_test(dev_set, 'Eval') if task['tagging'] == 1 \ and valid_tagging_result['f1'] > best_valid_score: best_valid_score = valid_tagging_result['f1'] # 保存最好的输出文件 subprocess.call(['mv', current_tagging_valid_out_file, current_tagging_valid_out_file + '.best_f1_%.2f' % best_valid_score]) # 测试,在每个验证后运行测试,以供开发之用。 test_accuracy, test_tagging_result = run_valid_test(test_set, 'Test') if task['tagging'] == 1 \ and test_tagging_result['f1'] > best_test_score: best_test_score = test_tagging_result['f1'] # 保存最好的输出文件 subprocess.call(['mv', current_tagging_test_out_file, current_tagging_test_out_file + '.best_f1_%.2f' % best_test_score])
def train(): print('Applying Parameters:') for k, v in FLAGS.__dict__['__flags'].items(): print('%s: %s' % (k, str(v))) print("Preparing data in %s" % FLAGS.data_dir) # sent_vocab_path = '' # s_attr_vocab_path = '' # s_loc_vocab_path = '' # s_name_vocab_path = '' # s_ope_vocab_path = '' # s_way_vocab_path = '' # intent_vocab_path = '' sent_train, slot_train, intent_train, \ sent_valid, slot_valid, intent_valid, \ sent_test, slot_test, intent_test, \ sent_vocab_path, slot_vocab_path, intent_vocab_path = data_utils.prepare_multi_task_data( FLAGS.data_dir, FLAGS.sent_vocab_size) result_dir = './test_results' if not os.path.isdir(result_dir): os.makedirs(result_dir) current_valid_out_file = result_dir + '/valid_hyp' current_test_out_file = result_dir + '/test_hyp' current_train_out_file = result_dir + '/train_hyp' sent_vocab, rev_sent_vocab = data_utils.initialize_vocabulary(sent_vocab_path) s_attr_vocab, rev_s_attr_vocab = data_utils.initialize_vocabulary(slot_vocab_path[0]) s_loc_vocab, rev_s_loc_vocab = data_utils.initialize_vocabulary(slot_vocab_path[1]) s_name_vocab, rev_s_name_vocab = data_utils.initialize_vocabulary(slot_vocab_path[2]) s_ope_vocab, rev_s_ope_vocab = data_utils.initialize_vocabulary(slot_vocab_path[3]) intent_vocab, rev_intent_vocab = data_utils.initialize_vocabulary(intent_vocab_path) print(rev_intent_vocab) sent_vocab_size = len(sent_vocab) slot_vocab_size = [len(s_attr_vocab), len(s_loc_vocab), len(s_name_vocab), len(s_ope_vocab)] intent_vocab_size = len(intent_vocab) # print(sent_vocab_size, slot_vocab_size, intent_vocab_size) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Create model. print("Max sequence length: %d." % FLAGS.max_sequence_length) print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) sess.run(tf.global_variables_initializer()) model, model_test = create_model(sess, sent_vocab_size, slot_vocab_size, intent_vocab_size) print("Creating model with sent_vocab_size=%d, s_attr_vocab_size=%d, " "s_loc_vocab_size=%d, s_name_vocab_size=%d, " "s_ope_vocab_size=%d, and intent_vocab_size=%d." % (sent_vocab_size, slot_vocab_size[0], slot_vocab_size[1], slot_vocab_size[2], slot_vocab_size[3], intent_vocab_size)) # Read data into buckets and compute their sizes. print("Reading train/valid/test data (training set limit: %d)." % FLAGS.max_train_data_size) valid_set = data_utils.read_data(sent_valid, slot_valid, intent_valid) test_set = data_utils.read_data(sent_test, slot_test, intent_test) train_set = data_utils.read_data(sent_train, slot_train, intent_train) # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 best_valid_score = 0 best_test_score = 0 best_train_score = 0 while model.global_step.eval() < FLAGS.max_training_steps: # Get a batch and make a step. start_time = time.time() batch_inputs, batch_s_attrs, batch_s_locs, batch_s_names, batch_s_opes, \ batch_intents, batch_sequence_length = model.get_batch(train_set) # print(batch_inputs[0].shape) _, step_loss, logits = model.step(sess, batch_inputs, batch_s_attrs, batch_s_locs, batch_s_names, batch_s_opes, batch_intents, batch_sequence_length, False) # print(logits[-1]) # print('s_attrs_logits', logits[0]) # print(logits[0].shape) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: perplexity = math.exp(loss) if loss < 300 else float('inf') print("global step %d step-time %.2f. Training perplexity %.2f" % (model.global_step.eval(), step_time, perplexity)) sys.stdout.flush() # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "model.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0