def validate(self, sess): print("Validating..") total_num_samples = 0 losses, accuracies, valid_y_strings, valid_t_strings, valid_x_strings, attention_tracker = [], [], [], [], [], [] for v_feed_dict in self.model.next_valid_feed(): fetches = { 'accuracy': self.model.valid_accuracy, 'ys': self.model.valid_ys, 'loss': self.model.valid_loss, 'attention_tracker': self.model.valid_attention_tracker } res, time = self.perform_iteration(sess, fetches, v_feed_dict) # keep track of num_samples in batch and total samples_in_batch = res['ys'].shape[0] total_num_samples += samples_in_batch # convert to strings valid_ys, valid_ts, valid_xs = res['ys'], v_feed_dict[ self.model.ts], v_feed_dict[self.model.Xs] str_ts, str_ys = utils.numpy_to_str(valid_ts, valid_ys, self.alphabet_tar) str_xs, _ = utils.numpy_to_str(valid_xs, valid_xs, self.alphabet_src) valid_y_strings += str_ys valid_t_strings += str_ts valid_x_strings += str_xs # collect loss and accuracy losses.append(res['loss'] * samples_in_batch) accuracies.append(res['accuracy'] * samples_in_batch) attention_tracker.append(res['attention_tracker'].transpose( 1, 0, 2)) # convert all prediction strings to lists of words (for computing bleu) t_words, y_words = utils.strs_to_words(valid_y_strings, valid_t_strings) # compute performance metrics valid_loss = np.sum(losses) / total_num_samples valid_acc = np.sum(accuracies) / total_num_samples corpus_bleu = pm.corpus_bleu(t_words, y_words) edit_dist = pm.mean_char_edit_distance(valid_y_strings, valid_t_strings) # print results self.visualize_ys(res['ys'], v_feed_dict) print('\t{:s}{:.5f}'.format('loss:'.ljust(25), valid_loss)) print('\t{:s}{:.2f}%'.format('accuracy:'.ljust(25), (valid_acc * 100))) print('\t{:s}{:.5f}'.format('BLEU:'.ljust(25), corpus_bleu)) print('\t{:s}{:.5f}'.format('Mean edit dist per char:'.ljust(25), edit_dist)) def dump_to_file(thefile, thelist): tot_str = '\n'.join(thelist) with open(thefile, "w") as file: file.write(tot_str) # TODO move this to setup if self.name: path_bleu = 'bleu/%s-%s' % (self.name, self.timestamp) path_attention = 'attention/%s-%s' % (self.name, self.timestamp) else: path_bleu = 'bleu/%s-%s' % ('no-name', self.timestamp) path_attention = 'attention/%s-%s' % ('no-name', self.timestamp) path_to_bleu = os.path.join(SAVER_PATH['base'], path_bleu) path_to_attention = os.path.join(SAVER_PATH['base'], path_attention) if not os.path.exists(path_to_bleu): os.makedirs(path_to_bleu) if not os.path.exists(path_to_attention): os.makedirs(path_to_attention) reference = os.path.join(path_to_bleu, 'reference.txt') translated = os.path.join(path_to_bleu, 'translated.txt') source = os.path.join(path_to_bleu, 'source.txt') attention_path = os.path.join(path_to_attention, 'attention.npy') np.save(attention_path, attention_tracker[0]) if not os.path.exists(reference): dump_to_file(reference, valid_t_strings) if not os.path.exists(source): dump_to_file(source, valid_x_strings) dump_to_file(translated, valid_y_strings) out = pm.moses_bleu(translated, reference) # Write TensorBoard summaries if self.summarywriter: feed_dict = { self.model.valid_loss: valid_loss, self.model.valid_accuracy: valid_acc, self.bleu: corpus_bleu, self.moses_bleu: out, self.edit_dist: edit_dist } fetches = [self.val_summaries, self.model.global_step] summaries, i = sess.run(fetches, feed_dict) self.summarywriter.add_summary(summaries, i) print("Continue training..")
def validate(self, sess): print("Validating..") total_num_samples = 0 losses, accuracies, valid_y_strings, valid_t_strings, valid_x_strings, attention_tracker = [], [], [], [], [], [] for v_feed_dict in self.model.next_valid_feed(): fetches = {'accuracy': self.model.valid_accuracy, 'ys': self.model.valid_ys, 'loss': self.model.valid_loss, 'attention_tracker': self.model.valid_attention_tracker} res, time = self.perform_iteration(sess, fetches, v_feed_dict) # keep track of num_samples in batch and total samples_in_batch = res['ys'].shape[0] total_num_samples += samples_in_batch # convert to strings valid_ys, valid_ts, valid_xs = res['ys'], v_feed_dict[self.model.ts], v_feed_dict[self.model.Xs] str_ts, str_ys = utils.numpy_to_str(valid_ts, valid_ys, self.alphabet_tar) str_xs, _ = utils.numpy_to_str(valid_xs, valid_xs, self.alphabet_src) valid_y_strings += str_ys valid_t_strings += str_ts valid_x_strings += str_xs # collect loss and accuracy losses.append(res['loss']*samples_in_batch) accuracies.append(res['accuracy']*samples_in_batch) attention_tracker.append(res['attention_tracker'].transpose(1, 0, 2)) # convert all prediction strings to lists of words (for computing bleu) t_words, y_words = utils.strs_to_words(valid_y_strings, valid_t_strings) # compute performance metrics valid_loss = np.sum(losses)/total_num_samples valid_acc = np.sum(accuracies)/total_num_samples corpus_bleu = pm.corpus_bleu(t_words, y_words) edit_dist = pm.mean_char_edit_distance(valid_y_strings, valid_t_strings) # print results self.visualize_ys(res['ys'], v_feed_dict) print('\t{:s}{:.5f}'.format('loss:'.ljust(25), valid_loss)) print('\t{:s}{:.2f}%'.format('accuracy:'.ljust(25), (valid_acc * 100))) print('\t{:s}{:.5f}'.format('BLEU:'.ljust(25), corpus_bleu)) print('\t{:s}{:.5f}'.format('Mean edit dist per char:'.ljust(25), edit_dist)) def dump_to_file(thefile, thelist): tot_str = '\n'.join(thelist) with open(thefile, "w") as file: file.write(tot_str) # TODO move this to setup if self.name: path_bleu = 'bleu/%s-%s' % (self.name, self.timestamp) path_attention = 'attention/%s-%s' % (self.name, self.timestamp) else: path_bleu = 'bleu/%s-%s' % ('no-name', self.timestamp) path_attention = 'attention/%s-%s' % ('no-name', self.timestamp) path_to_bleu = os.path.join(SAVER_PATH['base'], path_bleu) path_to_attention = os.path.join(SAVER_PATH['base'], path_attention) if not os.path.exists(path_to_bleu): os.makedirs(path_to_bleu) if not os.path.exists(path_to_attention): os.makedirs(path_to_attention) reference = os.path.join(path_to_bleu, 'reference.txt') translated = os.path.join(path_to_bleu, 'translated.txt') source = os.path.join(path_to_bleu, 'source.txt') attention_path = os.path.join(path_to_attention, 'attention.npy') np.save(attention_path, attention_tracker[0]) if not os.path.exists(reference): dump_to_file(reference, valid_t_strings) if not os.path.exists(source): dump_to_file(source, valid_x_strings) dump_to_file(translated, valid_y_strings) out = pm.moses_bleu(translated, reference) # Write TensorBoard summaries if self.summarywriter: feed_dict = { self.model.valid_loss: valid_loss, self.model.valid_accuracy: valid_acc, self.bleu: corpus_bleu, self.moses_bleu: out, self.edit_dist: edit_dist } fetches = [self.val_summaries, self.model.global_step] summaries, i = sess.run(fetches, feed_dict) self.summarywriter.add_summary(summaries, i) print("Continue training..")
def train(): """Train a en->fr translation model using WMT data.""" from utils import performancemetrics as pm import datetime now = datetime.datetime.now().isoformat() log_file_path = "{0}.dat".format(now) print("Creating log file as {0}..".format(log_file_path)) with open(log_file_path, "w") as log_file: print("step,bleu", file=log_file) # Prepare WMT data. print("Preparing WMT data in %s" % FLAGS.data_dir) en_train, fr_train, en_dev, fr_dev, _, fr_vocab_path = data_utils.prepare_wmt_data( FLAGS.data_dir, FLAGS.en_vocab_size, FLAGS.fr_vocab_size) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) # TODO dev_set = read_data(en_dev, fr_dev) train_set = read_data(en_train, fr_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in xrange(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp(loss) if loss < 300 else float('inf') print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. references, candidates = [], [] for bucket_id in xrange(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, output_logits = model.step( sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) predictions = [] for batch in output_logits: output = [] for logit in batch: output.append(int(np.argmax(logit))) # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in output: output = output[:output.index(data_utils.EOS_ID)] # append output to list of results predictions.append(output) # decode into sentences for (result, expected) in zip(predictions, decoder_inputs): candidates.append(" ".join( [rev_fr_vocab[word_arg] for word_arg in result])) references.append(" ".join( [rev_fr_vocab[word_arg] for word_arg in expected])) eval_ppx = math.exp( eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) eval_corp_bleu = pm.corpus_bleu(candidates, references) print(" combined bleu score: %.5f" % (eval_corp_bleu)) with open(log_file_path, "a") as log_file: print("{0},{1}".format(current_step, eval_corp_bleu), file=log_file) sys.stdout.flush()