def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): """Compute BLEU for two files (reference and hypothesis translation).""" ref_lines = tokenizer.native_to_unicode( tf.io.gfile.GFile(ref_filename).read()).strip().splitlines() # tf.io.gfile.GFile(ref_filename).read()).splitlines() hyp_lines = tokenizer.native_to_unicode( tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines() # tf.io.gfile.GFile(hyp_filename).read()).splitlines() print( '*******************************************************************') print(ref_filename, len(ref_lines)) print(hyp_filename, len(hyp_lines)) print( '*******************************************************************') if len(ref_lines) != len(hyp_lines): logging.info( "Reference and translation files have different number of " "lines (%d VS %d). If training only a few steps (100-200), the " "translation may be empty." % (len(ref_lines), len(hyp_lines))) return 0 # raise ValueError( # "Reference and translation files have different number of " # "lines (%d VS %d). If training only a few steps (100-200), the " # "translation may be empty." % (len(ref_lines), len(hyp_lines))) if not case_sensitive: ref_lines = [x.lower() for x in ref_lines] hyp_lines = [x.lower() for x in hyp_lines] ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): """Compute BLEU for two files (reference and hypothesis translation).""" ref_lines = tf.gfile.Open(ref_filename).read().strip().splitlines() hyp_lines = tf.gfile.Open(hyp_filename).read().strip().splitlines() if len(ref_lines) != len(hyp_lines): raise ValueError("Reference and translation files have different number of " "lines.") if not case_sensitive: ref_lines = [x.lower() for x in ref_lines] hyp_lines = [x.lower() for x in hyp_lines] ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): """compute BLEU for two files.""" print("Compute BLEU score between two files.") with open(ref_filename) as f1: ref_lines = f1.read().strip().splitlines() with open(hyp_filename) as f2: hyp_lines = f2.read().strip().splitlines() if len(ref_lines) != len(hyp_lines): raise ValueError("Reference and translation files have diffenrent number of lines") if not case_sensitive: ref_lines = [x.lower() for x in ref_lines] hyp_lines = [x.lower() for x in hyp_lines] ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
def result(self): return metrics.compute_bleu(self.labels, self.translations) * 100
def train(params): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # calculate the learning rate schedule learning_rate = get_learning_rate(params.learning_rate, params.hidden_size, params.learning_rate_warmup_steps, global_step) optimizer = tf.contrib.opt.LazyAdamOptimizer( learning_rate, beta1=params.optimizer_adam_beta1, beta2=params.optimizer_adam_beta2, epsilon=params.optimizer_adam_epsilon) # get src,tgt sentence for each model tower my_dataset = dataset.Dataset(params) # src, tgt = my_dataset.train_input_fn(params) # batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( # [src, tgt], capacity=2 * flags_obj.num_gpus # ) train_iterator = my_dataset.train_input_fn(params) valid_iterator = my_dataset.eval_input_fn(params) tower_grads = [] g_tower_grads = [] model = transformer_5.Transformer(params, is_train=True) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): #tf.logging.info(tf.get_variable_scope()) for i in xrange(flags_obj.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: tf.logging.info("Build graph on gpu:{}".format(i)) loss, g_loss, rewards_mb = gan_tower_loss( scope, model, train_iterator) # Reuse variables for the next tower. # tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) grads = optimizer.compute_gradients(loss) g_grads = optimizer.compute_gradients(g_loss) #for var, grad in grads: # tf.logging.info(var) tf.logging.info( "total trainable variables number: {}".format( len(grads))) tower_grads.append(grads) g_tower_grads.append(g_grads) if i == 0 and valid_iterator: #with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # valid_loss_op = tower_loss(scope, valid_iterator) #val_pred, val_target = evaluation(valid_iterator) val_loss_op, val_logits_op, val_tgt_op = evaluation( model, valid_iterator) summaries.append( tf.summary.scalar("val_loss", val_loss_op)) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. if len(tower_grads) > 1: grads = average_gradients(tower_grads) g_grads = average_gradients(g_tower_grads) else: grads = tower_grads[0] g_grads = g_tower_grads[0] # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', learning_rate)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) g_apply_gradient_op = optimizer.apply_gradients( g_grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. #variable_averages = tf.train.ExponentialMovingAverage( # MOVING_AVERAGE_DECAY, global_step) #variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. # train_op = tf.group(apply_gradient_op, variables_averages_op) train_op = tf.group(apply_gradient_op, g_apply_gradient_op) # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=20) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess_config.allow_soft_placement = True with tf.Session(config=sess_config) as sess: sess.run(init) sess.run(tf.local_variables_initializer()) sess.run(train_iterator.initializer) #ckpt = tf.train.latest_checkpoint(flags_obj.pretrain_dir) ckpt = tf.train.latest_checkpoint(flags_obj.model_dir) tf.logging.info("ckpt {}".format(ckpt)) if ckpt and tf.train.checkpoint_exists(ckpt): tf.logging.info( "Reloading model parameters..from {}".format(ckpt)) saver.restore(sess, ckpt) else: tf.logging.info("Create a new model...{}".format( flags_obj.pretrain_dir)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(flags_obj.model_dir, sess.graph) best_bleu = 0.0 for step in xrange(flags_obj.train_steps): start_time = time.time() _, loss_value, g_loss_value, rewards_mb_value, baseline_value, total_rewards_value = sess.run( [ train_op, loss, g_loss, rewards_mb, model.baseline, model.total_rewards ]) tf.logging.info( "step = {}, step_g_loss = {:.4f}, step_loss = {:.4f}". format(step, g_loss_value, loss_value)) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: num_examples_per_step = flags_obj.batch_size * flags_obj.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / flags_obj.num_gpus tf.logging.info( "step = {}, step_g_loss = {:.4f}, step_loss = {:.4f}, reward_mb = {}, baseline = {}, total_rewards = {}" .format(step, g_loss_value, loss_value, rewards_mb_value[:5], baseline_value[:5], total_rewards_value[:5])) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % flags_obj.steps_between_evals == 0: sess.run(valid_iterator.initializer) tf.logging.info( "-------------------- Validation step ...{} -------------------------- ----------" .format(step)) total_bleu = 0.0 total_size = 0 total_loss = 0.0 while True: try: val_loss, val_logit, val_tgt = sess.run( [val_loss_op, val_logits_op, val_tgt_op]) val_pred = np.argmax(val_logit, axis=-1) val_bleu = metrics.compute_bleu(val_tgt, val_pred) batch_size = val_pred.shape[0] total_bleu += val_bleu * batch_size total_loss += val_loss * batch_size total_size += batch_size tf.logging.info( "pairs shape {}, {}, step_bleu: {:.5f}, step_loss: {:.4f}" .format(val_pred.shape, val_tgt.shape, val_bleu, val_loss)) except tf.errors.OutOfRangeError: pred_string = array_to_string(val_pred[-1]) tgt_string = array_to_string(val_tgt[-1]) tf.logging.info( "prediction:\n{}".format(pred_string)) tf.logging.info("target:\n{}".format(tgt_string)) tf.logging.info( "Finished going through the valid dataset") break total_bleu /= total_size total_loss /= total_size tf.logging.info( "{}, Step: {}, Valid loss: {:.6f}, Valid bleu : {:.6f}" .format(datetime.now(), step, total_loss, total_bleu)) tf.logging.info( "--------------------- Finish evaluation -----------------------------------------------------" ) # Save the model checkpoint periodically. if step == 0: total_bleu = 0.0 if total_bleu > best_bleu: best_bleu = total_bleu checkpoint_path = os.path.join(flags_obj.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) tf.logging.info( "Saving model at {}".format(checkpoint_path + "-" + str(step)))
def train(params): with tf.Graph().as_default(), tf.device('/cpu:0'): g_model, d_model, train_return, valid_return, dataset_iter = build_graph( params) train_op, global_step, g_loss, xen_loss, rewards, learning_rate, \ init_step, roll_mean_loss, real_mean_loss = train_return val_pred, val_tgt, val_src = valid_return train_iterator, valid_iterator = dataset_iter vars_to_update = tf.global_variables() print("total variables number is %i" % len(vars_to_update)) update_op = train_helper.update_checkpoint(vars_to_update, replace_from="Transformer", replace_to="Discriminator") saver = tf.train.Saver(tf.global_variables(), max_to_keep=20) sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess_config.allow_soft_placement = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(train_iterator.initializer) # reload the parameters print(flags_obj.pretrain_dir) ckpt = tf.train.latest_checkpoint(flags_obj.pretrain_dir) tf.logging.info("ckpt {}".format(ckpt)) if ckpt and tf.train.checkpoint_exists(ckpt): tf.logging.info( "Reloading model parameters..from {}".format(ckpt)) variables = tf.global_variables() var_keep_dic = train_helper.get_variables_in_checkpoint_file( ckpt) variables_to_restore = [] for v in variables: if v.name.split(':')[0] in var_keep_dic: variables_to_restore.append(v) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, ckpt) else: tf.logging.info("Create a new model...{}".format( flags_obj.model_dir)) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(flags_obj.model_dir, sess.graph) best_bleu = 0.0 sess.run(update_op) for step in xrange(init_step, flags_obj.train_steps): g_steps_per_iter = 5 for g_step in range(g_steps_per_iter): _, x_loss_value, g_loss_value, rewards_value, roll_loss, real_loss = sess.run( [ train_op, xen_loss, g_loss, rewards, roll_mean_loss, real_mean_loss ], feed_dict={ g_model.dropout_rate: 0.0, d_model.dropout_rate: 0.1 }) assert not np.isnan( g_loss_value), 'Model diverged with loss = NaN' assert not np.isnan( x_loss_value), 'Model diverged with loss = NaN' if step % 50 == 0: tf.logging.info( "step = {}, g_loss = {:.4f}, x_loss = {:.4f}, roll_loss = {:.4f}, " "real_loss = {:.4f}, reward = {}".format( step, g_loss_value, x_loss_value, roll_loss, real_loss, rewards_value[:5])) # train discriminator sess.run(update_op) if step % flags_obj.steps_between_evals == 0: sess.run(valid_iterator.initializer) tf.logging.info( "------------- Validation step ...{} -----------". format(step)) total_bleu = 0.0 total_size = 0 while True: try: val_tgt_np, val_src_np, val_pred_np = sess.run( [val_tgt, val_src, val_pred], feed_dict={ g_model.dropout_rate: 0.0, d_model.dropout_rate: 0.0 }) val_bleu = metrics.compute_bleu( val_tgt_np, val_pred_np) batch_size = val_pred_np.shape[0] total_bleu += val_bleu * batch_size total_size += batch_size except tf.errors.OutOfRangeError: break total_bleu /= total_size tf.logging.info("{}, Step: {}, Valid bleu : {:.6f}".format( datetime.now(), step, total_bleu)) tf.logging.info( "--------------------- Finish evaluation ---------------------" ) # Save the model checkpoint periodically. if total_bleu > best_bleu: best_bleu = total_bleu checkpoint_path = os.path.join(flags_obj.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) tf.logging.info( "Saving model at {}".format(checkpoint_path + "-" + str(step)))
def train(params): with tf.Graph().as_default(): if tf.train.latest_checkpoint(flags_obj.model_dir): global_step_value = int( tf.train.latest_checkpoint(flags_obj.model_dir).split("-")[-1]) global_step = tf.Variable(initial_value=global_step_value, dtype=tf.int32, trainable=False) print( "right here!", int( tf.train.latest_checkpoint( flags_obj.model_dir).split("-")[-1])) else: global_step_value = 0 global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = get_learning_rate(params.learning_rate, params.hidden_size, params.learning_rate_warmup_steps, global_step) optimizer = tf.contrib.opt.LazyAdamOptimizer( learning_rate, beta1=params.optimizer_adam_beta1, beta2=params.optimizer_adam_beta2, epsilon=params.optimizer_adam_epsilon) my_dataset = dataset.Dataset(params) train_iterator = my_dataset.train_input_fn(params) valid_iterator = my_dataset.eval_input_fn(params) tower_grads = [] g_model = transformer_9.Transformer(params, is_train=True, mode=None, scope="Transformer") with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i in xrange(flags_obj.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: tf.logging.info("Build graph on gpu:{}".format(i)) logits = g_model.inference(train_iterator.source, train_iterator.target) xentropy, weights = metrics.padded_cross_entropy_loss( logits, train_iterator.target, params.label_smoothing, params.target_vocab_size) loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) grads = optimizer.compute_gradients(loss) tf.logging.info( "total trainable variables number: {}".format( len(grads))) tower_grads.append(grads) if i == 0 and valid_iterator: valid_pred = g_model.inference( inputs=valid_iterator.source, targets=None)["outputs"] valid_tgt = valid_iterator.target valid_src = valid_iterator.source if len(tower_grads) > 1: grads = average_gradients(tower_grads) else: grads = tower_grads[0] summaries.append(tf.summary.scalar('learning_rate', learning_rate)) for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) train_op = apply_gradient_op saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=20) init = tf.global_variables_initializer() sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess_config.allow_soft_placement = True with tf.Session(config=sess_config) as sess: sess.run(init) sess.run(tf.local_variables_initializer()) sess.run(train_iterator.initializer) ckpt = tf.train.latest_checkpoint(flags_obj.model_dir) tf.logging.info("ckpt {}".format(ckpt)) if ckpt and tf.train.checkpoint_exists(ckpt): tf.logging.info( "Reloading model parameters..from {}".format(ckpt)) saver.restore(sess, ckpt) else: tf.logging.info("create a new model...{}".format( flags_obj.model_dir)) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(flags_obj.model_dir, sess.graph) count = 0 best_bleu = 0.0 for step in xrange(global_step_value, flags_obj.train_steps): _, loss_value, lr_value = sess.run( [train_op, loss, learning_rate], feed_dict={g_model.dropout_rate: 0.1}) if step % 200 == 0: tf.logging.info( "step: {}, loss = {:.4f}, lr = {:5f}".format( step, loss_value, lr_value)) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step < 10000: steps_between_evals = 2000 else: steps_between_evals = 1000 if step % steps_between_evals == 0: sess.run(valid_iterator.initializer) tf.logging.info( "------------------ Evaluation bleu -------------------------" ) total_bleu = 0.0 total_size = 0 while True: try: val_pred, val_tgt, val_src = sess.run( [valid_pred, valid_tgt, valid_src], feed_dict={g_model.dropout_rate: 0.0}) val_bleu = metrics.compute_bleu(val_tgt, val_pred) batch_size = val_pred.shape[0] total_bleu += val_bleu * batch_size total_size += batch_size except tf.errors.OutOfRangeError: break total_bleu /= total_size tf.logging.info("{}, Step: {}, Valid bleu : {:.6f}".format( datetime.now(), step, total_bleu)) tf.logging.info( "--------------------- Finish evaluation ------------------------" ) # Save the model checkpoint periodically. if step == 0: total_bleu = 0.0 if total_bleu > best_bleu: best_bleu = total_bleu checkpoint_path = os.path.join(flags_obj.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) tf.logging.info( "Saving model at {}".format(checkpoint_path + "-" + str(step))) elif total_bleu + 0.003 > best_bleu: checkpoint_path = os.path.join(flags_obj.model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) tf.logging.info( "Saving model at {}".format(checkpoint_path + "-" + str(step))) else: count += 1 # early stop if count > 5: break tf.logging.info("Best bleu is {}".format(best_bleu))