def build_graph_dist_strategy(self, features, labels, mode, params): """Model function.""" del labels, params misc_utils.print_out("Running dist_strategy mode_fn") hparams = self.hparams # Create a GNMT model for training. # assert (hparams.encoder_type == "gnmt" or # hparams.attention_architecture in ["gnmt", "gnmt_v2"]) with mixed_precision_scope(): model = gnmt_model.GNMTModel(hparams, mode=mode, features=features) if mode == tf.contrib.learn.ModeKeys.INFER: sample_ids = model.sample_id reverse_target_vocab_table = lookup_ops.index_to_string_table_from_file( hparams.tgt_vocab_file, default_value=vocab_utils.UNK) sample_words = reverse_target_vocab_table.lookup( tf.to_int64(sample_ids)) # make sure outputs is of shape [batch_size, time] or [beam_width, # batch_size, time] when using beam search. if hparams.time_major: sample_words = tf.transpose(sample_words) elif sample_words.shape.ndims == 3: # beam search output in [batch_size, time, beam_width] shape. sample_words = tf.transpose(sample_words, [2, 0, 1]) predictions = {"predictions": sample_words} # return loss, vars, grads, predictions, train_op, scaffold return None, None, None, predictions, None, None elif mode == tf.contrib.learn.ModeKeys.TRAIN: loss = model.train_loss train_op = model.update return loss, model.params, model.grads, None, train_op, None else: raise ValueError("Unknown mode in model_fn: %s" % mode)
def run_main(flags, default_hparams, eval_fn, target_session=""): """Run main.""" # Job jobid = flags.jobid num_workers = flags.num_workers utils.print_out("# Job id %d" % jobid) # Random random_seed = flags.random_seed if random_seed is not None and random_seed > 0: utils.print_out("# Set random seed to %d" % random_seed) random.seed(random_seed + jobid) np.random.seed(random_seed + jobid) ## Train / Decode out_dir = flags.out_dir if not tf.gfile.Exists(out_dir): tf.gfile.MakeDirs(out_dir) # Load hparams. hparams = create_or_load_hparams(out_dir, default_hparams, flags.hparams_path, save_hparams=(jobid == 0)) # Train eval_fn(hparams, target_session=target_session)
def load_model(model, ckpt, session, name): start_time = time.time() model.saver.restore(session, ckpt) session.run(tf.tables_initializer()) utils.print_out(" loaded %s model parameters from %s, time %.2fs" % (name, ckpt, time.time() - start_time)) return model
def _cell_list(num_units, num_layers, num_residual_layers, forget_bias, dropout, mode, single_cell_fn=None, residual_fn=None, global_step=None, fast_reverse=False, seq_len=None): """Create a list of RNN cells.""" if not single_cell_fn: single_cell_fn = _single_cell # Multi-GPU cell_list = [] for i in range(num_layers): utils.print_out(" cell %d" % i, new_line=False) single_cell = single_cell_fn( num_units=num_units, forget_bias=forget_bias, dropout=dropout, mode=mode, residual_connection=(i >= num_layers - num_residual_layers), residual_fn=residual_fn, global_step=global_step, fast_reverse=fast_reverse, seq_len=seq_len) utils.print_out("") cell_list.append(single_cell) return cell_list
def _get_learning_rate_decay(self, hparams): """Get learning rate decay.""" if hparams.decay_scheme in ["luong5", "luong10", "luong234"]: decay_factor = 0.5 if hparams.decay_scheme == "luong5": start_decay_step = int(hparams.num_train_steps / 2) decay_times = 5 elif hparams.decay_scheme == "luong10": start_decay_step = int(hparams.num_train_steps / 2) decay_times = 10 elif hparams.decay_scheme == "luong234": start_decay_step = int(hparams.num_train_steps * 2 / 3) decay_times = 4 remain_steps = hparams.num_train_steps - start_decay_step decay_steps = int(remain_steps / decay_times) elif not hparams.decay_scheme: # no decay start_decay_step = hparams.num_train_steps decay_steps = 0 decay_factor = 1.0 elif hparams.decay_scheme: raise ValueError("Unknown decay scheme %s" % hparams.decay_scheme) utils.print_out( " decay_scheme=%s, start_decay_step=%d, decay_steps %d, " "decay_factor %g" % (hparams.decay_scheme, start_decay_step, decay_steps, decay_factor)) return tf.cond(self.global_step < start_decay_step, lambda: self.learning_rate, lambda: tf.train.exponential_decay(self.learning_rate, ( self.global_step - start_decay_step), decay_steps, decay_factor, staircase=True), name="learning_rate_decay_cond")
def check_vocab(vocab_file, out_dir, check_special_token=True, sos=None, eos=None, unk=None): """Check if vocab_file doesn't exist, create from corpus_file.""" if tf.gfile.Exists(vocab_file): utils.print_out("# Vocab file %s exists" % vocab_file) vocab, vocab_size = load_vocab(vocab_file) if check_special_token: # Verify if the vocab starts with unk, sos, eos # If not, prepend those tokens & generate a new vocab file if not unk: unk = UNK if not sos: sos = SOS if not eos: eos = EOS assert len(vocab) >= 3 if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos: utils.print_out("The first 3 vocab words [%s, %s, %s]" " are not [%s, %s, %s]" % (vocab[0], vocab[1], vocab[2], unk, sos, eos)) vocab = [unk, sos, eos] + vocab vocab_size += 3 new_vocab_file = os.path.join(out_dir, os.path.basename(vocab_file)) with codecs.getwriter("utf-8")( tf.gfile.GFile(new_vocab_file, "wb")) as f: for word in vocab: f.write("%s\n" % word) vocab_file = new_vocab_file else: raise ValueError("vocab_file '%s' does not exist." % vocab_file) vocab_size = len(vocab) return vocab_size, vocab_file
def _get_learning_rate_warmup(self, hparams): """Get learning rate warmup.""" warmup_steps = hparams.warmup_steps warmup_scheme = hparams.warmup_scheme utils.print_out( " learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" % (hparams.learning_rate, warmup_steps, warmup_scheme)) if not warmup_scheme: return self.learning_rate # Apply inverse decay if global steps less than warmup steps. # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3) # When step < warmup_steps, # learing_rate *= warmup_factor ** (warmup_steps - step) if warmup_scheme == "t2t": # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller warmup_factor = tf.exp(tf.log(0.01) / warmup_steps) inv_decay = warmup_factor**(tf.to_float(warmup_steps - self.global_step)) else: raise ValueError("Unknown warmup scheme %s" % warmup_scheme) return tf.cond(self.global_step < hparams.warmup_steps, lambda: inv_decay * self.learning_rate, lambda: self.learning_rate, name="learning_rate_warump_cond")
def decode_and_evaluate(name, model, sess, trans_file, ref_file, metrics, subword_option, beam_width, tgt_eos, num_translations_per_input=1, decode=True): """Decode a test set and compute a score according to the evaluation task.""" # Decode if decode: utils.print_out(" decoding to output %s." % trans_file) start_time = time.time() num_sentences = 0 with codecs.getwriter("utf-8")( tf.gfile.GFile(trans_file, mode="wb")) as trans_f: trans_f.write("") # Write empty string to ensure file is created. num_translations_per_input = max( min(num_translations_per_input, beam_width), 1) while True: try: nmt_outputs, _ = model.decode(sess) if beam_width == 0: nmt_outputs = np.expand_dims(nmt_outputs, 0) batch_size = nmt_outputs.shape[1] num_sentences += batch_size for sent_id in range(batch_size): for beam_id in range(num_translations_per_input): translation = get_translation( nmt_outputs[beam_id], sent_id, tgt_eos=tgt_eos, subword_option=subword_option) trans_f.write((translation + b"\n").decode("utf-8")) except tf.errors.OutOfRangeError: utils.print_time( " done, num sentences %d, num translations per input %d" % (num_sentences, num_translations_per_input), start_time) break # Evaluation evaluation_scores = {} if ref_file and tf.gfile.Exists(trans_file): for metric in metrics: score = evaluation_utils.evaluate( ref_file, trans_file, metric, subword_option=subword_option) evaluation_scores[metric] = score utils.print_out(" %s %s: %.1f" % (metric, name, score)) return evaluation_scores
def extend_hparams(hparams): """Add new arguments to hparams.""" # Sanity checks if hparams.subword_option and hparams.subword_option not in ["spm", "bpe"]: raise ValueError("subword option must be either spm, or bpe") if hparams.infer_mode == "beam_search" and hparams.beam_width <= 0: raise ValueError( "beam_width must greater than 0 when using beam_search" "decoder.") # Different number of encoder / decoder layers assert hparams.num_encoder_layers == hparams.num_decoder_layers # The first unidirectional layer (after the bi-directional layer) in # the GNMT encoder can't have residual connection due to the input is # the concatenation of fw_cell and bw_cell's outputs. num_encoder_residual_layers = hparams.num_encoder_layers - 2 num_decoder_residual_layers = num_encoder_residual_layers _add_argument(hparams, "num_encoder_residual_layers", num_encoder_residual_layers) _add_argument(hparams, "num_decoder_residual_layers", num_decoder_residual_layers) ## Vocab # Get vocab file names first if hparams.vocab_prefix: src_vocab_file = hparams.vocab_prefix + "." + hparams.src tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt else: raise ValueError("hparams.vocab_prefix must be provided.") # Source vocab src_vocab_size, src_vocab_file = vocab_utils.check_vocab( src_vocab_file, hparams.out_dir, check_special_token=hparams.check_special_token, sos=hparams.sos, eos=hparams.eos, unk=vocab_utils.UNK) # Target vocab utils.print_out(" using source vocab for target") tgt_vocab_file = src_vocab_file tgt_vocab_size = src_vocab_size _add_argument(hparams, "src_vocab_size", src_vocab_size) _add_argument(hparams, "tgt_vocab_size", tgt_vocab_size) _add_argument(hparams, "src_vocab_file", src_vocab_file) _add_argument(hparams, "tgt_vocab_file", tgt_vocab_file) # Num embedding partitions _add_argument(hparams, "num_enc_emb_partitions", hparams.num_embeddings_partitions) _add_argument(hparams, "num_dec_emb_partitions", hparams.num_embeddings_partitions) # Pretrained Embeddings _add_argument(hparams, "src_embed_file", "") _add_argument(hparams, "tgt_embed_file", "") return hparams
def _build_graph(self, hparams): dropout = hparams.dropout if self.mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 ##?? with tf.variable_scope("decoder_cell") as scope: # Cell Type if hparams.unit_type == "lstm": utils.print_out(" LSTM, forget_bias=%g" % hparams.forget_bias, new_line=False) cell = tf.contrib.rnn.BasicLSTMCell( hparams.num_units, forget_bias=hparams.forget_bias) elif hparams.unit_type == "gru": utils.print_out(" GRU", new_line=False) cell = tf.contrib.rnn.GRUCell(hparams.num_units) else: raise ValueError("Required decoder cell not supported!") # Wrap dropout to encoder cell if dropout > 0.0: cell = tf.contrib.rnn.DropoutWrapper( cell=cell, input_keep_prob=(1.0 - dropout)) # Add residual to encoder cell if hparams.residual: cell = tf.contrib.rnn.ResidualWrapper(cell) # Device Wrapper # if hparams.encoder_device: # cell = tf.contrib.rnn.DeviceWrapper(cell, hparams.encoder_device) # self.decoder_scope = scope return cell
def _build_model(self, hparams): """Builds a sequence-to-sequence model. Args: hparams: Hyperparameter configurations. Returns: For infrence, A tuple of the form (logits, decoder_cell_outputs, predicted_ids), where: logits: logits output of the decoder. decoder_cell_outputs: the output of decoder. predicted_ids: predicted ids from beam search. For training, returns the final loss Raises: ValueError: if encoder_type differs from mono and bi, or attention_option is not (luong | scaled_luong | bahdanau | normed_bahdanau). """ # Encoder if hparams.language_model: # no encoder for language modeling utils.print_out(" language modeling: no encoder") self.encoder_outputs = None encoder_state = None else: self.encoder_outputs, encoder_state = self._build_encoder(hparams) ## Decoder return self._build_decoder(self.encoder_outputs, encoder_state, hparams)
def _get_learning_rate_decay(self, hparams): """Get learning rate decay.""" if hparams.learning_rate_decay_scheme in ["luong", "luong10"]: start_factor = 2 start_decay_step = int(hparams.num_train_steps / start_factor) decay_factor = 0.5 # decay 5 times if hparams.learning_rate_decay_scheme == "luong": decay_steps = int(hparams.num_train_steps / (5 * start_factor)) # decay 10 times elif hparams.learning_rate_decay_scheme == "luong10": decay_steps = int(hparams.num_train_steps / (10 * start_factor)) else: start_decay_step = hparams.start_decay_step decay_steps = hparams.decay_steps decay_factor = hparams.decay_factor utils.print_out( " decay_scheme=%s, start_decay_step=%d, decay_steps %d, " "decay_factor %g" % (hparams.learning_rate_decay_scheme, hparams.start_decay_step, hparams.decay_steps, hparams.decay_factor)) return tf.cond(self.global_step < start_decay_step, lambda: self.learning_rate, lambda: tf.train.exponential_decay(self.learning_rate, ( self.global_step - start_decay_step), decay_steps, decay_factor, staircase=True), name="learning_rate_decay_cond")
def _cell_list(unit_type, num_units, num_layers, num_residual_layers, forget_bias, dropout, mode, num_gpus, base_gpu=0, verbose=True): """Create a list of RNN cells.""" # Multi-GPU cell_list = [] for i in range(num_layers): if verbose: utils.print_out(" cell %d" % i, new_line=False) dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 # Disable dropout outside training. single_cell = _single_cell( unit_type=unit_type, num_units=num_units, forget_bias=forget_bias, dropout=dropout, residual_connection=(i >= num_layers - num_residual_layers ), # Apply residual wrapper to last layers. device_str=get_device_str( i + base_gpu, num_gpus), # Parallelize computation over GPUs verbose=verbose # Whether to print to stdout ) if verbose: utils.print_out("") # create new line cell_list.append(single_cell) return cell_list
def _compute_tower_grads(self, tower_loss, tower_params, learning_rate, use_fp16=False, loss_scale=None, colocate_gradients_with_ops=True): """docstring.""" if use_fp16: assert loss_scale scaled_loss = tf.multiply(tower_loss, tf.convert_to_tensor( loss_scale, dtype=tower_loss.dtype), name="scaling_loss") else: scaled_loss = tower_loss opt = self.get_optimizer(self.hparams, learning_rate) grads_and_vars = opt.compute_gradients( scaled_loss, tower_params, colocate_gradients_with_ops=self.hparams. colocate_gradients_with_ops) grads = [x for (x, _) in grads_and_vars] assert grads for g in grads: assert g.dtype == tf.float32, "grad.dtype isn't fp32: %s" % g.name # Downscale grads for var, grad in zip(tower_params, grads): if grad is None: misc_utils.print_out("%s gradient is None!" % var.name) if use_fp16: grads = [grad * tf.reciprocal(loss_scale) for grad in grads] return tower_params, grads, opt
def ensure_compatible_hparams(hparams, default_hparams, hparams_path=""): """Make sure the loaded hparams is compatible with new changes.""" default_hparams = utils.maybe_parse_standard_hparams( default_hparams, hparams_path) # Set num encoder/decoder layers (for old checkpoints) if hasattr(hparams, "num_layers"): if not hasattr(hparams, "num_encoder_layers"): hparams.add_hparam("num_encoder_layers", hparams.num_layers) if not hasattr(hparams, "num_decoder_layers"): hparams.add_hparam("num_decoder_layers", hparams.num_layers) # For compatible reason, if there are new fields in default_hparams, # we add them to the current hparams default_config = default_hparams.values() config = hparams.values() for key in default_config: if key not in config: hparams.add_hparam(key, default_config[key]) # Update all hparams' keys if override_loaded_hparams=True if getattr(default_hparams, "override_loaded_hparams", None): overwritten_keys = default_config.keys() else: # For inference overwritten_keys = INFERENCE_KEYS for key in overwritten_keys: if getattr(hparams, key) != default_config[key]: utils.print_out("# Updating hparams.%s: %s -> %s" % (key, str(getattr(hparams, key)), str(default_config[key]))) setattr(hparams, key, default_config[key]) return hparams
def _create_pretrained_emb_from_txt(vocab_file, embed_file, num_trainable_tokens=3, dtype=tf.float32, scope=None): """Load pretrain embeding from embed_file, and return an embedding matrix. Args: embed_file: Path to a Glove formated embedding txt file. num_trainable_tokens: Make the first n tokens in the vocab file as trainable variables. Default is 3, which is "<unk>", "<s>" and "</s>". """ vocab, _ = vocab_utils.load_vocab(vocab_file) utils.print_out("# Using pretrained embedding: %s." % embed_file) utils.print_out(" with trainable tokens: ") emb_dict, emb_size = vocab_utils.load_embed_txt(embed_file) # Made a change to add all words present in vocab but not in the # pretrained embedding. Previous behaviours seems illogical for token in vocab: if token not in emb_dict: emb_dict[token] = [0.0] * emb_size emb_mat = np.array([emb_dict[token] for token in vocab], dtype=dtype.as_numpy_dtype()) emb_mat = tf.constant(emb_mat) emb_mat_const = tf.slice(emb_mat, [num_trainable_tokens, 0], [-1, -1]) with tf.variable_scope(scope or "pretrain_embeddings", dtype=dtype) as scope: with tf.device(_get_embed_device(num_trainable_tokens)): emb_mat_var = tf.get_variable("emb_mat_var", [num_trainable_tokens, emb_size]) return tf.concat([emb_mat_var, emb_mat_const], 0)
def extend_hparams(hparams): """Extend training hparams.""" # Set num_residual_layers if hparams.residual and hparams.num_layers > 1: num_residual_layers = hparams.num_layers - 1 else: num_residual_layers = 0 hparams.add_hparam("num_residual_layers", num_residual_layers) print("hparams.vocab_file", hparams.vocab_file) hparams.add_hparam("vocab_size", vocab_utils.get_vocab_size(hparams.vocab_file)) hparams.add_hparam("t1", vocab_utils.start_of_turn1) hparams.add_hparam("t2", vocab_utils.start_of_turn2) hparams.add_hparam("eod", vocab_utils.end_of_dialogue) hparams.add_hparam("unk", vocab_utils.UNK) # Check out_dir if not tf.gfile.Exists(hparams.out_dir): utils.print_out("# Creating output directory %s ..." % hparams.out_dir) tf.gfile.MakeDirs(hparams.out_dir) # Evaluation for metric in hparams.metrics: hparams.add_hparam("best_" + metric, 0) # larger is better best_metric_dir = os.path.join(hparams.out_dir, "best_" + metric) hparams.add_hparam("best_" + metric + "_dir", best_metric_dir) tf.gfile.MakeDirs(best_metric_dir) # path if not hparams.inference_output_file: # If not set, it will be set to inference_out.txt under variable out_dir hparams.inference_output_file = os.path.join(hparams.out_dir, "inference_out.txt") return hparams
def before_train(train_model, train_sess, global_step, hparams, log_f, tensor_or_op_name_to_replica_names): """Misc tasks to do before training.""" stats = train.init_stats() lr_name = train_model.model.learning_rate.name assert len(tensor_or_op_name_to_replica_names[lr_name]) == 1 lr = train_sess.run(tensor_or_op_name_to_replica_names[lr_name][0]) info = { "train_ppl": 0.0, "speed": 0.0, "avg_step_time": 0.0, "avg_grad_norm": 0.0, "learning_rate": lr } start_train_time = time.time() utils.print_out( "# Start step %d, lr %g, %s" % (global_step, info["learning_rate"], time.ctime()), log_f) # Initialize all of the iterators skip_count = hparams.batch_size * hparams.epoch_step utils.print_out("# Init train iterator, skipping %d elements" % skip_count) skip_count_name = train_model.skip_count_placeholder.name feed_dict = {} num_skip_counts = len(tensor_or_op_name_to_replica_names[skip_count_name]) for i in range(num_skip_counts): feed_dict[tensor_or_op_name_to_replica_names[skip_count_name][i]] = 0 initializers = [] init_name = train_model.iterator.initializer.name num_initializers = len(tensor_or_op_name_to_replica_names[init_name]) for i in range(num_initializers): initializers.append(tensor_or_op_name_to_replica_names[init_name][i]) train_sess.run(initializers, feed_dict=feed_dict) return stats, info, start_train_time
def print_variables_in_ckpt(ckpt_path): """Print a list of variables in a checkpoint together with their shapes.""" utils.print_out("# Variables in ckpt %s" % ckpt_path) reader = tf.train.NewCheckpointReader(ckpt_path) variable_map = reader.get_variable_to_shape_map() for key in sorted(variable_map.keys()): utils.print_out(" %s: %s" % (key, variable_map[key]))
def tokenize(hparams, file, tokenized_file): utils.print_out("tokenizing {} -> {}".format(file, tokenized_file)) with open(file, 'rb') as input_file: with open(tokenized_file, 'wb') as output_file: subprocess.run([hparams.tokenizer_file, '-l', hparams.src], stdin=input_file, stdout=output_file)
def _single_cell(num_units, forget_bias, dropout, mode, residual_connection=False, residual_fn=None, global_step=None, fast_reverse=False, seq_len=None): """Create an instance of a single RNN cell.""" # dropout (= 1 - keep_prob) is set to 0 during eval and infer dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 # Cell Type utils.print_out(" LSTM, forget_bias=%g" % forget_bias, new_line=False) single_cell = tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=forget_bias) # Dropout (= 1 - keep_prob) enabled = (mode == tf.contrib.learn.ModeKeys.TRAIN ) or dropout > 0.0 or fast_reverse single_cell = CellWrapper(cell=single_cell, input_keep_prob=(1.0 - dropout), global_step=global_step, seq_len=seq_len, enabled=enabled) # Residual if residual_connection: single_cell = tf.contrib.rnn.ResidualWrapper(single_cell, residual_fn=residual_fn) utils.print_out(" %s" % type(single_cell).__name__, new_line=False) return single_cell
def before_train(train_model, train_sess, global_step, hparams, log_f, num_replicas_per_worker): """Misc tasks to do before training.""" stats = train.init_stats() lr = train_sess.run(train_model.model.learning_rate)[0] info = { "train_ppl": 0.0, "speed": 0.0, "avg_step_time": 0.0, "avg_grad_norm": 0.0, "learning_rate": lr } start_train_time = time.time() utils.print_out( "# Start step %d, lr %g, %s" % (global_step, info["learning_rate"], time.ctime()), log_f) # Initialize all of the iterators skip_count = hparams.batch_size * hparams.epoch_step utils.print_out("# Init train iterator, skipping %d elements" % skip_count) skip_count = train_model.skip_count_placeholder feed_dict = {} feed_dict[skip_count] = [0 for i in range(num_replicas_per_worker)] initializers = [] init = train_model.iterator.initializer train_sess.run(init, feed_dict=feed_dict) return stats, info, start_train_time
def before_train(loaded_train_model, train_model, train_sess, global_step, hparams, log_f): """Misc tasks to do before training.""" stats = init_stats() info = { "train_ppl": 0.0, "speed": 0.0, "avg_step_time": 0.0, "avg_grad_norm": 0.0, "avg_sequence_count": 0.0, "learning_rate": loaded_train_model.learning_rate.eval(session=train_sess) } start_train_time = time.time() utils.print_out( "# Start step %d, lr %g, %s" % (global_step, info["learning_rate"], time.ctime()), log_f) # Initialize all of the iterators skip_count = hparams.batch_size * hparams.epoch_step utils.print_out("# Init train iterator, skipping %d elements" % skip_count) train_sess.run(train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: skip_count}) return stats, info, start_train_time
def _cell_list(unit_type, num_units, num_layers, num_residual_layers, forget_bias, dropout, mode, dtype=None, single_cell_fn=None, residual_fn=None, use_block_lstm=False): """Create a list of RNN cells.""" if not single_cell_fn: single_cell_fn = _single_cell # Multi-GPU cell_list = [] for i in range(num_layers): utils.print_out(" cell %d" % i, new_line=False) single_cell = single_cell_fn( unit_type=unit_type, num_units=num_units, forget_bias=forget_bias, dropout=dropout, mode=mode, dtype=dtype, residual_connection=(i >= num_layers - num_residual_layers), residual_fn=residual_fn, use_block_lstm=use_block_lstm) utils.print_out("") cell_list.append(single_cell) return cell_list
def print_step_info(prefix, global_step, info, result_summary, log_f): """Print all info at the current global step.""" utils.print_out( "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s" % (prefix, global_step, info["learning_rate"], info["avg_step_time"], info["speed"], info["train_ppl"], info["avg_grad_norm"], result_summary, time.ctime()), log_f)
def _build_encoder(model, encoder_emb_inp, hparams): """Build an seq2seq encoder.""" num_layers = hparams.num_layers num_residual_layers = hparams.num_residual_layers iterator = model.iterator with tf.variable_scope("encoder") as scope: dtype = scope.dtype # Encoder_outpus: [max_time, batch_size, num_units] utils.print_out(" num_layers = %d, num_residual_layers=%d" % (num_layers, num_residual_layers)) cell = _build_encoder_cell(model, hparams, num_layers, num_residual_layers, base_gpu=model.global_gpu_num, all_layer_outputs=True) model.global_gpu_num += num_layers encoder_outputs, encoder_state = tf.nn.dynamic_rnn( cell, encoder_emb_inp, dtype=dtype, sequence_length=iterator.dialogue_len, time_major=False, swap_memory=True) return encoder_outputs, encoder_state
def _cell_list(unit_type, num_units, num_layers, num_residual_layers, forget_bias, dropout, mode, num_gpus, base_gpu=0, single_cell_fn=None, residual_fn=None): """Create a list of RNN cells.""" if not single_cell_fn: single_cell_fn = _single_cell # Multi-GPU cell_list = [] for i in range(num_layers): utils.print_out(" cell %d" % i, new_line=False) single_cell = single_cell_fn( unit_type=unit_type, num_units=num_units, forget_bias=forget_bias, dropout=dropout, mode=mode, residual_connection=(i >= num_layers - num_residual_layers), device_str=get_device_str(i + base_gpu, num_gpus), residual_fn=residual_fn) utils.print_out("") cell_list.append(single_cell) return cell_list
def run_main(flags, default_hparams, train_fn, inference_fn): """Run main.""" # Random random_seed = flags.random_seed if random_seed is not None and random_seed > 0: utils.print_out("# Set random seed to %d" % random_seed) random.seed(random_seed) np.random.seed(random_seed) ## Train / Decode out_dir = flags.out_dir if not tf.gfile.Exists(out_dir): tf.gfile.MakeDirs(out_dir) hparams = create_or_load_hparams(out_dir, default_hparams) if flags.inference_input_file: # Inference ckpt = flags.ckpt if not ckpt: ckpt = tf.train.latest_checkpoint(out_dir) inference_fn(ckpt, flags.inference_input_file, flags.inference_output_file, hparams) else: # Train train_fn(default_hparams)
def ensure_compatible_hparams(hparams, default_hparams, hparams_path): """Make sure the loaded hparams is compatible with new changes.""" default_hparams = utils.maybe_parse_standard_hparams( default_hparams, hparams_path) # For compatible reason, if there are new fields in default_hparams, # we add them to the current hparams default_config = default_hparams.values() config = hparams.values() for key in default_config: if key not in config: hparams.add_hparam(key, default_config[key]) # Make sure that the loaded model has latest values for the below keys updated_keys = [ "out_dir", "num_gpus", "test_prefix", "beam_width", "length_penalty_weight", "num_train_steps" ] for key in updated_keys: if key in default_config and getattr(hparams, key) != default_config[key]: utils.print_out( "# Updating hparams.%s: %s -> %s" % (key, str(getattr(hparams, key)), str(default_config[key]))) setattr(hparams, key, default_config[key]) return hparams
def single_worker_inference(infer_model, ckpt, inference_input_file, inference_output_file, hparams): """Inference with a single worker.""" output_infer = inference_output_file # Read data infer_data = load_data(inference_input_file, hparams) with tf.Session(config=utils.get_config_proto(), graph=infer_model.graph) as sess: loaded_infer_model = model_helper.load_model(infer_model.model, ckpt, sess, "infer") sess.run(infer_model.iterator.initializer, feed_dict={ infer_model.src_placeholder: infer_data, infer_model.batch_size_placeholder: hparams.infer_batch_size }) # Decode utils.print_out("# Start decoding") _decode_and_evaluate("infer", loaded_infer_model, sess, output_infer, ref_file=None, subword_option=None, beam_width=hparams.beam_width, tgt_eos=hparams.eos, num_translations_per_input=hparams.num_translations_per_input)