def test_no_decay(self): decay_fn = training_utils.create_learning_rate_decay_fn( decay_type=None, decay_steps=5, decay_rate=2.0) self.assertEqual(decay_fn, None) decay_fn = training_utils.create_learning_rate_decay_fn( decay_type="", decay_steps=5, decay_rate=2.0) self.assertEqual(decay_fn, None)
def _build_train_op(self, loss): """Creates the training operation""" learning_rate_decay_fn = training_utils.create_learning_rate_decay_fn( decay_type=self.params["optimizer.lr_decay_type"] or None, decay_steps=self.params["optimizer.lr_decay_steps"], decay_rate=self.params["optimizer.lr_decay_rate"], start_decay_at=self.params["optimizer.lr_start_decay_at"], stop_decay_at=self.params["optimizer.lr_stop_decay_at"], min_learning_rate=self.params["optimizer.lr_min_learning_rate"], staircase=self.params["optimizer.lr_staircase"]) optimizer = self._create_optimizer() global_step = tf.train.get_global_step() # moving_average_decay = self.params["encoder.params"]["resnet"]["moving_average_decay"] # moving_average_variables = [] # for var in slim.get_model_variables(): # if 'resnet_v1_50' in var.name: # moving_average_variables.append(var) # # variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay,global_step) # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # update_ops.append(variable_averages.apply(moving_average_variables)) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=global_step, learning_rate=self.params["optimizer.learning_rate"], learning_rate_decay_fn=learning_rate_decay_fn, clip_gradients=self._clip_gradients, optimizer=optimizer, summaries=["learning_rate", "loss", "gradients", "gradient_norm"]) return train_op
def test_decay_with_min(self): decay_fn = training_utils.create_learning_rate_decay_fn( decay_type="exponential_decay", decay_steps=10, decay_rate=0.9, start_decay_at=100, stop_decay_at=1000.0, min_learning_rate=0.01, staircase=False) initial_lr = 1.0 with self.test_session() as sess: # Should not decay past min_learning_rate np.testing.assert_almost_equal(sess.run(decay_fn(initial_lr, 900)), 0.01)
def _build_train_op(self, loss): """Creates the training operation""" learning_rate_decay_fn = training_utils.create_learning_rate_decay_fn( decay_type=self.params["optimizer.lr_decay_type"] or None, decay_steps=self.params["optimizer.lr_decay_steps"], decay_rate=self.params["optimizer.lr_decay_rate"], start_decay_at=self.params["optimizer.lr_start_decay_at"], stop_decay_at=self.params["optimizer.lr_stop_decay_at"], min_learning_rate=self.params["optimizer.lr_min_learning_rate"], staircase=self.params["optimizer.lr_staircase"]) return tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=self.params["optimizer.learning_rate"], learning_rate_decay_fn=learning_rate_decay_fn, clip_gradients=self.params["optimizer.clip_gradients"], optimizer=self.params["optimizer.name"], summaries=["learning_rate", "loss", "gradients", "gradient_norm"])
def test_decay_without_min(self): decay_fn = training_utils.create_learning_rate_decay_fn( decay_type="exponential_decay", decay_steps=10, decay_rate=0.9, start_decay_at=100, stop_decay_at=1000, staircase=False) initial_lr = 1.0 with self.test_session() as sess: # Should not decay before start_decay_at np.testing.assert_equal(sess.run(decay_fn(initial_lr, 50)), initial_lr) # Proper decay np.testing.assert_almost_equal( sess.run(decay_fn(initial_lr, 115)), initial_lr * 0.9**(15.0 / 10.0)) # Should not decay past stop_decay_at np.testing.assert_almost_equal( sess.run(decay_fn(initial_lr, 5000)), initial_lr * 0.9**( (1000.0 - 100.0) / 10.0))
def _build_train_op(self, loss): """Creates the training operation""" learning_rate_decay_fn = training_utils.create_learning_rate_decay_fn( decay_type=self.params["optimizer.lr_decay_type"] or None, decay_steps=self.params["optimizer.lr_decay_steps"], decay_rate=self.params["optimizer.lr_decay_rate"], start_decay_at=self.params["optimizer.lr_start_decay_at"], stop_decay_at=self.params["optimizer.lr_stop_decay_at"], min_learning_rate=self.params["optimizer.lr_min_learning_rate"], staircase=self.params["optimizer.lr_staircase"]) optimizer = self._create_optimizer() train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=self.params["optimizer.learning_rate"], learning_rate_decay_fn=learning_rate_decay_fn, clip_gradients=self._clip_gradients, optimizer=optimizer, summaries=["learning_rate", "loss", "gradients", "gradient_norm"]) return train_op
def _build(self, features, labels, params, mode): # Pre-process features and labels features, labels = self.create_featurizer(mode)(features, labels) # Add to graph collection for later use graph_utils.add_dict_to_collection(features, "features") if labels: graph_utils.add_dict_to_collection(labels, "labels") source_ids = features["source_ids"] if self.params["source.reverse"] is True: source_ids = tf.reverse_sequence( input=features["source_ids"], seq_lengths=features["source_len"], seq_dim=1, batch_dim=0, name=None) # Create embedddings source_embedding = tf.get_variable( "source_embedding", [self.source_vocab_info.total_size, self.params["embedding.dim"]]) target_embedding = tf.get_variable( "target_embedding", [self.target_vocab_info.total_size, self.params["embedding.dim"]]) # Embed source source_embedded = tf.nn.embedding_lookup(source_embedding, source_ids) # Graph used for inference if mode == tf.contrib.learn.ModeKeys.INFER: target_start_id = self.target_vocab_info.special_vocab.SEQUENCE_START # Embed the "SEQUENCE_START" token initial_input = tf.nn.embedding_lookup( target_embedding, tf.ones_like(features["source_len"]) * target_start_id) def make_input_fn(predictions): """Use the embedded prediction as the input to the next time step """ return tf.nn.embedding_lookup(target_embedding, predictions) def elements_finished_fn(_time_, predictions): """Returns true when a prediction is finished""" return tf.equal( predictions, tf.cast(self.target_vocab_info.special_vocab.SEQUENCE_END, dtype=predictions.dtype)) decoder_input_fn_infer = decoders.DynamicDecoderInputs( initial_inputs=initial_input, make_input_fn=make_input_fn, max_decode_length=self.params["inference.max_decode_length"], elements_finished_fn=elements_finished_fn) # Decode decoder_output = self.encode_decode( source=source_embedded, source_len=features["source_len"], decoder_input_fn=decoder_input_fn_infer, mode=mode) predictions = self._create_predictions( decoder_output=decoder_output, features=features, labels=labels) return predictions, None, None # Embed target target_embedded = tf.nn.embedding_lookup(target_embedding, labels["target_ids"]) # During training/eval, we have labels and use them for teacher forcing # We don't feed the last SEQUENCE_END token decoder_input_fn_train = decoders.FixedDecoderInputs( inputs=target_embedded[:, :-1], sequence_length=labels["target_len"] - 1) decoder_output = self.encode_decode( source=source_embedded, source_len=features["source_len"], decoder_input_fn=decoder_input_fn_train, mode=mode) # Calculate loss per example-timestep of shape [B, T] losses = seq2seq_losses.cross_entropy_sequence_loss( logits=decoder_output.logits[:, :, :], targets=tf.transpose(labels["target_ids"][:, 1:], [1, 0]), sequence_length=labels["target_len"] - 1) # Calculate the average log perplexity loss = tf.reduce_sum(losses) / tf.to_float( tf.reduce_sum(labels["target_len"] - 1)) learning_rate_decay_fn = training_utils.create_learning_rate_decay_fn( decay_type=self.params["optimizer.lr_decay_type"] or None, decay_steps=self.params["optimizer.lr_decay_steps"], decay_rate=self.params["optimizer.lr_decay_rate"], start_decay_at=self.params["optimizer.lr_start_decay_at"], stop_decay_at=self.params["optimizer.lr_stop_decay_at"], min_learning_rate=self.params["optimizer.lr_min_learning_rate"], staircase=self.params["optimizer.lr_staircase"]) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=self.params["optimizer.learning_rate"], learning_rate_decay_fn=learning_rate_decay_fn, clip_gradients=self.params["optimizer.clip_gradients"], optimizer=self.params["optimizer.name"], summaries=tf.contrib.layers.optimizers.OPTIMIZER_SUMMARIES) if mode == tf.contrib.learn.ModeKeys.EVAL: train_op = None predictions = self._create_predictions(decoder_output=decoder_output, features=features, labels=labels, losses=losses) # We add "useful" tensors to the graph collection so that we # can easly find them in our hooks/monitors. graph_utils.add_dict_to_collection(predictions, "predictions") return predictions, loss, train_op