示例#1
0
 def _create_variables(accum_count):
     global_step = tf.Variable(0, trainable=False, dtype=tf.int64)
     optimizer = tf.train.AdamOptimizer(1.0)
     gradient = tf.placeholder(tf.float32, shape=[2])
     variable = tf.Variable([1.0, 2.0])
     optim.delayed_update(optimizer, [(gradient, variable)],
                          global_step,
                          accum_count=accum_count)
     return list(sorted(var.name for var in tf.global_variables()))
示例#2
0
  def testDelayedUpdate(self):
    global_step = tf.Variable(0, trainable=False, dtype=tf.int64)
    optimizer = tf.train.GradientDescentOptimizer(1.0)
    gradient = tf.placeholder(tf.float32, shape=[2])
    variable = tf.Variable([1.0, 2.0])
    train_op, extra_variables = optim.delayed_update(
        optimizer,
        [(gradient, variable)],
        global_step,
        accum_count=3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run(tf.variables_initializer(extra_variables))

      def _check_step(grad, expected_variable, expected_step):
        _, variable_value, step_value = sess.run(
            [train_op, variable, global_step], feed_dict={gradient: grad})
        self.assertAllEqual(variable_value, expected_variable)
        self.assertAllEqual(step_value, expected_step)

      _check_step([3.0, 3.0], [1.0, 2.0], 0)     # accum_grad = [3.0, 3.0]
      _check_step([4.0, 1.0], [1.0, 2.0], 0)     # accum_grad = [7.0, 4.0]
      _check_step([-1.0, 0.0], [-5.0, -2.0], 1)  # accum_grad = [6.0, 4.0], apply
      _check_step([-3.0, 1.0], [-5.0, -2.0], 1)  # accum_grad = [-3.0, 1.0]
      _check_step([0.0, -3.0], [-5.0, -2.0], 1)  # accum_grad = [-3.0, -2.0]
      _check_step([2.0, -1.0], [-4.0, 1.0], 2)   # accum_grad = [-1.0, -3.0], apply
示例#3
0
 def testDelayedUpdateSparseGradients(self):
     # Test that delayed update does not crash on sparse gradients.
     global_step = tf.Variable(0, trainable=False, dtype=tf.int64)
     optimizer = tf.train.GradientDescentOptimizer(1.0)
     embeddings = tf.Variable([[1.0, 2.0], [3.0, 4.0]])
     x = tf.nn.embedding_lookup(embeddings, [0])
     loss = tf.losses.mean_squared_error([[1.1, 2.1]], x)
     gradients = optimizer.compute_gradients(loss)
     _ = optim.delayed_update(optimizer,
                              gradients,
                              global_step,
                              accum_count=3)
示例#4
0
def train(model_dir,
          example_inputter,
          source_file,
          target_file,
          maximum_length=100,
          shuffle_buffer_size=1000000,
          gradients_accum=8,
          train_steps=100000,
          save_every=1000,
          report_every=50):
    """Runs the training loop.
    Args:
      model_dir: Directory where checkpoints are saved.
      example_inputter: The inputter instance that produces the training examples.
      source_file: The source training file.
      target_file: The target training file.
      maximum_length: Filter sequences longer than this.
      shuffle_buffer_size: How many examples to load for shuffling.
      gradients_accum: Accumulate gradients of this many iterations.
      train_steps: Train for this many iterations.
      save_every: Save a checkpoint every this many iterations.
      report_every: Report training progress every this many iterations.
    """
    mode = tf.estimator.ModeKeys.TRAIN

    # Create the dataset.
    dataset = example_inputter.make_training_dataset(
        source_file,
        target_file,
        batch_size=3072,
        batch_type="tokens",
        shuffle_buffer_size=shuffle_buffer_size,
        bucket_width=1,  # Bucketize sequences by the same length for efficiency.
        maximum_features_length=maximum_length,
        maximum_labels_length=maximum_length)
    iterator = dataset.make_initializable_iterator()
    source, target = iterator.get_next()

    # Encode the source.
    with tf.variable_scope("encoder"):
        source_embedding = source_inputter.make_inputs(source, training=True)
        memory, _, _ = encoder.encode(source_embedding,
                                      source["length"],
                                      mode=mode)

    # Decode the target.
    with tf.variable_scope("decoder"):
        target_embedding = target_inputter.make_inputs(target, training=True)
        logits, _, _ = decoder.decode(
            target_embedding,
            target["length"],
            vocab_size=target_inputter.vocabulary_size,
            mode=mode,
            memory=memory,
            memory_sequence_length=source["length"])

    # Compute the loss.
    loss, normalizer, _ = losses.cross_entropy_sequence_loss(
        logits,
        target["ids_out"],
        target["length"],
        label_smoothing=0.1,
        average_in_time=True,
        mode=mode)
    loss /= normalizer

    # Define the learning rate schedule.
    step = tf.train.create_global_step()
    learning_rate = decay.noam_decay_v2(2.0,
                                        step,
                                        model_dim=512,
                                        warmup_steps=4000)

    # Define the optimization op.
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss)
    train_op, optim_variables = optim.delayed_update(
        optimizer, gradients, step, accum_count=gradients_accum)

    # Runs the training loop.
    saver = tf.train.Saver()
    checkpoint_path = None
    if os.path.exists(model_dir):
        checkpoint_path = tf.train.latest_checkpoint(model_dir)
    with tf.Session() as sess:
        if checkpoint_path is not None:
            print("Restoring parameters from %s" % checkpoint_path)
            saver.restore(sess, checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())
        sess.run(tf.variables_initializer(optim_variables))
        sess.run(tf.tables_initializer())
        sess.run(iterator.initializer)
        last_step = -1
        while True:
            step_, lr_, loss_, _ = sess.run(
                [step, learning_rate, loss, train_op])
            if step_ != last_step:
                if step_ % report_every == 0:
                    print("Step = %d ; Learning rate = %f ; Loss = %f" %
                          (step_, lr_, loss_))
                if step_ % save_every == 0:
                    print("Saving checkpoint for step %d" % step_)
                    saver.save(sess, "%s/model" % model_dir, global_step=step_)
                if step_ == train_steps:
                    break
            last_step = step_