Exemplo n.º 1
0
def train(model, optimizer, train_data, train_target, mask):
    def model_loss(inputs, targets):
        return loss_fn(model, inputs, targets, mask)

    grad_fn = tfe.implicit_gradients(model_loss)
    grads_and_vars = grad_fn(train_data, train_target)
    optimizer.apply_gradients(grads_and_vars)
Exemplo n.º 2
0
def train(model, optimizer, train_data, sequence_length, clip_ratio):
    def model_loss(inputs, targets):
        return loss_fn(model, inputs, targets, training=True)

    grads = tfe.implicit_gradients(model_loss)

    train_loss = []
    # range把原来的sequence截成多部分,分别训练
    for batch, i in enumerate(
            range(0, train_data.shape[1] - sequence_length, sequence_length)):
        # 获取数据
        train_seq = tf.convert_to_tensor(train_data[:, i:i + sequence_length])
        train_target = tf.convert_to_tensor(train_data[:, i + 1:i + 1 +
                                                       sequence_length])
        assert train_seq.shape[1] == train_target.shape[1] == sequence_length

        optimizer.apply_gradients(
            clip_gradients(grads(train_seq, train_target), clip_ratio))

        if batch % 10 == 0:
            loss = model_loss(train_seq, train_target).numpy()
            print(loss, end=",", flush=True)
            train_loss.append(loss)

    return np.mean(train_loss)
Exemplo n.º 3
0
    def _benchmark_train(self, label, model):
        with tf.device(device()):
            optimizer = tf.train.GradientDescentOptimizer(1.)

            def model_loss(inputs, targets):
                return rnn_ptb.loss_fn(model, inputs, targets, training=True)

            grads = tfe.implicit_gradients(model_loss)

            sequence_batch = tf.ones(
                [PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE],
                dtype=tf.int64)

            def step():
                optimizer.apply_gradients(
                    rnn_ptb.clip_gradients(
                        grads(sequence_batch, sequence_batch), 0.25))

            for _ in range(10):  # Warmup
                step()
            force_gpu_sync()
            gc.collect()

            start = time.time()
            iters = 100
            for _ in range(iters):
                step()
            force_gpu_sync()
            self._report(label, start, iters, device(),
                         int(sequence_batch.shape[1]))
Exemplo n.º 4
0
  def _benchmark_train(self, label, model):
    with tf.device(device()):
      optimizer = tf.train.GradientDescentOptimizer(1.)

      def model_loss(inputs, targets):
        return rnn_ptb.loss_fn(model, inputs, targets, training=True)

      grads = tfe.implicit_gradients(model_loss)

      sequence_batch = tf.ones(
          [PTBBenchmark.SEQ_LEN, PTBBenchmark.BATCH_SIZE], dtype=tf.int64)

      def step():
        optimizer.apply_gradients(
            rnn_ptb.clip_gradients(grads(sequence_batch, sequence_batch), 0.25))

      for _ in range(10):  # Warmup
        step()
      force_gpu_sync()
      gc.collect()

      start = time.time()
      iters = 100
      for _ in range(iters):
        step()
      force_gpu_sync()
      self._report(label, start, iters, device(), int(sequence_batch.shape[1]))
Exemplo n.º 5
0
def main(_):
    policy = Policy()
    optimizer = tf.train.GradientDescentOptimizer(5e-1)
    policy_gradient = tfe.implicit_gradients(loss_fn)

    env = gym.make('CartPole-v0')
    observs = [env.reset()]
    for i in range(5):
        observs.append(env.step(env.action_space.sample())[0])
    observs = tf.constant(tf.stack(observs))
    optimizer.apply_gradients(policy_gradient(policy, observs))
    print(loss_fn(policy, observs))
Exemplo n.º 6
0
def train(model, optimizer, train_data, sequence_length, clip_ratio):
  """training an epoch."""

  def model_loss(inputs, targets,total):
    lossfn,accuracy = loss_fn(model, inputs, targets, training=True)
    total[1] += accuracy
    total[0] += 1.0
    return lossfn

  grads = tfe.implicit_gradients(model_loss)
  total = [0.0]*2
  total_time = 0
  #writer = tf.contrib.summary.create_file_writer(FLAGS.logdir)
  global_step=tf.train.get_or_create_global_step()  # return global step var
  #writer.set_as_default()

  for j in range(0,sequence_length,2): 
    for batch, i in enumerate(range(0, train_data.shape[0] - 1, sequence_length)):

      train_seq, train_target = _get_batch(train_data, i+j, sequence_length)
      a,b = train_seq.shape
      if a < sequence_length/2:
        continue

      input_list = tf.unstack(train_seq, num=int(train_seq.shape[0]), axis=0)
      start = time.time()
      optimizer.apply_gradients(
          clip_gradients(grads(train_seq, train_target,total), clip_ratio))
      total_time += (time.time() - start)
      global_step.assign_add(1)
      with tf.contrib.summary.record_summaries_every_n_global_steps(100):
        if total[0] > 0.0: 
          tf.contrib.summary.scalar('train_acc', total[1]/total[0])
        tf.contrib.summary.scalar('train_loss', model_loss(train_seq, train_target,total).numpy())
      if batch % 100 == 0 :#and i >= train_data.shape[0] -5:
        time_in_ms = (total_time * 1000) / (batch + 1)
        sys.stderr.write("batch %d,i:%d: training loss %.6f, avg step time %d ms\n" %
                         (batch,i, model_loss(train_seq, train_target,total).numpy(),
                          time_in_ms))
        if total[0] > 0.0: 
          sys.stderr.write("batch %d: training accuracy: %.8f\n" %
                         (batch, total[1]/total[0]))
        total[0] = 0.0
        total[1] =0
Exemplo n.º 7
0
def train(model, optimizer, train_data, sequence_length, clip_ratio):
  """training an epoch."""

  def model_loss(inputs, targets):
    return loss_fn(model, inputs, targets, training=True)

  grads = tfe.implicit_gradients(model_loss)

  total_time = 0
  for batch, i in enumerate(range(0, train_data.shape[0] - 1, sequence_length)):
    train_seq, train_target = _get_batch(train_data, i, sequence_length)
    start = time.time()
    optimizer.apply_gradients(
        clip_gradients(grads(train_seq, train_target), clip_ratio))
    total_time += (time.time() - start)
    if batch % 10 == 0:
      time_in_ms = (total_time * 1000) / (batch + 1)
      sys.stderr.write("batch %d: training loss %.2f, avg step time %d ms\n" %
                       (batch, model_loss(train_seq, train_target).numpy(),
                        time_in_ms))
Exemplo n.º 8
0
def train_one_epoch(model, optimizer, train_data, log_interval=None):
    """Trains model on train_data using optimizer."""
    def model_loss(labels, chars, sequence_length):
        predictions = model(chars, sequence_length, training=True)
        return loss(labels, predictions)

    # `grad` is a function that returns the gradients of model_loss with respect
    # to all the variables that contribute to the computation of its output.
    grad = tfe.implicit_gradients(model_loss)

    batch = 0
    for (labels, chars, sequence_length) in tfe.Iterator(train_data):
        optimizer.apply_gradients(grad(labels, chars, sequence_length))
        if log_interval and batch % log_interval == 0:
            batch_loss = model_loss(labels, chars, sequence_length)
            print("train/batch #{}\tloss: {:.6f}".format(
                batch, float(batch_loss)))
            if model.summary_writer is not None:
                model.summary_writer.scalar("train/loss", batch_loss)
                model.summary_writer.step()
        batch += 1
Exemplo n.º 9
0
    # Optimal parameter 3
    x = [9.42478, 2.475]
    params = [
        tf.get_variable('%d' % i, initializer=xi) for i, xi in enumerate(x)
    ]

    loss = evaluation_branin(*params).numpy()
    assert np.allclose(loss, 0.397887)


check_branin_impl()

print("Initial loss : ", evaluation_branin(x, y).numpy())

# Get the gradients and variables to be optimized
grad_fn = tfe.implicit_gradients(evaluation_branin)
grad_vars = grad_fn(x, y)

# {repare the optimizer. Since this is a very simple problem, we don't need
# many optimization steps
optimizer = tf.train.AdamOptimizer(0.01)

for i in range(200):
    # update the variables and print the loss value
    optimizer.apply_gradients(grad_vars)
    print("[%d] Loss = %0.6f - (x = %0.5f, y = %0.5f)" %
          (i + 1, evaluation_branin(x, y).numpy(), x.numpy(), y.numpy()))

print()
print("Final Loss = %0.6f - (x = %0.5f, y = %0.5f)" %
      (evaluation_branin(x, y).numpy(), x.numpy(), y.numpy()))
Exemplo n.º 10
0
    x = [0.20169, 0.15001, 0.476874, 0.275332, 0.311652, 0.6573]
    params = [
        tf.get_variable('x%d' % i, initializer=xi) for i, xi in enumerate(x)
    ]

    loss = evaluation_hartmann6(params).numpy()

    assert np.allclose(loss, -3.32237)


check_hartmann6_impl()

print("Initial loss : ", evaluation_hartmann6(variables).numpy())

# Get the gradients and variables to be optimized
grad_fn = tfe.implicit_gradients(evaluation_hartmann6)
grad_vars = grad_fn(variables)

# {repare the optimizer. Since this is a very simple problem, we don't need
# many optimization steps
optimizer = tf.train.AdamOptimizer(0.001)

for i in range(500):
    # update the variables and print the loss value
    optimizer.apply_gradients(grad_vars)
    print("[%d] Loss = %0.6f" %
          (i + 1, evaluation_hartmann6(variables).numpy()))

print()
print("Final Loss = %0.6f" % (evaluation_hartmann6(variables).numpy()))
"""
Exemplo n.º 11
0
# Create the variables to be optimized
x = tf.get_variable('x', initializer=tf.random_uniform([], -5.0, 5.0))
y = tf.get_variable('y', initializer=tf.random_uniform([], -0.0, 15.0))


# define the squared error loss function as before
def loss(x, y):
    y_predicted = 2 * x - y
    y_true = 4.0
    return tf.square(y_predicted - y_true)


print("Initial loss : ", loss(x, y).numpy())

# Get the gradients and variables to be optimized
grad_fn = tfe.implicit_gradients(loss)
grad_vars = grad_fn(x, y)

# prepare the optimizer. Since this is a very simple problem, we don't need
# many optimization steps
optimizer = tf.train.GradientDescentOptimizer(0.01)

for i in range(10):
    # update the variables and print the loss value
    optimizer.apply_gradients(grad_vars)
    print("[%d] Loss = %0.6f - (x = %0.5f, y = %0.5f)" %
          (i + 1, loss(x, y).numpy(), x.numpy(), y.numpy()))

print()
print("Final Loss = %0.6f - (x = %0.5f, y = %0.5f)" %
      (loss(x, y).numpy(), x.numpy(), y.numpy()))
Exemplo n.º 12
0
import tensorflow as tf
from tensorflow.contrib.eager.python import tfe
tf.enable_eager_execution()

x = tfe.Variable(initial_value=tf.random_uniform([1], -1., 1.), name='x')


def loss(input):
    return tf.sigmoid(input)


grad_vars = tfe.implicit_gradients(loss)
opt = tf.train.GradientDescentOptimizer(learning_rate=1)

for i in range(1000):
    for j in range(50):
        opt.apply_gradients(grad_vars(x))

    if i % 50 == 0:
        loss_val = loss(x)
        print(i, "Optimal Value : ", loss_val.numpy(), "Val (X) : ", x.numpy())