示例#1
0
    def testOpClipDifferentClipValues(self):
        x = tf.placeholder(tf.float32, shape=[2, 1])
        y_1 = snt.clip_gradient(x, 1, 2)
        y_2 = snt.clip_gradient(x, 2, 3)
        z_1 = tf.reduce_sum(y_1 * y_1)
        z_2 = tf.reduce_sum(y_2 * y_2)
        dzdy_1 = tf.gradients(z_1, y_1)[0]
        dzdy_2 = tf.gradients(z_2, y_2)[0]
        dzdx_1 = tf.gradients(z_1, x)[0]
        dzdx_2 = tf.gradients(z_2, x)[0]

        x_np = np.array([[0.5], [2]])
        with self.test_session() as sess:
            y_np_1, dzdy_np_1, dzdx_np_1, y_np_2, dzdy_np_2, dzdx_np_2 = sess.run(
                [y_1, dzdy_1, dzdx_1, y_2, dzdy_2, dzdx_2],
                feed_dict={x: x_np})

            self.assertAllEqual(y_np_1, x_np)
            self.assertAllEqual(y_np_2, x_np)
            # We do not expect the gradients with respect to the output to be clipped.
            self.assertAllEqual(dzdy_np_1, np.array([[1], [4]]))
            self.assertAllEqual(dzdy_np_2, np.array([[1], [4]]))
            # We expect the gradients w.r.t. the input to be clipped [1, 2] or [2, 3].
            self.assertAllEqual(dzdx_np_1, np.array([[1], [2]]))
            self.assertAllEqual(dzdx_np_2, np.array([[2], [3]]))
示例#2
0
    def testOpClipDifferentDtypes(self):
        x_1 = tf.placeholder(tf.float16, shape=())
        snt.clip_gradient(x_1, 0, 1)

        # clip_gradient throws here if the Defun func_name does not use the dtype.
        x_2 = tf.placeholder(tf.float32, shape=())
        snt.clip_gradient(x_2, 0, 1)
示例#3
0
  def testOpClipDifferentDtypes(self):
    x_1 = tf.placeholder(tf.float16, shape=())
    snt.clip_gradient(x_1, 0, 1)

    # clip_gradient throws here if the Defun func_name does not use the dtype.
    x_2 = tf.placeholder(tf.float32, shape=())
    snt.clip_gradient(x_2, 0, 1)
示例#4
0
  def testShape(self):
    x = tf.placeholder(tf.float32, [None, 10, 13])
    y = snt.clip_gradient(x, 0, 1)
    z = tf.reduce_sum(y * y)
    dzdx = tf.gradients(z, x)[0]

    self.assertAllEqual(y.get_shape().as_list(), [None, 10, 13])
    self.assertAllEqual(dzdx.get_shape().as_list(), [None, 10, 13])
示例#5
0
  def testShape(self):
    x = tf.placeholder(tf.float32, [None, 10, 13])
    y = snt.clip_gradient(x, 0, 1)
    z = tf.reduce_sum(y * y)
    dzdx = tf.gradients(z, x)[0]

    self.assertAllEqual(y.get_shape().as_list(), [None, 10, 13])
    self.assertAllEqual(dzdx.get_shape().as_list(), [None, 10, 13])
示例#6
0
    def _build(self, inputs):
        (shared_inputs, extra_policy_inputs) = inputs
        policy_in = tf.concat([shared_inputs, extra_policy_inputs], axis=1)

        policy = snt.nets.MLP(output_sizes=self._policy_layers,
                              activation=self._activation,
                              name='policy_mlp')(policy_in)

        # Sample an action from the policy logits.
        action = tf.multinomial(policy, num_samples=1, output_dtype=tf.int32)
        action = tf.squeeze(action, 1)  # [B, 1] -> [B]

        if self._policy_clip_abs_value > 0:
            policy = snt.clip_gradient(
                net=policy,
                clip_value_min=-self._policy_clip_abs_value,
                clip_value_max=self._policy_clip_abs_value)

        baseline_in = tf.concat(
            [shared_inputs, tf.stop_gradient(policy)], axis=1)
        baseline = snt.nets.MLP(self._baseline_layers,
                                activation=self._activation,
                                name='baseline_mlp')(baseline_in)
        baseline = tf.squeeze(baseline, axis=-1)  # [B, 1] -> [B]

        if self._policy_clip_abs_value > 0:
            baseline = snt.clip_gradient(
                net=baseline,
                clip_value_min=-self._policy_clip_abs_value,
                clip_value_max=self._policy_clip_abs_value)

        outputs = PolicyOutputs(policy=policy,
                                action=action,
                                baseline=baseline)

        return outputs
示例#7
0
  def testOpClipDifferentClipValues(self):
    x = tf.placeholder(tf.float32, shape=[2, 1])
    y_1 = snt.clip_gradient(x, 1, 2)
    y_2 = snt.clip_gradient(x, 2, 3)
    z_1 = tf.reduce_sum(y_1 * y_1)
    z_2 = tf.reduce_sum(y_2 * y_2)
    dzdy_1 = tf.gradients(z_1, y_1)[0]
    dzdy_2 = tf.gradients(z_2, y_2)[0]
    dzdx_1 = tf.gradients(z_1, x)[0]
    dzdx_2 = tf.gradients(z_2, x)[0]

    x_np = np.array([[0.5], [2]])
    with self.test_session() as sess:
      y_np_1, dzdy_np_1, dzdx_np_1, y_np_2, dzdy_np_2, dzdx_np_2 = sess.run(
          [y_1, dzdy_1, dzdx_1, y_2, dzdy_2, dzdx_2], feed_dict={x: x_np})

      self.assertAllEqual(y_np_1, x_np)
      self.assertAllEqual(y_np_2, x_np)
      # We do not expect the gradients with respect to the output to be clipped.
      self.assertAllEqual(dzdy_np_1, np.array([[1], [4]]))
      self.assertAllEqual(dzdy_np_2, np.array([[1], [4]]))
      # We expect the gradients w.r.t. the input to be clipped [1, 2] or [2, 3].
      self.assertAllEqual(dzdx_np_1, np.array([[1], [2]]))
      self.assertAllEqual(dzdx_np_2, np.array([[2], [3]]))
示例#8
0
  def testOpClip(self):
    x = tf.placeholder(tf.float32, shape=[2, 1])
    y = snt.clip_gradient(x, 2, 3)
    z = tf.reduce_sum(y * y)
    dzdy = tf.gradients(z, y)[0]
    dzdx = tf.gradients(z, x)[0]

    x_np = np.array([[0.5], [2]])
    with self.test_session() as sess:
      y_np, dzdy_np, dzdx_np = sess.run([y, dzdy, dzdx], feed_dict={x: x_np})

      self.assertAllEqual(y_np, x_np)
      # We do not expect the gradients with respect to the output to be clipped.
      self.assertAllEqual(dzdy_np, np.array([[1], [4]]))
      # We expect the gradients with respect to the input to be clipped [2, 3].
      self.assertAllEqual(dzdx_np, np.array([[2], [3]]))
示例#9
0
  def testOpClip(self):
    x = tf.placeholder(tf.float32, shape=[2, 1])
    y = snt.clip_gradient(x, 2, 3)
    z = tf.reduce_sum(y * y)
    dzdy = tf.gradients(z, y)[0]
    dzdx = tf.gradients(z, x)[0]

    x_np = np.array([[0.5], [2]])
    with self.test_session() as sess:
      y_np, dzdy_np, dzdx_np = sess.run([y, dzdy, dzdx], feed_dict={x: x_np})

      self.assertAllEqual(y_np, x_np)
      # We do not expect the gradients with respect to the output to be clipped.
      self.assertAllEqual(dzdy_np, np.array([[1], [4]]))
      # We expect the gradients with respect to the input to be clipped [2, 3].
      self.assertAllEqual(dzdx_np, np.array([[2], [3]]))