Пример #1
0
 def testPixelControlLossShapes(self):
     with self.assertRaisesRegexp(
             ValueError, "Pixel Control values are not compatible"):
         pixel_control_ops.pixel_control_loss(
             self.observations_ph, self.actions_ph,
             self.action_values_ph[:, :, :-1], self.cell_size,
             self.discount, self.scale)
Пример #2
0
 def testPixelControlLossShapes(self):
   with self.assertRaisesRegexp(
       ValueError, "Pixel Control values are not compatible"):
     pixel_control_ops.pixel_control_loss(
         self.observations_ph, self.actions_ph,
         self.action_values_ph[:, :, :-1], self.cell_size, self.discount,
         self.scale)
Пример #3
0
 def testTensorDiscountShape(self):
   with self.assertRaisesRegexp(
       ValueError, "discount_factor must be a scalar or a tensor of rank 2"):
     tensor_discount = tf.tile(
         tf.reshape(self.discount, [1, 1, 1]),
         [self.seq_length, self.batch_size, 1])
     pixel_control_ops.pixel_control_loss(
         self.observations_ph, self.actions_ph,
         self.action_values_ph, self.cell_size, tensor_discount,
         self.scale)
Пример #4
0
 def testTensorDiscountShape(self):
     with self.assertRaisesRegexp(
             ValueError,
             "discount_factor must be a scalar or a tensor of rank 2"):
         tensor_discount = tf.tile(tf.reshape(self.discount, [1, 1, 1]),
                                   [self.seq_length, self.batch_size, 1])
         pixel_control_ops.pixel_control_loss(self.observations_ph,
                                              self.actions_ph,
                                              self.action_values_ph,
                                              self.cell_size,
                                              tensor_discount, self.scale)
Пример #5
0
  def testPixelControlLossScalarDiscount(self):
    """Compute loss for given observations, actions, values, scalar discount."""

    loss, _ = pixel_control_ops.pixel_control_loss(
        self.observations_ph, self.actions_ph, self.action_values_ph,
        self.cell_size, self.discount, self.scale)
    init = tf.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init)
      feed_dict = {
          self.observations_ph: self.observations,
          self.action_values_ph: self.action_values,
          self.actions_ph: self.actions}
      loss_np = sess.run(loss, feed_dict=feed_dict)
      self.assertNear(loss_np, self.error, 1e-3)
Пример #6
0
    def testPixelControlLossScalarDiscount(self):
        """Compute loss for given observations, actions, values, scalar discount."""

        loss, _ = pixel_control_ops.pixel_control_loss(
            self.observations_ph, self.actions_ph, self.action_values_ph,
            self.cell_size, self.discount, self.scale)
        init = tf.global_variables_initializer()

        with self.test_session() as sess:
            sess.run(init)
            feed_dict = {
                self.observations_ph: self.observations,
                self.action_values_ph: self.action_values,
                self.actions_ph: self.actions
            }
            loss_np = sess.run(loss, feed_dict=feed_dict)
            self.assertNear(loss_np, self.error, 1e-3)
Пример #7
0
  def testPixelControlLossTensorDiscount(self):
    """Compute loss for given observations, actions, values, tensor discount."""

    zero_discount = tf.zeros((1, self.batch_size))
    non_zero_discount = tf.tile(
        tf.reshape(self.discount, [1, 1]),
        [self.seq_length - 1, self.batch_size])
    tensor_discount = tf.concat([zero_discount, non_zero_discount], axis=0)
    loss, _ = pixel_control_ops.pixel_control_loss(
        self.observations_ph, self.actions_ph, self.action_values_ph,
        self.cell_size, tensor_discount, self.scale)
    init = tf.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init)
      feed_dict = {
          self.observations_ph: self.observations,
          self.action_values_ph: self.action_values,
          self.actions_ph: self.actions}
      loss_np = sess.run(loss, feed_dict=feed_dict)
      self.assertNear(loss_np, self.error_term, 1e-3)
Пример #8
0
    def testPixelControlLossTensorDiscount(self):
        """Compute loss for given observations, actions, values, tensor discount."""

        zero_discount = tf.zeros((1, self.batch_size))
        non_zero_discount = tf.tile(tf.reshape(self.discount, [1, 1]),
                                    [self.seq_length - 1, self.batch_size])
        tensor_discount = tf.concat([zero_discount, non_zero_discount], axis=0)
        loss, _ = pixel_control_ops.pixel_control_loss(
            self.observations_ph, self.actions_ph, self.action_values_ph,
            self.cell_size, tensor_discount, self.scale)
        init = tf.global_variables_initializer()

        with self.test_session() as sess:
            sess.run(init)
            feed_dict = {
                self.observations_ph: self.observations,
                self.action_values_ph: self.action_values,
                self.actions_ph: self.actions
            }
            loss_np = sess.run(loss, feed_dict=feed_dict)
            self.assertNear(loss_np, self.error_term, 1e-3)