def testPixelControlLossShapes(self): with self.assertRaisesRegexp( ValueError, "Pixel Control values are not compatible"): pixel_control_ops.pixel_control_loss( self.observations_ph, self.actions_ph, self.action_values_ph[:, :, :-1], self.cell_size, self.discount, self.scale)
def testTensorDiscountShape(self): with self.assertRaisesRegexp( ValueError, "discount_factor must be a scalar or a tensor of rank 2"): tensor_discount = tf.tile( tf.reshape(self.discount, [1, 1, 1]), [self.seq_length, self.batch_size, 1]) pixel_control_ops.pixel_control_loss( self.observations_ph, self.actions_ph, self.action_values_ph, self.cell_size, tensor_discount, self.scale)
def testTensorDiscountShape(self): with self.assertRaisesRegexp( ValueError, "discount_factor must be a scalar or a tensor of rank 2"): tensor_discount = tf.tile(tf.reshape(self.discount, [1, 1, 1]), [self.seq_length, self.batch_size, 1]) pixel_control_ops.pixel_control_loss(self.observations_ph, self.actions_ph, self.action_values_ph, self.cell_size, tensor_discount, self.scale)
def testPixelControlLossScalarDiscount(self): """Compute loss for given observations, actions, values, scalar discount.""" loss, _ = pixel_control_ops.pixel_control_loss( self.observations_ph, self.actions_ph, self.action_values_ph, self.cell_size, self.discount, self.scale) init = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init) feed_dict = { self.observations_ph: self.observations, self.action_values_ph: self.action_values, self.actions_ph: self.actions} loss_np = sess.run(loss, feed_dict=feed_dict) self.assertNear(loss_np, self.error, 1e-3)
def testPixelControlLossScalarDiscount(self): """Compute loss for given observations, actions, values, scalar discount.""" loss, _ = pixel_control_ops.pixel_control_loss( self.observations_ph, self.actions_ph, self.action_values_ph, self.cell_size, self.discount, self.scale) init = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init) feed_dict = { self.observations_ph: self.observations, self.action_values_ph: self.action_values, self.actions_ph: self.actions } loss_np = sess.run(loss, feed_dict=feed_dict) self.assertNear(loss_np, self.error, 1e-3)
def testPixelControlLossTensorDiscount(self): """Compute loss for given observations, actions, values, tensor discount.""" zero_discount = tf.zeros((1, self.batch_size)) non_zero_discount = tf.tile( tf.reshape(self.discount, [1, 1]), [self.seq_length - 1, self.batch_size]) tensor_discount = tf.concat([zero_discount, non_zero_discount], axis=0) loss, _ = pixel_control_ops.pixel_control_loss( self.observations_ph, self.actions_ph, self.action_values_ph, self.cell_size, tensor_discount, self.scale) init = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init) feed_dict = { self.observations_ph: self.observations, self.action_values_ph: self.action_values, self.actions_ph: self.actions} loss_np = sess.run(loss, feed_dict=feed_dict) self.assertNear(loss_np, self.error_term, 1e-3)
def testPixelControlLossTensorDiscount(self): """Compute loss for given observations, actions, values, tensor discount.""" zero_discount = tf.zeros((1, self.batch_size)) non_zero_discount = tf.tile(tf.reshape(self.discount, [1, 1]), [self.seq_length - 1, self.batch_size]) tensor_discount = tf.concat([zero_discount, non_zero_discount], axis=0) loss, _ = pixel_control_ops.pixel_control_loss( self.observations_ph, self.actions_ph, self.action_values_ph, self.cell_size, tensor_discount, self.scale) init = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init) feed_dict = { self.observations_ph: self.observations, self.action_values_ph: self.action_values, self.actions_ph: self.actions } loss_np = sess.run(loss, feed_dict=feed_dict) self.assertNear(loss_np, self.error_term, 1e-3)