def test_get_qval_sym(self, obs_dim, action_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() output1 = qf.get_qval([obs], [act]) input_var1 = tf.compat.v1.placeholder(tf.float32, shape=(None, obs.shape[0])) input_var2 = tf.compat.v1.placeholder(tf.float32, shape=(None, act.shape[0])) q_vals = qf.get_qval_sym(input_var1, input_var2, 'another') output2 = self.sess.run(q_vals, feed_dict={ input_var1: [obs], input_var2: [act] }) expected_output = np.full((1, ), 0.5) assert np.array_equal(output1, output2) assert np.array_equal(output2[0], expected_output)
def test_is_pickleable(self, obs_dim, action_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() with tf.compat.v1.variable_scope( 'ContinuousMLPQFunction/SimpleMLPMergeModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = qf.get_qval([obs], [act]) h_data = pickle.dumps(qf) with tf.compat.v1.Session(graph=tf.Graph()): qf_pickled = pickle.loads(h_data) output2 = qf_pickled.get_qval([obs], [act]) assert np.array_equal(output1, output2)
def test_build(self, obs_dim, action_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) qf = ContinuousMLPQFunction(env_spec=env.spec, action_merge_layer=0, hidden_sizes=(1, ), hidden_nonlinearity=None, hidden_w_init=tf.ones_initializer(), output_w_init=tf.ones_initializer()) obs = np.full(obs_dim, 1).flatten() act = np.full(action_dim, 1).flatten() output1 = qf.get_qval([obs], [act]) input_var1 = tf.compat.v1.placeholder(tf.float32, shape=(None, obs.shape[0])) input_var2 = tf.compat.v1.placeholder(tf.float32, shape=(None, act.shape[0])) q_vals = qf.build(input_var1, input_var2, 'another') output2 = self.sess.run(q_vals, feed_dict={ input_var1: [obs], input_var2: [act] }) expected_output = np.full((1, ), np.prod(obs_dim) + np.prod(action_dim)) assert np.array_equal(output1, output2) assert np.array_equal(output2[0], expected_output)
def test_output_shape(self, obs_dim, action_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs = env.step(1).observation obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() outputs = qf.get_qval([obs], [act]) assert outputs.shape == (1, 1)
def test_q_vals(self, obs_dim, action_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() expected_output = np.full((1, ), 0.5) outputs = qf.get_qval([obs], [act]) assert np.array_equal(outputs[0], expected_output) outputs = qf.get_qval([obs, obs, obs], [act, act, act]) for output in outputs: assert np.array_equal(output, expected_output)
def test_q_vals_goal_conditioned(self): env = GarageEnv(DummyDictEnv()) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = np.concatenate( (obs['observation'], obs['desired_goal'], obs['achieved_goal']), axis=-1) act = np.full((1, ), 0.5).flatten() expected_output = np.full((1, ), 0.5) outputs = qf.get_qval([obs], [act]) assert np.array_equal(outputs[0], expected_output) outputs = qf.get_qval([obs, obs, obs], [act, act, act]) for output in outputs: assert np.array_equal(output, expected_output)
def test_q_vals(self, hidden_sizes): env = GymEnv(DummyBoxEnv()) obs_dim = env.spec.observation_space.flat_dim act_dim = env.spec.action_space.flat_dim qf = ContinuousMLPQFunction(env_spec=env.spec, action_merge_layer=0, hidden_sizes=hidden_sizes, hidden_nonlinearity=None, hidden_w_init=tf.ones_initializer(), output_w_init=tf.ones_initializer()) obs = np.full(obs_dim, 1).flatten() act = np.full(act_dim, 1).flatten() expected_output = np.full((1, ), (obs_dim + act_dim) * np.prod(hidden_sizes)) outputs = qf.get_qval([obs], [act]) assert np.array_equal(outputs[0], expected_output) outputs = qf.get_qval([obs, obs, obs], [act, act, act]) for output in outputs: assert np.array_equal(output, expected_output)
def test_output_shape(self, obs_dim, action_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() outputs = qf.get_qval([obs], [act]) assert outputs.shape == (1, 1)
def test_is_pickleable(self, obs_dim, action_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs = env.step(1).observation obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() with tf.compat.v1.variable_scope('ContinuousMLPQFunction', reuse=True): bias = tf.compat.v1.get_variable('mlp_concat/hidden_0/bias') # assign it to all one bias.load(tf.ones_like(bias).eval()) output1 = qf.get_qval([obs], [act]) h_data = pickle.dumps(qf) with tf.compat.v1.Session(graph=tf.Graph()): qf_pickled = pickle.loads(h_data) output2 = qf_pickled.get_qval([obs], [act]) assert np.array_equal(output1, output2)
class TestContinuousMLPQFunctionTransit(TfGraphTestCase): @mock.patch('tensorflow.random.normal') def setUp(self, mock_rand): mock_rand.return_value = 0.5 super().setUp() self.obs_dim = (5, ) self.act_dim = (2, ) self.box_env = TfEnv( DummyBoxEnv(obs_dim=self.obs_dim, action_dim=self.act_dim)) self.qf1 = ContinuousMLPQFunction(env_spec=self.box_env, hidden_sizes=(32, 32), name='QF1') self.qf2 = ContinuousMLPQFunction(env_spec=self.box_env, hidden_sizes=(64, 64), name='QF2') self.qf3 = ContinuousMLPQFunctionWithModel(env_spec=self.box_env, hidden_sizes=(32, 32), name='QF3') self.qf4 = ContinuousMLPQFunctionWithModel(env_spec=self.box_env, hidden_sizes=(64, 64), name='QF4') self.sess.run(tf.global_variables_initializer()) for a, b in zip(self.qf3.get_trainable_vars(), self.qf1.get_trainable_vars()): self.sess.run(a.assign(b)) for a, b in zip(self.qf4.get_trainable_vars(), self.qf2.get_trainable_vars()): self.sess.run(a.assign(b)) self.obs = self.box_env.reset() self.act = np.full((2, ), 0.5) def test_get_qval(self): q_val1 = self.qf1.get_qval([self.obs], [self.act]) q_val2 = self.qf2.get_qval([self.obs], [self.act]) q_val3 = self.qf3.get_qval([self.obs], [self.act]) q_val4 = self.qf4.get_qval([self.obs], [self.act]) assert np.array_equal(q_val1, q_val3) assert np.array_equal(q_val2, q_val4) q_val1 = self.qf1.get_qval([self.obs, self.obs], [self.act, self.act]) q_val2 = self.qf2.get_qval([self.obs, self.obs], [self.act, self.act]) q_val3 = self.qf3.get_qval([self.obs, self.obs], [self.act, self.act]) q_val4 = self.qf4.get_qval([self.obs, self.obs], [self.act, self.act]) assert np.array_equal(q_val1, q_val3) assert np.array_equal(q_val2, q_val4) def test_get_qval_sym(self): obs_ph = tf.placeholder(tf.float32, shape=(None, ) + self.obs_dim) act_ph = tf.placeholder(tf.float32, shape=(None, ) + self.act_dim) qval_sym1 = self.qf1.get_qval_sym(obs_ph, act_ph, name='qval_sym') qval_sym2 = self.qf2.get_qval_sym(obs_ph, act_ph, name='qval_sym') qval_sym3 = self.qf3.get_qval_sym(obs_ph, act_ph, name='qval_sym') qval_sym4 = self.qf4.get_qval_sym(obs_ph, act_ph, name='qval_sym') q_val1 = self.sess.run(qval_sym1, feed_dict={ obs_ph: [self.obs], act_ph: [self.act] }) q_val2 = self.sess.run(qval_sym2, feed_dict={ obs_ph: [self.obs], act_ph: [self.act] }) q_val3 = self.sess.run(qval_sym3, feed_dict={ obs_ph: [self.obs], act_ph: [self.act] }) q_val4 = self.sess.run(qval_sym4, feed_dict={ obs_ph: [self.obs], act_ph: [self.act] }) assert np.array_equal(q_val1, q_val3) assert np.array_equal(q_val2, q_val4)