def test_unflattened_input(self): env = GymEnv(DummyBoxEnv(obs_dim=(2, 2))) cmb = ContinuousMLPBaseline(env_spec=env.spec) env.reset() es = env.step(1) obs, rewards = es.observation, es.reward train_paths = [{'observations': [obs], 'returns': [rewards]}] cmb.fit(train_paths) paths = {'observations': [obs]} prediction = cmb.predict(paths) assert np.allclose(0., prediction)
def test_is_pickleable(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec cmb = ContinuousMLPBaseline(env_spec=box_env_spec) with tf.compat.v1.variable_scope('ContinuousMLPBaseline', reuse=True): bias = tf.compat.v1.get_variable('mlp/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) _, _, paths, _ = get_train_test_data() result1 = cmb.predict(paths) h = pickle.dumps(cmb) with tf.compat.v1.Session(graph=tf.Graph()): cmb_pickled = pickle.loads(h) result2 = cmb_pickled.predict(paths) assert np.array_equal(result1, result2)
def test_fit(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'continuous_mlp_baseline.' 'ContinuousMLPRegressor'), new=SimpleMLPRegressor): cmb = ContinuousMLPBaseline(env_spec=box_env.spec) paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] cmb.fit(paths) obs = {'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)]} prediction = cmb.predict(obs) assert np.array_equal(prediction, [1, 2])
def test_fit_unnormalized(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec cmb = ContinuousMLPBaseline(env_spec=box_env_spec, normalize_inputs=False) train_paths, _, paths, expected = get_train_test_data() for _ in range(20): cmb.fit(train_paths) prediction = cmb.predict(paths) assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(cmb._x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(cmb._x_std) x_std_expected = np.ones_like(x_std) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected)
def test_fit_normalized(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec cmb = ContinuousMLPBaseline(env_spec=box_env_spec) train_paths, observations, paths, expected = get_train_test_data() for _ in range(20): cmb.fit(train_paths) prediction = cmb.predict(paths) assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(cmb._x_mean) x_mean_expected = np.mean(observations, axis=0, keepdims=True) x_std = self.sess.run(cmb._x_std) x_std_expected = np.std(observations, axis=0, keepdims=True) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected)
def test_is_pickleable(self): box_env = GarageEnv(DummyBoxEnv(obs_dim=(1, ))) with mock.patch(('garage.tf.baselines.' 'continuous_mlp_baseline.' 'ContinuousMLPRegressor'), new=SimpleMLPRegressor): cmb = ContinuousMLPBaseline(env_spec=box_env.spec) obs = {'observations': [np.full(1, 1), np.full(1, 1)]} with tf.compat.v1.variable_scope('ContinuousMLPBaseline', reuse=True): return_var = tf.compat.v1.get_variable('SimpleMLPModel/return_var') return_var.load(1.0) prediction = cmb.predict(obs) h = pickle.dumps(cmb) with tf.compat.v1.Session(graph=tf.Graph()): cmb_pickled = pickle.loads(h) prediction2 = cmb_pickled.predict(obs) assert np.array_equal(prediction, prediction2)