def test_obs_not_image(self): env = GarageEnv(DummyDiscretePixelEnv(), is_image=False) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): with mock.patch( 'garage.tf.baselines.' 'gaussian_cnn_baseline.' 'normalize_pixel_batch', side_effect=normalize_pixel_batch) as npb: gcb = GaussianCNNBaseline(env_spec=env.spec) obs_dim = env.spec.observation_space.shape paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) obs = { 'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)] } gcb.predict(obs) assert not npb.called
def test_flattened_image_input(self): env = GymEnv(DummyDiscretePixelEnv(), is_image=True) gcb = GaussianCNNBaseline(env_spec=env.spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, )) env.reset() es = env.step(1) obs, rewards = es.observation, es.reward train_paths = [{'observations': [obs.flatten()], 'returns': [rewards]}] gcb.fit(train_paths) paths = {'observations': [obs.flatten()]} prediction = gcb.predict(paths) assert np.allclose(0., prediction)
def test_fit(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): gcb = GaussianCNNBaseline(env_spec=box_env.spec) paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) obs = {'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)]} prediction = gcb.predict(obs) assert np.array_equal(prediction, [1, 2])
def test_fit_unnormalized(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=True, normalize_inputs=False, normalize_outputs=False) train_data, test_data = get_train_test_data() train_paths, _, _ = train_data for _ in range(20): gcr.fit(train_paths) test_paths, expected = test_data prediction = gcr.predict(test_paths) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1 x_mean = self.sess.run(gcr._networks['default'].x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(gcr._networks['default'].x_std) x_std_expected = np.ones_like(x_std) assert np.array_equal(x_mean, x_mean_expected) assert np.array_equal(x_std, x_std_expected) y_mean = self.sess.run(gcr._networks['default'].y_mean) y_mean_expected = np.zeros_like(y_mean) y_std = self.sess.run(gcr._networks['default'].y_std) y_std_expected = np.ones_like(y_std) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_fit_without_trusted_region(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False) train_data, test_data = get_train_test_data() train_paths, _, _ = train_data for _ in range(20): gcr.fit(train_paths) test_paths, expected = test_data prediction = gcr.predict(test_paths) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1
def test_obs_is_image(self): env = GarageEnv(DummyDiscretePixelEnv(), is_image=True) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): with mock.patch( 'garage.tf.baselines.' 'gaussian_cnn_baseline.' 'normalize_pixel_batch', side_effect=normalize_pixel_batch) as npb: gcb = GaussianCNNBaseline(env_spec=env.spec) obs_dim = env.spec.observation_space.shape paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) observations = np.concatenate( [p['observations'] for p in paths]) assert npb.call_count == 1, ( "Expected '%s' to have been called once. Called %s times." % (npb._mock_name or 'mock', npb.call_count)) assert (npb.call_args_list[0][0][0] == observations).all() obs = { 'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)] } observations = obs['observations'] gcb.predict(obs) assert npb.call_args_list[1][0][0] == observations
def test_fit_normalized(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=True) train_data, test_data = get_train_test_data() train_paths, observations, returns = train_data for _ in range(20): gcr.fit(train_paths) test_paths, expected = test_data prediction = gcr.predict(test_paths) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1 x_mean = self.sess.run(gcr._networks['default'].x_mean) x_mean_expected = np.mean(observations, axis=0, keepdims=True) x_std = self.sess.run(gcr._networks['default'].x_std) x_std_expected = np.std(observations, axis=0, keepdims=True) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected) y_mean = self.sess.run(gcr._networks['default'].y_mean) y_mean_expected = np.mean(returns, axis=0, keepdims=True) y_std = self.sess.run(gcr._networks['default'].y_std) y_std_expected = np.std(returns, axis=0, keepdims=True) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)