def test_is_pickleable(self, hidden_channels, kernel_sizes, strides, hidden_sizes): """Test if policy is pickable.""" env = GymEnv(DummyDiscretePixelEnv(), is_image=True) policy = CategoricalCNNPolicy(env_spec=env.spec, image_format='NHWC', kernel_sizes=kernel_sizes, hidden_channels=hidden_channels, strides=strides, hidden_sizes=hidden_sizes) env.reset() obs = env.step(1).observation output_action_1, _ = policy.get_action(obs) p = cloudpickle.dumps(policy) policy_pickled = cloudpickle.loads(p) output_action_2, _ = policy_pickled.get_action(obs) assert env.action_space.contains(output_action_1) assert env.action_space.contains(output_action_2) assert output_action_1.shape == output_action_2.shape
def test_clone(self, filters, strides): env = GarageEnv(DummyDiscretePixelEnv()) with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.CNNModel'), new=SimpleCNNModel): with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousCNNQFunction(env_spec=env.spec, filters=filters, strides=strides) qf_clone = qf.clone('another_qf') # pylint: disable=protected-access assert qf_clone._filters == qf._filters assert qf_clone._strides == qf._strides # pylint: enable=protected-access for cloned_param, param in zip(qf_clone.parameters.values(), qf.parameters.values()): assert np.array_equal(cloned_param, param)
def test_get_action(self, filters, strides, padding, hidden_sizes): env = MetaRLEnv(DummyDiscretePixelEnv()) policy = CategoricalCNNPolicy(env_spec=env.spec, filters=filters, strides=strides, padding=padding, hidden_sizes=hidden_sizes) obs_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None) + env.observation_space.shape, name='obs') policy.build(obs_var) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs) assert env.action_space.contains(action) actions, _ = policy.get_actions([obs, obs, obs]) for action in actions: assert env.action_space.contains(action)
def test_is_pickleable(self): env = TfEnv(DummyDiscretePixelEnv()) policy = CategoricalCNNPolicy(env_spec=env.spec, filter_dims=(32, ), num_filters=(3, ), strides=(1, ), padding='SAME', hidden_sizes=(4, )) obs_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None) + env.observation_space.shape, name='obs') policy.build(obs_var) env.reset() obs, _, _, _ = env.step(1) with tf.compat.v1.variable_scope( 'CategoricalCNNPolicy/CategoricalCNNModel', reuse=True): cnn_bias = tf.compat.v1.get_variable('CNNModel/cnn/h0/bias') bias = tf.compat.v1.get_variable('MLPModel/mlp/hidden_0/bias') cnn_bias.load(tf.ones_like(cnn_bias).eval()) bias.load(tf.ones_like(bias).eval()) output1 = self.sess.run(policy.distribution.probs, feed_dict={policy.model.input: [[obs]]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) obs_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None) + env.observation_space.shape, name='obs') policy_pickled.build(obs_var) output2 = sess.run(policy_pickled.distribution.probs, feed_dict={policy_pickled.model.input: [[obs]]}) assert np.array_equal(output1, output2)
def test_obs_is_image(self): env = GarageEnv(DummyDiscretePixelEnv(), is_image=True) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): with mock.patch( 'garage.tf.baselines.' 'gaussian_cnn_baseline.' 'normalize_pixel_batch', side_effect=normalize_pixel_batch) as npb: gcb = GaussianCNNBaseline(env_spec=env.spec) obs_dim = env.spec.observation_space.shape paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) observations = np.concatenate( [p['observations'] for p in paths]) assert npb.call_count == 1, ( "Expected '%s' to have been called once. Called %s times." % (npb._mock_name or 'mock', npb.call_count)) assert (npb.call_args_list[0][0][0] == observations).all() obs = { 'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)] } observations = obs['observations'] gcb.predict(obs) assert npb.call_args_list[1][0][0] == observations
def test_is_pickleable(self, filter_dims, num_filters, strides): env = TfEnv(DummyDiscretePixelEnv()) obs = env.reset() with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.CNNModel'), new=SimpleCNNModel): with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousCNNQFunction(env_spec=env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides) action_dim = env.action_space.shape obs, _, _, _ = env.step(1) act = np.full(action_dim, 0.5) _, _ = qf.inputs with tf.compat.v1.variable_scope( 'ContinuousCNNQFunction/CNNMLPMergeModel/SimpleMLPMergeModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = qf.get_qval([obs], [act]) h_data = pickle.dumps(qf) with tf.compat.v1.Session(graph=tf.Graph()): qf_pickled = pickle.loads(h_data) _, _ = qf_pickled.inputs output2 = qf_pickled.get_qval([obs], [act]) assert np.array_equal(output1, output2)
def test_episodic_life_reset(self): env = EpisodicLife(DummyDiscretePixelEnv()) obs = env.reset() # env has reset assert np.array_equal(obs, np.zeros(env.observation_space.shape)) assert env.unwrapped.ale.lives() == 5 obs, _, d, info = env.step(0) assert d assert info['ale.lives'] == 4 obs = env.reset() # env has not reset assert not np.array_equal(obs, np.zeros(env.observation_space.shape)) for _ in range(3): obs, r, d, info = env.step(0) assert d assert info['ale.lives'] == 0 obs = env.reset() # env has reset assert np.array_equal(obs, np.zeros(env.observation_space.shape))
def test_get_qval_sym(self, filter_dims, num_filters, strides): env = TfEnv(DummyDiscretePixelEnv()) obs = env.reset() with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.CNNModel'), new=SimpleCNNModel): with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousCNNQFunction(env_spec=env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides) action_dim = env.action_space.shape obs, _, _, _ = env.step(1) act = np.full(action_dim, 0.5) output1 = qf.get_qval([obs], [act]) input_var1 = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + obs.shape) input_var2 = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + act.shape) q_vals = qf.get_qval_sym(input_var1, input_var2, 'another') output2 = self.sess.run(q_vals, feed_dict={ input_var1: [obs], input_var2: [act] }) expected_output = np.full((1, ), 0.5) assert np.array_equal(output1, output2) assert np.array_equal(output2[0], expected_output)
def test_normalize_pixel_batch(self): env = GarageEnv(DummyDiscretePixelEnv(), is_image=True) obs = env.reset() obs_normalized = normalize_pixel_batch(obs) expected = [ob / 255.0 for ob in obs] assert np.allclose(obs_normalized, expected)
def setup_method(self): self.env = DummyDiscretePixelEnv(random=False) self.env_g = Grayscale(DummyDiscretePixelEnv(random=False))
def setup_method(self): super().setup_method() self.env = GarageEnv(DummyDiscretePixelEnv()) self.obs = self.env.reset()
def setup_method(self): super().setup_method() self.env = GymEnv(DummyDiscretePixelEnv(), is_image=True) self.sess.run(tf.compat.v1.global_variables_initializer()) self.env.reset()
def test_normalize_pixel_patch(self): env = TfEnv(DummyDiscretePixelEnv()) obs = env.reset() obs_normalized = normalize_pixel_batch(env, obs) expected = [ob / 255.0 for ob in obs] assert np.allclose(obs_normalized, expected)
def setUp(self): self.env = TfEnv(DummyDiscretePixelEnv(random=False)) self.env_g = TfEnv(Grayscale(DummyDiscretePixelEnv(random=False)))
def setUp(self): super().setUp() self.env = TfEnv(DummyDiscretePixelEnv()) self.obs = self.env.reset()
def setup_method(self): self.env = DummyDiscretePixelEnv(random=False) self.env_wrap = MaxAndSkip(DummyDiscretePixelEnv(random=False), skip=4)