def test_is_pickleable(self, hidden_channels, kernel_sizes, strides,
                           hidden_sizes):
        """Test if policy is pickable."""
        env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      image_format='NHWC',
                                      kernel_sizes=kernel_sizes,
                                      hidden_channels=hidden_channels,
                                      strides=strides,
                                      hidden_sizes=hidden_sizes)
        env.reset()
        obs = env.step(1).observation

        output_action_1, _ = policy.get_action(obs)

        p = cloudpickle.dumps(policy)
        policy_pickled = cloudpickle.loads(p)
        output_action_2, _ = policy_pickled.get_action(obs)

        assert env.action_space.contains(output_action_1)
        assert env.action_space.contains(output_action_2)
        assert output_action_1.shape == output_action_2.shape
Пример #2
0
    def test_clone(self, filters, strides):
        env = GarageEnv(DummyDiscretePixelEnv())

        with mock.patch(('garage.tf.models.'
                         'cnn_mlp_merge_model.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filters=filters,
                                            strides=strides)

                qf_clone = qf.clone('another_qf')

        # pylint: disable=protected-access
        assert qf_clone._filters == qf._filters
        assert qf_clone._strides == qf._strides
        # pylint: enable=protected-access
        for cloned_param, param in zip(qf_clone.parameters.values(),
                                       qf.parameters.values()):
            assert np.array_equal(cloned_param, param)
    def test_get_action(self, filters, strides, padding, hidden_sizes):
        env = MetaRLEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filters=filters,
                                      strides=strides,
                                      padding=padding,
                                      hidden_sizes=hidden_sizes)
        obs_var = tf.compat.v1.placeholder(tf.float32,
                                           shape=(None, None) +
                                           env.observation_space.shape,
                                           name='obs')
        policy.build(obs_var)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
Пример #4
0
    def test_is_pickleable(self):
        env = TfEnv(DummyDiscretePixelEnv())
        policy = CategoricalCNNPolicy(env_spec=env.spec,
                                      filter_dims=(32, ),
                                      num_filters=(3, ),
                                      strides=(1, ),
                                      padding='SAME',
                                      hidden_sizes=(4, ))
        obs_var = tf.compat.v1.placeholder(tf.float32,
                                           shape=(None, None) +
                                           env.observation_space.shape,
                                           name='obs')
        policy.build(obs_var)

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.compat.v1.variable_scope(
                'CategoricalCNNPolicy/CategoricalCNNModel', reuse=True):
            cnn_bias = tf.compat.v1.get_variable('CNNModel/cnn/h0/bias')
            bias = tf.compat.v1.get_variable('MLPModel/mlp/hidden_0/bias')

        cnn_bias.load(tf.ones_like(cnn_bias).eval())
        bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(policy.distribution.probs,
                                feed_dict={policy.model.input: [[obs]]})
        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            obs_var = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None) +
                                               env.observation_space.shape,
                                               name='obs')
            policy_pickled.build(obs_var)
            output2 = sess.run(policy_pickled.distribution.probs,
                               feed_dict={policy_pickled.model.input: [[obs]]})
            assert np.array_equal(output1, output2)
    def test_obs_is_image(self):
        env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_cnn_baseline.'
                         'GaussianCNNRegressor'),
                        new=SimpleGaussianCNNRegressor):
            with mock.patch(
                    'garage.tf.baselines.'
                    'gaussian_cnn_baseline.'
                    'normalize_pixel_batch',
                    side_effect=normalize_pixel_batch) as npb:

                gcb = GaussianCNNBaseline(env_spec=env.spec)

                obs_dim = env.spec.observation_space.shape
                paths = [{
                    'observations': [np.full(obs_dim, 1)],
                    'returns': [1]
                }, {
                    'observations': [np.full(obs_dim, 2)],
                    'returns': [2]
                }]

                gcb.fit(paths)
                observations = np.concatenate(
                    [p['observations'] for p in paths])
                assert npb.call_count == 1, (
                    "Expected '%s' to have been called once. Called %s times."
                    % (npb._mock_name or 'mock', npb.call_count))
                assert (npb.call_args_list[0][0][0] == observations).all()

                obs = {
                    'observations': [np.full(obs_dim, 1),
                                     np.full(obs_dim, 2)]
                }
                observations = obs['observations']
                gcb.predict(obs)
                assert npb.call_args_list[1][0][0] == observations
    def test_is_pickleable(self, filter_dims, num_filters, strides):

        env = TfEnv(DummyDiscretePixelEnv())
        obs = env.reset()

        with mock.patch(('garage.tf.models.'
                         'cnn_mlp_merge_model.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filter_dims=filter_dims,
                                            num_filters=num_filters,
                                            strides=strides)

        action_dim = env.action_space.shape

        obs, _, _, _ = env.step(1)
        act = np.full(action_dim, 0.5)
        _, _ = qf.inputs

        with tf.compat.v1.variable_scope(
                'ContinuousCNNQFunction/CNNMLPMergeModel/SimpleMLPMergeModel',
                reuse=True):
            return_var = tf.compat.v1.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())

        output1 = qf.get_qval([obs], [act])

        h_data = pickle.dumps(qf)
        with tf.compat.v1.Session(graph=tf.Graph()):
            qf_pickled = pickle.loads(h_data)
            _, _ = qf_pickled.inputs
            output2 = qf_pickled.get_qval([obs], [act])

        assert np.array_equal(output1, output2)
Пример #7
0
    def test_episodic_life_reset(self):
        env = EpisodicLife(DummyDiscretePixelEnv())
        obs = env.reset()

        # env has reset
        assert np.array_equal(obs, np.zeros(env.observation_space.shape))
        assert env.unwrapped.ale.lives() == 5

        obs, _, d, info = env.step(0)
        assert d
        assert info['ale.lives'] == 4
        obs = env.reset()

        # env has not reset
        assert not np.array_equal(obs, np.zeros(env.observation_space.shape))

        for _ in range(3):
            obs, r, d, info = env.step(0)
        assert d
        assert info['ale.lives'] == 0
        obs = env.reset()
        # env has reset
        assert np.array_equal(obs, np.zeros(env.observation_space.shape))
    def test_get_qval_sym(self, filter_dims, num_filters, strides):
        env = TfEnv(DummyDiscretePixelEnv())
        obs = env.reset()

        with mock.patch(('garage.tf.models.'
                         'cnn_mlp_merge_model.CNNModel'),
                        new=SimpleCNNModel):
            with mock.patch(('garage.tf.models.'
                             'cnn_mlp_merge_model.MLPMergeModel'),
                            new=SimpleMLPMergeModel):
                qf = ContinuousCNNQFunction(env_spec=env.spec,
                                            filter_dims=filter_dims,
                                            num_filters=num_filters,
                                            strides=strides)
        action_dim = env.action_space.shape

        obs, _, _, _ = env.step(1)
        act = np.full(action_dim, 0.5)

        output1 = qf.get_qval([obs], [act])

        input_var1 = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, ) + obs.shape)
        input_var2 = tf.compat.v1.placeholder(tf.float32,
                                              shape=(None, ) + act.shape)
        q_vals = qf.get_qval_sym(input_var1, input_var2, 'another')

        output2 = self.sess.run(q_vals,
                                feed_dict={
                                    input_var1: [obs],
                                    input_var2: [act]
                                })

        expected_output = np.full((1, ), 0.5)

        assert np.array_equal(output1, output2)
        assert np.array_equal(output2[0], expected_output)
Пример #9
0
 def test_normalize_pixel_batch(self):
     env = GarageEnv(DummyDiscretePixelEnv(), is_image=True)
     obs = env.reset()
     obs_normalized = normalize_pixel_batch(obs)
     expected = [ob / 255.0 for ob in obs]
     assert np.allclose(obs_normalized, expected)
Пример #10
0
 def setup_method(self):
     self.env = DummyDiscretePixelEnv(random=False)
     self.env_g = Grayscale(DummyDiscretePixelEnv(random=False))
Пример #11
0
 def setup_method(self):
     super().setup_method()
     self.env = GarageEnv(DummyDiscretePixelEnv())
     self.obs = self.env.reset()
 def setup_method(self):
     super().setup_method()
     self.env = GymEnv(DummyDiscretePixelEnv(), is_image=True)
     self.sess.run(tf.compat.v1.global_variables_initializer())
     self.env.reset()
Пример #13
0
 def test_normalize_pixel_patch(self):
     env = TfEnv(DummyDiscretePixelEnv())
     obs = env.reset()
     obs_normalized = normalize_pixel_batch(env, obs)
     expected = [ob / 255.0 for ob in obs]
     assert np.allclose(obs_normalized, expected)
Пример #14
0
 def setUp(self):
     self.env = TfEnv(DummyDiscretePixelEnv(random=False))
     self.env_g = TfEnv(Grayscale(DummyDiscretePixelEnv(random=False)))
Пример #15
0
 def setUp(self):
     super().setUp()
     self.env = TfEnv(DummyDiscretePixelEnv())
     self.obs = self.env.reset()
Пример #16
0
 def setup_method(self):
     self.env = DummyDiscretePixelEnv(random=False)
     self.env_wrap = MaxAndSkip(DummyDiscretePixelEnv(random=False), skip=4)