def test_is_pickleable(self):
        env = GarageEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1))
        obs_var = tf.compat.v1.placeholder(
            tf.float32,
            shape=[None, None, env.observation_space.flat_dim],
            name='obs')
        policy = CategoricalLSTMPolicy(env_spec=env.spec,
                                       state_include_action=False)

        policy.build(obs_var)
        policy.reset()
        obs = env.reset()

        policy.model._lstm_cell.weights[0].load(
            tf.ones_like(policy.model._lstm_cell.weights[0]).eval())

        output1 = self.sess.run(
            [policy.distribution.probs],
            feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]})

        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            obs_var = tf.compat.v1.placeholder(
                tf.float32,
                shape=[None, None, env.observation_space.flat_dim],
                name='obs')
            policy_pickled.build(obs_var)
            output2 = sess.run([policy_pickled.distribution.probs],
                               feed_dict={
                                   policy_pickled.model.input:
                                   [[obs.flatten()], [obs.flatten()]]
                               })  # noqa: E126
            assert np.array_equal(output1, output2)
示例#2
0
    def test_categorical_lstm_policy(self):
        categorical_lstm_policy = CategoricalLSTMPolicy(
            env_spec=self.env, hidden_dim=1, state_include_action=False)
        self.sess.run(tf.compat.v1.global_variables_initializer())
        categorical_lstm_policy.build(self.obs_var)
        categorical_lstm_policy.reset()

        obs = self.env.observation_space.high
        assert categorical_lstm_policy.get_action(obs)
示例#3
0
    def test_is_pickleable(self):
        env = GarageEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1))
        policy = CategoricalLSTMPolicy(env_spec=env.spec,
                                       state_include_action=False)

        policy.reset()
        obs = env.reset()

        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None,
                                                      policy.input_dim))
        dist_sym = policy.build(state_input, name='dist_sym').dist
        policy._lstm_cell.weights[0].load(
            tf.ones_like(policy._lstm_cell.weights[0]).eval())

        output1 = self.sess.run(
            [dist_sym.probs],
            feed_dict={state_input: [[obs.flatten()], [obs.flatten()]]})

        p = pickle.dumps(policy)

        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            state_input = tf.compat.v1.placeholder(
                tf.float32, shape=(None, None, policy_pickled.input_dim))
            dist_sym = policy_pickled.build(state_input, name='dist_sym').dist
            output2 = sess.run(
                [dist_sym.probs],
                feed_dict={state_input: [[obs.flatten()],
                                         [obs.flatten()]]})  # noqa: E126
            assert np.array_equal(output1, output2)
示例#4
0
    def test_get_action(self, obs_dim, action_dim, hidden_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        obs_var = tf.compat.v1.placeholder(
            tf.float32,
            shape=[None, None, env.observation_space.flat_dim],
            name='obs')
        policy = CategoricalLSTMPolicy(env_spec=env.spec,
                                       hidden_dim=hidden_dim,
                                       state_include_action=False)

        policy.build(obs_var)
        policy.reset()
        obs = env.reset()

        action, _ = policy.get_action(obs.flatten())
        assert env.action_space.contains(action)

        actions, _ = policy.get_actions([obs.flatten()])
        for action in actions:
            assert env.action_space.contains(action)
示例#5
0
    def test_build_state_not_include_action(self, obs_dim, action_dim,
                                            hidden_dim):
        env = GymEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = CategoricalLSTMPolicy(env_spec=env.spec,
                                       hidden_dim=hidden_dim,
                                       state_include_action=False)
        policy.reset(do_resets=None)
        obs = env.reset()[0]

        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None,
                                                      policy.input_dim))
        dist_sym = policy.build(state_input, name='dist_sym').dist
        dist_sym2 = policy.build(state_input, name='dist_sym2').dist
        output1 = self.sess.run(
            [dist_sym.probs],
            feed_dict={state_input: [[obs.flatten()], [obs.flatten()]]})
        output2 = self.sess.run(
            [dist_sym2.probs],
            feed_dict={state_input: [[obs.flatten()], [obs.flatten()]]})
        assert np.array_equal(output1, output2)
示例#6
0
    def test_build_state_include_action(self, obs_dim, action_dim, hidden_dim):
        env = GarageEnv(
            DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        policy = CategoricalLSTMPolicy(env_spec=env.spec,
                                       hidden_dim=hidden_dim,
                                       state_include_action=True)
        policy.reset(do_resets=None)
        obs = env.reset()

        state_input = tf.compat.v1.placeholder(tf.float32,
                                               shape=(None, None,
                                                      policy.input_dim))
        dist_sym = policy.build(state_input, name='dist_sym').dist

        concat_obs = np.concatenate([obs.flatten(), np.zeros(action_dim)])
        output1 = self.sess.run(
            [policy.distribution.probs],
            feed_dict={policy.model.input: [[concat_obs], [concat_obs]]})
        output2 = self.sess.run(
            [dist_sym.probs],
            feed_dict={state_input: [[concat_obs], [concat_obs]]})
        assert np.array_equal(output1, output2)