def test_nested_ob(self):
            """Test."""
            env = make_env('CartPole-v1')
            qf = QFunction(
                FeedForwardBase(env.observation_space, env.action_space))
            env = NestedVecObWrapper(env)
            buffer = ReplayBuffer(2000, 1)
            data_manager = ReplayBufferDataManager(buffer,
                                                   env,
                                                   act_fn=BufferActor(qf),
                                                   device='cpu',
                                                   learning_starts=50,
                                                   update_period=2)

            for _ in range(11):
                data_manager.step_until_update()
            assert buffer.num_in_buffer == 70

            batch = data_manager.sample(32)
            data_manager.act(batch['obs'])
            assert batch['action'].shape == batch['reward'].shape
            assert batch['action'].shape == batch['done'].shape
            if isinstance(batch['obs'], list):
                assert batch['obs'][0].shape == batch['next_obs'][0].shape
                assert len(batch['obs'][0].shape) == 2
            else:
                assert batch['obs'].shape == batch['next_obs'].shape
                assert len(batch['obs'].shape) == 2
            assert len(batch['action'].shape) == 1
Exemplo n.º 2
0
        def test_nested_observations(self):
            """Test nested observations."""
            logger.configure('./.test')
            env = make_env('CartPole-v1', 1)
            env = NestedVecObWrapper(env)
            env = NestedVecObWrapper(env)
            env = VecObsNormWrapper(env, log_prob=1.)
            print(env.observation_space)
            env.reset()
            assert env.t == 0
            for _ in range(100):
                _, _, done, _ = env.step(
                    np.array([env.action_space.sample() for _ in range(1)]))
                if done:
                    env.reset()
            assert env.t == 100
            state = env.state_dict()
            assert state['t'] == env.t
            state['t'] = 0
            env.load_state_dict(state)
            assert env.t == 0

            env.eval()
            env.reset()
            for _ in range(3):
                env.step(np.array([env.action_space.sample()]))
            assert env.t == 0
            env.train()
            for _ in range(3):
                env.step(np.array([env.action_space.sample()]))
            assert env.t == 3
            print(env.mean)
            print(env.std)
            shutil.rmtree('./.test')
Exemplo n.º 3
0
        def test_vec(self):
            """Test vec wrapper."""
            logger.configure('./.test')
            nenv = 10
            env = make_env('CartPole-v1', nenv)
            env = VecObsNormWrapper(env, log_prob=1.)
            print(env.observation_space)
            env.reset()
            assert env.t == 0
            for _ in range(5):
                env.step(
                    np.array([env.action_space.sample() for _ in range(nenv)]))
            state = env.state_dict()
            assert state['t'] == env.t
            assert np.allclose(state['mean'], env.mean)
            assert np.allclose(state['std'], env.std)
            state['t'] = 0
            env.load_state_dict(state)
            assert env.t == 0

            env.eval()
            env.reset()
            for _ in range(10):
                env.step(
                    np.array([env.action_space.sample() for _ in range(nenv)]))
            assert env.t == 0
            env.train()
            print(env.mean)
            print(env.std)
            shutil.rmtree('./.test')
Exemplo n.º 4
0
        def test_rnd_env(self):
            """Test vec frame stack wrapper."""
            nenv = 2
            env = make_env('LunarLander-v2', nenv=nenv)
            rnd = RND(RNDNet, torch.optim.Adam, 0.99,
                      env.observation_space.shape, 'cpu')
            emb = InverseDynamicsEmbedding(env, EmbeddingNet, PredictionNet,
                                           Loss, torch.optim.Adam, 'cpu')
            ngu = NGU(rnd, emb, 50, 'cpu')

            env = NGUVecEnv(env, ngu)
            env.reset()
            _, r, _, _ = env.step(
                np.array([env.action_space.sample() for _ in range(nenv)]))
            assert r.shape == (nenv, 2)
            for _ in range(1000):
                _, r, done, _ = env.step(
                    np.array([env.action_space.sample() for _ in range(nenv)]))
                assert r.shape == (nenv, 2)

                if np.any(done):
                    env.reset(force=False)
Exemplo n.º 5
0
        def test_vec(self):
            """Test vec wrapper."""
            nenv = 10
            env = make_env('CartPole-v1', nenv)
            env = VecRewardNormWrapper(env, gamma=0.99)
            env.reset()
            for _ in range(5):
                out = env.step(
                    np.array([env.action_space.sample() for _ in range(nenv)]))
                print(out[1])
            c = env.rn.count
            print(c)
            assert c == 5 * nenv
            state = env.state_dict()
            env.load_state_dict(state)
            assert c == env.rn.count

            env.eval()
            env.reset()
            for _ in range(10):
                env.step(
                    np.array([env.action_space.sample() for _ in range(nenv)]))
            env.train()
            assert c == env.rn.count
Exemplo n.º 6
0
 def env_continuous(nenv):
     """Create continuous env."""
     return make_env('LunarLanderContinuous-v2', nenv=nenv)
Exemplo n.º 7
0
 def env_discrete(nenv):
     """Create discrete env."""
     return make_env('CartPole-v1', nenv=nenv)
Exemplo n.º 8
0
 def env_fn(nenv):
     """Environment function."""
     return make_env('LunarLanderContinuous-v2', nenv=nenv)
 def env_fn(nenv):
     return make_env('LunarLanderContinuous-v2', nenv)
Exemplo n.º 10
0
 def _env(nenv):
     """Create a training environment."""
     return make_env("LunarLander-v2", nenv)