示例#1
0
 def create_agent(self, env):
     model = agents.ddpg.DDPGModel(
         policy=create_deterministic_policy_for_env(env),
         q_func=create_state_action_q_function_for_env(env))
     rbuf = replay_buffer.ReplayBuffer(10 ** 5)
     opt_a = optimizers.Adam()
     opt_a.setup(model.policy)
     opt_b = optimizers.Adam()
     opt_b.setup(model.q_function)
     explorer = explorers.AdditiveGaussian(scale=1)
     return agents.DDPG(model, opt_a, opt_b, rbuf, gamma=0.99,
                        explorer=explorer)
示例#2
0
    def _test_load_ddpg(self, gpu):
        def concat_obs_and_action(obs, action):
            return F.concat((obs, action), axis=-1)

        action_size = 3
        winit = chainer.initializers.LeCunUniform(3**-0.5)
        q_func = chainer.Sequential(
            concat_obs_and_action,
            L.Linear(None, 400, initialW=winit),
            F.relu,
            L.Linear(None, 300, initialW=winit),
            F.relu,
            L.Linear(None, 1, initialW=winit),
        )
        policy = chainer.Sequential(
            L.Linear(None, 400, initialW=winit),
            F.relu,
            L.Linear(None, 300, initialW=winit),
            F.relu,
            L.Linear(None, action_size, initialW=winit),
            F.tanh,
            chainerrl.distribution.ContinuousDeterministicDistribution,
        )
        from chainerrl.agents.ddpg import DDPGModel
        model = DDPGModel(q_func=q_func, policy=policy)

        obs_low = [-np.inf] * 11
        fake_obs = chainer.Variable(model.xp.zeros_like(
            obs_low, dtype=np.float32)[None],
                                    name='observation')
        fake_action = chainer.Variable(model.xp.zeros_like(
            [-1., -1., -1.], dtype=np.float32)[None],
                                       name='action')
        policy(fake_obs)
        q_func(fake_obs, fake_action)

        opt_a = optimizers.Adam()
        opt_c = optimizers.Adam()
        opt_a.setup(model['policy'])
        opt_c.setup(model['q_function'])

        explorer = explorers.AdditiveGaussian(scale=0.1,
                                              low=[-1., -1., -1.],
                                              high=[1., 1., 1.])

        agent = agents.DDPG(model,
                            opt_a,
                            opt_c,
                            replay_buffer.ReplayBuffer(100),
                            gamma=0.99,
                            explorer=explorer,
                            replay_start_size=1000,
                            target_update_method='soft',
                            target_update_interval=1,
                            update_interval=1,
                            soft_update_tau=5e-3,
                            n_times_update=1,
                            gpu=gpu,
                            minibatch_size=100,
                            burnin_action_func=None)

        model, exists = download_model("DDPG",
                                       "Hopper-v2",
                                       model_type=self.pretrained_type)
        agent.load(model)
        if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'):
            assert exists