def create_agent(self, env): model = agents.ddpg.DDPGModel( policy=create_deterministic_policy_for_env(env), q_func=create_state_action_q_function_for_env(env)) rbuf = replay_buffer.ReplayBuffer(10 ** 5) opt_a = optimizers.Adam() opt_a.setup(model.policy) opt_b = optimizers.Adam() opt_b.setup(model.q_function) explorer = explorers.AdditiveGaussian(scale=1) return agents.DDPG(model, opt_a, opt_b, rbuf, gamma=0.99, explorer=explorer)
def _test_load_ddpg(self, gpu): def concat_obs_and_action(obs, action): return F.concat((obs, action), axis=-1) action_size = 3 winit = chainer.initializers.LeCunUniform(3**-0.5) q_func = chainer.Sequential( concat_obs_and_action, L.Linear(None, 400, initialW=winit), F.relu, L.Linear(None, 300, initialW=winit), F.relu, L.Linear(None, 1, initialW=winit), ) policy = chainer.Sequential( L.Linear(None, 400, initialW=winit), F.relu, L.Linear(None, 300, initialW=winit), F.relu, L.Linear(None, action_size, initialW=winit), F.tanh, chainerrl.distribution.ContinuousDeterministicDistribution, ) from chainerrl.agents.ddpg import DDPGModel model = DDPGModel(q_func=q_func, policy=policy) obs_low = [-np.inf] * 11 fake_obs = chainer.Variable(model.xp.zeros_like( obs_low, dtype=np.float32)[None], name='observation') fake_action = chainer.Variable(model.xp.zeros_like( [-1., -1., -1.], dtype=np.float32)[None], name='action') policy(fake_obs) q_func(fake_obs, fake_action) opt_a = optimizers.Adam() opt_c = optimizers.Adam() opt_a.setup(model['policy']) opt_c.setup(model['q_function']) explorer = explorers.AdditiveGaussian(scale=0.1, low=[-1., -1., -1.], high=[1., 1., 1.]) agent = agents.DDPG(model, opt_a, opt_c, replay_buffer.ReplayBuffer(100), gamma=0.99, explorer=explorer, replay_start_size=1000, target_update_method='soft', target_update_interval=1, update_interval=1, soft_update_tau=5e-3, n_times_update=1, gpu=gpu, minibatch_size=100, burnin_action_func=None) model, exists = download_model("DDPG", "Hopper-v2", model_type=self.pretrained_type) agent.load(model) if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'): assert exists