Пример #1
0
        def make_agent(process_idx):
            n_hidden_channels = 50
            if self.use_lstm:
                q_func = FCLSTMStateQFunction(
                    ndim_obs,
                    n_actions,
                    n_hidden_channels=n_hidden_channels,
                    n_hidden_layers=2)
            else:
                q_func = FCStateQFunctionWithDiscreteAction(
                    ndim_obs,
                    n_actions,
                    n_hidden_channels=n_hidden_channels,
                    n_hidden_layers=2)
            opt = rmsprop_async.RMSpropAsync(lr=1e-3, eps=1e-2, alpha=0.99)
            opt.setup(q_func)
            if self.explorer == 'epsilon_greedy':
                explorer = chainerrl.explorers.ConstantEpsilonGreedy(
                    process_idx / 10, random_action_func)
            else:
                explorer = chainerrl.explorers.Boltzmann()

            return nsq.NSQ(q_func,
                           opt,
                           t_max=self.t_max,
                           gamma=0.9,
                           i_target=100,
                           explorer=explorer)
Пример #2
0
 def make_agent(process_idx):
     # Random epsilon assignment described in the original paper
     rand = random.random()
     if rand < 0.4:
         epsilon_target = 0.1
     elif rand < 0.7:
         epsilon_target = 0.01
     else:
         epsilon_target = 0.5
     explorer = explorers.LinearDecayEpsilonGreedy(
         1, epsilon_target, args.final_exploration_frames,
         action_space.sample)
     # Suppress the explorer logger
     explorer.logger.setLevel(logging.INFO)
     return nsq.NSQ(q_func, opt, t_max=5, gamma=0.99,
                    i_target=40000,
                    explorer=explorer, phi=dqn_phi)