示例#1
0
 def train_and_assert(self, agent_type, is_v1: bool, num_iterations=100):
     logger = logging.warning
     v2_backends = [b for b in get_backends(agent_type, skip_v1=True)]
     v1_backends = [
         b for b in get_backends(agent_type) if (not b in v2_backends)
     ]
     backends = v1_backends if is_v1 else v2_backends
     for backend in backends:
         logger(
             f'backend={backend} agent={agent_type}, num_iterations={num_iterations}'
         )
         cem_agent: CemAgent = agent_type('CartPole-v0',
                                          fc_layers=(100, ),
                                          backend=backend)
         tc: core.TrainContext = cem_agent.train(
             [log.Duration(),
              log.Iteration(eval_only=True),
              log.Agent()],
             num_iterations=num_iterations,
             num_iterations_between_eval=10,
             max_steps_per_episode=200,
             default_plots=False)
         (min_steps, avg_steps,
          max_steps) = tc.eval_steps[tc.episodes_done_in_training]
         assert max_steps >= 100
         assert avg_steps >= 50
示例#2
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent('CartPole-v0', backend=backend)
         sac_agent.train([log.Duration(), log.Iteration(), log.Agent()],
                         num_iterations=10,
                         max_steps_per_episode=200,
                         default_plots=False)
示例#3
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent(_mountaincart_continuous_name, backend=backend)
         tc: core.TrainContext = sac_agent.train([log.Duration(), log.Iteration(eval_only=True), duration.Fast()],
                                                 default_plots=False)
         r = max_avg_rewards(tc)
         assert r >= -1
示例#4
0
 def test_train(self):
     for backend in get_backends(RandomAgent):
         reinforce_agent: ReinforceAgent = ReinforceAgent('CartPole-v0', backend=backend)
         tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()],
                                                       num_iterations=10,
                                                       max_steps_per_episode=200,
                                                       default_plots=False)
         (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training]
         assert avg_steps >= 10
示例#5
0
 def test_train(self):
     for backend in get_backends(RandomAgent):
         reinforce_agent: ReinforceAgent = ReinforceAgent(_line_world_name, backend=backend)
         tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()],
                                                       num_iterations=10,
                                                       max_steps_per_episode=200,
                                                       default_plots=False)
         r = max_avg_rewards(tc)
         assert r >= 5
示例#6
0
 def train_and_eval(self, agent_type, backend, num_iterations):
     dqn_agent: DqnAgent = agent_type(_cartpole_name, fc_layers=(100,), backend=backend)
     tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()],
                                             num_iterations=num_iterations,
                                             num_steps_buffer_preload=1000,
                                             num_iterations_between_eval=500,
                                             max_steps_per_episode=200,
                                             default_plots=False)
     return max_avg_rewards(tc)
示例#7
0
 def train_and_eval(self, agent_type, backend, num_iterations):
     dqn_agent: DqnAgent = agent_type('CartPole-v0', fc_layers=(100,), backend=backend)
     tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()],
                                             num_iterations=num_iterations,
                                             num_steps_buffer_preload=1000,
                                             num_iterations_between_eval=500,
                                             max_steps_per_episode=200,
                                             default_plots=False)
     max_avg_steps = max([avg_steps for (min_steps, avg_steps, max_steps) in tc.eval_steps.values()])
     return max_avg_steps