Пример #1
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent('CartPole-v0', backend=backend)
         sac_agent.train([log.Duration(), log.Iteration(), log.Agent()],
                         num_iterations=10,
                         max_steps_per_episode=200,
                         default_plots=False)
Пример #2
0
 def train_and_assert(self, agent_type, is_v1: bool, num_iterations=100):
     logger = logging.warning
     v2_backends = [b for b in get_backends(agent_type, skip_v1=True)]
     v1_backends = [
         b for b in get_backends(agent_type) if (not b in v2_backends)
     ]
     backends = v1_backends if is_v1 else v2_backends
     for backend in backends:
         logger(
             f'backend={backend} agent={agent_type}, num_iterations={num_iterations}'
         )
         cem_agent: CemAgent = agent_type('CartPole-v0',
                                          fc_layers=(100, ),
                                          backend=backend)
         tc: core.TrainContext = cem_agent.train(
             [log.Duration(),
              log.Iteration(eval_only=True),
              log.Agent()],
             num_iterations=num_iterations,
             num_iterations_between_eval=10,
             max_steps_per_episode=200,
             default_plots=False)
         (min_steps, avg_steps,
          max_steps) = tc.eval_steps[tc.episodes_done_in_training]
         assert max_steps >= 100
         assert avg_steps >= 50
Пример #3
0
 def test_train_single_episode(self):
     for backend in get_backends(PpoAgent):
         ppo = agents.PpoAgent(gym_env_name=_env_name, backend=backend)
         count = log._CallbackCounts()
         ppo.train([log.Agent(), count, duration._SingleEpisode()])
         assert count.gym_init_begin_count == count.gym_init_end_count == 1
         assert count.gym_step_begin_count == count.gym_step_end_count
         assert count.gym_step_begin_count < 10 + count.gym_reset_begin_count
Пример #4
0
 def test_reinforce_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config)
     reinforceAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
Пример #5
0
 def test_ppo_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.PpoTrainContext()
     ppoAgent = tforce.TforcePpoAgent(model_config=model_config)
     ppoAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
Пример #6
0
 def test_play_single_episode(self):
     for backend in get_backends(PpoAgent):
         ppo = agents.PpoAgent(gym_env_name=_env_name, backend=backend)
         count = log._CallbackCounts()
         cb = [log.Agent(), count, duration._SingleEpisode()]
         ppo.train(duration._SingleEpisode())
         ppo.play(cb)
         assert count.gym_init_begin_count == count.gym_init_end_count == 1
         assert count.gym_step_begin_count == count.gym_step_end_count <= 10
Пример #7
0
 def test_train(self):
     model_config = core.ModelConfig(_mountaincar_continuous_name)
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfSacAgent(model_config=model_config)
     dqn_agent.train(
         train_context=tc,
         callbacks=[duration.Fast(),
                    log.Iteration(),
                    log.Agent()])
Пример #8
0
    def test_reinforce_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.EpisodesTrainContext()
        tc.num_iterations = 50
        reinforce_agent = tforce.TforceReinforceAgent(model_config=model_config)
        reinforce_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 100
Пример #9
0
 def test_dueling_dqn_train(self):
     model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, ))
     tc: core.StepsTrainContext = core.StepsTrainContext()
     tc.num_iterations = 20000
     tc.num_steps_buffer_preload = 1000
     tc.num_iterations_between_eval = 1000
     tc.max_steps_per_episode = 200
     dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[log.Iteration(eval_only=True),
                               log.Agent()])
Пример #10
0
 def test_dqn(self):
     easyagents.agents.seed = 0
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqnAgent = kerasrl.KerasRlDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[
                        duration.Fast(),
                        log.Agent(),
                        log.Step(),
                        log.Iteration()
                    ])
Пример #11
0
 def test_cem(self):
     easyagents.agents.seed = 0
     model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, ))
     tc = core.CemTrainContext()
     tc.num_iterations = 100
     tc.num_episodes_per_iteration = 50
     tc.max_steps_per_episode = 200
     tc.elite_set_fraction = 0.1
     tc.num_steps_buffer_preload = 2000
     cemAgent = kerasrl.KerasRlCemAgent(model_config=model_config)
     cemAgent.train(train_context=tc,
                    callbacks=[log.Agent(), log.Iteration()])
Пример #12
0
    def test_dqn_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100))
        tc: core.StepsTrainContext = core.StepsTrainContext()
        tc.num_iterations = 10000
        tc.num_steps_buffer_preload = 500
        tc.num_iterations_between_eval = 500
        tc.max_steps_per_episode = 200
        dqn_agent = tforce.TforceDqnAgent(model_config=model_config)
        dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 50
Пример #13
0
    def test_save_(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.PpoTrainContext()
        tc.num_iterations = 3
        ppo_agent = tforce.TforcePpoAgent(model_config=model_config)
        ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        tempdir = bcore._get_temp_path()
        bcore._mkdir(tempdir)
        ppo_agent.save(tempdir, [])

        loaded_agent = tforce.TforcePpoAgent(model_config=model_config)
        loaded_agent.load(tempdir, [])
        bcore._rmpath(tempdir)
Пример #14
0
 def train_and_eval(self, agent_type, backend, num_iterations):
     dqn_agent: DqnAgent = agent_type('CartPole-v0',
                                      fc_layers=(100, ),
                                      backend=backend)
     tc: core.TrainContext = dqn_agent.train(
         [log.Duration(),
          log.Iteration(eval_only=True),
          log.Agent()],
         num_iterations=num_iterations,
         num_steps_buffer_preload=1000,
         num_iterations_between_eval=500,
         max_steps_per_episode=200,
         default_plots=False)
     max_avg_steps = max([
         avg_steps
         for (min_steps, avg_steps, max_steps) in tc.eval_steps.values()
     ])
     return max_avg_steps
Пример #15
0
 def test_log_agent(self):
     agent = agents.PpoAgent(_step_count_name)
     agent.train([log.Agent(), duration.Fast()])
Пример #16
0
 def train_and_eval(self, agent_type, backend, num_iterations):
     dqn_agent: DqnAgent = agent_type(_cartpole_name, fc_layers=(100,), backend=backend)
     tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()],
                                             num_iterations=num_iterations,
                                             num_steps_buffer_preload=1000,
                                             num_iterations_between_eval=500,
                                             max_steps_per_episode=200,
                                             default_plots=False)
     return max_avg_rewards(tc)