示例#1
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.PpoTrainContext()
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
示例#2
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfDqnAgent(model_config=model_config)
     dqn_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
示例#3
0
 def test_save_load(self):
     model_config = core.ModelConfig(_lineworld_name)
     random_agent = tfagents.TfRandomAgent(model_config=model_config)
     tempdir = bcore._get_temp_path()
     bcore._mkdir(tempdir)
     random_agent.save(directory=tempdir, callbacks=[])
     random_agent.load(directory=tempdir, callbacks=[])
     bcore._rmpath(tempdir)
示例#4
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.TrainContext()
     random_agent = tfagents.TfRandomAgent(model_config=model_config)
     random_agent.train(train_context=tc,
                        callbacks=[duration.Fast(),
                                   log.Iteration()])
     assert tc.episodes_done_in_iteration == 1
示例#5
0
 def test_play(self):
     model_config = core.ModelConfig("CartPole-v0")
     randomAgent = tfagents.TfRandomAgent(model_config=model_config)
     pc=core.PlayContext()
     pc.max_steps_per_episode=10
     pc.num_episodes=1
     randomAgent.play(play_context=pc,callbacks=[])
     assert pc.num_episodes == 1
示例#6
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tfagents.TfReinforceAgent(model_config=model_config)
     reinforceAgent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
     assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0
     assert tc.iterations_done_in_training == tc.num_iterations > 0
     rmin, ravg, rmax = tc.eval_rewards[tc.episodes_done_in_training]
     assert rmax >= 10
示例#7
0
 def test_ppo_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.PpoTrainContext()
     ppoAgent = tforce.TforcePpoAgent(model_config=model_config)
     ppoAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
示例#8
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.EpisodesTrainContext()
     reinforce_agent = tfagents.TfReinforceAgent(model_config=model_config)
     reinforce_agent.train(train_context=tc,
                           callbacks=[duration.Fast(),
                                      log.Iteration()])
     assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0
     assert tc.iterations_done_in_training == tc.num_iterations > 0
示例#9
0
 def test_reinforce_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config)
     reinforceAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
示例#10
0
 def test_train(self):
     model_config = core.ModelConfig(_mountaincar_continuous_name)
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfSacAgent(model_config=model_config)
     dqn_agent.train(
         train_context=tc,
         callbacks=[duration.Fast(),
                    log.Iteration(),
                    log.Agent()])
示例#11
0
    def test_reinforce_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.EpisodesTrainContext()
        tc.num_iterations = 50
        reinforce_agent = tforce.TforceReinforceAgent(model_config=model_config)
        reinforce_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 100
示例#12
0
 def test_dueling_dqn_train(self):
     model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, ))
     tc: core.StepsTrainContext = core.StepsTrainContext()
     tc.num_iterations = 20000
     tc.num_steps_buffer_preload = 1000
     tc.num_iterations_between_eval = 1000
     tc.max_steps_per_episode = 200
     dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[log.Iteration(eval_only=True),
                               log.Agent()])
示例#13
0
    def test_dqn_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100))
        tc: core.StepsTrainContext = core.StepsTrainContext()
        tc.num_iterations = 10000
        tc.num_steps_buffer_preload = 500
        tc.num_iterations_between_eval = 500
        tc.max_steps_per_episode = 200
        dqn_agent = tforce.TforceDqnAgent(model_config=model_config)
        dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 50
示例#14
0
    def test_save_(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.PpoTrainContext()
        tc.num_iterations = 3
        ppo_agent = tforce.TforcePpoAgent(model_config=model_config)
        ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        tempdir = bcore._get_temp_path()
        bcore._mkdir(tempdir)
        ppo_agent.save(tempdir, [])

        loaded_agent = tforce.TforcePpoAgent(model_config=model_config)
        loaded_agent.load(tempdir, [])
        bcore._rmpath(tempdir)
示例#15
0
 def __init__(self,
              gym_env_name: str,
              fc_layers: Union[Tuple[int, ...], int, None] = None,
              backend: str = None):
     """
         Args:
             gym_env_name: name of an OpenAI gym environment to be used for training and evaluation
             fc_layers: defines the neural network to be used, a sequence of fully connected
                 layers of the given size. Eg (75,40) yields a neural network consisting
                 out of 2 hidden layers, the first one containing 75 and the second layer
                 containing 40 neurons.
             backend=the backend to be used (eg 'tfagents'), if None a default implementation is used.
                 call get_backends() to get a list of the available backends.
     """
     model_config = core.ModelConfig(gym_env_name=gym_env_name, fc_layers=fc_layers, seed=seed)
     self._initialize(model_config=model_config, backend_name=backend)
     return
示例#16
0
 def test_save_load(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.PpoTrainContext()
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.train(
         train_context=tc,
         callbacks=[duration._SingleIteration(),
                    log.Iteration()])
     tempdir = bcore._get_temp_path()
     bcore._mkdir(tempdir)
     ppo_agent.save(tempdir, [])
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.load(tempdir, [])
     pc = core.PlayContext()
     pc.max_steps_per_episode = 10
     pc.num_episodes = 1
     ppo_agent.play(play_context=pc, callbacks=[])
     bcore._rmpath(tempdir)
示例#17
0
    def _initialize(self,
                    gym_env_name: str = None,
                    fc_layers: Tuple[int, ...] = None,
                    model_config: core.ModelConfig = None,
                    backend_name: str = None):

        if model_config is None:
            model_config = core.ModelConfig(gym_env_name=gym_env_name, fc_layers=fc_layers)
        if backend_name is None:
            backend_name = easyagents.backends.default.BackendAgentFactory.backend_name
        backend: bcore.BackendAgentFactory = _get_backend(backend_name)

        assert model_config is not None, "model_config not set."
        assert backend, f'Backend "{backend_name}" not found. The registered backends are {get_backends()}.'

        self._model_config: core.ModelConfig = model_config
        backend_agent = backend.create_agent(easyagent_type=type(self), model_config=model_config)
        assert backend_agent, f'Backend "{backend_name}" does not implement "{type(self).__name__}". ' + \
                              f'Choose one of the following backend {get_backends(type(self))}.'
        self._backend_agent: Optional[bcore._BackendAgent] = backend_agent
        return
示例#18
0
 def test_create_agent(self):
     f = BackendAgentFactoryTest.DebugAgentFactory()
     mc = core.ModelConfig(gym_env_name="CartPole-v0")
     a = f.create_agent(easyagent_type=easyagents.agents.DqnAgent,
                        model_config=mc)
     assert a is not None
示例#19
0
 def test_setbackendagent_twice(self):
     model_config = core.ModelConfig(self.env_name)
     agent = debug.DebugAgent(model_config)
     monitor._MonitorEnv._register_backend_agent(agent)
     monitor._MonitorEnv._register_backend_agent(agent)
     monitor._MonitorEnv._register_backend_agent(None)
示例#20
0
 def test_create_agent_not_implemented(self):
     f = BackendAgentFactoryTest.DebugAgentFactory()
     mc = core.ModelConfig(gym_env_name="CartPole-v0")
     a = f.create_agent(easyagent_type=easyagents.agents.ReinforceAgent,
                        model_config=mc)
     assert a is None
示例#21
0
 def __init__(self):
     super().__init__(
         core.ModelConfig(gym_env_name=BackendAgentTest.env_name),
         action=1)