def test_train_agent_dqn(self):
        config = TestAgentConfig.get()
        experiment = Experiment(config)

        experiment.set_env('lunarlander-dqn')
        scores = experiment.train()
        assert len(scores) == (
            config.get_current_exp_cfg().trainer_cfg.max_steps /
            config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
    def test_train_agent_dqn_with_disabled_prioritized_replay(self):
        config = TestAgentConfig.get()
        experiment = Experiment(config)

        experiment.set_env('lunarlander-dqn-noprio')
        scores = experiment.train()
        assert len(scores) == (
            config.get_current_exp_cfg().trainer_cfg.max_steps /
            config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
    def test_train_environment_gymai_lunarlander(self):
        config = TestEnvConfig.get()
        experiment = Experiment(config)

        experiment.set_env('lunarlander')
        scores = experiment.train()
        assert len(scores) == (
            config.get_current_exp_cfg().trainer_cfg.max_steps /
            config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
    def test_train_environment_unity_banana(self):
        config = TestEnvConfig.get()
        experiment = Experiment(config)

        experiment.set_env('banana')
        scores = experiment.train()
        assert len(scores) == (
            config.get_current_exp_cfg().trainer_cfg.max_steps /
            config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
    def test_train_environment_atari_breakout_rgb(self):
        config = TestEnvConfig.get()
        experiment = Experiment(config)

        experiment.set_env('breakout-rgb')
        scores = experiment.train()
        assert len(scores) == (
            config.get_current_exp_cfg().trainer_cfg.max_steps /
            config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
Пример #6
0
    def test_train_configExist_canTrain1Episode(self):
        config = Config(test=True)
        experiment = Experiment(config)

        envs = experiment.list_envs()

        for env in envs:
            experiment.set_env(env)

            if config.get_env_type() != 'unity':
                max_steps = 128
                max_episode_steps = 2
                scores = experiment.train(max_steps=max_steps,
                                          eval_frequency=16,
                                          eval_steps=4,
                                          max_episode_steps=max_episode_steps)

                assert len(scores) == max_steps / max_episode_steps