def test_train_agent_dqn(self): config = TestAgentConfig.get() experiment = Experiment(config) experiment.set_env('lunarlander-dqn') scores = experiment.train() assert len(scores) == ( config.get_current_exp_cfg().trainer_cfg.max_steps / config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
def test_train_agent_dqn_with_disabled_prioritized_replay(self): config = TestAgentConfig.get() experiment = Experiment(config) experiment.set_env('lunarlander-dqn-noprio') scores = experiment.train() assert len(scores) == ( config.get_current_exp_cfg().trainer_cfg.max_steps / config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
def test_train_environment_gymai_lunarlander(self): config = TestEnvConfig.get() experiment = Experiment(config) experiment.set_env('lunarlander') scores = experiment.train() assert len(scores) == ( config.get_current_exp_cfg().trainer_cfg.max_steps / config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
def test_train_environment_unity_banana(self): config = TestEnvConfig.get() experiment = Experiment(config) experiment.set_env('banana') scores = experiment.train() assert len(scores) == ( config.get_current_exp_cfg().trainer_cfg.max_steps / config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
def test_train_environment_atari_breakout_rgb(self): config = TestEnvConfig.get() experiment = Experiment(config) experiment.set_env('breakout-rgb') scores = experiment.train() assert len(scores) == ( config.get_current_exp_cfg().trainer_cfg.max_steps / config.get_current_exp_cfg().trainer_cfg.max_episode_steps)
def test_train_configExist_canTrain1Episode(self): config = Config(test=True) experiment = Experiment(config) envs = experiment.list_envs() for env in envs: experiment.set_env(env) if config.get_env_type() != 'unity': max_steps = 128 max_episode_steps = 2 scores = experiment.train(max_steps=max_steps, eval_frequency=16, eval_steps=4, max_episode_steps=max_episode_steps) assert len(scores) == max_steps / max_episode_steps