def test_interpretation_plot_q_dqn_returns(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human') model = DQN(data) learn = AgentLearner(data, model) learn.fit(5) interp = AgentInterpretationAlpha(learn) interp.plot_heatmapped_episode(2)
def test_fit_function_dqn(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = DQN(data, memory=PriorityExperienceReplay(1000)) learn = AgentLearner(data, model) learn.fit(5)
def test_priority_experience_replay(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = FixedTargetDQN(data, memory=PriorityExperienceReplay(1000)) learn = AgentLearner(data, model) learn.fit(5)
def test_double_dueling_dqn_model_maze(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = DoubleDuelingDQN(data) learn = AgentLearner(data, model) learn.fit(5)
def test_basic_dqn_model_maze(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=200) model = DQN(data) learn = AgentLearner(data, model) learn.fit(5)
def test_fixed_target_dqn_model_maze(): print('\n') data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=1000) model = FixedTargetDQN(data) learn = AgentLearner(data, model) learn.fit(5)
def test_interpretation_plot_q_ddpg_returns(): data = MDPDataBunch.from_env('Pendulum-v0', render='human') # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human') model = DDPG(data, batch=8) learn = AgentLearner(data, model) learn.fit(5) interp = AgentInterpretationAlpha(learn) interp.plot_heatmapped_episode(2)
def test_epsilon(): data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human', max_steps=100, add_valid=False) model = FixedTargetDQN(data, batch_size=64, max_episodes=100, copy_over_frequency=4) learn = AgentLearner(data, model) learn.fit(20)
def test_ddpg(): data = MDPDataBunch.from_env('Pendulum-v0', render='human') # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human') model = DDPG(data, batch=8) learn = AgentLearner(data, model) learn.fit(450)
def test_fit_function_ddpg(): data = MDPDataBunch.from_env('Pendulum-v0', render='human', max_steps=1000) model = DDPG(data, memory=PriorityExperienceReplay(1000)) learn = AgentLearner(data, model) learn.fit(5)