Пример #1
0
def test_interpretation_plot_q_dqn_returns():
    data = MDPDataBunch.from_env('maze-random-5x5-v0', render='human')
    model = DQN(data)
    learn = AgentLearner(data, model)
    learn.fit(5)
    interp = AgentInterpretationAlpha(learn)
    interp.plot_heatmapped_episode(2)
def test_fit_function_dqn():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = DQN(data, memory=PriorityExperienceReplay(1000))
    learn = AgentLearner(data, model)
    learn.fit(5)
def test_priority_experience_replay():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = FixedTargetDQN(data, memory=PriorityExperienceReplay(1000))
    learn = AgentLearner(data, model)
    learn.fit(5)
def test_double_dueling_dqn_model_maze():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = DoubleDuelingDQN(data)
    learn = AgentLearner(data, model)

    learn.fit(5)
def test_basic_dqn_model_maze():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=200)
    model = DQN(data)
    learn = AgentLearner(data, model)

    learn.fit(5)
def test_fixed_target_dqn_model_maze():
    print('\n')
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=1000)
    model = FixedTargetDQN(data)
    learn = AgentLearner(data, model)

    learn.fit(5)
Пример #7
0
def test_interpretation_plot_q_ddpg_returns():
    data = MDPDataBunch.from_env('Pendulum-v0', render='human')
    # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human')
    model = DDPG(data, batch=8)
    learn = AgentLearner(data, model)

    learn.fit(5)
    interp = AgentInterpretationAlpha(learn)
    interp.plot_heatmapped_episode(2)
Пример #8
0
def test_epsilon():
    data = MDPDataBunch.from_env('maze-random-5x5-v0',
                                 render='human',
                                 max_steps=100,
                                 add_valid=False)
    model = FixedTargetDQN(data,
                           batch_size=64,
                           max_episodes=100,
                           copy_over_frequency=4)
    learn = AgentLearner(data, model)

    learn.fit(20)
def test_ddpg():
    data = MDPDataBunch.from_env('Pendulum-v0', render='human')
    # data = MDPDataBunch.from_env('MountainCarContinuous-v0', render='human')
    model = DDPG(data, batch=8)
    learn = AgentLearner(data, model)
    learn.fit(450)
def test_fit_function_ddpg():
    data = MDPDataBunch.from_env('Pendulum-v0', render='human', max_steps=1000)
    model = DDPG(data, memory=PriorityExperienceReplay(1000))
    learn = AgentLearner(data, model)
    learn.fit(5)