示例#1
0
文件: breakout.py 项目: Irlyue/dqn
def breakout_env_test():
    env = wrap_env(gym.make('Breakout-v0'))
    obs = env.reset()
    counter = 0
    while True:
        counter += 1
        env.render()
        obs, _, done, _ = env.step(np.random.randint(env.action_space.n))
        if done:
            print('counter = ', counter)
            break
示例#2
0
文件: breakout.py 项目: Irlyue/dqn
def show_result():
    env = wrap_env(gym.make("BreakoutNoFrameskip-v4"))
    act = simple.ActWrapper.load("breakout_model.ckpt", num_cpus=1)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
示例#3
0
文件: pong.py 项目: Irlyue/dqn
def main():
    env = wrap_env(gym.make("PongNoFrameskip-v4"))
    act = simple.learn(env,
                       q_func_pong,
                       n_steps=2000000,
                       exploration_fraction=0.20,
                       final_epsilon=0.01,
                       alpha=1e-3,
                       buffer_size=10000,
                       train_main_every=4,
                       update_target_every=1000,
                       gamma=0.99,
                       print_every=1,
                       pre_run_steps=10000,
                       callback=callback)
    # show_result(env, act)
    act.save("./pong_model.ckpt")
示例#4
0
文件: breakout.py 项目: Irlyue/dqn
def main():
    env = wrap_env(gym.make("BreakoutNoFrameskip-v4"))
    n_steps = 500000
    act = simple.learn(env,
                       q_func_breakout,
                       n_steps=n_steps,
                       exploration_fraction=0.2,
                       final_epsilon=0.01,
                       alpha=5e-4,
                       buffer_size=10000,
                       train_main_every=4,
                       update_target_every=1000,
                       gamma=0.99,
                       print_every=4,
                       pre_run_steps=10000,
                       callback=callback)
    # show_result(env, act)
    act.save("./breakout_model.ckpt")