Пример #1
0
def main():
    logger.configure()
    env = make_atari('PongNoFrameskip-v4')
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)

    model = deepq.learn(
        env,
        "conv_only",
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True,
        lr=1e-4,
        total_timesteps=int(1e7),
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
    )

    model.save('pong_model.pkl')
    env.close()
Пример #2
0
def main():
    env = gym.make("CartPole-v0")
    act = deepq.learn(env,
                      network='mlp',
                      lr=1e-3,
                      total_timesteps=100000,
                      buffer_size=50000,
                      exploration_fraction=0.1,
                      exploration_final_eps=0.02,
                      print_freq=10,
                      callback=callback)
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
Пример #3
0
def main():
    env = gym.make("MountainCar-v0")
    # Enabling layer_norm here is import for parameter space noise!
    act = deepq.learn(env,
                      network=models.mlp(num_hidden=64, num_layers=1),
                      lr=1e-3,
                      total_timesteps=100000,
                      buffer_size=50000,
                      exploration_fraction=0.1,
                      exploration_final_eps=0.1,
                      print_freq=10,
                      param_noise=True)
    print("Saving model to mountaincar_model.pkl")
    act.save("mountaincar_model.pkl")
Пример #4
0
def main():
    env = gym.make("MountainCar-v0")
    act = deepq.learn(env,
                      network=models.mlp(num_layers=1, num_hidden=64),
                      total_timesteps=0,
                      load_path='mountaincar_model.pkl')

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Пример #5
0
def main():
    env = gym.make("CartPole-v0")
    act = deepq.learn(env,
                      network='mlp',
                      total_timesteps=0,
                      load_path="cartpole_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Пример #6
0
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = deepq.wrap_atari_dqn(env)
    model = deepq.learn(env,
                        "conv_only",
                        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                        hiddens=[256],
                        dueling=True,
                        total_timesteps=0)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(model(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)