def main():

    env = gym.make('CartPole-v1')
    # env = gym.make('CarRacing-v0')

    # print("\nstate")
    # state = env.reset()
    # print(state.shape)
    # print("\action")
    # print(env.action_space)
    # print(env.action_space.high)
    # print(env.action_space.low)
    # sys.exit()

    save_ith_epoch = 1000
    dir_name = "./model_saves/reinforce/try10k_expl/"

    def create_video_callable(save_ith_episode):
        def video_callable(episode):
            return True if ((episode + 1) % save_ith_episode == 0) else False

        return video_callable

    kwargs = {
        "directory": os.path.join(dir_name, "monitor"),
        "resume": True,
        "force": True,
        "video_callable": create_video_callable(save_ith_epoch)
    }

    with gym.wrappers.Monitor(env, **kwargs) as env_monitor:

        model = CartPoleModel()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

        agent = DiscreteReinforce(env=env_monitor,
                                  model=model,
                                  optimizer=optimizer,
                                  gamma=0.98,
                                  exploration='proportional')

        rewards = train_sessions(
            num_epochs=10000,
            agent=agent,
            dir_name=dir_name,
            save_ith_epoch=save_ith_epoch,
            monitor=False,
        )

    env.close()

    print(rewards)
    plt.figure()
    plt.plot(rewards)
    plt.plot(np.convolve(rewards, np.ones((100, )) / 100, mode='same'))
    plt.show()
def main():

    env = gym.make('MountainCarContinuous-v0')
    # env = gym.make('CarRacing-v0')

    # print("\nstate")
    # state = env.reset()
    # print(state.shape)
    # print("\action")
    # print(env.action_space)
    # print(env.action_space.high)
    # print(env.action_space.low)
    # sys.exit()

    save_ith_epoch = 100
    dir_name = "./model_saves/reinforce/try5/"

    kwargs = {
        "directory": os.path.join(dir_name, "monitor"),
        "resume": False,
        "force": True,
        "video_callable": create_video_callable(save_ith_epoch)
    }

    with gym.wrappers.Monitor(env, **kwargs) as env_monitor:

        model = MouintainCarContinousModel()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

        agent = ContinousReinforce(
            env=env_monitor,
            model=model,
            optimizer=optimizer,
            gamma=0.98,
            distribution="Normal",
            decoder=None,
        )

        rewards = train_sessions(
            num_epochs=10000,
            agent=agent,
            dir_name=dir_name,
            save_ith_epoch=save_ith_epoch,
            monitor=False,
        )

    env.close()

    print(rewards)
    plt.figure()
    plt.plot(rewards)
    plt.plot(np.convolve(rewards, np.ones((50, )) / 50, mode='same'))
    plt.show()
Пример #3
0
def main():

    env = gym.make('Pendulum-v0')

    # print("\nstate")
    # state = env.reset()
    # print(state.shape)
    # print("\action")
    # print(env.action_space)
    # print(env.action_space.high)
    # print(env.action_space.low)
    # sys.exit()

    save_ith_epoch = 500
    dir_name = "./model_saves/actorcritic/try5k/"


    model = torch.load(os.path.join(dir_name, "best_model"))
    # model = PendulumModel()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


    kwargs = {"directory": os.path.join(dir_name, "monitor"), "resume": True, "force": True, "video_callable": create_video_callable(save_ith_epoch)}
    with gym.wrappers.Monitor(env, **kwargs) as env_monitor:
    
        agent = ContinuousActorCritic(
            env=env_monitor,
            model=model,
            optimizer=optimizer,
            gamma=0.98, 
            distribution="Normal",
            decoder=None,
        )

        rewards = train_sessions(
            num_epochs=5000,
            agent=agent,
            dir_name=dir_name,
            save_ith_epoch=save_ith_epoch,
            monitor=False,
            init_expl=100,
            start_epoch=10000,
        )

    env.close()

    print(rewards)
    plt.figure()
    plt.plot(rewards)
    plt.plot(np.convolve(rewards, np.ones((50,))/50, mode='same'))
    plt.show()
def main():
    num_epochs = 20
    save_ith_epoch = 2
    batch_size = 20
    num_workers = None

    representation_dim = 16
    module_dir_vae = "./model_saves/simpleCNNVAE_startGas_nroll100_lroll100"
    # module_vae = instaniate_SimpleCNNVAE(representation_dim, image_channels=3)
    # criterion = vae_loss_fn
    # optimizer_vae = torch.optim.Adam(module_vae.parameters(), lr=1e-3)

    # train_observations = load_observations("./observations/startGas/observations_startGas_nroll100_lroll100_000000.npy")[:, ::10, :, :, :]
    # train_observations = transform_observations_torch(train_observations)

    # val_observations = load_observations("./observations/startGas/observations_startGas_nroll100_lroll100_000001.npy")[:, ::20, :, :, :]
    # val_observations = transform_observations_torch(val_observations)

    # train_vae(
    #     num_epochs = num_epochs,
    #     module = module_vae,
    #     module_dir = module_dir_vae,
    #     train_observations = train_observations,
    #     val_observations = val_observations,
    #     criterion = criterion,
    #     optimizer = optimizer_vae,
    #     save_ith_epoch = save_ith_epoch,
    #     batch_size = batch_size,
    #     num_workers = num_workers,
    # )

    module_vae = torch.load(os.path.join(module_dir_vae, "best_module"))

    env = gym.make('CarRacing-v0')

    # env.reset()
    # env.reset()

    # print(env.action_space.high)
    # print(env.action_space.low)
    # sys.exit()

    encoder = module_vae.vae_encoder
    decoder = module_vae.decoder

    model = set_up_repr_dualhead_from_pixels(encoder, encoder_out_dim=representation_dim, out_dim=3)
    optimizer = torch.optim.Adam([{"params": model.parameters()}, {"params": decoder.parameters()}], lr=1e-3)

    save_ith_epoch = 2
    dir_name = "./model_saves/reinforce/try0/"

    kwargs = {"directory": os.path.join(dir_name, "monitor"), "resume": True, "force": True, "video_callable": create_video_callable(save_ith_epoch)}
    
    with gym.wrappers.Monitor(env, **kwargs) as env_monitor:

        agent = ContinousReinforce(
            env=env_monitor,
            model=model,
            optimizer=optimizer,
            gamma=0.98, 
            distribution="Normal",
            decoder=decoder,
            from_pixel=True,
            convert_to_action_space=convert_to_action_space,
            convert_from_action_space=convert_from_action_space,
        )
            
        rewards = train_sessions(
            num_epochs=20,
            agent=agent,
            dir_name=dir_name,
            save_ith_epoch=1,
            monitor=False,        
        )

    env.close()

    print(rewards)
    plt.figure()
    plt.plot(rewards)
    plt.plot(np.convolve(rewards, np.ones((50,))/50, mode='same'))
    plt.show()