Exemplo n.º 1
0
    episodes = args.episode
    time_steps = args.steps
    epsilon = args.epsilon
    render = args.render

    # --- run algorithm ---
    # td_lam.load_models()
    stats = td_lam.train(env, episodes, time_steps, epsilon)
    # td_lam.save_models()

    # --- visualize the results ---
    result_folder = Path(__file__).resolve().parent / 'results'
    viz = Visualizer(result_path=result_folder)
    viz.plot_episode_length(stats,
                            plot_name='td_episode_length_{}'.format(
                                args.exp_count))
    viz.plot_reward(stats, plot_name='td_rewards_{}'.format(args.exp_count))

    # --- animation ---
    if render:
        with contextlib.closing(ContinuousCartPoleEnv()) as env:
            for _ in range(2):
                s = env.reset()
                for _ in range(300):
                    env.render()
                    a = td_lam.get_action(s, epsilon=0.02)
                    s, _, d, _ = env.step(a)
                    if d:
                        break
                          action_dim,
                          gamma=0.99,
                          hidden_dim=hidden_dim,
                          policy_lr=policy_lr,
                          baseline_lr=baseline_lr,
                          d2c=d2c_converter)

    # --- run algorithm ---
    if load_flag:
        reinforce.load_models(model_name=exp_count)
    stats = reinforce.train(env=env, episodes=episodes, time_steps=timesteps)
    reinforce.save_models(model_name=exp_count)

    # --- visualize the results ---
    result_folder = Path(__file__).resolve().parent / 'results'
    viz = Visualizer(result_path=result_folder)
    viz.plot_episode_length(stats, plot_name=f'r_d_episodes_{exp_count}')
    viz.plot_reward(stats, plot_name=f'r_d_rewards_{exp_count}')

    # --- animation ---
    if render_flag:
        with contextlib.closing(ContinuousCartPoleEnv()) as env:
            for _ in range(2):
                s = env.reset()
                for _ in range(500):
                    env.render()
                    a, _ = reinforce.get_action(s)
                    s, _, d, _ = env.step(a)
                    if d:
                        break
Exemplo n.º 3
0
              critic_lr=critic_lr,
              K_epochs=K_epochs,
              eps_clip=eps_clip,
              entropy_coeff=entropy_coeff,
              verbose=verbose_flag)
    if load_flag:
        ppo.load_models(model_name=exp_count)
    stats = ppo.train(env=env,
                      episodes=episodes,
                      timesteps=timesteps,
                      update_timestep=update_timestep)
    ppo.save_models(model_name=exp_count)

    # --- visualize the results ---
    result_folder = Path(__file__).resolve().parent / 'results'
    viz = Visualizer(result_path=result_folder)
    viz.plot_episode_length(stats, plot_name=f'ppo_c_episodes_{exp_count}')
    viz.plot_reward(stats, plot_name=f'ppo_c_rewards_{exp_count}')

    # --- animation ---
    if render_flag:
        with contextlib.closing(ContinuousCartPoleEnv()) as env:
            for _ in range(3):
                s = env.reset()
                for _ in range(500):
                    env.render()
                    a = ppo.get_action(s)
                    s, _, d, _ = env.step(a)
                    if d:
                        break