示例#1
0
def record_video(run_name: str, model_file: str, video_length: int):
    run_dir = get_run_dir(run_name)
    env, model = load_model(run_name, model_file)
    env = VecVideoRecorder(env,
                           os.path.join(run_dir, 'video'),
                           record_video_trigger=lambda x: x == 0,
                           video_length=video_length,
                           name_prefix='video')
    obs = env.reset()
    for _ in range(video_length + 1):
        action, _ = model.predict(obs, deterministic=False)
        obs, _, _, _ = env.step(action)
        env.render()
    env.close()
示例#2
0
def record_video(trained_model, env, video_folder, video_length, name):

    obs = env.reset()

    trained_model.set_env(env)
    # Record the video starting at the first step
    env = VecVideoRecorder(env,
                           video_folder,
                           record_video_trigger=lambda x: x == 0,
                           video_length=video_length,
                           name_prefix=name)

    env.reset()
    for _ in range(video_length + 1):
        action = trained_model.predict(obs)
        obs, _, _, _ = env.step(action[0])
    # Save the video
    env.close()
def record_video(eval_env, model, video_length=500, prefix='', video_folder=video_folder):
  """
  :param env_id: (str)
  :param model: (RL model)
  :param video_length: (int)
  :param prefix: (str)
  :param video_folder: (str)
  """
  # Start the video at step=0 and record 500 steps
  eval_env = VecVideoRecorder(eval_env, video_folder=video_folder,
                              record_video_trigger=lambda step: step == 0, video_length=video_length,
                              name_prefix=prefix)

  obs = eval_env.reset()
  for _ in range(video_length):
    action, _ = model.predict(obs, deterministic=True)
    obs, _, _, _ = eval_env.step(action)

  # Close the video recorder
  eval_env.close()
示例#4
0
def run(env):
    drive = PPO.load("conduziadrive")

    env = VecVideoRecorder(
        env,
        log_dir + '/videos/',
        record_video_trigger=lambda x: x == 0,
        video_length=1000,
        name_prefix="conduzia-drive-agent-{}".format(gym_env_id))

    env = VecNormalize(env,
                       gamma=0.9997,
                       norm_obs=True,
                       norm_reward=True,
                       clip_obs=10.,
                       epsilon=0.1)

    rewards = []
    total_reward = 0

    while True:
        obs = env.reset()

        for t in range(1000):
            action, _states = drive.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            env.render()
            total_reward += reward
            if t % 100 == 0:
                print(t)
            if done:
                break
        print("Finished after {} timesteps".format(t + 1))
        print("Reward: {}".format(total_reward))
        rewards.append(total_reward)
        env.close()
示例#5
0
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path, env=env)

    obs = env.reset()

    if video_folder is None:
        video_folder = os.path.join(log_path, "videos")

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix=name_prefix,
    )

    env.reset()
    try:
        for _ in range(video_length + 1):
            action, _ = model.predict(obs, deterministic=deterministic)
            obs, _, _, _ = env.step(action)
            if not args.no_render:
                env.render()
    except KeyboardInterrupt:
        pass

    env.close()
def record_video(env_id: str = "CartPole-v1",
                 algo: str = "ppo",
                 folder: str = "rl-trained-agents",
                 video_folder: str = "logs/videos/",
                 video_length: int = 1000,
                 n_envs: int = 1,
                 deterministic: bool = False,
                 seed: int = 0,
                 no_render: bool = False,
                 exp_id: int = 0):

    if exp_id == 0:
        exp_id = get_latest_run_id(os.path.join(folder, algo), env_id)
        print(f"Loading latest experiment, id={exp_id}")
    # Sanity checks
    if exp_id > 0:
        log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}")
    else:
        log_path = os.path.join(folder, algo)

    model_path = os.path.join(log_path, f"{env_id}.zip")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = "NoFrameskip" in env_id

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path)

    obs = env.reset()

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix=f"{algo}-{env_id}",
    )

    env.reset()
    for _ in range(video_length + 1):
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, _, _, _ = env.step(action)

    # Workaround for https://github.com/openai/gym/issues/893
    if n_envs == 1 and "Bullet" not in env_id and not is_atari:
        env = env.venv
        # DummyVecEnv
        while isinstance(env, VecEnvWrapper):
            env = env.venv
        if isinstance(env, DummyVecEnv):
            env.envs[0].env.close()
        else:
            env.close()
    else:
        # SubprocVecEnv
        env.close()