示例#1
0
def replay_save_video(env_id, policy, path_vids):

    if env_id == 'HalfCheetah-v2':
        from configs import cheetah_config
        nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \
        nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cheetah_config()
    elif env_id == 'MountainCarContinuous-v0':
        nb_bootstrap, nb_explorations, nb_tests, nb_timesteps, offline_eval, controller, representer, \
        nb_rep, engineer_goal, goal_space, initial_space, knn, noise, nb_weights = cmc_config()

    env = gym.make(env_id)
    vid_env = VideoRecorder(env=env, path=path_vids)
    obs = env.reset()
    rew = np.zeros([nb_timesteps + 1])
    done = False
    for t in range(nb_timesteps):
        if done:
            break
        act = controller.step(policy, obs).reshape(1, -1)
        out = env.step(np.copy(act))
        env.render()
        # vid_env.capture_frame()
        obs = out[0]
        rew[t + 1] = out[1]
        done = out[2]
    print('Run performance: ', np.nansum(rew))
    vid_env.close()
示例#2
0
def play(env, model, video_path, num_episodes, timesteps, metadata):
    video_recorder = None
    for i_episodes in range(num_episodes):
        video_recorder = VideoRecorder(
            env=env, path=video_path, metadata=metadata, enabled=video_path is not None)
        obs = env.reset()
        for t in range(timesteps):
            obs = [np.array([[list(obs)]])]
            video_recorder.capture_frame()
            action = model.predict(obs)[0]
            obs, rew, done, info = env.step(action)
            env.render()
            theta.append(obs[0])
            theta_dot.append(obs[1])
            actions.append(action[0])
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                num_episodes += 1
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
                break
    env.close()
    return theta
示例#3
0
def testCartpole(model, test=True, record=True):
    env = gym.make(ENV_NAME)
    if record:
        rec = VideoRecorder(env, "./video/iteration_%s.mp4" % str(model))
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space, load_path="./artifacts/iteration_%s/" % str(model),
                           test=test)
    run = 0
    while True:
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        run += 1
        while True:
            step += 1
            env.render()
            if record:
                rec.capture_frame()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            state_next = np.reshape(state_next, [1, observation_space])
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", score: " + str(step))
                if record:
                    rec.close()
                    record = False
                break
示例#4
0
def perform_debug_sequence(sequence_name,
                           env,
                           walker_type,
                           sequence_of_actions,
                           action_repeat=10,
                           has_gravity=False):
    video_folder = debug_folder + "\\" + walker_type
    if not os.path.exists(video_folder):
        os.makedirs(video_folder)

    if not has_gravity:
        env.world.gravity = (0, 0)
    else:
        env.world.gravity = (0, -10)

    video_recorder = VideoRecorder(
        env,
        video_folder + "\\" + sequence_name + ".mp4",
    )  # Stump Tracks
    env.reset()

    for action in sequence_of_actions:
        for i in range(action_repeat):
            _, _, d, _ = env.step(action)
            video_recorder.capture_frame()
            time.sleep(0.01)
            video_recorder.capture_frame()
    video_recorder.close()
示例#5
0
def play_episode(episode=0):
    video_fn = 'episode_' + str(episode) + '.mp4'
    video_path = os.path.join(video_dir, video_fn)
    video_recorder = VideoRecorder(env, video_path)

    length = 0
    obs = env.reset()
    done = False
    while not done and length < max_episode_length:
        observation, reward, done, _ = env.step()

        video_recorder.capture_frame()

        # Optain waypoints
        waypoints = []
        for entity in env.get_team_blue.tolist() + env.get_team_red.tolist():
            waypoints.extend(entity.get_loc())
        length += 1

    # Closer
    video_recorder.close()
    vid = mp.VideoFileClip(video_path)

    # Check if episode has right length played
    if length <= min_length or length >= max_length:
        return

    # Post Processing
    if env.blue_win and len(vid_success) < num_success:
        vid_success.append(vid)
    elif env.red_win and len(vid_failure) < num_failure:
        vid_failure.append(vid)
示例#6
0
def start(env, agent: AgentBase):
    global video_recorder
    scores = []
    total_steps = 0
    video_recorder = None
    video_enabled = True
    video_ext = determine_extension(env)
    if not video_ext:
        video_enabled = False

    total_start_time = time.time()
    for episode in range(1, episodes):
        episode_start_time = time.time()

        if (episode % video_frequency) == 0:
            video_recorder = VideoRecorder(env, video_dir + "/{}{}".format(episode, video_ext), enabled=video_enabled)

        score, steps = run_episode(env, agent, video_recorder)

        scores.append(score)
        total_steps += steps

        if episode_log_frequency > 0 and (episode + 1) % episode_log_frequency == 0:
            log_episode_summary(episodes, episode, score, steps, time.time() - episode_start_time,
                                time.time() - total_start_time)

        if episode % plot_frequency == 0:
            plot([i for i in range(episode)], scores)

        if video_recorder:
            video_recorder.close()
            video_recorder = None
示例#7
0
def test_no_frames():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.close()
    assert rec.empty
    assert rec.functional
    assert not os.path.exists(rec.path)
示例#8
0
def record_rollout(env, horizon, policy, record_file_path):
    """
    This is the recording function for the runner class which samples one episode with a specified length
    using the provided policy and records it in a video.


    Parameters
    ---------
    horizon: Int
        The task horizon/ episode length.
    policy: ModelBasedBasePolicy or ModelFreeBasePolicy
        The policy to be used in collecting the episodes from the different agents.
    record_file_path: String
        specified the file path to save the video that will be recorded in.
    """
    recorder = VideoRecorder(env, record_file_path + '.mp4')
    observations = env.reset()
    for t in range(horizon):
        recorder.capture_frame()
        if not isinstance(policy, ModelFreeBasePolicy):
            action_to_execute, expected_obs, expected_reward = policy.act(
                observations, t)
        else:
            action_to_execute = policy.act(observations, t)
        observations, reward, done, info = env.step(action_to_execute)
    recorder.capture_frame()
    recorder.close()
    return
class VideoLogger(Callback):
    def __init__(self, env: MultiagentVecEnv, save_folder: str):
        super(VideoLogger, self).__init__()
        self.env = env
        self.save_folder = save_folder

        os.makedirs(save_folder, exist_ok=True)
        self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4')

    def before_step(self,
                    logs: Optional[dict] = None,
                    actions: Optional[Dict[str, torch.Tensor]] = None,
                    action_distributions: Optional[Dict[str,
                                                        Distribution]] = None):
        self.recorder.capture_frame()

    def after_train(self,
                    logs: Optional[dict] = None,
                    obs: Optional[Dict[str, torch.Tensor]] = None,
                    rewards: Optional[Dict[str, torch.Tensor]] = None,
                    dones: Optional[Dict[str, torch.Tensor]] = None,
                    infos: Optional[Dict[str, torch.Tensor]] = None):
        # If there is just one env save each episode to a different file
        # Otherwise save the whole video at the end
        if self.env.num_envs == 1:
            if logs['env_done']:
                # Save video and make a new recorder
                self.recorder.close()
                self.recorder = VideoRecorder(
                    self.env,
                    path=f'{self.save_folder}/{logs["episodes"]}.mp4')

    def on_train_end(self):
        self.recorder.close()
示例#10
0
class RecordedEnv(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.start_of_episode = None

    def reset(self):
        self.start_of_episode = True
        return self.env.reset()

    def step(self, action, filename='', record_episode=False):
        if record_episode and self.start_of_episode:
            self.recorder = VideoRecorder(self.env, path=filename)
        self.start_of_episode = False

        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()

        next_state, reward, done, info = self.env.step(action)
        if hasattr(self, 'recorder') and done:
            self.recorder.close()  # close and save video at end of episode
            del self.recorder

        return next_state, reward, done, info

    def close(self):
        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()
            self.recorder.close()
            del self.recorder
        self.start_of_episode = True
        return self.env.close()
示例#11
0
    def mutate(self, weights, record):
        """ Mutate the inputted weights and evaluate its performance against the
        weights of the previous generation. """
        recorder = VideoRecorder(self.env,
                                 path=self.video_path) if record else None
        self.elite.set_weights(weights)
        self.oponent.set_weights(weights)
        perturbations = self.oponent.mutate(self.config['mutation_power'])

        _, oponent_reward1, ts1 = self.play_game(self.elite,
                                                 self.oponent,
                                                 recorder=recorder)
        oponent_reward2, _, ts2 = self.play_game(self.oponent,
                                                 self.elite,
                                                 recorder=recorder)

        if record:
            recorder.close()

        return {
            'total_reward': np.mean([oponent_reward1, oponent_reward2]),
            'timesteps_total': ts1 + ts2,
            'video': None if not record else wandb.Video(self.video_path),
            'noise': perturbations
        }
    def play(self):

        from gym.wrappers.monitoring.video_recorder import VideoRecorder
        rec = VideoRecorder(self.env,
                            base_path=os.path.join(
                                self.log_dir,
                                self.log_dir.rsplit('/', 1)[1]))

        observation = self.env.reset()
        r = 0
        done = False
        while not done:
            action, q_vals = self.act(np.asarray(observation)[np.newaxis])

            next_state, reward, done, _ = self.env.step(action)

            r += reward

            observation = next_state

            self.env.render()
            rec.capture_frame()
            time.sleep(0.05)

        print('Game ended with score: ', r)
        self.env.close()
        rec.close()
示例#13
0
def record_video(env,
                 policy,
                 file_name,
                 number_of_resets=1,
                 max_time_steps=None):
    """
    Records a video of a policy for a specified environment
    :param env: (causal_world.CausalWorld) the environment to use for
                                           recording.
    :param policy: the policy to be evaluated
    :param file_name: (str) full path where the video is being stored.
    :param number_of_resets: (int) the number of resets/episodes to be viewed
    :param max_time_steps: (int) the maximum number of time steps per episode
    :return:
    """
    recorder = VideoRecorder(env, "{}.mp4".format(file_name))
    for reset_idx in range(number_of_resets):
        policy.reset()
        obs = env.reset()
        recorder.capture_frame()
        if max_time_steps is not None:
            for i in range(max_time_steps):
                desired_action = policy.act(obs)
                obs, reward, done, info = env.step(action=desired_action)
                recorder.capture_frame()
        else:
            while True:
                desired_action = policy.act(obs)
                obs, reward, done, info = env.step(action=desired_action)
                recorder.capture_frame()
                if done:
                    break
    recorder.close()
    return
示例#14
0
def record_video_of_episode(episode,
                            file_name,
                            env_wrappers=np.array([]),
                            env_wrappers_args=np.array([])):
    """
     Records a video of a logged episode for a specified environment

     :param episode: (Episode) the logged episode
     :param file_name: (str) full path where the video is being stored.
     :param env_wrappers: (list) a list of gym wrappers
     :param env_wrappers_args: (list) a list of kwargs for the gym wrappers
     :return:
     """
    actual_skip_frame = episode.world_params["skip_frame"]
    env = get_world(episode.task_name,
                    episode.task_params,
                    episode.world_params,
                    enable_visualization=False,
                    env_wrappers=env_wrappers,
                    env_wrappers_args=env_wrappers_args)
    env.set_starting_state(episode.initial_full_state, check_bounds=False)
    recorder = VideoRecorder(env, "{}.mp4".format(file_name))
    recorder.capture_frame()
    for time, observation, reward, action in zip(episode.timestamps,
                                                 episode.observations,
                                                 episode.rewards,
                                                 episode.robot_actions):
        for _ in range(actual_skip_frame):
            env.step(action)
            recorder.capture_frame()
    recorder.close()
    env.close()
示例#15
0
def play_with_buffer(env, pi, num_episodes=3, video_path=None):
    video_path = 'videos/REINFORCE_best.mp4'
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    rep = ReplayMemory(pi.config)
    obs_mask = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
    for e_i in range(num_episodes):
        s = env.reset()
        z = np.matmul(obs_mask, s)
        rep.add(z, 0, 0)
        # env.unwrapped.render()
        done = False
        while not done:
            a = int(pi(rep.getState()))
            s_prime, r_t, done, _ = env.step(a)
            z_prime = np.matmul(obs_mask, s_prime)
            rep.add(z_prime, r_t, a)
            video_recorder.capture_frame()
            # env.render()
            # s = s_prime
            # z = z_prime
    video_recorder.close()
    # video_recorder.enabled = False
    env.close()
示例#16
0
文件: play.py 项目: Jason93415/C-HGG
 def demoRecordPickAndPlaceObstacle(
         self, raw_path="videos/KukaPickAndPlaceObstacle"):
     env = self.env
     test_rollouts = 5
     goals = [[0.80948876, -0.24847823, 0.85],
              [0.90204398, -0.24176245, 0.85],
              [0.72934716, -0.19637749,
               0.85], [0.6970663, -0.25643907, 0.85],
              [0.7029464, -0.18765762, 0.85]]
     recorder = VideoRecorder(env.env.env, base_path=raw_path)
     acc_sum, obs = 0.0, []
     test_rollouts = 5
     for i in range(test_rollouts):
         env.reset()
         env.set_goal(np.array(goals[i]))
         obs.append(goal_based_process(env.get_obs()))
         print("Rollout {}/{} ...".format(i + 1, test_rollouts))
         for timestep in range(200):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             recorder.capture_frame()
     recorder.close()
def play_episode(frame_count, episode = 0):
    """
    play episode and render it into .gif
    """
    
    # Set video recorder
    video_dir = os.path.join(data_dir, 'raw_videos')
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    if not os.path.exists(video_dir):
        os.mkdir(video_dir)
    video_fn = 'episode_' + str(episode) + '.mp4'
    video_path = os.path.join(video_dir, video_fn)

    video_recorder = VideoRecorder(env, video_path)

    # Reset environmnet
    observation = env.reset()

    # Rollout episode
    episode_length = 0.
    done = 0
    while (done == 0):
        # set exploration rate for this frame
        video_recorder.capture_frame()
        episode_length += 1

        observation, reward, done, _ = env.step()

        # stop the episode if it goes too long
        if episode_length >= max_episode_length:
            reward = -100.
            done = True

    # Closer
    video_recorder.close()
    vid = mp.VideoFileClip(video_path)

    success_flag = env.blue_win
    survival_rate = sum([agent.isAlive for agent in env.get_team_blue]) / len(env.get_team_blue)
    kill_rate = sum([not agent.isAlive for agent in env.get_team_red]) / len(env.get_team_red)

    if success_flag == 1 and len(vid_success) < num_success:
        vid_success.append(vid)
        success_episode_num.append(episode)
        
    elif success_flag == 0 and len(vid_failure) < num_failure:
        vid_failure.append(vid)
        failure_episode_num.append(episode)
    
    # rendering vid to .gif
    video_dir = os.path.join(data_dir, 'gif_videos')
    if not os.path.exists(video_dir):
        os.mkdir(video_dir)
    video_fn = 'episode_' + str(episode) + '.gif'
    video_path = os.path.join(video_dir, video_fn)
    vid.write_gif(video_path, fps=500)
    
    return episode_length, reward, frame_count + episode_length, survival_rate, kill_rate, success_flag
示例#18
0
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
示例#19
0
def test_text_envs():
    env = gym.make("FrozenLake-v1")
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
示例#20
0
    def test(self,
             gamesNum=100,
             adversary=None,
             advDetector=None,
             render=False,
             verbose=True,
             videoPath=None):
        """ Test trained DQN agent. """
        recordVideo = videoPath is not None
        if recordVideo:
            recorder = VideoRecorder(self.env, videoPath)

        gameRewards = []
        gameLengths = []
        attacksNumbers = []
        for i in range(gamesNum):
            done = False
            s = utils.preprocess(self.env.reset())
            frames = np.expand_dims(np.repeat(s, 4, 2), 0)
            gameReward = 0.0
            gameLength = 0
            attNum = 0
            while not done:
                actionScores, actionProbs = self.sess.run(
                    [self.logits, self.probs], feed_dict={self.inputs: frames})
                isAdvState, advFrames = self._attack(adversary, frames,
                                                     actionProbs)
                if advDetector is not None:
                    advDetector.isAdv(advFrames, isAdvState)
                attNum += isAdvState

                for j in range(self.frameSkip):
                    sj, r, done, _ = self.env.step(np.argmax(actionScores))
                    gameReward += r
                    gameLength += 1
                    if render:
                        self.env.render()
                    if recordVideo:
                        recorder.capture_frame()

                frames = utils.pushframe(frames, utils.preprocess(sj))

            gameRewards.append(gameReward)
            gameLengths.append(gameLength)
            attacksNumbers.append(attNum)
            if verbose:
                print("Finished test game " + str(i + 1) + " / " +
                      str(gamesNum) + " reward = " + str(gameReward))
                print('{"metric": "loss", "value":' + str(gameReward) + '}')

        print("Agent achieved average reward of " + str(np.mean(gameRewards)) +
              " in " + str(gamesNum) + " games.")
        print('{"metric": "loss", "value":' + str(np.mean(gameRewards)) + '}')
        if recordVideo:
            recorder.close()

        return gameRewards, gameLengths, attacksNumbers, advDetector
示例#21
0
def run_policy(env,
               get_action,
               env_params_list,
               max_ep_len=None,
               episode_id=0,
               record=False,
               recording_path=None,
               no_render=False,
               use_baselines=False):
    if record:
        if os.name == "nt":
            full_path = os.path.join(pathlib.Path().absolute(), recording_path)
            full_path_len = len(full_path)
            nb_char_to_remove = full_path_len - 245
            if nb_char_to_remove > 0:
                recording_path = recording_path[:-nb_char_to_remove]
        video_recorder = VideoRecorder(env,
                                       recording_path + "_ep" +
                                       str(episode_id) + ".mp4",
                                       enabled=True)

    if use_baselines:
        env.get_raw_env().set_environment(**env_params_list[episode_id])
    else:
        env.set_environment(**env_params_list[episode_id])

    if use_baselines:
        _, o = env.reset()
    else:
        o = env.reset()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    while True:
        if record and video_recorder.enabled:
            video_recorder.capture_frame()
        if not record and not no_render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, i = env.step(a)
        if use_baselines:
            ep_ret += i[0]["original_reward"][0]
        else:
            ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            print('Episode %d \t EpRet %.3f \t EpLen %d' %
                  (episode_id, ep_ret, ep_len))
            if record and video_recorder.enabled:
                video_recorder.close()
                video_recorder.enabled = False
            break
    return ep_ret
示例#22
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
示例#23
0
def play_episode(frame_count, episode=0):
    # Set video recorder
    video_dir = os.path.join(data_dir, 'raw_videos')
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    if not os.path.exists(video_dir):
        os.mkdir(video_dir)
    video_fn = 'episode_' + str(episode) + '.mp4'
    video_path = os.path.join(video_dir, video_fn)

    video_recorder = VideoRecorder(env, video_path)

    # Reset environmnet
    observation = env.reset()

    # Rollout episode
    episode_length = 0.
    done = 0
    while (done == 0):
        # set exploration rate for this frame
        video_recorder.capture_frame()
        episode_length += 1

        # state consists of the centered observations of each agent
        action = policy_blue.gen_action(env.get_team_blue,
                                        env._env)  # Full observability

        observation, reward, done, _ = env.step(action)

        # stop the episode if it goes too long
        if episode_length >= max_episode_length:
            reward = -100.
            done = True

    # Post Statistics
    success_flag = env.blue_win
    survival_rate = sum([agent.isAlive for agent in env.get_team_blue]) / len(
        env.get_team_blue)
    kill_rate = sum([not agent.isAlive
                     for agent in env.get_team_red]) / len(env.get_team_red)

    # Closer
    video_recorder.close()
    vid = mp.VideoFileClip(video_path)

    if success_flag == 1 and len(vid_success) < num_success:
        vid_success.append(vid)
    elif success_flag == 0 and len(vid_failure) < num_failure:
        vid_failure.append(vid)

    return episode_length, reward, frame_count + episode_length, survival_rate, kill_rate, success_flag
示例#24
0
def rollout(env, policy, max_steps=1000, action_noise=0.0, render_dir=None):
    """Run one rollout and return data.

  Args:
    env: The environment with reset and step function.
    policy: The state->action policy to roll out.
    max_steps: The maximum number of steps in the episode.
    action_noise: The probability of adding noise to the action
      before sending to the environment. Action noise does not
      get logged in rollout data.
  Returns:
    A dict of torch tensors, with time dimension for 'obs', 'act'
      'rew' and 'done'.
  """
    rollout_data = {
        'obs': [],
        'act': [],
        'rew': [],
        'done': [],
        'total_rew': 0.0,
        'num_steps': 0,
    }

    # Initialize collection.
    obs = env.reset()
    done = False

    if render_dir is not None:
        video_recorder = VideoRecorder(env, base_path=render_dir)

    while not done and rollout_data['num_steps'] < max_steps:
        if render_dir is not None:
            video_recorder.capture_frame()
        rollout_data['num_steps'] += 1
        rollout_data['obs'].append(obs)
        act = policy(obs)
        rollout_data['act'].append(act)
        if action_noise > 0.0:
            if np.random.uniform() < action_noise:
                act += env.action_space.sample()
        obs, rew, done, _ = env.step(act)
        rollout_data['rew'].append(rew)
        rollout_data['done'].append(done)

    if render_dir is not None:
        video_recorder.close()

    rollout_data = utils.tree_apply(torch.tensor, rollout_data)
    for k in ['obs', 'act', 'rew', 'done']:
        rollout_data[k] = torch.stack(rollout_data[k])
    return rollout_data
示例#25
0
class RecordMonitor(Thread):
    def __init__(self, env, monitor_path):
        Thread.__init__(self)
        # The starting time

        self.rec = VideoRecorder(env, path=monitor_path)

    def reset_timer(self):
        self.start_time = time.time()

    def get_rec(self):
        self.rec.capture_frame()  # 放在底层会使得控制不准!

    def stop(self):
        self.rec.close()
示例#26
0
 def evaluate(self, weights):
     """ Evlauate weights by playing against a random policy. """
     recorder = VideoRecorder(self.env, path=self.video_path_eval)
     self.elite.set_weights(weights)
     reward, _, ts = self.play_game(self.elite,
                                    RandomPolicy(
                                        self.config['number_actions']),
                                    recorder=recorder,
                                    eval=True)
     recorder.close()
     return {
         'total_reward': reward,
         'timesteps_total': ts,
         'video': wandb.Video(self.video_path_eval),
     }
示例#27
0
 def life():
     yield 'moment', 'born'
     try:
         from gym.wrappers.monitoring.video_recorder import VideoRecorder
         with habitat() as env:
             if record: recorder = VideoRecorder(env, record)
             yield 'env', env
             life = yield_(live(env)(individual))
             for moment in life:
                 if record: recorder.capture_frame()
                 yield 'moment', moment
             if record: recorder.close()
             return life.value
     except AssertionError:
         self = yield from contribution()
         return self
示例#28
0
    def _predict(self, model, video_path):
        """Run predictions on trained RL model.
        """

        vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())),
                           enabled=True)
        obs = self._env.reset()
        for i in range(1000):
            action, _states = model.predict(obs)
            obs, rewards, dones, info = self._env.step(action)
            if dones:
                obs = self._env.reset()
            self._env.render(mode='rgb_array')
            vr.capture_frame()
        vr.close()
        self._env.close()
示例#29
0
    def test(self, agent):
        config = self._config

        for i in range(3):
            video_path = 'ppo_{0}_{1}_{2:d}.mp4'.format(config.name, config.model, i)
            video_recorder = VideoRecorder(self._env, video_path, enabled=video_path is not None, fps=15)
            state0 = torch.tensor(self._env.reset(), dtype=torch.float32).unsqueeze(0).to(config.device)
            done = False

            while not done:
                self._env.render()
                video_recorder.capture_frame()
                action0 = agent.get_action(state0, True)
                next_state, reward, done, info = self._env.step(action0.item())
                state0 = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0).to(config.device)
            video_recorder.close()
示例#30
0
    def _run_episode(self, step, render=False):
        target = self.target.detach().cpu().numpy()
        t, ep_reward = 0, 0

        env = self.env

        if render:
            video_path = self.log_dir / 'video'
            video_path.mkdir(exist_ok=True, parents=True)
            video_recorder = VideoRecorder(
                env=self.env,
                base_path=str(video_path / f'{step}'),
                metadata={'step': step},
                enabled=True,
            )
        else:
            video_recorder = None

        state, done = env.reset(), False
        # extra variables for proper execution (initial value not important)
        goal = None

        while not done and t < self.ep_len:
            if video_recorder:
                video_recorder.capture_frame()
            state_tens = torch.from_numpy(state).float().to(self.device)
            # TODO: run episodes exactly like we collect experience
            if t == 0 or (t + 1) % self.params.c == 0:
                goal = self.agent_hi.actor(state_tens).squeeze(0)

            action = self.agent_lo.actor(torch.cat([state_tens, goal], dim=-1)).squeeze(0)

            next_state, _, _, _ = env.step(action.detach().cpu().numpy())
            reward = dense_reward(next_state, target, self.goal_dim)
            done = success_judge(next_state, target, self.goal_dim)
            next_state_tens = torch.from_numpy(next_state).float().to(self.device)

            t += 1

            ep_reward += reward
            goal = goal + (state_tens - next_state_tens)[:self.goal_dim]
            state = next_state

        if video_recorder:
            video_recorder.close()

        return done, ep_reward