示例#1
0
def replay_states(path):
    f, i, w = sample_parameters(mode)
    path = os.path.join(os.getcwd(), './windyslope.xml')
    model = load_model_from_path(path)
    randomize_dynamics(model, friction=f, insidebox=i, wind=w)
    env = WindySlope(model, mode.REAL, should_render=should_render)
    env.reset()
    with open('traj100-sample10.npz', 'rb') as f:
        states = np.load(f)

    if env.should_record:
        rec = VideoRecorder(env, path='/tmp/video/windyslope-predict-sample.mp4')

    for e in range(len(states)):
        episode = states[e]
        for i in range(len(episode)):
            qpos = episode[i][:3]
            env.data.qpos[:3] = qpos
            mat = episode[i][3:12]
            mat = np.asarray(mat).astype(np.float64)
            quat = np.empty(4)
            functions.mju_mat2Quat(quat, mat)
            print('quat: ', quat)
            env.data.qpos[3:] = quat
            #env.data.qvel[:3] = states[i][12:15]
            #env.data.qvel[3:] = states[i][15:18]
            
            #env.sim.forward()
            obs = env.get_observations(env.model, env.data)
            print('states:', episode[i])
            print('obs:', obs)
            #assert np.allclose(obs, states[i])
            env.render()
            if env.should_record:
                rec.capture_frame()
示例#2
0
class RecordedEnv(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.start_of_episode = None

    def reset(self):
        self.start_of_episode = True
        return self.env.reset()

    def step(self, action, filename='', record_episode=False):
        if record_episode and self.start_of_episode:
            self.recorder = VideoRecorder(self.env, path=filename)
        self.start_of_episode = False

        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()

        next_state, reward, done, info = self.env.step(action)
        if hasattr(self, 'recorder') and done:
            self.recorder.close()  # close and save video at end of episode
            del self.recorder

        return next_state, reward, done, info

    def close(self):
        if hasattr(self, 'recorder'):
            self.recorder.capture_frame()
            self.recorder.close()
            del self.recorder
        self.start_of_episode = True
        return self.env.close()
示例#3
0
def play(env, model, video_path, num_episodes, timesteps, metadata):
    video_recorder = None
    for i_episodes in range(num_episodes):
        video_recorder = VideoRecorder(
            env=env, path=video_path, metadata=metadata, enabled=video_path is not None)
        obs = env.reset()
        for t in range(timesteps):
            obs = [np.array([[list(obs)]])]
            video_recorder.capture_frame()
            action = model.predict(obs)[0]
            obs, rew, done, info = env.step(action)
            env.render()
            theta.append(obs[0])
            theta_dot.append(obs[1])
            actions.append(action[0])
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                num_episodes += 1
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
                break
    env.close()
    return theta
示例#4
0
def record_rollout(env, horizon, policy, record_file_path):
    """
    This is the recording function for the runner class which samples one episode with a specified length
    using the provided policy and records it in a video.


    Parameters
    ---------
    horizon: Int
        The task horizon/ episode length.
    policy: ModelBasedBasePolicy or ModelFreeBasePolicy
        The policy to be used in collecting the episodes from the different agents.
    record_file_path: String
        specified the file path to save the video that will be recorded in.
    """
    recorder = VideoRecorder(env, record_file_path + '.mp4')
    observations = env.reset()
    for t in range(horizon):
        recorder.capture_frame()
        if not isinstance(policy, ModelFreeBasePolicy):
            action_to_execute, expected_obs, expected_reward = policy.act(
                observations, t)
        else:
            action_to_execute = policy.act(observations, t)
        observations, reward, done, info = env.step(action_to_execute)
    recorder.capture_frame()
    recorder.close()
    return
示例#5
0
def play_episode(episode=0):
    video_fn = 'episode_' + str(episode) + '.mp4'
    video_path = os.path.join(video_dir, video_fn)
    video_recorder = VideoRecorder(env, video_path)

    length = 0
    obs = env.reset()
    done = False
    while not done and length < max_episode_length:
        observation, reward, done, _ = env.step()

        video_recorder.capture_frame()

        # Optain waypoints
        waypoints = []
        for entity in env.get_team_blue.tolist() + env.get_team_red.tolist():
            waypoints.extend(entity.get_loc())
        length += 1

    # Closer
    video_recorder.close()
    vid = mp.VideoFileClip(video_path)

    # Check if episode has right length played
    if length <= min_length or length >= max_length:
        return

    # Post Processing
    if env.blue_win and len(vid_success) < num_success:
        vid_success.append(vid)
    elif env.red_win and len(vid_failure) < num_failure:
        vid_failure.append(vid)
class VideoLogger(Callback):
    def __init__(self, env: MultiagentVecEnv, save_folder: str):
        super(VideoLogger, self).__init__()
        self.env = env
        self.save_folder = save_folder

        os.makedirs(save_folder, exist_ok=True)
        self.recorder = VideoRecorder(env, path=f'{save_folder}/0.mp4')

    def before_step(self,
                    logs: Optional[dict] = None,
                    actions: Optional[Dict[str, torch.Tensor]] = None,
                    action_distributions: Optional[Dict[str,
                                                        Distribution]] = None):
        self.recorder.capture_frame()

    def after_train(self,
                    logs: Optional[dict] = None,
                    obs: Optional[Dict[str, torch.Tensor]] = None,
                    rewards: Optional[Dict[str, torch.Tensor]] = None,
                    dones: Optional[Dict[str, torch.Tensor]] = None,
                    infos: Optional[Dict[str, torch.Tensor]] = None):
        # If there is just one env save each episode to a different file
        # Otherwise save the whole video at the end
        if self.env.num_envs == 1:
            if logs['env_done']:
                # Save video and make a new recorder
                self.recorder.close()
                self.recorder = VideoRecorder(
                    self.env,
                    path=f'{self.save_folder}/{logs["episodes"]}.mp4')

    def on_train_end(self):
        self.recorder.close()
示例#7
0
def perform_debug_sequence(sequence_name,
                           env,
                           walker_type,
                           sequence_of_actions,
                           action_repeat=10,
                           has_gravity=False):
    video_folder = debug_folder + "\\" + walker_type
    if not os.path.exists(video_folder):
        os.makedirs(video_folder)

    if not has_gravity:
        env.world.gravity = (0, 0)
    else:
        env.world.gravity = (0, -10)

    video_recorder = VideoRecorder(
        env,
        video_folder + "\\" + sequence_name + ".mp4",
    )  # Stump Tracks
    env.reset()

    for action in sequence_of_actions:
        for i in range(action_repeat):
            _, _, d, _ = env.step(action)
            video_recorder.capture_frame()
            time.sleep(0.01)
            video_recorder.capture_frame()
    video_recorder.close()
示例#8
0
def play_with_buffer(env, pi, num_episodes=3, video_path=None):
    video_path = 'videos/REINFORCE_best.mp4'
    video_recorder = VideoRecorder(env,
                                   video_path,
                                   enabled=video_path is not None)
    rep = ReplayMemory(pi.config)
    obs_mask = np.array([[1, 0, 0, 0], [0, 0, 1, 0]])
    for e_i in range(num_episodes):
        s = env.reset()
        z = np.matmul(obs_mask, s)
        rep.add(z, 0, 0)
        # env.unwrapped.render()
        done = False
        while not done:
            a = int(pi(rep.getState()))
            s_prime, r_t, done, _ = env.step(a)
            z_prime = np.matmul(obs_mask, s_prime)
            rep.add(z_prime, r_t, a)
            video_recorder.capture_frame()
            # env.render()
            # s = s_prime
            # z = z_prime
    video_recorder.close()
    # video_recorder.enabled = False
    env.close()
示例#9
0
文件: play.py 项目: Jason93415/C-HGG
 def demoRecordPickAndPlaceObstacle(
         self, raw_path="videos/KukaPickAndPlaceObstacle"):
     env = self.env
     test_rollouts = 5
     goals = [[0.80948876, -0.24847823, 0.85],
              [0.90204398, -0.24176245, 0.85],
              [0.72934716, -0.19637749,
               0.85], [0.6970663, -0.25643907, 0.85],
              [0.7029464, -0.18765762, 0.85]]
     recorder = VideoRecorder(env.env.env, base_path=raw_path)
     acc_sum, obs = 0.0, []
     test_rollouts = 5
     for i in range(test_rollouts):
         env.reset()
         env.set_goal(np.array(goals[i]))
         obs.append(goal_based_process(env.get_obs()))
         print("Rollout {}/{} ...".format(i + 1, test_rollouts))
         for timestep in range(200):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             recorder.capture_frame()
     recorder.close()
    def play(self):

        from gym.wrappers.monitoring.video_recorder import VideoRecorder
        rec = VideoRecorder(self.env,
                            base_path=os.path.join(
                                self.log_dir,
                                self.log_dir.rsplit('/', 1)[1]))

        observation = self.env.reset()
        r = 0
        done = False
        while not done:
            action, q_vals = self.act(np.asarray(observation)[np.newaxis])

            next_state, reward, done, _ = self.env.step(action)

            r += reward

            observation = next_state

            self.env.render()
            rec.capture_frame()
            time.sleep(0.05)

        print('Game ended with score: ', r)
        self.env.close()
        rec.close()
示例#11
0
def testCartpole(model, test=True, record=True):
    env = gym.make(ENV_NAME)
    if record:
        rec = VideoRecorder(env, "./video/iteration_%s.mp4" % str(model))
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space, load_path="./artifacts/iteration_%s/" % str(model),
                           test=test)
    run = 0
    while True:
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        run += 1
        while True:
            step += 1
            env.render()
            if record:
                rec.capture_frame()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            state_next = np.reshape(state_next, [1, observation_space])
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", score: " + str(step))
                if record:
                    rec.close()
                    record = False
                break
示例#12
0
def record_video_of_episode(episode,
                            file_name,
                            env_wrappers=np.array([]),
                            env_wrappers_args=np.array([])):
    """
     Records a video of a logged episode for a specified environment

     :param episode: (Episode) the logged episode
     :param file_name: (str) full path where the video is being stored.
     :param env_wrappers: (list) a list of gym wrappers
     :param env_wrappers_args: (list) a list of kwargs for the gym wrappers
     :return:
     """
    actual_skip_frame = episode.world_params["skip_frame"]
    env = get_world(episode.task_name,
                    episode.task_params,
                    episode.world_params,
                    enable_visualization=False,
                    env_wrappers=env_wrappers,
                    env_wrappers_args=env_wrappers_args)
    env.set_starting_state(episode.initial_full_state, check_bounds=False)
    recorder = VideoRecorder(env, "{}.mp4".format(file_name))
    recorder.capture_frame()
    for time, observation, reward, action in zip(episode.timestamps,
                                                 episode.observations,
                                                 episode.rewards,
                                                 episode.robot_actions):
        for _ in range(actual_skip_frame):
            env.step(action)
            recorder.capture_frame()
    recorder.close()
    env.close()
def play_episode(frame_count, episode = 0):
    """
    play episode and render it into .gif
    """
    
    # Set video recorder
    video_dir = os.path.join(data_dir, 'raw_videos')
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    if not os.path.exists(video_dir):
        os.mkdir(video_dir)
    video_fn = 'episode_' + str(episode) + '.mp4'
    video_path = os.path.join(video_dir, video_fn)

    video_recorder = VideoRecorder(env, video_path)

    # Reset environmnet
    observation = env.reset()

    # Rollout episode
    episode_length = 0.
    done = 0
    while (done == 0):
        # set exploration rate for this frame
        video_recorder.capture_frame()
        episode_length += 1

        observation, reward, done, _ = env.step()

        # stop the episode if it goes too long
        if episode_length >= max_episode_length:
            reward = -100.
            done = True

    # Closer
    video_recorder.close()
    vid = mp.VideoFileClip(video_path)

    success_flag = env.blue_win
    survival_rate = sum([agent.isAlive for agent in env.get_team_blue]) / len(env.get_team_blue)
    kill_rate = sum([not agent.isAlive for agent in env.get_team_red]) / len(env.get_team_red)

    if success_flag == 1 and len(vid_success) < num_success:
        vid_success.append(vid)
        success_episode_num.append(episode)
        
    elif success_flag == 0 and len(vid_failure) < num_failure:
        vid_failure.append(vid)
        failure_episode_num.append(episode)
    
    # rendering vid to .gif
    video_dir = os.path.join(data_dir, 'gif_videos')
    if not os.path.exists(video_dir):
        os.mkdir(video_dir)
    video_fn = 'episode_' + str(episode) + '.gif'
    video_path = os.path.join(video_dir, video_fn)
    vid.write_gif(video_path, fps=500)
    
    return episode_length, reward, frame_count + episode_length, survival_rate, kill_rate, success_flag
示例#14
0
def test_record_breaking_render_method():
    env = BrokenRecordableEnv()
    rec = VideoRecorder(env)
    rec.capture_frame()
    rec.close()
    assert rec.empty
    assert rec.broken
    assert not os.path.exists(rec.path)
示例#15
0
    def test(self,
             gamesNum=100,
             adversary=None,
             advDetector=None,
             render=False,
             verbose=True,
             videoPath=None):
        """ Test trained DQN agent. """
        recordVideo = videoPath is not None
        if recordVideo:
            recorder = VideoRecorder(self.env, videoPath)

        gameRewards = []
        gameLengths = []
        attacksNumbers = []
        for i in range(gamesNum):
            done = False
            s = utils.preprocess(self.env.reset())
            frames = np.expand_dims(np.repeat(s, 4, 2), 0)
            gameReward = 0.0
            gameLength = 0
            attNum = 0
            while not done:
                actionScores, actionProbs = self.sess.run(
                    [self.logits, self.probs], feed_dict={self.inputs: frames})
                isAdvState, advFrames = self._attack(adversary, frames,
                                                     actionProbs)
                if advDetector is not None:
                    advDetector.isAdv(advFrames, isAdvState)
                attNum += isAdvState

                for j in range(self.frameSkip):
                    sj, r, done, _ = self.env.step(np.argmax(actionScores))
                    gameReward += r
                    gameLength += 1
                    if render:
                        self.env.render()
                    if recordVideo:
                        recorder.capture_frame()

                frames = utils.pushframe(frames, utils.preprocess(sj))

            gameRewards.append(gameReward)
            gameLengths.append(gameLength)
            attacksNumbers.append(attNum)
            if verbose:
                print("Finished test game " + str(i + 1) + " / " +
                      str(gamesNum) + " reward = " + str(gameReward))
                print('{"metric": "loss", "value":' + str(gameReward) + '}')

        print("Agent achieved average reward of " + str(np.mean(gameRewards)) +
              " in " + str(gamesNum) + " games.")
        print('{"metric": "loss", "value":' + str(np.mean(gameRewards)) + '}')
        if recordVideo:
            recorder.close()

        return gameRewards, gameLengths, attacksNumbers, advDetector
示例#16
0
def test_text_envs():
    env = gym.make("FrozenLake-v1")
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
示例#17
0
def run_policy(env,
               get_action,
               env_params_list,
               max_ep_len=None,
               episode_id=0,
               record=False,
               recording_path=None,
               no_render=False,
               use_baselines=False):
    if record:
        if os.name == "nt":
            full_path = os.path.join(pathlib.Path().absolute(), recording_path)
            full_path_len = len(full_path)
            nb_char_to_remove = full_path_len - 245
            if nb_char_to_remove > 0:
                recording_path = recording_path[:-nb_char_to_remove]
        video_recorder = VideoRecorder(env,
                                       recording_path + "_ep" +
                                       str(episode_id) + ".mp4",
                                       enabled=True)

    if use_baselines:
        env.get_raw_env().set_environment(**env_params_list[episode_id])
    else:
        env.set_environment(**env_params_list[episode_id])

    if use_baselines:
        _, o = env.reset()
    else:
        o = env.reset()

    r, d, ep_ret, ep_len, n = 0, False, 0, 0, 0
    while True:
        if record and video_recorder.enabled:
            video_recorder.capture_frame()
        if not record and not no_render:
            env.render()
            time.sleep(1e-3)

        a = get_action(o)
        o, r, d, i = env.step(a)
        if use_baselines:
            ep_ret += i[0]["original_reward"][0]
        else:
            ep_ret += r
        ep_len += 1

        if d or (ep_len == max_ep_len):
            print('Episode %d \t EpRet %.3f \t EpLen %d' %
                  (episode_id, ep_ret, ep_len))
            if record and video_recorder.enabled:
                video_recorder.close()
                video_recorder.enabled = False
            break
    return ep_ret
示例#18
0
def test_record_simple():
    env = gym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
示例#19
0
def rollout(env, policy, max_steps=1000, action_noise=0.0, render_dir=None):
    """Run one rollout and return data.

  Args:
    env: The environment with reset and step function.
    policy: The state->action policy to roll out.
    max_steps: The maximum number of steps in the episode.
    action_noise: The probability of adding noise to the action
      before sending to the environment. Action noise does not
      get logged in rollout data.
  Returns:
    A dict of torch tensors, with time dimension for 'obs', 'act'
      'rew' and 'done'.
  """
    rollout_data = {
        'obs': [],
        'act': [],
        'rew': [],
        'done': [],
        'total_rew': 0.0,
        'num_steps': 0,
    }

    # Initialize collection.
    obs = env.reset()
    done = False

    if render_dir is not None:
        video_recorder = VideoRecorder(env, base_path=render_dir)

    while not done and rollout_data['num_steps'] < max_steps:
        if render_dir is not None:
            video_recorder.capture_frame()
        rollout_data['num_steps'] += 1
        rollout_data['obs'].append(obs)
        act = policy(obs)
        rollout_data['act'].append(act)
        if action_noise > 0.0:
            if np.random.uniform() < action_noise:
                act += env.action_space.sample()
        obs, rew, done, _ = env.step(act)
        rollout_data['rew'].append(rew)
        rollout_data['done'].append(done)

    if render_dir is not None:
        video_recorder.close()

    rollout_data = utils.tree_apply(torch.tensor, rollout_data)
    for k in ['obs', 'act', 'rew', 'done']:
        rollout_data[k] = torch.stack(rollout_data[k])
    return rollout_data
示例#20
0
def play_episode(frame_count, episode=0):
    # Set video recorder
    video_dir = os.path.join(data_dir, 'raw_videos')
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    if not os.path.exists(video_dir):
        os.mkdir(video_dir)
    video_fn = 'episode_' + str(episode) + '.mp4'
    video_path = os.path.join(video_dir, video_fn)

    video_recorder = VideoRecorder(env, video_path)

    # Reset environmnet
    observation = env.reset()

    # Rollout episode
    episode_length = 0.
    done = 0
    while (done == 0):
        # set exploration rate for this frame
        video_recorder.capture_frame()
        episode_length += 1

        # state consists of the centered observations of each agent
        action = policy_blue.gen_action(env.get_team_blue,
                                        env._env)  # Full observability

        observation, reward, done, _ = env.step(action)

        # stop the episode if it goes too long
        if episode_length >= max_episode_length:
            reward = -100.
            done = True

    # Post Statistics
    success_flag = env.blue_win
    survival_rate = sum([agent.isAlive for agent in env.get_team_blue]) / len(
        env.get_team_blue)
    kill_rate = sum([not agent.isAlive
                     for agent in env.get_team_red]) / len(env.get_team_red)

    # Closer
    video_recorder.close()
    vid = mp.VideoFileClip(video_path)

    if success_flag == 1 and len(vid_success) < num_success:
        vid_success.append(vid)
    elif success_flag == 0 and len(vid_failure) < num_failure:
        vid_failure.append(vid)

    return episode_length, reward, frame_count + episode_length, survival_rate, kill_rate, success_flag
示例#21
0
    def record():
        env = gym.make("CartPole-v1")
        rec = VideoRecorder(env)
        env.reset()
        rec.capture_frame()

        rec_path = rec.path
        proc = rec.encoder.proc

        assert proc.poll() is None  # subprocess is running

        # The function ends without an explicit `rec.close()` call
        # The Python interpreter will implicitly do `del rec` on garbage cleaning
        return rec_path, proc
示例#22
0
class RecordMonitor(Thread):
    def __init__(self, env, monitor_path):
        Thread.__init__(self)
        # The starting time

        self.rec = VideoRecorder(env, path=monitor_path)

    def reset_timer(self):
        self.start_time = time.time()

    def get_rec(self):
        self.rec.capture_frame()  # 放在底层会使得控制不准!

    def stop(self):
        self.rec.close()
示例#23
0
def show_smart_agent():
    env = gym.make('Acrobot-v1')
    recorder = VideoRecorder(env, path='./video.mp4', enabled=True)
    state = env.reset()

    for t in range(1000):
        recorder.capture_frame()
        action, _ = policy.act(state)
        env.render()
        state, reward, done, _ = env.step(action)
        if done:
            break
        time.sleep(0.05)

    env.close()
示例#24
0
def robo_view():
    # env = AcrobotEnv()
    env = gym.make('Acrobot-v1')
    video = VideoRecorder(env, path='./acrobot.mp4', enabled=True)
    state = env.reset()

    for i in range(10000):
        video.capture_frame()
        action, _ = model.get_action(state)
        env.render()
        state, rewards, finish, _ = env.step(action)
        if finish:
            break
        time.sleep(0.1)
    env.close()
示例#25
0
 def life():
     yield 'moment', 'born'
     try:
         from gym.wrappers.monitoring.video_recorder import VideoRecorder
         with habitat() as env:
             if record: recorder = VideoRecorder(env, record)
             yield 'env', env
             life = yield_(live(env)(individual))
             for moment in life:
                 if record: recorder.capture_frame()
                 yield 'moment', moment
             if record: recorder.close()
             return life.value
     except AssertionError:
         self = yield from contribution()
         return self
示例#26
0
    def _predict(self, model, video_path):
        """Run predictions on trained RL model.
        """

        vr = VideoRecorder(env=self._env, path="{}/rl_out.mp4".format(video_path, str(MPI.COMM_WORLD.Get_rank())),
                           enabled=True)
        obs = self._env.reset()
        for i in range(1000):
            action, _states = model.predict(obs)
            obs, rewards, dones, info = self._env.step(action)
            if dones:
                obs = self._env.reset()
            self._env.render(mode='rgb_array')
            vr.capture_frame()
        vr.close()
        self._env.close()
示例#27
0
    def test(self, agent):
        config = self._config

        for i in range(3):
            video_path = 'ppo_{0}_{1}_{2:d}.mp4'.format(config.name, config.model, i)
            video_recorder = VideoRecorder(self._env, video_path, enabled=video_path is not None, fps=15)
            state0 = torch.tensor(self._env.reset(), dtype=torch.float32).unsqueeze(0).to(config.device)
            done = False

            while not done:
                self._env.render()
                video_recorder.capture_frame()
                action0 = agent.get_action(state0, True)
                next_state, reward, done, info = self._env.step(action0.item())
                state0 = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0).to(config.device)
            video_recorder.close()
示例#28
0
def test(opt):

    opt.saved_path = os.getcwd() + '/PPO/' + opt.saved_path
    opt.output_path = os.getcwd() + '/PPO/' + opt.output_path
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)
    if opt.action_type == "right":
        actions = RIGHT_ONLY
    elif opt.action_type == "simple":
        actions = SIMPLE_MOVEMENT
    else:
        actions = COMPLEX_MOVEMENT
    env = create_train_env_test(actions)
    rec = VideoRecorder(env,
                        path="{}/mario_video_{}.mp4".format(
                            opt.output_path, opt.step),
                        enabled=True)
    model = PPO(env.observation_space.shape[0], len(actions))
    if torch.cuda.is_available():
        model.load_state_dict(
            torch.load("{}/ppo_super_mario_bros_{}".format(
                opt.saved_path, opt.step)))
        model.cuda()
    else:
        model.load_state_dict(
            torch.load("{}/ppo_super_mario_bros_{}".format(
                opt.saved_path, opt.step),
                       map_location=lambda storage, loc: storage))
    model.eval()
    state = torch.from_numpy(env.reset())
    while True:
        if torch.cuda.is_available():
            state = state.cuda()
        logits, value = model(state)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        state, reward, done, info = env.step(action)
        state = torch.from_numpy(state)
        # print(info)
        # env.render()
        rec.capture_frame()
        if done:
            print("Died.")
            rec.close()
            break
示例#29
0
    def _run_episode(self, step, render=False):
        target = self.target.detach().cpu().numpy()
        t, ep_reward = 0, 0

        env = self.env

        if render:
            video_path = self.log_dir / 'video'
            video_path.mkdir(exist_ok=True, parents=True)
            video_recorder = VideoRecorder(
                env=self.env,
                base_path=str(video_path / f'{step}'),
                metadata={'step': step},
                enabled=True,
            )
        else:
            video_recorder = None

        state, done = env.reset(), False
        # extra variables for proper execution (initial value not important)
        goal = None

        while not done and t < self.ep_len:
            if video_recorder:
                video_recorder.capture_frame()
            state_tens = torch.from_numpy(state).float().to(self.device)
            # TODO: run episodes exactly like we collect experience
            if t == 0 or (t + 1) % self.params.c == 0:
                goal = self.agent_hi.actor(state_tens).squeeze(0)

            action = self.agent_lo.actor(torch.cat([state_tens, goal], dim=-1)).squeeze(0)

            next_state, _, _, _ = env.step(action.detach().cpu().numpy())
            reward = dense_reward(next_state, target, self.goal_dim)
            done = success_judge(next_state, target, self.goal_dim)
            next_state_tens = torch.from_numpy(next_state).float().to(self.device)

            t += 1

            ep_reward += reward
            goal = goal + (state_tens - next_state_tens)[:self.goal_dim]
            state = next_state

        if video_recorder:
            video_recorder.close()

        return done, ep_reward
示例#30
0
文件: play.py 项目: Jason93415/C-HGG
    def demoRecordPushJoints(self, raw_path="videos/KukaPushJoints"):
        file = open("KukaPushJointsTrajectory2.txt", 'r')
        env = self.env
        test_rollouts = 5
        goals = [[0.68, -0.18, 0.85], [0.60, -0.3, 0.85], [0.72, -0.28, 0.85],
                 [0.58, -0.3, 0.85], [0.62, -0.25, 0.85]]
        recorder = VideoRecorder(env.env.env, base_path=raw_path)
        jointsTrajectory = re.sub(r"([^[])\s+([^]])", r"\1 \2",
                                  file.readline())
        jointsTrajectory = np.array(literal_eval(jointsTrajectory))
        env.reset()
        env.set_goal(np.array(goals[1]))
        for j in range(len(jointsTrajectory)):
            self.env.stepJoints(jointsTrajectory[j])
            recorder.capture_frame()

        print("... done.")