Exemplo n.º 1
0
  def rollout(self,
              env,
              agent,
              directory,
              record_video=False,
              timeout=8,
              num_episode=10,
              record_trajectory=False):
    """Rollout and save.

    Args:
      env: the RL environment
      agent: the RL agent
      directory: directory where the output of the rollout is saved
      record_video: record the video
      timeout: timeout step if the agent is stuck
      num_episode: number of rollout episode
      record_trajectory: record the ground truth trajectory

    Returns:
      percentage of success during this rollout
    """
    print('#######################################')
    print('Rolling out...')
    print('#######################################')
    all_frames = []
    ep_observation, ep_action, ep_agn = [], [], []
    black_frame = pad_image(env.render(mode='rgb_array')) * 0.0
    goal_sampled = 0
    timeout_count, success = 0, 0
    for ep in range(num_episode):
      s = self.reset(env, agent, self.cfg.diverse_scene_content)
      all_frames += [black_frame] * 10
      g_text, p = env.sample_goal()
      if env.all_goals_satisfied:
        s = self.reset(env, agent, True)
        g, p = env.sample_goal()
      goal_sampled += 1
      g = np.squeeze(self.encode_fn(g_text))
      current_goal_repetition = 0
      for t in range(self.cfg.max_episode_length):
        prob = self.epsilon if record_trajectory else 0.0
        action = agent.step(s, g, env, explore_prob=prob)
        s_tp1, r, _, _ = env.step(
            action,
            record_achieved_goal=True,
            goal=p,
            atomic_goal=self.cfg.record_atomic_instruction)
        ag = env.get_achieved_goals()
        s = s_tp1
        all_frames.append(
            add_text(pad_image(env.render(mode='rgb_array')), g_text))
        current_goal_repetition += 1

        if record_trajectory:
          ep_observation.append(env.get_direct_obs().tolist())
          ep_action.append(action)
          ep_agn.append(len(ag))

        sample_new_goal = False
        if r > env.shape_val:
          for _ in range(5):
            all_frames.append(
                add_text(
                    pad_image(env.render(mode='rgb_array')),
                    g_text,
                    color='green'))
          success += 1
          sample_new_goal = True

        if current_goal_repetition >= timeout:
          all_frames.append(
              add_text(pad_image(env.render(mode='rgb_array')), 'time out :('))
          timeout_count += 1
          sample_new_goal = True

        if sample_new_goal:
          g, p = env.sample_goal()
          if env.all_goals_satisfied:
            break
          g_text = g
          g = np.squeeze(self.encode_fn(g))
          current_goal_repetition = 0
          goal_sampled += 1

    print('Rollout finished')
    print('{} instrutctions tried given'.format(goal_sampled))
    print('{} instructions timed out'.format(timeout_count))
    if record_video:
      save_video(np.uint8(all_frames), directory, fps=5)
      print('Video saved...')
    if record_trajectory:
      print('Recording trajectory...')
      datum = {
          'obs': ep_observation,
          'action': ep_action,
          'achieved goal': ep_agn,
      }
      save_json(datum, directory[:-4] + '_trajectory.json')
    return success / float(num_episode)
Exemplo n.º 2
0
  def rollout(self,
              env,
              agent,
              directory,
              record_video=False,
              timeout=8,
              num_episode=10,
              record_trajectory=False):
    """Rollout and save.

    Args:
      env: the RL environment
      agent: the RL agent
      directory: directory where the output of the rollout is saved
      record_video: record the video
      timeout: timeout step if the agent is stuck
      num_episode: number of rollout episode
      record_trajectory: record the ground truth trajectory

    Returns:
      percentage of success during this rollout
    """
    print('\n#######################################')
    print('Rolling out...')
    print('#######################################')

    # randomly change subset of embedding
    if self._use_synonym_for_rollout and self.cfg.embedding_type == 'random':
      original_embedding = agent.randomize_partial_word_embedding(10)

    all_frames = []
    ep_observation, ep_action, ep_agn = [], [], []
    black_frame = pad_image(env.render(mode='rgb_array')) * 0.0
    goal_sampled = 0
    timeout_count, success = 0, 0
    for ep in range(num_episode):
      s = env.reset(self.cfg.diverse_scene_content)
      all_frames += [black_frame] * 10
      g_text, p = env.sample_goal()
      if env.all_goals_satisfied:
        s = env.reset(True)
        g, p = env.sample_goal()
      goal_sampled += 1
      g = self.encode_fn(g_text)
      g = np.squeeze(pad_to_max_length([g], self.cfg.max_sequence_length)[0])
      if self._use_synonym_for_rollout and self.cfg.embedding_type != 'random':
        # use unseen lexicons for test
        g = paraphrase_sentence(
            self.decode_fn(g), synonym_tables=_SYNONYM_TABLES)
      current_goal_repetition = 0
      for t in range(self.cfg.max_episode_length):
        prob = self.epsilon if record_trajectory else 0.0
        action = agent.step(s, g, env, explore_prob=prob)
        s_tp1, r, _, _ = env.step(
            action,
            record_achieved_goal=False,
            goal=p,
            atomic_goal=self.cfg.record_atomic_instruction)
        s = s_tp1
        all_frames.append(
            add_text(pad_image(env.render(mode='rgb_array')), g_text))
        current_goal_repetition += 1

        if record_trajectory:
          ep_observation.append(env.get_direct_obs().tolist())
          ep_action.append(action)

        sample_new_goal = False
        if r > env.shape_val:
          img = pad_image(env.render(mode='rgb_array'))
          for _ in range(5):
            all_frames.append(add_text(img, g_text, color='green'))
          success += 1
          sample_new_goal = True

        if current_goal_repetition >= timeout:
          all_frames.append(
              add_text(pad_image(env.render(mode='rgb_array')), 'time out :('))
          timeout_count += 1
          sample_new_goal = True

        if sample_new_goal:
          g, p = env.sample_goal()
          if env.all_goals_satisfied:
            break
          g_text = g
          g = self.encode_fn(g_text)
          g = np.squeeze(
              pad_to_max_length([g], self.cfg.max_sequence_length)[0])
          if self._use_synonym_for_rollout and self.cfg.embedding_type != 'random':
            g = paraphrase_sentence(
                self.decode_fn(g), synonym_tables=_SYNONYM_TABLES)
          current_goal_repetition = 0
          goal_sampled += 1

    # restore the original embedding
    if self._use_synonym_for_rollout and self.cfg.embedding_type == 'random':
      agent.set_embedding(original_embedding)

    print('Rollout finished')
    print('{} instrutctions tried given'.format(goal_sampled))
    print('{} instructions timed out'.format(timeout_count))
    print('{} success rate\n'.format(1 - float(timeout_count) / goal_sampled))
    if record_video:
      save_video(np.uint8(all_frames), directory, fps=5)
      print('Video saved...')
    if record_trajectory:
      print('Recording trajectory...')
      datum = {
          'obs': ep_observation,
          'action': ep_action,
          'achieved goal': ep_agn,
      }
      save_json(datum, directory[:-4] + '_trajectory.json')
    return 1 - float(timeout_count) / goal_sampled