Пример #1
0
    def _player_step_tuple(self, envs_step_tuples):
        """Construct observation, return usual step tuple.

    Args:
      envs_step_tuples: tuples.

    Returns:
      Step tuple: ob, reward, done, info
        ob: concatenated images [simulated observation, real observation,
          difference], with additional informations in header.
        reward: real environment reward
        done: True iff. envs_step_tuples['real_env'][2] is True
        info: real environment info
    """
        ob_real, reward_real, _, _ = envs_step_tuples["real_env"]
        ob_sim, reward_sim, _, _ = envs_step_tuples["sim_env"]
        ob_err = absolute_hinge_difference(ob_sim, ob_real)

        ob_real_aug = self._augment_observation(ob_real, reward_real,
                                                self.cumulative_real_reward)
        ob_sim_aug = self._augment_observation(ob_sim, reward_sim,
                                               self.cumulative_sim_reward)
        ob_err_aug = self._augment_observation(
            ob_err, reward_sim - reward_real,
            self.cumulative_sim_reward - self.cumulative_real_reward)
        ob = np.concatenate([ob_sim_aug, ob_real_aug, ob_err_aug], axis=1)
        _, reward, done, info = envs_step_tuples["real_env"]
        return ob, reward, done, info
Пример #2
0
 def append_debug_frame_batch(sim_obs, real_obs, sim_cum_rews,
                              real_cum_rews, sim_rews, real_rews):
     """Add a debug frame."""
     rews = [[sim_cum_rews, sim_rews], [real_cum_rews, real_rews]]
     headers = []
     for j in range(len(sim_obs)):
         local_nps = []
         for i in range(2):
             img = PIL_Image().new(
                 "RGB",
                 (sim_obs.shape[-2], 11),
             )
             draw = PIL_ImageDraw().Draw(img)
             draw.text((0, 0),
                       "c:{:3}, r:{:3}".format(int(rews[i][0][j]),
                                               int(rews[i][1][j])),
                       fill=(255, 0, 0))
             local_nps.append(np.asarray(img))
         local_nps.append(np.zeros_like(local_nps[0]))
         headers.append(np.concatenate(local_nps, axis=1))
     errs = absolute_hinge_difference(sim_obs, real_obs)
     headers = np.stack(headers)
     debug_frame_batches.append(  # pylint: disable=cell-var-from-loop
         np.concatenate([
             headers,
             np.concatenate([sim_obs, real_obs, errs], axis=2)
         ],
                        axis=1))