def _player_step_tuple(self, envs_step_tuples): """Construct observation, return usual step tuple. Args: envs_step_tuples: tuples. Returns: Step tuple: ob, reward, done, info ob: concatenated images [simulated observation, real observation, difference], with additional informations in header. reward: real environment reward done: True iff. envs_step_tuples['real_env'][2] is True info: real environment info """ ob_real, reward_real, _, _ = envs_step_tuples["real_env"] ob_sim, reward_sim, _, _ = envs_step_tuples["sim_env"] ob_err = absolute_hinge_difference(ob_sim, ob_real) ob_real_aug = self._augment_observation(ob_real, reward_real, self.cumulative_real_reward) ob_sim_aug = self._augment_observation(ob_sim, reward_sim, self.cumulative_sim_reward) ob_err_aug = self._augment_observation( ob_err, reward_sim - reward_real, self.cumulative_sim_reward - self.cumulative_real_reward) ob = np.concatenate([ob_sim_aug, ob_real_aug, ob_err_aug], axis=1) _, reward, done, info = envs_step_tuples["real_env"] return ob, reward, done, info
def append_debug_frame_batch(sim_obs, real_obs, sim_cum_rews, real_cum_rews, sim_rews, real_rews): """Add a debug frame.""" rews = [[sim_cum_rews, sim_rews], [real_cum_rews, real_rews]] headers = [] for j in range(len(sim_obs)): local_nps = [] for i in range(2): img = PIL_Image().new( "RGB", (sim_obs.shape[-2], 11), ) draw = PIL_ImageDraw().Draw(img) draw.text((0, 0), "c:{:3}, r:{:3}".format(int(rews[i][0][j]), int(rews[i][1][j])), fill=(255, 0, 0)) local_nps.append(np.asarray(img)) local_nps.append(np.zeros_like(local_nps[0])) headers.append(np.concatenate(local_nps, axis=1)) errs = absolute_hinge_difference(sim_obs, real_obs) headers = np.stack(headers) debug_frame_batches.append( # pylint: disable=cell-var-from-loop np.concatenate([ headers, np.concatenate([sim_obs, real_obs, errs], axis=2) ], axis=1))