Пример #1
0
    def _plot_level_curves(self):
        # Create mesh grid.
        xs = np.linspace(-1, 1, 50)
        ys = np.linspace(-1, 1, 50)
        xgrid, ygrid = np.meshgrid(xs, ys)
        N = len(xs) * len(ys)

        # Copy default values along the first axis and replace nans with
        # the mesh grid points.
        actions = np.tile(self._default_action, (N, 1))
        actions[:, self._var_inds[0]] = xgrid.ravel()
        actions[:, self._var_inds[1]] = ygrid.ravel()

        for ax, obs in zip(self._ax_lst, self._obs_lst):
            repeated_obs = np.repeat(
                obs[None],
                actions.shape[0],
                axis=0,
            )
            qs = eval_np(self._qf, repeated_obs, actions)
            qs = qs.reshape(xgrid.shape)

            cs = ax.contour(xgrid, ygrid, qs, 20)
            self._line_objects += cs.collections
            self._line_objects += ax.clabel(cs,
                                            inline=1,
                                            fontsize=10,
                                            fmt='%.2f')
def vis(args):
    imgs = np.load(args.ds)
    vae = joblib.load(args.file)
    losses = []
    for i, image_obs in enumerate(imgs):
        img = normalize_image(image_obs)
        recon, *_ = eval_np(vae, img)
        error = ((recon - img)**2).sum()
        losses.append((i, error))

    losses.sort(key=lambda x: -x[1])

    for rank, (i, error) in enumerate(losses[:NUM_SHOWN]):
        image_obs = imgs[i]
        recon, *_ = eval_np(vae, normalize_image(image_obs))

        img = image_obs.reshape(3, 48, 48).transpose()
        rimg = recon.reshape(3, 48, 48).transpose()

        cv2.imshow(
            "image, rank {}, loss {}".format(rank, error),
            img
        )
        cv2.imshow(
            "recon, rank {}, loss {}".format(rank, error),
            rimg
        )
        print("rank {}\terror {}".format(rank, error))
    for j, (i, error) in enumerate(losses[-NUM_SHOWN:]):
        rank = len(losses) - j - 1
        image_obs = imgs[i]
        recon, *_ = eval_np(vae, normalize_image(image_obs))

        img = image_obs.reshape(3, 48, 48).transpose()
        rimg = recon.reshape(3, 48, 48).transpose()

        cv2.imshow(
            "image, rank {}, loss {}".format(rank, error),
            img
        )
        cv2.imshow(
            "recon, rank {}, loss {}".format(rank, error),
            rimg
        )
        print("rank {}\terror {}".format(rank, error))
    cv2.waitKey(0)
    cv2.destroyAllWindows()
Пример #3
0
 def get_actions(self, obs_np, image, deterministic=False):
     # import ipdb; ipdb.set_trace()
     obs_np = obs_np[None] if obs_np != None else None
     return eval_np(self,
                    image[None],
                    obs_np,
                    actions=None,
                    reparameterize=True,
                    deterministic=deterministic,
                    return_log_prob=False)[0]
Пример #4
0
    def get_action(self, current_ob):
        if (self.replan_every_time_step
                or self.t_in_plan == self.planning_horizon
                or self.last_solution is None):
            if self.dynamic_lm and self.best_obs_seq is not None:
                error = np.linalg.norm(current_ob -
                                       self.best_obs_seq[self.t_in_plan + 1])
                self.update_lagrange_multiplier(error)
            goal = self.env.multitask_goal[self.multitask_goal_slice]
            full_solution = self.replan(current_ob, goal)

            x_torch = ptu.np_to_var(full_solution, requires_grad=True)
            current_ob_torch = ptu.np_to_var(current_ob)

            _, actions, next_obs = self.batchify(x_torch, current_ob_torch)
            self.best_action_seq = np.array(
                [ptu.get_numpy(a) for a in actions])
            self.best_obs_seq = np.array([current_ob] +
                                         [ptu.get_numpy(o) for o in next_obs])

            self.last_solution = full_solution
            self.t_in_plan = 0

        tdm_actions = eval_np(self.tdm_policy, self.best_obs_seq[:-1],
                              self.best_obs_seq[1:],
                              np.zeros((self.planning_horizon, 1)))
        agent_info = dict(
            best_action_seq=self.best_action_seq[self.t_in_plan:],
            # best_action_seq=tdm_actions,
            best_obs_seq=self.best_obs_seq[self.t_in_plan:],
        )
        action = self.best_action_seq[self.t_in_plan]
        # action = tdm_actions[self.t_in_plan]
        self.t_in_plan += 1
        # print("action", action)
        # print("tdm_action", tdm_actions[0])

        return action, agent_info
Пример #5
0
 def get_actions(self, obs):
     return eval_np(self, obs)
Пример #6
0
 def get_actions(self, obs_np, deterministic=False):
     return eval_np(self,
                    obs_np,
                    deterministic=deterministic,
                    execute_actions=True)[0]
Пример #7
0
 def get_actions(self, obs_np, deterministic=False):
     return eval_np(self, obs_np, deterministic=deterministic)[0]