def _plot_level_curves(self): # Create mesh grid. xs = np.linspace(-1, 1, 50) ys = np.linspace(-1, 1, 50) xgrid, ygrid = np.meshgrid(xs, ys) N = len(xs) * len(ys) # Copy default values along the first axis and replace nans with # the mesh grid points. actions = np.tile(self._default_action, (N, 1)) actions[:, self._var_inds[0]] = xgrid.ravel() actions[:, self._var_inds[1]] = ygrid.ravel() for ax, obs in zip(self._ax_lst, self._obs_lst): repeated_obs = np.repeat( obs[None], actions.shape[0], axis=0, ) qs = eval_np(self._qf, repeated_obs, actions) qs = qs.reshape(xgrid.shape) cs = ax.contour(xgrid, ygrid, qs, 20) self._line_objects += cs.collections self._line_objects += ax.clabel(cs, inline=1, fontsize=10, fmt='%.2f')
def vis(args): imgs = np.load(args.ds) vae = joblib.load(args.file) losses = [] for i, image_obs in enumerate(imgs): img = normalize_image(image_obs) recon, *_ = eval_np(vae, img) error = ((recon - img)**2).sum() losses.append((i, error)) losses.sort(key=lambda x: -x[1]) for rank, (i, error) in enumerate(losses[:NUM_SHOWN]): image_obs = imgs[i] recon, *_ = eval_np(vae, normalize_image(image_obs)) img = image_obs.reshape(3, 48, 48).transpose() rimg = recon.reshape(3, 48, 48).transpose() cv2.imshow( "image, rank {}, loss {}".format(rank, error), img ) cv2.imshow( "recon, rank {}, loss {}".format(rank, error), rimg ) print("rank {}\terror {}".format(rank, error)) for j, (i, error) in enumerate(losses[-NUM_SHOWN:]): rank = len(losses) - j - 1 image_obs = imgs[i] recon, *_ = eval_np(vae, normalize_image(image_obs)) img = image_obs.reshape(3, 48, 48).transpose() rimg = recon.reshape(3, 48, 48).transpose() cv2.imshow( "image, rank {}, loss {}".format(rank, error), img ) cv2.imshow( "recon, rank {}, loss {}".format(rank, error), rimg ) print("rank {}\terror {}".format(rank, error)) cv2.waitKey(0) cv2.destroyAllWindows()
def get_actions(self, obs_np, image, deterministic=False): # import ipdb; ipdb.set_trace() obs_np = obs_np[None] if obs_np != None else None return eval_np(self, image[None], obs_np, actions=None, reparameterize=True, deterministic=deterministic, return_log_prob=False)[0]
def get_action(self, current_ob): if (self.replan_every_time_step or self.t_in_plan == self.planning_horizon or self.last_solution is None): if self.dynamic_lm and self.best_obs_seq is not None: error = np.linalg.norm(current_ob - self.best_obs_seq[self.t_in_plan + 1]) self.update_lagrange_multiplier(error) goal = self.env.multitask_goal[self.multitask_goal_slice] full_solution = self.replan(current_ob, goal) x_torch = ptu.np_to_var(full_solution, requires_grad=True) current_ob_torch = ptu.np_to_var(current_ob) _, actions, next_obs = self.batchify(x_torch, current_ob_torch) self.best_action_seq = np.array( [ptu.get_numpy(a) for a in actions]) self.best_obs_seq = np.array([current_ob] + [ptu.get_numpy(o) for o in next_obs]) self.last_solution = full_solution self.t_in_plan = 0 tdm_actions = eval_np(self.tdm_policy, self.best_obs_seq[:-1], self.best_obs_seq[1:], np.zeros((self.planning_horizon, 1))) agent_info = dict( best_action_seq=self.best_action_seq[self.t_in_plan:], # best_action_seq=tdm_actions, best_obs_seq=self.best_obs_seq[self.t_in_plan:], ) action = self.best_action_seq[self.t_in_plan] # action = tdm_actions[self.t_in_plan] self.t_in_plan += 1 # print("action", action) # print("tdm_action", tdm_actions[0]) return action, agent_info
def get_actions(self, obs): return eval_np(self, obs)
def get_actions(self, obs_np, deterministic=False): return eval_np(self, obs_np, deterministic=deterministic, execute_actions=True)[0]
def get_actions(self, obs_np, deterministic=False): return eval_np(self, obs_np, deterministic=deterministic)[0]