def conditional_generator_function(self, c_, c_next_, obs): ''' This doesn't do anything. ''' c_ = undiscretize(c_, self.discretization_bins, self.P.unif_range) c_next_ = undiscretize(c_next_, self.discretization_bins, self.P.unif_range) z_ = from_numpy_to_var(np.random.randn(c_.shape[0], self.rand_z_dim)) _, next_observation = self.G(z_, from_numpy_to_var(c_), from_numpy_to_var(c_next_)) return next_observation.data.cpu().numpy()
def continuous_transition_function(self, c_): c_ = undiscretize(c_, self.discretization_bins, self.P.unif_range) c_next_ = self.T(from_numpy_to_var(c_)).data.cpu().numpy() c_next_ = np.clip(c_next_, self.P.unif_range[0] + 1e-6, self.P.unif_range[1] - 1e-6) c_next_d = discretize(c_next_, self.discretization_bins, self.P.unif_range) return c_next_d
def get_torch_images_from_numpy(npy_list, conditional, normalize=True, one_image=False): """ :param npy_list: a list of (image, attrs) pairs :param normalize: if True then the output is between 0 and 1 :return: Torch Variable as input to model """ if one_image: o = from_numpy_to_var(np.transpose(npy_list, (0, 3, 1, 2))) else: o = from_numpy_to_var( np.transpose(np.stack(npy_list[:, 0]), (0, 3, 1, 2))) if normalize: o /= 255 if one_image: return o if conditional: return o[:, :3].contiguous(), o[:, 3:].contiguous() return o, None
def astar_plan(self, c_start, c_goal, verbose=True, **kwargs): """ Generate a plan in observation space given start and goal states via A* search. :param c_start: bs x c_dim :param c_goal: bs x c_dim :return: rollout: horizon x bs x channel_dim x img_W x img_H """ with torch.no_grad(): rollout = [] # _z = Variable(torch.randn(c_start.size()[0], self.rand_z_dim)).cuda() bs = c_start.size()[0] traj = plan_traj_astar( kwargs['start_obs'], kwargs['goal_obs'], start_state=c_start[0].data.cpu().numpy(), goal_state=c_goal[0].data.cpu().numpy(), transition_function=self.continuous_transition_function, preprocess_function=self.preprocess_function, discriminator_function=self.discriminator_function_np, generator_function=self.conditional_generator_function) for t, disc in enumerate(traj[:-1]): state = undiscretize(disc.state, self.discretization_bins, self.P.unif_range) state_next = undiscretize(traj[t + 1].state, self.discretization_bins, self.P.unif_range) c = from_numpy_to_var(state).repeat(bs, 1) c_next = from_numpy_to_var(state_next).repeat(bs, 1) _z = Variable(torch.randn(c.size()[0], self.rand_z_dim)).cuda() _cur_img, _next_img = self.G(_z, c, c_next) if t == 0: rollout.append(_cur_img) next_img = _next_img rollout.append(next_img) if verbose: # import ipdb; ipdb.set_trace() print("\t c_%d: %s" % (t, print_array(c[0].data))) return rollout
def get_c_loss(model, c_model, c_type, o, o_next, context, N, o_neg=None): batch_size = o.size(0) if c_type[:3] == "cpc": # Positive positive_log_density = c_model.log_density(o, o_next, context) # Negative negative_c = context.repeat(N, 1, 1, 1) if o_neg is None: negative_o_pred = model.inference(negative_c, n_samples=1, layer_cond=False) else: negative_o_pred = model(o_neg, negative_c)[0] negative_log_density = c_model.log_density(o.repeat( N, 1, 1, 1), negative_o_pred, negative_c).view(N, batch_size).t() # Loss density_ratio = torch.cat([ from_numpy_to_var(np.zeros((batch_size, 1))), negative_log_density - positive_log_density[:, None] ], dim=1) if c_type == "cpc-sptm": density_ratio = torch.cat([ density_ratio, -positive_log_density[:, None], negative_log_density ], dim=1) c_loss = torch.mean(log_sum_exp(density_ratio)) elif c_type[:4] == "sptm": # Positive positive_y_pred = c_model(o, o_next, context) # Negative if o_neg is None: negative_o_pred = model.inference(context, n_samples=1, layer_cond=False) else: negative_o_pred = model(o_neg, context)[0] negative_y_pred = c_model(o, negative_o_pred, context) ys = torch.cat([positive_y_pred, negative_y_pred]) labels = torch.cat([torch.ones(batch_size), torch.zeros(batch_size)]) if torch.cuda.is_available(): labels = labels.cuda() c_loss = c_model.loss(ys, labels) else: raise NotImplementedError assert not torch.isnan(c_loss) return c_loss
def localization(o_cur_pred, o_samples_npy, context, c_model): """ :param o_cur: :param o_samples_npy: in numpy :param c_model: :return: """ # TODO: do batching like build graph # Finds the closest node v in the graph G of our exploration sequence to some observation o # will first look in local neighborhood of last node, unless we are at the start (start=True) datalen = len(o_samples_npy) with torch.no_grad(): rscores = get_score(c_model, o_cur_pred.repeat(datalen, 1, 1, 1), from_numpy_to_var(o_samples_npy), context.repeat(datalen, 1, 1, 1), type="all")["raw"] i = np.argmax(rscores) return i
def find_next_way_point(o_cur_pred, path, o_goal_pred, context, shortcut_thresh, c_model): path_to_goal = torch.cat([from_numpy_to_var(path), o_goal_pred]) return path_to_goal[1]
def run_planning_and_inverse_model(env, envname, hp, n_test_locs, test_data, config, test_mode, model, c_model, actor, savepath): # Finding threshold # TODO: auto collect this & make sure all c_type's work replanning = hp["replanning_every"] edge_weight = hp["use_edge_weight"] using_vae_samples = hp["use_vae_samples"] threshold_edge = hp["threshold_edge"] threshold_wp = hp["threshold_shortcut"] # threshold = 10.5 n_planning_samples = hp["n_planning_samples"] score_type = hp["score_type"] # Precompute the graph o_samples_npy = graph = node_goal = None total_dist = total_success = 0 for i in range(n_test_locs): curr_obs, goal_obs, start_state, goal_state, context = test_data[i] env.reset(start_state) env.render(config=config) # Generate data, Build Graph, Localize goal if i == 0 or test_mode == "eval": # TODO: do batching to get a larger graph if using_vae_samples: o_samples_npy = model.inference(context, n_samples=n_planning_samples, layer_cond=False).cpu().detach().numpy() else: o_samples_npy = generate_samples(envname, env, config, n_planning_samples).cpu().detach().numpy() # threshold = find_threshold(o_samples_npy, context, c_model, min_thres=0, max_thres=50, n_iters=10) graph = build_memory_graph(o_samples_npy, context, c_model, score_type, threshold_edge, edge_weight) save_image(from_numpy_to_var(o_samples_npy), os.path.join(savepath, 'plan_samples.png'), nrow=10) # Precompute goal node and recon o node_goal = localization(goal_obs, o_samples_npy, context, c_model) o_goal_pred, _, _, _ = model(goal_obs, context, determ=True) # Actor run for t in range(hp["T"]): o_curr_pred, _, _, _ = model(curr_obs, context, determ=True) # Localize and do planning if t % replanning == 0: node_cur = localization(o_curr_pred, o_samples_npy, context, c_model) shortest_path, edge_weights, edge_raw = find_shortest_path(graph, node_cur, node_goal) # padding(edge_weights, edge_raw) # Next goal img j = t % replanning next_way_point = find_next_way_point( o_curr_pred, o_samples_npy[shortest_path][min(j // 10, len(shortest_path) - 1):], o_goal_pred, context, threshold_wp, c_model) next_o_goal = next_way_point[None, :] # Visualize plans if t % 50 == 0: img_seq = torch.cat([curr_obs, from_numpy_to_var(o_samples_npy[shortest_path]), goal_obs]) all_score = get_score( c_model, o_curr_pred.repeat(len(img_seq), 1, 1, 1), img_seq, context.repeat(len(img_seq), 1, 1, 1), type="all" ) img_seq_np = img_seq.detach().cpu().numpy() write_number_on_images(img_seq_np, ["%d, %.3f" % (i, j) for i, j in zip(all_score["raw"], all_score[score_type])], position="top-left") write_number_on_images(img_seq_np, ["", ""] + ["%d, %.3f" % (i, j) for i, j in zip(edge_raw, edge_weights)] + [""], position="bottom-left") save_image(torch.Tensor(img_seq_np), os.path.join(savepath, 'plan_task_%d_step_%d.png' % (i, t)), nrow=len(shortest_path) + 2) # Get action action = actor(o_curr_pred, next_o_goal, context).cpu().numpy() _, _ = env.step_only(action + np.random.randn(2) * hp["action_noise"]) next_img = env.get_current_img(config) curr_obs = get_torch_images_from_numpy(next_img[None, :], False, one_image=True) # print("Final State: %s; Goal State: %s" % (env.get_current_obs(), goal_state)) import matplotlib.pyplot as plt if not os.path.exists(os.path.join(savepath, '%d' % i)): os.makedirs(os.path.join(savepath, '%d' % i)) plt.imshow(next_img) plt.savefig(os.path.join(savepath, '%d/%03d.png' % (i, t))) plt.close() curr_dist = np.linalg.norm(env.get_current_obs() - goal_state) is_success = (np.abs(env.get_current_obs() - goal_state) < hp["block_size"]).all() if is_success: break print("Final Distance for Task %d: %f; Success %s" % (i, curr_dist, is_success)) total_dist += curr_dist total_success += is_success print("Summary: total dist %f; success %d out of %d" % (total_dist, total_success, n_test_locs)) return total_dist, total_success, n_test_locs
def get_torch_actions(npy_list): act = np.array([ npy_list[i, 1]['action'].reshape(-1) for i in range(npy_list.shape[0]) ]) return from_numpy_to_var(act)
def forward(self, s, a=None): bs = list(s.size())[0] mu, var = self.get_mu_and_var(s) return mu + var.sqrt() * from_numpy_to_var( np.random.randn(bs, self.s_dim))
def plan_hack(self, data_start_loader, data_goal_loader, epoch, metric, keep_best=10): """ Generate visual plans from starts to goals. First, find the closest codes for starts and goals. Then, generate the plans in the latent space. Finally, map the latent plans to visual plans and use the classifier to pick the top K. The start image is fixed. The goal image is loaded from data_goal_loader. :param data_start_loader: :param data_goal_loader: :param epoch: :param metric: :param keep_best: :return: """ all_confidences = [] c_start = None est_start_obs = None for img in data_start_loader: if self.fcn: start_obs = self.apply_fcn_mse(img[0]) else: start_obs = Variable(img[0]).cuda() pt_start = os.path.join(self.out_dir, 'plans', 'c_min_start_%s.pt' % metric) if os.path.exists(pt_start): z_start, c_start, _, est_start_obs = torch.load(pt_start) else: z_start, c_start, _, est_start_obs = self.closest_code( start_obs, 400, False, metric, 1) torch.save([z_start, c_start, _, est_start_obs], pt_start) break # Hacky for now try: c_start = Variable(c_start) est_start_obs = Variable(est_start_obs) except RuntimeError: pass for i, img in enumerate(data_goal_loader, 0): if self.fcn: goal_obs = self.apply_fcn_mse(img[0]) else: goal_obs = Variable(img[0]).cuda() pt_goal = os.path.join( self.out_dir, 'plans', 'c_min_goal_%s_%d_epoch_%d.pt' % (metric, i, epoch)) if os.path.exists(pt_goal): z_goal, _, c_goal, est_goal_obs = torch.load(pt_goal) else: z_goal, _, c_goal, est_goal_obs = self.closest_code( goal_obs, 400, True, metric, 1) torch.save([z_goal, _, c_goal, est_goal_obs], pt_goal) # Hacky for now try: c_goal = Variable(c_goal) est_goal_obs = Variable(est_goal_obs) except RuntimeError: pass # Plan using c_start and c_goal. rollout = self.planner(c_start.repeat(self.traj_eval_copies, 1), c_goal.repeat(self.traj_eval_copies, 1), start_obs=start_obs, goal_obs=goal_obs) # Insert closest start and goal. rollout.insert( 0, est_start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(est_goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) # Insert real start and goal. rollout.insert(0, start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout_best_k, confidences = self.get_best_k(rollout, keep_best) rollout_data = torch.stack(rollout_best_k, dim=0) masks = -np.ones( (rollout_data.size()[0], keep_best, self.channel_dim, 64, 64), dtype=np.float32) write_number_on_images(masks, confidences) # save_image(torch.max(rollout_data, from_numpy_to_var(masks)).view(-1, self.channel_dim, 64, 64).data, # os.path.join(self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' # % (self.planner.__name__, metric, i, epoch)), # nrow=keep_best, # normalize=True) pd = torch.max(rollout_data, from_numpy_to_var(masks)).permute( 1, 0, 2, 3, 4).contiguous().view(-1, self.channel_dim, 64, 64) # confidences.T has size keep_best x rollout length all_confidences.append(confidences.T[-1][:-1]) save_image(pd.data, os.path.join( self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' % (self.planner.__name__, metric, i, epoch)), nrow=int(pd.size()[0] / keep_best), normalize=True) all_confidences = np.stack(all_confidences) print((all_confidences[:, 0] > 0.9).sum(), (all_confidences[:, -1] > 0.9).sum()) import pickle as pkl with open(os.path.join(self.out_dir, 'all_confidences.pkl'), 'wb') as f: pkl.dump(all_confidences, f) import matplotlib.pyplot as plt plt.boxplot([ all_confidences.mean(1), all_confidences[all_confidences[:, -1] > 0.9].mean(1) ]) plt.savefig(os.path.join(self.out_dir, 'boxplot.png'))
def discriminator_function_np(self, obs, obs_next): return self.discriminator_function(from_numpy_to_var(obs), from_numpy_to_var(obs_next))
def log_prob(self, s): bs = list(s.size())[0] log_prob = from_numpy_to_var( -np.ones(bs) * np.log(self.unif_range[1] - self.unif_range[0])) return log_prob
def sample(self, batch_size): s = np.random.uniform(*self.unif_range, size=(batch_size, self.s_dim)) return from_numpy_to_var(s)
def build_memory_graph(o_samples_npy, context, c_model, score_type, edge_thresh=0, edge_weight=False): """ :param o_samples_npy: in numpy :param c_model: :param edge_thresh: :param edge_weight: :return: """ graph = nx.DiGraph() datalen = len(o_samples_npy) # add nodes representing each observation in the trajectory and # add edges if temporally close together: if |i-j| = 1 for i in range(datalen): graph.add_node(i) # add edges to o_i, o_j if R(o_i, o_j) > s_thresh, and # i,j are separated by at least deltaT bs = min(datalen, 500) assert datalen % bs == 0 batch_len = int(datalen / bs) all_pair_scores = [] raw_scores = [] with torch.no_grad(): for i, oi in enumerate(o_samples_npy): cscores = [] rscores = [] o = from_numpy_to_var(tile(oi, bs)) for batch in range(batch_len): o_next_batch = from_numpy_to_var(o_samples_npy[batch * bs:(batch + 1) * bs]) scores = get_score(c_model, o, o_next_batch, context.repeat(bs, 1, 1, 1), type="all") # Weights ys = scores[score_type] cscores.append(ys) # Raw ys = scores["raw"] rscores.append(ys) cscores = np.concatenate(cscores) rscores = np.concatenate(rscores) all_pair_scores.append(cscores) raw_scores.append(rscores) all_pair_scores = np.array(all_pair_scores) raw_scores = np.array(raw_scores) assert all_pair_scores.shape[0] == all_pair_scores.shape[1] == datalen assert raw_scores.shape[0] == raw_scores.shape[1] == datalen # Normalizing scores global MIN_SCORE, MAX_SCORE MIN_SCORE = all_pair_scores.min() # all_pair_scores -= MIN_SCORE MAX_SCORE = all_pair_scores.max() # all_pair_scores /= MAX_SCORE # all_pair_scores *= 100 # all_pair_scores -= 50 for i in range(datalen): for j in range(datalen): # if not edge_weight: # if all_pair_scores[i, j] >= edge_thresh: # graph.add_edge(i, j, raw=raw_scores[i, j]) # else: # graph.add_edge(i, j, weight=all_pair_scores[i, j], raw=raw_scores[i, j]) if i != j: graph.add_edge(i, j, weight=np.exp(raw_scores[i] - raw_scores[i, j]).sum(), raw=raw_scores[i, j]) print("W edge: min & max", MIN_SCORE, MAX_SCORE) print("Raw score: min & max", raw_scores.min(), raw_scores.max()) return graph
def plan(self, data_start_loader, data_goal_loader, epoch, metric, keep_best=10): """ Generate visual plans from starts to goals. First, find the closest codes for starts and goals. Then, generate the plans in the latent space. Finally, map the latent plans to visual plans and use the classifier to pick the top K. The start image is loaded from data_start_loader. The goal image is loaded from data_goal_loader. :param data_start_loader: :param data_goal_loader: :param epoch: :param metric: :param keep_best: :return: """ planning_dataloader = zip(data_start_loader, data_goal_loader) for i, pair in enumerate(planning_dataloader, 0): if self.fcn: start_obs = self.apply_fcn_mse(pair[0][0]) goal_obs = self.apply_fcn_mse(pair[1][0]) # Compute c_start and c_goal pt_path = os.path.join( self.out_dir, 'plans', 'c_min_%s_%d_epoch_%d.pt' % (metric, i, epoch)) if os.path.exists(pt_path): c_start, c_goal, est_start_obs, est_goal_obs = torch.load( pt_path) else: _, c_start, _, est_start_obs = self.closest_code( start_obs, 400, False, metric, 1) _, _, c_goal, est_goal_obs = self.closest_code( goal_obs, 400, True, metric, 1) # _, c_start, _, est_start_obs = self.closest_code(start_obs, # self.traj_eval_copies, # False, # metric, 0) # _, _, c_goal, est_goal_obs = self.closest_code(goal_obs, # self.traj_eval_copies, # True, # metric, 0) torch.save([c_start, c_goal, est_start_obs, est_goal_obs], pt_path) # Plan using c_start and c_goal. rollout = self.planner(c_start.repeat(self.traj_eval_copies, 1), c_goal.repeat(self.traj_eval_copies, 1), start_obs=start_obs, goal_obs=goal_obs) # Insert closest start and goal. rollout.insert( 0, est_start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(est_goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) # Insert real start and goal. rollout.insert(0, start_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout.append(goal_obs.repeat(self.traj_eval_copies, 1, 1, 1)) rollout_best_k, confidences = self.get_best_k(rollout, keep_best) rollout_data = torch.stack(rollout_best_k, dim=0) masks = -np.ones( (rollout_data.size()[0], keep_best, self.channel_dim, 64, 64), dtype=np.float32) write_number_on_images(masks, confidences) # save_image(torch.max(rollout_data, from_numpy_to_var(masks)).view(-1, self.channel_dim, 64, 64).data, # os.path.join(self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' # % (self.planner.__name__, metric, i, epoch)), # nrow=keep_best, # normalize=True) pd = torch.max(rollout_data, from_numpy_to_var(masks)).permute( 1, 0, 2, 3, 4).contiguous().view(-1, self.channel_dim, 64, 64) save_image(pd.data, os.path.join( self.out_dir, 'plans', '%s_min_%s_%d_epoch_%d.png' % (self.planner.__name__, metric, i, epoch)), nrow=int(pd.size()[0] / keep_best), normalize=True)
def forward_soft(self, x): mu, var = self.forward(x) return mu + var.sqrt() * from_numpy_to_var( np.random.randn(*list(var.size())))