def __init__(self, instance_id=0): self.presenter = Presenter() self.instance_id = instance_id self.env = None self.word2token = None self.all_instructions = None
def __init__(self, run_name="", save_images=True): super(EvaluateBase, self).__init__() self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions() self.passing_distance = LANDMARK_REGION_RADIUS self.results = ResultsLandmarkSide() self.presenter = Presenter() self.run_name = run_name self.save_images = save_images
def forward(self, image_g, pose, sentence_embed, parent=None, show=""): # scale to 0-1 range #image_g = image_g - torch.min(image_g) #image_g = image_g / (torch.max(image_g) + 1e-9) # rotate to robot frame # TODO: Temporarily changed to local pose self.set_map(image_g, pose) image_r, _ = self.get_map(pose) """ # normalize mean-0 std-1 image_r = image_r - torch.mean(image_r) image_r = image_r / (torch.std(image_r) + 1e-9) ones = torch.ones_like(image_g) self.set_map(ones, None) cov_r, _ = self.get_map(pose) cov_r = cov_r - torch.min(cov_r) cov_r /= (torch.max(cov_r) + 1e-9) cov_rl = cov_r > 1e-8 blackcolor = torch.min(image_g) #image_r[cov_rl] = blackcolor """ features_r = self.feature_net(image_r) if parent is not None: parent.keep_inputs("fpv_features", features_r) if self.aux_ground: self.lang_filter.precompute_conv_weights(sentence_embed) features_g = self.lang_filter(features_r) if parent is not None: parent.keep_inputs("fpv_features_g", features_g) features_all = torch.cat([features_g, features_r], dim=1) else: features_all = features_r coverage = torch.ones_like(features_all) if show != "": Presenter().show_image(image_r.data[0, 0:3], show + "_img", torch=True, scale=1, waitkey=20) Presenter().show_image(features_r.data[0, 0:3], show, torch=True, scale=12, waitkey=20) #Presenter().show_image(cov_r.data[0, 0:3], show+ "_convg", torch=True, scale=1, waitkey=20) return features_all, coverage
def __init__(self, resolution=512): self.presenter = Presenter() self.clear() self.current_rollout = {} self.current_rollout_name = None self.env_image = None self.current_timestep = None self.world_size_m = P.get_current_parameters()["Setup"]["world_size_m"] self.resolution = resolution
def show(self, perturbed_maps, unperturbed_maps, name): Presenter().show_image(unperturbed_maps.data[0], name + "_unperturbed", torch=True, waitkey=1, scale=4) Presenter().show_image(perturbed_maps.data[0], name + "_perturbed", torch=True, waitkey=1, scale=4)
def __init__(self, run_name="", save_images=True, entire_trajectory=True, custom_instr=None): super(EvaluateBase, self).__init__() self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions() self.all_i = {**self.train_i, **self.test_i, **self.dev_i} self.passing_distance = DEFAULT_PASSING_DISTANCE self.results = ResultsLandmarkSide() self.presenter = Presenter() self.run_name = run_name self.save_images = save_images self.entire_trajectory = entire_trajectory self.custom_instr = custom_instr
def get_viz(self): presenter = Presenter() out = {"viz_img": []} for i, img in enumerate(self.viz_images): instruction = self.instructions[i] if len(instruction.view([-1])) < 2: instruction = [0] else: instruction = list(instruction.data.cpu().numpy().squeeze()) instruction_str = debug_untokenize_instruction(instruction) viz_img = presenter.overlay_text(img, instruction_str) out["viz_img"].append(viz_img) return out
def test_rollout_sampler(): policy, _ = load_model("pvn_full_bidomain") policy_state = policy.get_policy_state() from visualization import Presenter #roller = SimplePolicyRoller(policy_factory) roller = SimpleParallelPolicyRoller("pvn_full_bidomain", num_workers=4) rollout_sampler = RolloutSampler(roller) # TODO: Load some policy print("Sampling once") rollouts = rollout_sampler.sample_n_rollouts(12, policy_state) print("Sampling twice") rollouts += rollout_sampler.sample_n_rollouts(12, policy_state) print("Sampling thrice") rollouts += rollout_sampler.sample_n_rollouts(12, policy_state) for rollout in rollouts: print("Visualizing rollout") for sample in rollout: state = sample["state"] image = state.get_rgb_image() Presenter().show_image(image, "fpv", waitkey=True, scale=4) print("Done!") roller.__exit__() print("ding")
def forward(self, coverage_masks, initpos_masks): batch_size = coverage_masks.shape[0] coverage_masks_initpos = (coverage_masks + initpos_masks).clamp(0, 1) if False: for i in range(batch_size): Presenter().show_image(coverage_masks[i, 0], "cov_mask_before", scale=4, waitkey=1) Presenter().show_image(coverage_masks_initpos[i, 0], "cov_mask_after", scale=4, waitkey=True) return coverage_masks_initpos
def forward(self, maps_w, sentence_embeddings, map_poses_w, cam_poses_w, show=False): #show="li self.prof.tick(".") batch_size = len(maps_w) # Initialize the layers of the same size as the maps, but with only one channel new_layer_size = list(maps_w.size()) new_layer_size[1] = 1 all_maps_out_w = empty_float_tensor(new_layer_size, self.is_cuda, self.cuda_device) start_poses = self.get_start_poses(cam_poses_w, sentence_embeddings) poses_img = [poses_m_to_px(as_pose, self.map_size, self.world_size_px, self.world_size_m) for as_pose in start_poses] #poses_img = poses_as_to_img(start_poses, self.world_size, batch_dim=True) for i in range(batch_size): x = min(max(int(poses_img[i].position.data[0]), 0), new_layer_size[2] - 1) y = min(max(int(poses_img[i].position.data[1]), 0), new_layer_size[2] - 1) all_maps_out_w[i, 0, x, y] = 10.0 if show != "": Presenter().show_image(all_maps_out_w[0], show, torch=True, waitkey=1) self.prof.tick("draw") # Step 3: Convert all maps to local frame maps_out = torch.cat([Variable(all_maps_out_w), maps_w], dim=1) #all_maps_w = torch.cat(all_maps_out_w, dim=0) self.prof.loop() self.prof.print_stats(10) return maps_out, map_poses_w
def forward(self, images, sentence_embeddings, map_poses, proc_mask=None, show=""): # If we are supposed to use less channels than the input map has, just grab the first N channels if images.size(1) > self.in_channels: images_in = images[:, 0:self.in_channels, :, :] else: images_in = images # Apply the language-conditioned convolutional filter self.lang_filter.precompute_conv_weights(sentence_embeddings) images_out = self.lang_filter(images_in) if show != "": Presenter().show_image(images_out.data[0, 0:3], show, torch=True, scale=4, waitkey=1) # If requested, concatenate with the prior input, such that the first feature maps are from output # That allows chaining these modules and slicing if self.cat_out: images_out = torch.cat([images_out, images_in], dim=1) self.set_maps(images_out, map_poses) return images_out, map_poses
def show_depth(image): grayscale = np.mean(image[:, :, 0:3], axis=2) depth = image[:, :, 3] comb = np.stack([grayscale, grayscale, depth], axis=2) comb -= comb.min() comb /= (comb.max() + 1e-9) Presenter().show_image(comb, "depth_alignment", torch=False, waitkey=1, scale=4)
def forward(self, masks, mask_labels, show="", iteration=0): if show != "": Presenter().show_image(masks.data[0], "pred_mask", torch=True, waitkey=1, scale=4) Presenter().show_image(mask_labels.data[0], "mask_labels", torch=True, waitkey=1, scale=4) self.logger.log_image("pred_mask", Presenter().prep_image(masks.data[0], 4), iteration) self.logger.log_image( "mask_labels", Presenter().prep_image(mask_labels.data[0], 4), iteration) if masks.size(1) == 1: return False # TODO: Handle batches if necessary goal_mask = masks[0, 1, :, :] goal_mask_flat = goal_mask.view([1, -1]) max_val, argmax = goal_mask_flat.max(1) argmax_loc_x = argmax / goal_mask.size(1) argmax_loc_y = torch.remainder(argmax, goal_mask.size(1)) argmax_loc = torch.cat( [argmax_loc_x.unsqueeze(1), argmax_loc_y.unsqueeze(1)], 1) goal_mask_l = mask_labels[0, 1, :, :] goal_mask_flat_l = goal_mask_l.view([1, -1]) max_val, argmax_l = goal_mask_flat_l.max(1) argmax_loc_x_l = argmax_l / goal_mask_l.size(1) argmax_loc_y_l = torch.remainder(argmax_l, goal_mask_l.size(1)) argmax_loc_l = torch.cat( [argmax_loc_x_l.unsqueeze(1), argmax_loc_y_l.unsqueeze(1)], 1) dist = (argmax_loc - argmax_loc_l).float().norm(dim=1) success = dist < self.ok_distance return success
def forward(self, select_dist, all_cam_poses, plan_mask=None, show=False): #show="li" self.prof.tick(".") # During rollout, plan_mask will alternate between [True] and [False] if plan_mask is None: all_dist = select_dist return all_dist, all_cam_poses full_batch_size = len(all_cam_poses) all_dists_out_r = [] self.prof.tick("maps_to_global") # For each timestep, take the latest map that was available, transformed into this timestep # Do only a maximum of one transformation for any map to avoid cascading of errors! ptr = 0 for i in range(full_batch_size): this_pose = all_cam_poses[i:i + 1] if plan_mask[i]: this_obs = (select_dist[ptr:ptr + 1], this_pose) ptr += 1 self.last_observation = this_obs else: assert self.last_observation is not None, "The first observation in a sequence needs to be used!" last_map, last_pose = self.last_observation # TODO: See if we can speed this up. Perhaps batch for all timesteps inbetween observations self.child_transformer.set_map(last_map.inner_distribution, last_pose) x = self.child_transformer.get_map(this_pose) this_obs = Partial2DDistribution(x, last_map.outer_prob_mass) all_dists_out_r.append(this_obs) if show != "": Presenter().show_image(this_obs.inner_distribution.data[0, 0:3], show, torch=True, scale=8, waitkey=50) self.prof.tick("integrate") inner_list = [x.inner_distribution for x in all_dists_out_r] outer_list = [x.outer_prob_mass for x in all_dists_out_r] all_dists_out_r = Partial2DDistribution(torch.cat(inner_list, dim=0), torch.cat(outer_list, dim=0)) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return all_dists_out_r, all_cam_poses
def forward_one(self, maps_r, other_features, firstseg=None): # TODO: Log this somewhere if self.map_channels < maps_r.size(1): maps_r = maps_r[:, 0:self.map_channels] if self.manual: max, argmax = torch.max(maps_r[:, 1]) print(argmax) if True: maps_s = maps_r[:, :, self.t_crop:self.b_crop, self.l_crop:self.r_crop].contiguous() # maps_s = self.downsample(maps_r) if self.path_only: # Copy over the trajectory channel, discarding the goal maps_in = torch.zeros_like(maps_s) maps_in[:, 0] = maps_s[:, 0] else: maps_in = maps_s DBG = run_md.IS_ROLLOUT # or True# and False if DBG: for i in range(len(maps_s)): Presenter().show_image(maps_in.data[i], "d", torch=True, waitkey=1, scale=8) map_features = maps_in.view([maps_s.size(0), -1]) # other_features_zero = torch.zeros_like(other_features) # mlp_in_features = torch.cat([map_features, other_features_zero], dim=1) mlp_in_features = map_features if self.use_recurrence: if firstseg: self.forget_recurrence() hist_features = self.last_h else: hist_features = torch.zeros( [maps_s.size(0), RECURRENCE_SIZE]).to(next(self.parameters()).device) mlp_in_features = torch.cat([mlp_in_features, hist_features], dim=1) mlp_in_features = self.dropout(mlp_in_features) actions_pred = self.mlp(mlp_in_features) if self.use_recurrence: self.last_h, self.last_c = self.recurrence( actions_pred, (self.last_h, self.last_c)) # this must be in 0-1 range for BCE loss actions_pred = actions_pred.clone() actions_pred[:, 3] = torch.sigmoid(actions_pred[:, 3]) return actions_pred
def forward(self, masks, mask_labels): masks = torch.cat(masks, dim=0) mask_labels = torch.cat(mask_labels, dim=0) if masks.size(1) < mask_labels.size(1): mask_labels = mask_labels[:, 0:masks.size(1)].contiguous() global pa2d_count if DBG and pa2d_count % 50 == 0: for i in range(masks.size(0)): Presenter().show_image(masks.data[i], "aux_path_pred", torch=True, waitkey=1, scale=4) Presenter().show_image(mask_labels.data[i], "aux_path_label", torch=True, waitkey=100, scale=4) pa2d_count += 1 loss = self.loss(masks, mask_labels) # TODO: Put accuract reporting here... return loss, 1
def forward(self, coverage_masks_w, cam_poses): pos_px = pos_m_to_px(cam_poses.position[0:1], img_size_px=self.world_size_px, world_size_px=self.world_size_px, world_size_m=self.world_size_m) batch_size = coverage_masks_w.shape[0] # TODO: Don't do this at test-time for everything except the first action! assert cam_poses.position.shape[ 0] > 0, "Not implemented test-time behavior" pos_mask = torch.zeros_like(coverage_masks_w[0, 0]) radius = 6 # 6 pixels is a bit less than a meter x = pos_px[0][0].item() y = pos_px[0][1].item() xi = int(x) yi = int(y) min_x = max(xi - radius, 0) min_y = max(yi - radius, 0) max_x = min(xi + radius, coverage_masks_w.shape[2]) max_y = min(yi + radius, coverage_masks_w.shape[2]) indices = [[i, j] for i in range(min_x, max_x) for j in range(min_y, max_y) if (x - i - 0.5)**2 + (y - j - 0.5)**2 < radius**2] for i, j in indices: pos_mask[i, j] = 1.0 coverage_masks_w_init_pos = ( coverage_masks_w + pos_mask[np.newaxis, np.newaxis, :, :]).clamp( 0, 1) if True: for i in range(batch_size): Presenter().show_image(coverage_masks_w[i, 0], "cov_mask_before", scale=4, waitkey=1) Presenter().show_image(coverage_masks_w_init_pos[i, 0], "cov_mask_after", scale=4, waitkey=True) return coverage_masks_w_init_pos
def forward_deprecated(self, images, cam_poses, add_mask=None, show=False): #show="li" self.prof.tick(".") batch_size = len(cam_poses) assert add_mask is None or add_mask[0] is not None, "The first observation in a sequence needs to be used!" # Step 1: All local maps to global: # TODO: Allow inputing global maps when new projector is ready self.child_transformer.set_maps(images, cam_poses) observations_g, _ = self.child_transformer.get_maps(None) all_maps_out_g = [] self.prof.tick("maps_to_global") # TODO: Draw past trajectory on an extra channel of the semantic map # Step 2: Integrate serially in the global frame for i in range(batch_size): # If we don't have a map yet, initialize the map to this observation if self.map_memory.latest_maps is None: self.map_memory.set_map(observations_g[i:i+1], None) # Allow masking of observations if add_mask is None or add_mask[i]: # Use the map from this frame map_g = observations_g[i:i+1] self.map_memory.set_map(map_g, None) else: # Use the latest available map oriented in global frame map_g, _ = self.map_memory.get_map(None) if show != "": Presenter().show_image(map_g.data[0, 0:3], show, torch=True, scale=8, waitkey=50) all_maps_out_g.append(map_g) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_g = torch.cat(all_maps_out_g, dim=0) # Write gifs for debugging self.dbg_write_extra(all_maps_g, None) self.child_transformer.set_maps(all_maps_g, None) maps_r, _ = self.child_transformer.get_maps(cam_poses) self.set_maps(maps_r, cam_poses) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return maps_r, cam_poses
def forward(self, visit_dist_r, map_uncoverage, firstseg=None, eval=False): action = self.teleoper.get_command() inner_goal_dist = visit_dist_r.inner_distribution prob_goal_inside = inner_goal_dist[0, 1].sum().detach().item() rectangle = np.zeros([100, 20, 3]) fill_until = int(100 * prob_goal_inside) rectangle[fill_until:, :, 0] = 1.0 Presenter().show_image(rectangle, "P(outside)", scale=4, waitkey=1) # Normalize channels for viewing inner_goal_dist[0, 0] /= (inner_goal_dist[0, 0].max() + 1e-10) inner_goal_dist[0, 1] /= (inner_goal_dist[0, 1].max() + 1e-10) Presenter().show_image(inner_goal_dist[0].detach(), "visit_dist", scale=8, waitkey=1) Presenter().show_image(map_uncoverage[0].detach(), "unobserved", scale=8, waitkey=1) action_t = torch.Tensor(action) return action_t
def forward(self, current_maps, coverages, cam_poses, add_mask=None, show=""): batch_size = len(cam_poses) assert add_mask is None or add_mask[ 0] is not None, "The first observation in a sequence needs to be used!" # If we don't have masked observations, just return each timestep observations if add_mask is None: self.set_maps(current_maps, cam_poses) return current_maps, cam_poses maps_r = [] # If we have masked observations, then for timesteps where observation is masked (False), get the previous observation # rotated to the current frame for i in range(batch_size): # If we don't have a map yet, rotate this observation and initialize a map if self.latest_map is None: self.set_map(current_maps[i:i + 1], cam_poses[i:i + 1]) map_g, _ = self.get_map(None) self.set_map(map_g, None) # Allow masking of observations if add_mask is None or add_mask[i]: # Transform the observation into the global (map) frame self.child_transformer.set_map(current_maps[i:i + 1], cam_poses[i:i + 1]) obs_g, _ = self.child_transformer.get_map(None) # Remember this new map self.set_map(obs_g, None) # Return this map in the camera frame of reference map_r, _ = self.get_map(cam_poses[i:i + 1]) if show != "": Presenter().show_image(map_r.data[0, 0:3], show, torch=True, scale=8, waitkey=1) maps_r.append(map_r) maps_r = torch.cat(maps_r, dim=0) self.set_maps(maps_r, cam_poses) return maps_r, cam_poses
def browse_pvn_dataset(): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") data_params = P.get_current_parameters()["Training"] print("Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() #dom="real" dom = "sim" dataset = model_sim.get_dataset( data=None, envs=train_envs, domain=dom, dataset_names=data_params[f"{dom}_dataset_names"], dataset_prefix="supervised", eval=False, halfway_only=False) p = Presenter() for example in dataset: if example is None: continue md = example["md"][0] print( f"Showing example: {md['env_id']}:{md['set_idx']}:{md['seg_idx']}") print(f" instruction: {md['instruction']}") exec_len = len(example["images"]) for i in range(exec_len): print(f" timestep: {i}") img_i = example["images"][i] lm_fpv_i = example["lm_pos_fpv"][i] if lm_fpv_i is not None: img_i = p.plot_pts_on_torch_image(img_i, lm_fpv_i.long()) p.show_image(img_i, "fpv_img_i", scale=4, waitkey=True)
def configure_landmarks(self, env_id): self.env_config = load_and_convert_env_config(env_id) self.state_positioning = True self.state_instructions_printed = False self.subscriber = rospy.Subscriber(self.img_topic, Image, self._image_callback) self.enter_monitor = EnterMonitor() self.monitor_runner = MonitorRunner(self.enter_monitor) env_sim_img = load_env_img(env_id, width=400, height=400, real_drone=False, origin_bottom_left=False) new = True while True: if self.new_image: Presenter().show_image(self.image_to_show, "Landmark Positioning", scale=2, waitkey=10) Presenter().show_image(env_sim_img, "Sim Image", scale=2, waitkey=10) if new: cv2.moveWindow("Landmark Positioning", 20, 20) cv2.moveWindow("Sim Image", 1000, 20) new = False if self.enter_monitor.tapped or SKIP_CONFIGURATION: break sleep(1) cv2.destroyWindow('Landmark Positioning') cv2.destroyWindow("Sim Image") self.subscriber.unregister() return self.image_to_show
def forward(self, maps_r, map_structure_r): maps_r_cropped = maps_r.inner_distribution[:, :, self.crop_l:self.crop_r, self.crop_l:self.crop_r] batch_size = maps_r.inner_distribution.shape[0] # Create a context vector that encodes goal observability # Don't backprop into the embedding vectors - don't risk losing the only input we have gin = self.goal_in_vec.detach()[np.newaxis, :].repeat([batch_size, 1]) gout = self.goal_out_vec.detach()[np.newaxis, :].repeat( [batch_size, 1]) vin = self.visit_in_vec.detach()[np.newaxis, :].repeat([batch_size, 1]) vout = self.visit_out_vec.detach()[np.newaxis, :].repeat( [batch_size, 1]) p_visit_out = maps_r.outer_prob_mass[:, 0:1].detach() p_goal_out = maps_r.outer_prob_mass[:, 1:2].detach() g_context_vec = gout * p_goal_out + gin * (1 - p_goal_out) v_context_vec = vout * p_visit_out + vin * (1 - p_visit_out) obs_context_vec = torch.cat([g_context_vec, v_context_vec], dim=1) # 64x64 -> 16x16 uncov_r_pooled = self.avgpool(map_structure_r) if False: conv_in_np = conv_in[0].data.cpu().numpy().transpose(1, 2, 0) # expand to 0-1 range conv_in_np[:, :, 0] /= (np.max(conv_in_np[:, :, 0]) + 1e-10) conv_in_np[:, :, 1] /= (np.max(conv_in_np[:, :, 1]) + 1e-10) conv_in_np[:, :, 2] /= (np.max(conv_in_np[:, :, 2]) + 1e-10) Presenter().show_image(conv_in_np, "rl_conv_in", scale=2) #Presenter().show_image(uncov_r_pooled[0], "uncov_pooled", scale=4) # From 16x16 down to 8x8 x = self.act(self.conv1(maps_r_cropped)) x = self.norm1(x) # From 16x16 down to 8x8 c = self.act(self.structconv1(uncov_r_pooled)) c = self.covnorm1(c) comb_map = torch.cat([x, c], dim=1) batch_size = x.shape[0] lin_in = comb_map.view(batch_size, -1) lin_in = torch.cat([lin_in, obs_context_vec], dim=1) x = self.act(self.linear1(lin_in)) x = torch.cat([lin_in, x], dim=1) x = self.act(self.linear2(x)) return x
def forward(self, select_images, all_cam_poses, plan_mask=None, show=False): #show="li" self.prof.tick(".") # During rollout, plan_mask will alternate between [True] and [False] if plan_mask is None: all_images = select_images return all_images, all_cam_poses full_batch_size = len(all_cam_poses) all_maps_out_r = [] self.prof.tick("maps_to_global") # For each timestep, take the latest map that was available, transformed into this timestep # Do only a maximum of one transformation for any map to avoid cascading of errors! ptr = 0 for i in range(full_batch_size): this_pose = all_cam_poses[i:i+1] if plan_mask[i]: this_obs = (select_images[ptr:ptr+1], this_pose) ptr += 1 self.last_observation = this_obs else: assert self.last_observation is not None, "The first observation in a sequence needs to be used!" last_map, last_pose = self.last_observation # TODO: See if we can speed this up. Perhaps batch for all timesteps inbetween observations self.child_transformer.set_map(last_map, last_pose) this_obs = self.child_transformer.get_map(this_pose) all_maps_out_r.append(this_obs[0]) if show != "": Presenter().show_image(this_obs.data[0, 0:3], show, torch=True, scale=8, waitkey=50) self.prof.tick("integrate") # Step 3: Convert all maps to local frame all_maps_r = torch.cat(all_maps_out_r, dim=0) # Write gifs for debugging #self.dbg_write_extra(all_maps_r, None) self.set_maps(all_maps_r, all_cam_poses) self.prof.tick("maps_to_local") self.prof.loop() self.prof.print_stats(10) return all_maps_r, all_cam_poses
def unbatch(self, batch): # Inputs states = self.cuda_var(batch["states"][0]) seq_len = len(states) firstseg_mask = batch["firstseg_mask"][0] # True for every timestep that is a new instruction segment plan_mask = batch["plan_mask"][0] # True for every timestep that we do visitation prediction actions = self.cuda_var(batch["actions"][0]) actions_select = self.batch_select.one(actions, plan_mask, actions.device) # Ground truth visitation distributions (in start and global frames) v_dist_w_ground_truth_select = self.cuda_var(batch["traj_ground_truth"][0]) cam_poses = self.cam_poses_from_states(states) cam_poses_select = self.batch_select.one(cam_poses, plan_mask, actions.device) v_dist_r_ground_truth_select, poses_r = self.map_transform_w_to_r(v_dist_w_ground_truth_select, None, cam_poses_select) self.tensor_store.keep_inputs("v_dist_w_ground_truth_select", v_dist_w_ground_truth_select) self.tensor_store.keep_inputs("v_dist_r_ground_truth_select", v_dist_r_ground_truth_select) Presenter().show_image(v_dist_w_ground_truth_select.detach().cpu()[0,0], "v_dist_w_ground_truth_select", waitkey=1, scale=4) Presenter().show_image(v_dist_r_ground_truth_select.detach().cpu()[0,0], "v_dist_r_ground_truth_select", waitkey=1, scale=4) return states, actions_select, v_dist_r_ground_truth_select, cam_poses_select, plan_mask, firstseg_mask
def forward(self, images, poses, sentence_embeds, parent=None, show=""): self.prof.tick("out") features_fpv_vis_only, features_fpv_gnd_only = self.forward_fpv_features(images, sentence_embeds, parent) # If we have grounding features, the overall features are a concatenation of grounded and non-grounded features if features_fpv_gnd_only is not None: features_fpv_all = torch.cat([features_fpv_gnd_only, features_fpv_vis_only], dim=1) else: features_fpv_all = features_fpv_vis_only # Project first-person view features on to the map in egocentric frame grid_maps = self.map_projection(poses) self.prof.tick("proj_map") features_r = self.grid_sampler(features_fpv_all, grid_maps) # Obtain an ego-centric map mask of where we have new information ones_size = list(features_fpv_all.size()) ones_size[1] = 1 tmp_ones = empty_float_tensor(ones_size, self.is_cuda, self.cuda_device).fill_(1.0) new_coverages = self.grid_sampler(tmp_ones, grid_maps) # Make sure that new_coverage is a 0/1 mask (grid_sampler applies bilinear interpolation) new_coverages = new_coverages - torch.min(new_coverages) new_coverages = new_coverages / torch.max(new_coverages) self.prof.tick("gsample") if show != "": Presenter().show_image(images.data[0, 0:3], show + "_img", torch=True, scale=1, waitkey=1) Presenter().show_image(features_r.data[0, 0:3], show, torch=True, scale=6, waitkey=1) Presenter().show_image(new_coverages.data[0], show + "_covg", torch=True, scale=6, waitkey=1) self.prof.loop() self.prof.print_stats(10) return features_r, new_coverages
def plot_pts(self, image, pts): """ :param image: CxHxW image :param pts: Nx2 points - (H,W) coords in the image :return: """ image = image.cpu().data.numpy() image = image.transpose((1, 2, 0)) pts = pts.cpu().data.numpy() image[:, :, 0] = 0.0 for pt in pts: image[pt[0], pt[1], 0] = 1.0 Presenter().show_image(image[:, :, 0:3], f"aux_class_2d:{self.name}", torch=False, waitkey=1, scale=8)
def _generate_mask(self): m = torch.zeros([self.map_size_px, self.map_size_px]) c_x, c_y = self.map_size_px / 2 for x in range(c_x - self.radius, c_x + self.radius): for y in range(c_y - self.radius, c_y + self.radius): dx = x - c_x dy = y - c_y angle = math.atan2(dy, dx) dst = math.sqrt(dy**2 + dx**2) if -self.hfov / 2 < angle < self.hfov / 2 and dst < self.radius: m[c_x, c_y] = 1.0 if False: Presenter().show_image(m, "init_pos_mask", scale=4, waitkey=True)
def __init__(self, run_name="", save_images=True, entire_trajectory=True, custom_instr=None, aug_len=None): super(EvaluateBase, self).__init__() self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions() self.all_i = {**self.train_i, **self.test_i, **self.dev_i} self.passing_distance = P.get_current_parameters( )["Units"]["passing_distance"] self.results = ResultsLandmarkSide() self.presenter = Presenter() self.run_name = run_name self.save_images = save_images self.entire_trajectory = entire_trajectory self.custom_instr = custom_instr self.aug_len = aug_len self.visible_map = {} self.hfov = P.get_current_parameters( )["ModelPVN"]["Stage1"]["cam_h_fov"]
def dyn_gt_test(): presenter = Presenter() train_instr, dev_instr, test_instr, corpus = get_all_instructions() all_instr = {**train_instr, **dev_instr, **test_instr} for i in range(10): path = load_path(i) segments = all_instr[i][0]["instructions"] for seg in segments: start_idx = seg["start_idx"] end_idx = seg["end_idx"] randInt = random.randint(10, 100) start_pose = Pose(path[start_idx] - randInt, 0) if end_idx - start_idx > 0: randInt = random.randint(10, 100) new_path = get_dynamic_ground_truth( path[start_idx:end_idx], (path[start_idx] - randInt)) new_path1 = get_dynamic_ground_truth_smooth( path[start_idx:end_idx], (path[start_idx] - randInt)) presenter.plot_path( i, [path[start_idx:end_idx], new_path, new_path1])