def runtest_fpv_to_global_map(): img_to_map = FPVToGlobalMap(source_map_size=32, world_size_px=32, world_size=30, img_w=256, img_h=144, res_channels=3, map_channels=3, img_dbg=True) import pickle import cv2 with open(test_data_path(), "rb") as fp: test_data = pickle.load(fp) for i in range(len(test_data["images"])): image = test_data["images"][i] pose = test_data["cam_poses"][i] cv2.imshow("fpv_image", cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) cv2.waitKey(1) image = standardize_image(image) image_t = Variable(torch.from_numpy(image)) pose_t = pose.to_torch().to_var() pose_t = Pose(pose_t.position.unsqueeze(0), pose_t.orientation.unsqueeze(0)) image_t = image_t.unsqueeze(0) projected, poses = img_to_map(image_t, pose_t, None, show="yes") print("Ding") print("globalish poses: ", poses)
def get_top_down_image_env(self, env_id, egocentric=False): """ To be called externally to retrieve a top-down environment image oriented with the start of the requested segment :param env_id: environment id :return: """ path = load_path(env_id) env_image_in = load_env_img(env_id, self.map_w, self.map_h) # If we need to return a bigger image resolution than we loaded if self.map_w != self.img_w or self.map_h != self.img_h: env_image = np.zeros( [self.img_h, self.img_w, env_image_in.shape[2]]) env_image[0:self.map_h, 0:self.map_w, :] = env_image_in else: env_image = env_image_in #path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) #self.plot_path_on_img(env_image, path_img) env_image = standardize_image(env_image) env_img_t = torch.from_numpy(env_image).unsqueeze(0).float() #presenter = Presenter() #presenter.show_image(env_img_t[0], "data_img", torch=True, scale=1) return env_img_t
def __getitem__(self, index): prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE) prof.tick("out") if type(index) == int: image = self.images[index] lm_pos_fpv = self.lm_pos_fpv[index] lm_indices = self.lm_idx[index] lm_pos_map = self.lm_pos_map[index] prof.tick("retrieve data") # data augmentation. If eval no data augmentation. out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation( image, lm_indices, lm_pos_fpv, self.img_h, self.img_w, self.eval, prof) if (len(out_lm_indices) == 0) | (out_lm_indices is None): out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation( image, lm_indices, lm_pos_fpv, self.img_h, self.img_w, True, prof) out_img = standardize_image(np.array(out_img)) out_img = torch.from_numpy(out_img) out_lm_indices = torch.tensor(out_lm_indices) out_lm_pos_fpv = torch.tensor(out_lm_pos_fpv) sample = {"poses": self.poses[index], "instructions": [], # self.instructions[index], "images": out_img, "env_ids": self.env_ids_decompressed[index], "lm_pos_fpv": out_lm_pos_fpv, "lm_indices": out_lm_indices, "lm_pos_map": lm_pos_map} prof.tick("dic") prof.print_stats() """ elif type(index) == list: out_images_list, out_lm_indices_list, out_lm_pos_fpv_list = [], [], [] for i in index: image = self.images[i] lm_pos_fpv = self.lm_pos_fpv[i] lm_indices = self.lm_idx[i] out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation(image, lm_indices, lm_pos_fpv, IMG_HEIGHT, IMG_WIDTH, self.eval, prof) if (len(out_lm_indices) == 0) | (out_lm_indices is None): out_img, out_lm_indices, out_lm_pos_fpv = data_augmentation(image, lm_indices, lm_pos_fpv, IMG_HEIGHT, IMG_WIDTH, True, prof) out_images_list.append(out_img) out_lm_indices_list.append(out_lm_indices) out_lm_pos_fpv_list.append(out_lm_pos_fpv) sample = {"poses": [self.poses[i] for i in index], "instructions": [], # self.instructions[index], "lm_mentioned": [], "images": out_images_list, "env_ids": [self.env_ids_decompressed[i] for i in index], "lm_pos_fpv": out_lm_pos_fpv_list, "lm_idx": out_lm_indices_list} """ return sample
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for paper and presentation if False: self.save_viz(images_np_pure) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > 0.5 else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None for tok in instruction: if tok >= self.params["vocab_size"] or tok < 0: raise Exception("Word embeddings out of bounds") instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: img_in_t = img_in_t.cuda(self.cuda_device) self.seq_step += 1 action = self(img_in_t, instruction, instr_len) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if (stop_prob > 0.5 or self.seq_step >= self.trajectory_len - 5) else 0 output_action[3] = output_stop #print("action: ", output_action) return output_action
def gen_top_down_image(env_top_down_image, affine, img_w, img_h, map_w, map_h): #top_down_image = load_env_img(env_id) # TODO: Check for overflowz seg_img = env_top_down_image.copy() seg_img_rot = apply_affine(seg_img, affine, img_w, img_h) if DEBUG: cv2.imshow("rot_top", seg_img_rot) cv2.waitKey(10) #self.latest_rot_img_dbg = seg_img_rot seg_img_rot = standardize_image(seg_img_rot) seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float() return seg_img_t
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len) output_action = action.squeeze().data.cpu().numpy() print("action: ", output_action) stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0 output_action[3] = output_stop return output_action
def __getitem__(self, idx): if self.seg_level: env_id = self.seg_list[idx][0] set_idx = self.seg_list[idx][1] seg_idx = self.seg_list[idx][2] else: env_id = self.env_list[idx] print("top_down_dataset_sm __getitem__ load_env_config") env_conf_json = load_env_config(env_id) landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim(env_conf_json) top_down_image = load_env_img(env_id) path = load_path(env_id) img_x = top_down_image.shape[0] img_y = top_down_image.shape[1] path_in_img_coords = self.cf_to_img(img_x, path) landmark_pos_in_img = self.as_to_img(img_x, np.asarray(landmark_positions)[:, 0:2]) self.pos_rand_image = self.pos_rand_range * img_x #self.plot_path_on_img(top_down_image, path_in_img_coords) #self.plot_path_on_img(top_down_image, landmark_pos_in_img) #cv2.imshow("top_down", top_down_image) #cv2.waitKey() input_images = [] input_instructions = [] label_images = [] aux_labels = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] # TODO: Check for overflowz seg_path = path_in_img_coords[start_idx:end_idx] seg_img = top_down_image.copy() #test_plot = self.plot_path_on_img(seg_img, seg_path) # TODO: Validate the 0.5 choice, should it be 2? affine, cropsize = self.get_affine_matrix(seg_path, 0, [int(img_x / 2), int(img_y / 2)], 0.5) if affine is None: continue seg_img_rot = self.apply_affine(seg_img, affine, cropsize) seg_labels = np.zeros_like(seg_img[:, :, 0:1]).astype(float) seg_labels = self.plot_path_on_img(seg_labels, seg_path) seg_labels = gaussian_filter(seg_labels, 4) seg_labels_rot = self.apply_affine(seg_labels, affine, cropsize) #seg_labels_rot = gaussian_filter(seg_labels_rot, 4) seg_labels_rot = self.normalize_0_1(seg_labels_rot) # Change to true to visualize the paths / labels if False: cv2.imshow("rot_img", seg_img_rot) cv2.imshow("seg_labels", seg_labels_rot) rot_viz = seg_img_rot.astype(np.float64) / 512 rot_viz[:, :, 0] += seg_labels_rot.squeeze() cv2.imshow("rot_viz", rot_viz) cv2.waitKey(0) tok_instruction = tokenize_instruction(instruction, self.word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) # Get landmark classification labels landmark_pos_in_seg_img = self.apply_affine_on_pts(landmark_pos_in_img, affine) # Down-size images and labels if requested by the model if self.img_scale != 1.0: seg_img_rot = transform.resize( seg_img_rot, [seg_img_rot.shape[0] * self.img_scale, seg_img_rot.shape[1] * self.img_scale], mode="constant") seg_labels_rot = transform.resize( seg_labels_rot, [seg_labels_rot.shape[0] * self.img_scale, seg_labels_rot.shape[1] * self.img_scale], mode="constant") landmark_pos_in_seg_img = landmark_pos_in_seg_img * self.img_scale seg_img_rot = standardize_image(seg_img_rot) seg_labels_rot = standardize_image(seg_labels_rot) seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float() seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float() landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0) landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0) mask1 = torch.gt(landmark_pos_t, 0) mask2 = torch.lt(landmark_pos_t, seg_img_t.size(2)) mask = mask1 * mask2 mask = mask[:, :, 0] * mask[:, :, 1] mask = mask landmark_pos_t = torch.masked_select(landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2]) landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1]) mentioned_names, mentioned_indices = get_mentioned_landmarks(self.thesaurus, instruction) mentioned_labels_t = empty_float_tensor(list(landmark_indices_t.size())).long() for i, landmark_idx_present in enumerate(landmark_indices_t): if landmark_idx_present in mentioned_indices: mentioned_labels_t[i] = 1 aux_label = { "landmark_pos": landmark_pos_t, "landmark_indices": landmark_indices_t, "landmark_mentioned": mentioned_labels_t, "visible_mask": mask, } if self.include_instr_negatives: # If we are to be using similar instructions according to the json file, then # initialize choices with similar instructions. Otherwise let choices be empty, and they will # be filled in the following lines. if self.instr_negatives_similar_only: choices = self.similar_instruction_map[str(env_id)][str(seg_idx)] else: choices = [] # If there are no similar instructions to this instruction, pick a completely random instruction if len(choices) == 0: while len(choices) == 0: env_options = list(self.similar_instruction_map.keys()) random_env = random.choice(env_options) seg_options = list(self.similar_instruction_map[random_env].keys()) if len(seg_options) == 0: continue random_seg = random.choice(seg_options) choices = self.similar_instruction_map[random_env][random_seg] pick = random.choice(choices) picked_env = pick["env_id"] picked_seg = pick["seg_idx"] picked_set = pick["set_idx"] picked_instruction = self.all_instr[picked_env][picked_set]["instructions"][picked_seg]["instruction"] tok_fake_instruction = tokenize_instruction(picked_instruction, self.word2token) aux_label["negative_instruction"] = torch.LongTensor(tok_fake_instruction).unsqueeze(0) input_images.append(seg_img_t) input_instructions.append(instruction_t) label_images.append(seg_labels_t) aux_labels.append(aux_label) return [input_images, input_instructions, label_images, aux_labels]
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) passive_mode_debug_projections = True if passive_mode_debug_projections: self.show_landmark_locations(loop=False, states=state) self.reset() # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions) if self.params.get("run_auxiliaries_at_test_time"): _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store, reduce_average=True) overlaid = self.get_overlaid_classification_results( whole_batch=False) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action
def map_affine_test(): img = load_env_img(2, 128, 128) img = standardize_image(img) img = torch.from_numpy(img).float().unsqueeze(0) pos = np.asarray([15, 15, 0]) quat = euler.euler2quat(0, 0, 0) pose0 = Pose(pos[np.newaxis, :], quat[np.newaxis, :]) theta1 = 0.5 pos = np.asarray([15, 15, 0]) quat = euler.euler2quat(0, 0, theta1) pose1 = Pose(pos[np.newaxis, :], quat[np.newaxis, :]) D = 10.0 pos = np.asarray([15 + D * math.cos(theta1), 15 + D * math.sin(theta1), 0]) quat = euler.euler2quat(0, 0, theta1) pose2 = Pose(pos[np.newaxis, :], quat[np.newaxis, :]) affine = MapAffine(128, 128, 128) res1 = affine(img, pose0, pose1) res2 = affine(res1, pose1, pose2) res3 = affine(img, pose0, pose2) prof = SimpleProfiler(torch_sync=True, print=True) affinebig = MapAffine(128, 256, 128) prof.tick("init") res3big = affinebig(img, pose0, pose2) prof.tick("affinebig") img = load_env_img(2, 32, 32) img = standardize_image(img) img = torch.from_numpy(img).float().unsqueeze(0).cuda() affines = MapAffine(32, 64, 32).cuda() torch.cuda.synchronize() prof.tick("init") res3s = affines(img, pose0, pose2) prof.tick("affines") prof.print_stats() print("Start pose: ", pose0) print(" Pose 1: ", pose1) print(" Pose 2: ", pose2) print("Res2, Res3 and Res3Big should align!") Presenter().show_image(img[0], "img", torch=True, waitkey=False, scale=2) Presenter().show_image(res1.data[0], "res_1", torch=True, waitkey=False, scale=2) Presenter().show_image(res2.data[0], "res_2", torch=True, waitkey=False, scale=2) Presenter().show_image(res3.data[0], "res_3", torch=True, waitkey=False, scale=2) Presenter().show_image(res3big.data[0], "res3big", torch=True, waitkey=True, scale=2)
def affine_2d_test(): img = load_env_img(2, 128, 128) img = standardize_image(img) img = torch.from_numpy(img).float().unsqueeze(0) px = 64 py = 64 theta = 0.5 c = math.cos(theta) s = math.sin(theta) t_p = torch.FloatTensor([[1, 0, px], [0, 1, py], [0, 0, 1]]).unsqueeze(0) t_r = torch.FloatTensor([[c, -s, 0], [s, c, 0], [0, 0, 1]]).unsqueeze(0) mat_np = np.dot(t_p.squeeze().numpy(), t_r.squeeze().numpy()) mat_np_t = torch.from_numpy(mat_np).unsqueeze(0) # For some forsaken reason rightmultiplying seems to mean applying the transformation second mat = torch.bmm(t_p, t_r) #mat1 = t_p #mat2 = t_r affine_2d = Affine2D() res1 = affine_2d(Variable(img), Variable(t_r)) res2 = affine_2d(res1, Variable(t_p)) res3 = affine_2d(img, Variable(mat)) res4 = affine_2d(img, Variable(mat_np_t)) res3_big = affine_2d(img, Variable(mat), out_size=[512, 512]) res3_small = affine_2d(img, Variable(mat), out_size=[128, 128]) Presenter().show_image(res1.data[0], "res_1", torch=True, waitkey=False, scale=4) Presenter().show_image(res2.data[0], "res_2", torch=True, waitkey=False, scale=4) Presenter().show_image(res3.data[0], "res_3", torch=True, waitkey=False, scale=4) Presenter().show_image(res3_big.data[0], "res3_big", torch=True, waitkey=False, scale=4) Presenter().show_image(res3_small.data[0], "res3_small", torch=True, waitkey=False, scale=4) Presenter().show_image(res4.data[0], "res_4", torch=True, waitkey=True, scale=4) print("res2 should be the same as res_3 and res_4")
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) #if first_step: # say(debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] print(f"P(STOP): {stop_prob}") output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action