def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for paper and presentation if False: self.save_viz(images_np_pure) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > 0.5 else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None for tok in instruction: if tok >= self.params["vocab_size"] or tok < 0: raise Exception("Word embeddings out of bounds") instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: img_in_t = img_in_t.cuda(self.cuda_device) self.seq_step += 1 action = self(img_in_t, instruction, instr_len) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if (stop_prob > 0.5 or self.seq_step >= self.trajectory_len - 5) else 0 output_action[3] = output_stop #print("action: ", output_action) return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) self.prev_instruction = instruction img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len) output_action = action.squeeze().data.cpu().numpy() print("action: ", output_action) stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ prof = SimpleProfiler(print=True) prof.tick(".") # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state state = Variable(none_padded_seq_to_tensor([state_np])) #print("Act: " + debug_untokenize_instruction(instruction)) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction if first_step: self.get_act_start_pose = self.cam_poses_from_states(state[0:1]) self.seq_step += 1 # This is for training the policy to mimic the ground-truth state distribution with oracle actions # b_traj_gt_w_select = b_traj_ground_truth[b_plan_mask_t[:, np.newaxis, np.newaxis, np.newaxis].expand_as(b_traj_ground_truth)].view([-1] + gtsize) traj_gt_w = Variable(self.gt_labels) b_poses = self.cam_poses_from_states(state) # TODO: These source and dest should go as arguments to get_maps (in forward pass not params) transformer = MapTransformerBase( source_map_size=self.params["global_map_size"], world_size_px=self.params["world_size_px"], dest_map_size=self.params["local_map_size"], world_size_m=self.params["world_size_m"]) self.maybe_cuda(transformer) transformer.set_maps(traj_gt_w, None) traj_gt_r, _ = transformer.get_maps(b_poses) self.clear_inputs("traj_gt_r_select") self.clear_inputs("traj_gt_w_select") self.keep_inputs("traj_gt_r_select", traj_gt_r) self.keep_inputs("traj_gt_w_select", traj_gt_w) action = self(traj_gt_r, firstseg=[self.seq_step == 1]) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0 output_action[3] = output_stop return output_action
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) passive_mode_debug_projections = True if passive_mode_debug_projections: self.show_landmark_locations(loop=False, states=state) self.reset() # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions) if self.params.get("run_auxiliaries_at_test_time"): _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store, reduce_average=True) overlaid = self.get_overlaid_classification_results( whole_batch=False) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action
def __getitem__(self, idx): self.prof.tick("out") # If data is already loaded, use it if self.data is not None: seg_data = self.data[idx] raise NotImplementedError("Not implemented and tested") if type(seg_data) is int: raise NotImplementedError("Mixing dynamically loaded envs with training data is no longer supported.") else: dataset_name, env_id, seg_idx = self.sample_ids[idx] env_data = self.load_env_data(dataset_name, env_id) if self.segment_level: seg_data = [] segs_in_data = set() for sample in env_data: # This is a hack around the dataset format change - some stuff used to be inside the metadata dict, # but is now moved into the root level if "metadata" not in sample: sample["metadata"] = sample # TODO: Set this at rollout time - we know which domain we're rolling out, but this can potentially be mixed up sample["metadata"]["domain"] = self.domain segs_in_data.add(sample["metadata"]["seg_idx"]) # Keep the segments for which we have instructions segs_in_data_and_instructions = set() for _seg_idx in segs_in_data: if get_instruction_segment(env_id, 0, _seg_idx, all_instr=self.all_instr_full) is not None: segs_in_data_and_instructions.add(_seg_idx) if seg_idx not in segs_in_data_and_instructions: if DEBUG: print(f"Segment {env_id}::{seg_idx} not in (data)and(instructions)") # If there's a single segment in this entire dataset, just return that segment even if it's not a match. if len(segs_in_data) == 1: seg_data = env_data if DEBUG: print(f" Only one seg in data ({segs_in_data}): returning that") # Otherwise return a random segment instead elif len(segs_in_data_and_instructions) > 0: seg_idx = random.choice(list(segs_in_data_and_instructions)) if DEBUG: print(f" Returning a random segment from (data)and(instructions): {seg_idx}") elif dataset_name == "real" and len(segs_in_data) > 0: seg_idx = random.choice(list(segs_in_data)) if DEBUG: print(f" REAL dataset. Returning a random seg from data: {seg_idx}") else: seg_idx = -1 if DEBUG: print(f" No segment found. Skipping example") if len(seg_data) == 0: if DEBUG: print(f" Grabing segment: {seg_idx}") for sample in env_data: if sample["metadata"]["seg_idx"] == seg_idx: seg_data.append(sample) if DEBUG: print(f" Returning segment data of length: {len(seg_data)}") else: seg_data = env_data # I get a lot of Nones here in RL training because the dataset index is created based on different data than available! # TODO: in RL training, treat entire environment as a single segment and don't distinguish. # How? Check above if len(seg_data) < self.min_seg_len: print(f" None reason: len:{len(seg_data)} in {dataset_name}, env:{env_id}, seg:{seg_idx}") return None if len(seg_data) > self.traj_len: seg_data = seg_data[:self.traj_len] seg_idx = seg_data[0]["metadata"]["seg_idx"] set_idx = seg_data[0]["metadata"]["set_idx"] env_id = seg_data[0]["metadata"]["env_id"] instr = get_instruction_segment(env_id, set_idx, seg_idx, all_instr=self.all_instr) if instr is None and dataset_name != "real": #print(f"{dataset_name} Seg {env_id}:{set_idx}:{seg_idx} not present in instruction data") return None instr = get_instruction_segment(env_id, set_idx, seg_idx, all_instr=self.all_instr_full) if instr is None: print(f"{dataset_name} Seg {env_id}:{set_idx}:{seg_idx} not present in FULL instruction data. WTF?") return None # Convert to tensors, replacing Nones with zero's images_in = [seg_data[i]["state"].image if i < len(seg_data) else None for i in range(len(seg_data))] states = [seg_data[i]["state"].state if i < len(seg_data) else None for i in range(len(seg_data))] images_np = standardize_images(images_in) images = none_padded_seq_to_tensor(images_np) #depth_images_np = standardize_depth_images(images_in) #depth_images = none_padded_seq_to_tensor(depth_images_np) states = none_padded_seq_to_tensor(states) actions = [s["ref_action"] for s in seg_data] actions = none_padded_seq_to_tensor(actions) stops = [1.0 if s["done"] else 0.0 for s in seg_data] # e.g. [1 1 1 1 1 1 0 0 0 0 .. 0] for segment with 6 samples mask = [1.0 if s["ref_action"] is not None else 0.0 for s in seg_data] stops = torch.FloatTensor(stops) mask = torch.FloatTensor(mask) # This is a list, converted to tensor in collate_fn #if INSTRUCTIONS_FROM_FILE: # tok_instructions = [tokenize_instruction(load_instruction(md["env_id"], md["set_idx"], md["seg_idx"]), self.word2token) if s["md"] is not None else None for s in seg_data] #else: tok_instructions = [tokenize_instruction(s["instruction"], self.word2token) if s["instruction"] is not None else None for s in seg_data] md = [seg_data[i]["metadata"] for i in range(len(seg_data))] flag = md[0]["flag"] if "flag" in md[0] else None data = { "instr": tok_instructions, "images": images, #"depth_images": depth_images, "states": states, "actions": actions, "stops": stops, "masks": mask, "flags": flag, "md": md } self.prof.tick("getitem_core") for aux_provider_name in self.aux_provider_names: aux_datas = resolve_data_provider(aux_provider_name)(seg_data, data) for d in aux_datas: data[d[0]] = d[1] self.prof.tick("getitem_" + aux_provider_name) return data
def get_action(self, state, instruction): """ Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop) :param state: DroneState object with the raw image from the simulator :param instruction: Tokenized instruction given the corpus #TODO: Absorb corpus within model :return: """ # TODO: Simplify this self.eval() images_np_pure = state.image state_np = state.state #print("Act: " + debug_untokenize_instruction(instruction)) images_np = standardize_image(images_np_pure) image_fpv = Variable(none_padded_seq_to_tensor([images_np])) state = Variable(none_padded_seq_to_tensor([state_np])) # Add the batch dimension first_step = True if instruction == self.prev_instruction: first_step = False self.prev_instruction = instruction instruction_str = debug_untokenize_instruction(instruction) # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps) if first_step: if self.rviz is not None: self.rviz.publish_instruction_text( "instruction", debug_untokenize_instruction(instruction)) #if first_step: # say(debug_untokenize_instruction(instruction)) img_in_t = image_fpv img_in_t.volatile = True instr_len = [len(instruction)] if instruction is not None else None instruction = torch.LongTensor(instruction).unsqueeze(0) instruction = cuda_var(instruction, self.is_cuda, self.cuda_device) state.volatile = True if self.is_cuda: if img_in_t is not None: img_in_t = img_in_t.cuda(self.cuda_device) state = state.cuda(self.cuda_device) step_enc = None plan_now = None self.seq_step += 1 action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc) # Save materials for analysis and presentation if self.params["write_figures"]: self.save_viz(images_np_pure, instruction_str) output_action = action.squeeze().data.cpu().numpy() stop_prob = output_action[3] print(f"P(STOP): {stop_prob}") output_stop = 1 if stop_prob > self.params["stop_p"] else 0 output_action[3] = output_stop return output_action