def resolve_and_get_ground_truth_static_global(env_id, set_idx, seg_idx, map_size_px, world_size_px): seg = get_instruction_segment(env_id, set_idx, seg_idx) start_idx = seg["start_idx"] end_idx = seg["end_idx"] return get_top_down_ground_truth_static_global(env_id, start_idx, end_idx, map_size_px, map_size_px, world_size_px, world_size_px)
def provider_trajectory_ground_truth(segment_data, data, kind="static"): # For now, use only the first label traj_len = len(segment_data) env_id = segment_data[0]["metadata"]["env_id"] labels = [] # TODO: This could be more general than PVN model, but for now it's really not gonna be model_params = P.get_current_parameters()["ModelPVN"]["Stage1"] plan_every_n_steps = model_params["plan_every_n_steps"] #m_size = model_params["local_map_size"] m_size = model_params["global_map_size"] w_size = model_params["world_size_px"] # True for planning timesteps, False for the other timesteps obs_mask = get_obs_mask_every_n_and_segstart(plan_every_n_steps, segment_data) firstseg_mask = get_obs_mask_segstart(segment_data) for timestep in range(traj_len): # TODO: Shouldn't do this for every single timestep, otherwise it takes really long! if segment_data[timestep] is not None and obs_mask[timestep]: md = segment_data[timestep]["metadata"] seg = get_instruction_segment(md["env_id"], md["set_idx"], md["seg_idx"]) start_idx = seg["start_idx"] end_idx = seg["end_idx"] if kind == "dynamic": pos = segment_data[timestep]["state"].state[9:12] labels_t = get_top_down_ground_truth_dynamic_global( env_id, start_idx, end_idx, pos, m_size, m_size, w_size, w_size) elif kind == "dynamic_noisy": assert "noisy_poses" in data, "Noisy poses must be computed before computing dynamic ground truth!" pos = data["noisy_poses"][timestep].position labels_t = get_top_down_ground_truth_dynamic_global( env_id, start_idx, end_idx, pos, m_size, m_size, w_size, w_size) elif kind == "static": labels_t = get_top_down_ground_truth_static_global( env_id, start_idx, end_idx, m_size, m_size, w_size, w_size) else: raise Exception("Unknown trajectory ground truth kind") # append CxHxW labels.append(labels_t[0]) # TODO: for natural language, we'll use the NL functions above, instead of the tlpt ones #else: # labels.append(labels[-1]) # create labels SxCxHxW labels = torch.stack(labels, dim=0) return [("traj_ground_truth", labels), ("plan_mask", obs_mask), ("firstseg_mask", firstseg_mask)]
def _segment_matches_auglen(self, segment): if not self.aug_len: return True env_id = segment[0]["env_id"] seg_idx = segment[0]["seg_idx"] set_idx = segment[0]["set_idx"] instr_seg = get_instruction_segment(env_id, set_idx, seg_idx, all_instr=self.all_i) return instr_seg["merge_len"] == self.aug_len
def provider_goal_pos_map(segment_data, data): """ Data provider that gives the positions and indices of all landmarks visible in the FPV image. :param segment_data: segment dataset for which to provide data :return: ("lm_pos", lm_pos) - lm_pos is a list (over timesteps) of lists (over landmarks visible in image) of the landmark locations in image pixel coordinates ("lm_indices", lm_indices) - lm_indices is a list (over timesteps) of lists (over landmarks visible in image) of the landmark indices for every landmark included in lm_pos. These are the landmark classifier labels """ env_id = segment_data[0]["metadata"]["env_id"] path = load_path(env_id) traj_len = len(segment_data) goal_loc = [] for timestep in range(traj_len): if segment_data[timestep] is None: goal_loc.append(np.asarray([0.0, 0.0])) continue set_idx = segment_data[timestep]["metadata"]["set_idx"] seg_idx = segment_data[timestep]["metadata"]["seg_idx"] seg = get_instruction_segment(env_id, set_idx, seg_idx) end_idx = seg["end_idx"] if end_idx < len(path): end_pt = path[end_idx] else: end_pt = path[-1] goal_as = __get_goal_location_airsim(end_pt) goal_loc.append(goal_as) goal_loc = np.asarray(goal_loc) goal_loc_t = torch.from_numpy(goal_loc).float() return [("goal_loc", goal_loc_t)]
def provider_rot_top_down_images(segment_data, data): env_id = segment_data.metadata[0]["env_id"] path = load_path(env_id) env_image = load_env_img(env_id, 256, 256) top_down_images = [] top_down_labels = [] for md in segment_data.metadata: if md is None: break set_idx = md["set_idx"] seg_idx = md["seg_idx"] instr_seg = get_instruction_segment(env_id, set_idx, seg_idx) start_idx = instr_seg["start_idx"] end_idx = instr_seg["end_idx"] start_pt, dir_yaw = tdd.get_start_pt_and_yaw(path, start_idx, 256, 256, 0) affine = tdd.get_affine_matrix(start_pt, dir_yaw, 512, 512) seg_img_t = tdd.gen_top_down_image(env_image, affine, 512, 512, 256, 256) seg_labels_t = tdd.gen_top_down_labels(path[start_idx:end_idx], affine, 512, 512, 256, 256, True, True) seg_labels_t = F.max_pool2d(Variable(seg_labels_t), 8).data top_down_images.append(seg_img_t) top_down_labels.append(seg_labels_t) tdimg_t = torch.cat(top_down_images, dim=0) tdlab_t = torch.cat(top_down_labels, dim=0) return [("top_down_images", tdimg_t), ("traj_ground_truth", tdlab_t)]
def start_segment_rollout(self, env_id, set_idx, seg_idx): path = load_and_convert_path(env_id) seg = get_instruction_segment(env_id, set_idx, seg_idx) self.set_path(path) self.set_current_segment(seg["start_idx"], seg["end_idx"])
def single_segment_rollout(self, env_id, set_idx, seg_idx, do_sample, dagger_beta=0, rl_rollout=True): instruction_sets = self.all_instructions[env_id][set_idx][ 'instructions'] for instruction_set in instruction_sets: if instruction_set["seg_idx"] == seg_idx: break # TODO: Get rid of this idiocy: md.IS_ROLLOUT = True instruction_set = get_instruction_segment( env_id, set_idx, seg_idx, all_instr=self.all_instructions) self.env.set_environment(env_id, instruction_set=instruction_sets, fast=True) self.env.set_current_segment(seg_idx) self.policy.start_sequence() if hasattr(self.policy, "start_segment_rollout"): self.policy.start_segment_rollout(env_id, set_idx, seg_idx) if self.oracle: self.oracle.start_segment_rollout(env_id, set_idx, seg_idx) string_instruction, end_idx, start_idx = instruction_set[ "instruction"], instruction_set["end_idx"], instruction_set[ "start_idx"] token_instruction = self.tokenize_string(string_instruction) # TODO: Support oracle (including setCurrentSegment, and setting the path) rollout_sample = [] # Reset the drone to the segment starting position: state = self.env.reset(seg_idx) first = True while True: action, rl_stuff = self.policy.get_action(state, token_instruction, sample=do_sample, rl_rollout=rl_rollout) if self.oracle: ref_action, _ = self.oracle.get_action(state, token_instruction) exec_action = self.choose_action(action, ref_action, dagger_beta) else: ref_action = action exec_action = action next_state, extrinsic_reward, done, expired, oob = self.env.step( exec_action) # Calculate intrinsic reward (I don't like that this delays the loop) if hasattr(self.policy, "calc_intrinsic_rewards") and not self.no_reward: intrinsic_rewards = self.policy.calc_intrinsic_rewards( next_state, action, done, first) else: intrinsic_rewards = {"x": 0} intrinsic_reward = sum(intrinsic_rewards.values()) sample = { "instruction": string_instruction, "ref_action": ref_action, "pol_action": action, "action": exec_action, "state": state, "extrinsic_reward": extrinsic_reward, "intrinsic_reward": intrinsic_reward - (1.0 if oob else 0.0), "full_reward": extrinsic_reward + intrinsic_reward, "done": done, "expired": expired, "env_id": env_id, "set_idx": set_idx, "seg_idx": seg_idx, } sample = dict_merge(sample, rl_stuff) if not self.no_reward: sample = dict_merge(sample, intrinsic_rewards) rollout_sample.append(sample) # Multiprocessing has stopped playing nice with PyTorch cuda. Move sample to cpu first. if rl_rollout: self.sample_to_cpu(sample) state = next_state first = False if done: #print(f"Done! Last action: {exec_action}") break md.IS_ROLLOUT = False # Add discounted returns return rollout_sample
def __getitem__(self, idx): self.prof.tick("out") # If data is already loaded, use it if self.data is not None: seg_data = self.data[idx] raise NotImplementedError("Not implemented and tested") if type(seg_data) is int: raise NotImplementedError("Mixing dynamically loaded envs with training data is no longer supported.") else: dataset_name, env_id, seg_idx = self.sample_ids[idx] env_data = self.load_env_data(dataset_name, env_id) if self.segment_level: seg_data = [] segs_in_data = set() for sample in env_data: # This is a hack around the dataset format change - some stuff used to be inside the metadata dict, # but is now moved into the root level if "metadata" not in sample: sample["metadata"] = sample # TODO: Set this at rollout time - we know which domain we're rolling out, but this can potentially be mixed up sample["metadata"]["domain"] = self.domain segs_in_data.add(sample["metadata"]["seg_idx"]) # Keep the segments for which we have instructions segs_in_data_and_instructions = set() for _seg_idx in segs_in_data: if get_instruction_segment(env_id, 0, _seg_idx, all_instr=self.all_instr_full) is not None: segs_in_data_and_instructions.add(_seg_idx) if seg_idx not in segs_in_data_and_instructions: if DEBUG: print(f"Segment {env_id}::{seg_idx} not in (data)and(instructions)") # If there's a single segment in this entire dataset, just return that segment even if it's not a match. if len(segs_in_data) == 1: seg_data = env_data if DEBUG: print(f" Only one seg in data ({segs_in_data}): returning that") # Otherwise return a random segment instead elif len(segs_in_data_and_instructions) > 0: seg_idx = random.choice(list(segs_in_data_and_instructions)) if DEBUG: print(f" Returning a random segment from (data)and(instructions): {seg_idx}") elif dataset_name == "real" and len(segs_in_data) > 0: seg_idx = random.choice(list(segs_in_data)) if DEBUG: print(f" REAL dataset. Returning a random seg from data: {seg_idx}") else: seg_idx = -1 if DEBUG: print(f" No segment found. Skipping example") if len(seg_data) == 0: if DEBUG: print(f" Grabing segment: {seg_idx}") for sample in env_data: if sample["metadata"]["seg_idx"] == seg_idx: seg_data.append(sample) if DEBUG: print(f" Returning segment data of length: {len(seg_data)}") else: seg_data = env_data # I get a lot of Nones here in RL training because the dataset index is created based on different data than available! # TODO: in RL training, treat entire environment as a single segment and don't distinguish. # How? Check above if len(seg_data) < self.min_seg_len: print(f" None reason: len:{len(seg_data)} in {dataset_name}, env:{env_id}, seg:{seg_idx}") return None if len(seg_data) > self.traj_len: seg_data = seg_data[:self.traj_len] seg_idx = seg_data[0]["metadata"]["seg_idx"] set_idx = seg_data[0]["metadata"]["set_idx"] env_id = seg_data[0]["metadata"]["env_id"] instr = get_instruction_segment(env_id, set_idx, seg_idx, all_instr=self.all_instr) if instr is None and dataset_name != "real": #print(f"{dataset_name} Seg {env_id}:{set_idx}:{seg_idx} not present in instruction data") return None instr = get_instruction_segment(env_id, set_idx, seg_idx, all_instr=self.all_instr_full) if instr is None: print(f"{dataset_name} Seg {env_id}:{set_idx}:{seg_idx} not present in FULL instruction data. WTF?") return None # Convert to tensors, replacing Nones with zero's images_in = [seg_data[i]["state"].image if i < len(seg_data) else None for i in range(len(seg_data))] states = [seg_data[i]["state"].state if i < len(seg_data) else None for i in range(len(seg_data))] images_np = standardize_images(images_in) images = none_padded_seq_to_tensor(images_np) #depth_images_np = standardize_depth_images(images_in) #depth_images = none_padded_seq_to_tensor(depth_images_np) states = none_padded_seq_to_tensor(states) actions = [s["ref_action"] for s in seg_data] actions = none_padded_seq_to_tensor(actions) stops = [1.0 if s["done"] else 0.0 for s in seg_data] # e.g. [1 1 1 1 1 1 0 0 0 0 .. 0] for segment with 6 samples mask = [1.0 if s["ref_action"] is not None else 0.0 for s in seg_data] stops = torch.FloatTensor(stops) mask = torch.FloatTensor(mask) # This is a list, converted to tensor in collate_fn #if INSTRUCTIONS_FROM_FILE: # tok_instructions = [tokenize_instruction(load_instruction(md["env_id"], md["set_idx"], md["seg_idx"]), self.word2token) if s["md"] is not None else None for s in seg_data] #else: tok_instructions = [tokenize_instruction(s["instruction"], self.word2token) if s["instruction"] is not None else None for s in seg_data] md = [seg_data[i]["metadata"] for i in range(len(seg_data))] flag = md[0]["flag"] if "flag" in md[0] else None data = { "instr": tok_instructions, "images": images, #"depth_images": depth_images, "states": states, "actions": actions, "stops": stops, "masks": mask, "flags": flag, "md": md } self.prof.tick("getitem_core") for aux_provider_name in self.aux_provider_names: aux_datas = resolve_data_provider(aux_provider_name)(seg_data, data) for d in aux_datas: data[d[0]] = d[1] self.prof.tick("getitem_" + aux_provider_name) return data