def __init__(self, env_list=None, instr_negatives=False, instr_negatives_similar_only=False, seg_level=False, img_scale=1, yaw_rand_range=0, pos_rand_range=0 ): # If data is already loaded in memory, use it self.env_list = env_list self.train_instr, self.dev_instr, self.test_instr, corpus = get_all_instructions() self.all_instr = {**self.train_instr, **self.dev_instr, **self.test_instr} self.token2term, self.word2token = get_word_to_token_map(corpus) self.thesaurus = load_thesaurus() self.include_instr_negatives = instr_negatives if instr_negatives: self.similar_instruction_map = load_similar_instruction_map() self.instr_negatives_similar_only = instr_negatives_similar_only self.img_scale = img_scale self.yaw_rand_range = yaw_rand_range self.pos_rand_range = pos_rand_range self.pos_rand_image = 0 # If the data is supposed to be at seg level (not nested envs + segs), then we can support batching # but we need to correctly infer the dataset size self.seg_level = seg_level if seg_level: self.seg_list = [] for env in self.env_list: for set_idx, set in enumerate(self.all_instr[env]): for seg_idx, seg in enumerate(set["instructions"]): self.seg_list.append([env, set_idx, seg_idx])
def __init__(self, env_list=None, instr_negatives=False, instr_negatives_similar_only=False, seg_level=False, yaw_rand_range=0, img_w=512, img_h=512, map_w=None, map_h=None, incl_path=True, incl_endpoint=False, use_semantic_maps=False): # If data is already loaded in memory, use it self.cuda = False self.env_list = env_list self.train_instr, self.dev_instr, self.test_instr, corpus = get_all_instructions() self.all_instr = {**self.train_instr, **self.dev_instr, **self.test_instr} self.token2term, self.word2token = get_word_to_token_map(corpus) self.thesaurus = load_thesaurus() self.include_instr_negatives = instr_negatives #if instr_negatives: # self.similar_instruction_map = load_similar_instruction_map() self.instr_negatives_similar_only = instr_negatives_similar_only self.use_semantic_maps = use_semantic_maps self.img_w = img_w self.img_h = img_h if map_w is None: self.map_w = self.img_w self.map_h = self.img_h else: self.map_w = map_w self.map_h = map_h self.yaw_rand_range = yaw_rand_range self.latest_img_dbg = None self.latest_rot_img_dbg = None self.incl_endpoint = incl_endpoint self.incl_path = incl_path # If the data is supposed to be at seg level (not nested envs + segs), then we can support batching # but we need to correctly infer the dataset size self.seg_level = seg_level if seg_level: self.seg_list = [] for env in self.env_list: for set_idx, set in enumerate(self.all_instr[env]): for seg_idx, seg in enumerate(set["instructions"]): self.seg_list.append([env, set_idx, seg_idx]) print("Initialzied dataset!") print(" yaw range : " + str(self.yaw_rand_range)) print(" map size: ", self.map_w, self.map_h) print(" img size: ", self.img_w, self.img_h)
def __init__(self): super(AuxLabelsNL, self).__init__() self.thesaurus = load_thesaurus() train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions( ) self.all_instructions = { **train_instructions, **dev_instructions, **test_instructions } self.corpus = corpus self.word2token, self.token2term = get_word_to_token_map(corpus)
def get_mentioned_landmarks_nl(str_instruction): thesaurus = load_thesaurus() if thesaurus is None: return [], [] split_instr = split_instruction(clean_instruction(str_instruction)) word2term = thesaurus["word2term"] term_groundings = thesaurus["term_groundings"] lm_name2index = get_landmark_name_to_index() # Map each word in the instruction to it's corresponding term: split_instr_terms = words_to_terms(split_instr, word2term) mentioned_landmark_names = set() # For each term, find all the landmarks that have been mentioned for term in split_instr_terms: for landmark_name in term_groundings[term]["landmarks"]: mentioned_landmark_names.add(landmark_name) mentioned_landmark_names = list(mentioned_landmark_names) mentioned_landmark_indices = [ lm_name2index[name] for name in mentioned_landmark_names ] return mentioned_landmark_names, mentioned_landmark_indices