def from_shortest_path(self, viewpoints=None, get_first_feat=False): """ :param viewpoints: [[], [], ....(batch_size)]. Only for dropout viewpoint :param get_first_feat: whether output the first feat :return: """ obs = self.env._get_obs() ended = np.array( [False] * len(obs)) # Indices match permuation of the model, not env length = np.zeros(len(obs), np.int64) img_feats = [] can_feats = [] # obj_feats = [] # objects at the teacher selected direction at each step first_feat = np.zeros( (len(obs), self.feature_size + args.angle_feat_size), np.float32) for i, ob in enumerate(obs): first_feat[i, -args.angle_feat_size:] = utils.angle_feature( ob['heading'], ob['elevation']) first_feat = torch.from_numpy(first_feat).cuda() while not ended.all(): if viewpoints is not None: # default None for i, ob in enumerate(obs): viewpoints[i].append(ob['viewpoint']) img_feats.append(self.listener._feature_variable( obs)) # a list of [batch, 36, 2176] features, one for a step teacher_action = self._teacher_action(obs, ended) teacher_action = teacher_action.cpu().numpy() for i, act in enumerate(teacher_action): if act < 0 or act == len( obs[i]['candidate']): # Ignore or Stop teacher_action[i] = -1 # Stop Action can_feat_i, obj_feat_i = self._candidate_variable( obs, teacher_action) can_feats.append( can_feat_i ) # a list of [batch, 2176] features, at teacher seleccted direction # obj_feats.append(obj_feat_i) # a list of [batch, n_objects, 300] objects, at teacher seleccted direction self.make_equiv_action(teacher_action, obs) length += (1 - ended) ended[:] = np.logical_or(ended, (teacher_action == -1)) obs = self.env._get_obs() img_feats = torch.stack( img_feats, 1).contiguous() # [batch_size, total_steps, 36, 2176] can_feats = torch.stack( can_feats, 1).contiguous() # [batch_size, total_steps, 2176] # obj_feats = torch.stack(obj_feats, 1).contiguous() # [batch_size, total_steps, n_objects, 2176] if get_first_feat: # return (img_feats, can_feats, first_feat), length ERROR else: # default False return (img_feats, can_feats), length
def get_input_feat(self, obs): input_a_t = np.zeros((len(obs), args.angle_feat_size), np.float32) for i, ob in enumerate(obs): input_a_t[i] = utils.angle_feature(ob['heading'], ob['elevation']) input_a_t = torch.from_numpy(input_a_t).cuda() # f_t = self._feature_variable(obs) # Pano image features from obs candidate_feat, candidate_leng = self._candidate_variable(obs) return input_a_t, candidate_feat, candidate_leng
def make_simple_candidate(self, candidate, viewId): base_heading = (viewId % 12) * math.radians(30) new_candidate = [] for c in candidate: c_new = c.copy() heading = c['heading'] - base_heading c_new['heading'] = heading c_new['feature'] = np.concatenate( (c['feature'], utils.angle_feature(heading, c['elevation']))) new_candidate.append(c_new) return new_candidate
def _candidate_variable(self, obs, actions): candidate_feat = np.zeros((len(obs), 2048 + 4), dtype=np.float32) for i, (ob, act) in enumerate(zip(obs, actions)): if act == -1: # Ignore or Stop --> Just use zero vector as the feature pass else: c = ob['candidate'][act] candidate_feat[i, :args.feature_size] = c[ 'feature'] # Image feat candidate_feat[i, -4:] = utils.angle_feature( c['heading'], c['elevation']) # Position Feat return torch.from_numpy(candidate_feat).cuda()
def make_candidate(self, feature, scanId, viewpointId, viewId, obj_d_feat=None, obj_s_feat=None): def _loc_distance(loc): return np.sqrt(loc.rel_heading ** 2 + loc.rel_elevation ** 2) base_heading = (viewId % 12) * math.radians(30) adj_dict = {} long_id = "%s_%s" % (scanId, viewpointId) if long_id not in self.buffered_state_dict: for ix in range(36): if ix == 0: self.sim.newEpisode(scanId, viewpointId, 0, math.radians(-30)) elif ix % 12 == 0: self.sim.makeAction(0, 1.0, 1.0) else: self.sim.makeAction(0, 1.0, 0) state = self.sim.getState() assert state.viewIndex == ix # Heading and elevation for the viewpoint center heading = state.heading - base_heading elevation = state.elevation visual_feat = feature[ix] if obj_d_feat: odf = obj_d_feat[ix] # if obj_s_feat: # num_obj = 0 # obj_index = [] # for n_obj, viewIndex in enumerate(obj_s_feat['concat_viewIndex']): # if viewIndex == ix: # num_obj += 1 # obj_index.append(n_obj) # concat_angles_h = obj_s_feat['concat_angles_h'][obj_index] # concat_angles_e = obj_s_feat['concat_angles_e'][obj_index] # concat_feature = obj_s_feat['concat_feature'][obj_index] # odf = concat_feature for j, loc in enumerate(state.navigableLocations[1:]): # if a loc is visible from multiple view, use the closest # view (in angular distance) as its representation distance = _loc_distance(loc) # Heading and elevation for for the loc loc_heading = heading + loc.rel_heading loc_elevation = elevation + loc.rel_elevation angle_feat = utils.angle_feature(loc_heading, loc_elevation) if (loc.viewpointId not in adj_dict or distance < adj_dict[loc.viewpointId]['distance']): adj_dict[loc.viewpointId] = { 'heading': loc_heading, 'elevation': loc_elevation, "normalized_heading": state.heading + loc.rel_heading, 'scanId':scanId, 'viewpointId': loc.viewpointId, # Next viewpoint id 'pointId': ix, 'distance': distance, 'idx': j + 1, 'feature': np.concatenate((visual_feat, angle_feat), -1), 'ang_feat': angle_feat } if obj_d_feat: adj_dict[loc.viewpointId]['obj_d_feat'] = odf candidate = list(adj_dict.values()) self.buffered_state_dict[long_id] = [ {key: c[key] for key in ['normalized_heading', 'elevation', 'scanId', 'viewpointId', 'pointId', 'idx']} for c in candidate ] return candidate else: candidate = self.buffered_state_dict[long_id] candidate_new = [] for c in candidate: c_new = c.copy() ix = c_new['pointId'] normalized_heading = c_new['normalized_heading'] visual_feat = feature[ix] loc_heading = normalized_heading - base_heading c_new['heading'] = loc_heading angle_feat = utils.angle_feature(c_new['heading'], c_new['elevation']) c_new['feature'] = np.concatenate((visual_feat, angle_feat), -1) c_new['ang_feat'] = angle_feat if obj_d_feat: c_new['obj_feat'] = obj_d_feat[ix] c_new.pop('normalized_heading') candidate_new.append(c_new) return candidate_new
def make_candidate(self, feature, scanId, viewpointId, viewId): def _loc_distance(loc): return np.sqrt(loc.rel_heading**2 + loc.rel_elevation**2) base_heading = (viewId % 12) * math.radians(30) adj_dict = {} long_id = "%s_%s" % (scanId, viewpointId) if long_id not in self.buffered_state_dict: """ Agent's current view [0-35] (set only when viewing angles are discretized) [0-11] looking down, [12-23] looking at horizon, [24-35] looking up Rocky: from 0 to 35, look up the accessiable candidates. """ for ix in range(36): if ix == 0: # Rocky: newEpisode(scanId, viewpointId, heading, elevation); self.sim.newEpisode(scanId, viewpointId, 0, math.radians(-30)) elif ix % 12 == 0: # Rocky: makeAction(index, heading, elevation); self.sim.makeAction(0, 1.0, 1.0) else: self.sim.makeAction(0, 1.0, 0) state = self.sim.getState() assert state.viewIndex == ix # Heading and elevation for the viewpoint center heading = state.heading - base_heading elevation = state.elevation visual_feat = feature[ix] # get adjacent locations for j, loc in enumerate(state.navigableLocations[1:]): # if a loc is visible from multiple view, use the closest # view (in angular distance) as its representation distance = _loc_distance(loc) # Heading and elevation for for the loc loc_heading = heading + loc.rel_heading loc_elevation = elevation + loc.rel_elevation angle_feat = utils.angle_feature(loc_heading, loc_elevation) if (loc.viewpointId not in adj_dict or distance < adj_dict[loc.viewpointId]['distance']): adj_dict[loc.viewpointId] = { 'heading': loc_heading, 'elevation': loc_elevation, "normalized_heading": state.heading + loc.rel_heading, 'scanId': scanId, 'viewpointId': loc.viewpointId, # Next viewpoint id 'pointId': ix, 'distance': distance, 'idx': j + 1, 'feature': np.concatenate((visual_feat, angle_feat), -1) } candidate = list(adj_dict.values()) self.buffered_state_dict[long_id] = [{ key: c[key] for key in [ 'normalized_heading', 'elevation', 'scanId', 'viewpointId', 'pointId', 'idx' ] } for c in candidate] return candidate else: candidate = self.buffered_state_dict[long_id] candidate_new = [] for c in candidate: c_new = c.copy() ix = c_new['pointId'] normalized_heading = c_new['normalized_heading'] visual_feat = feature[ix] loc_heading = normalized_heading - base_heading c_new['heading'] = loc_heading angle_feat = utils.angle_feature(c_new['heading'], c_new['elevation']) c_new['feature'] = np.concatenate((visual_feat, angle_feat), -1) c_new.pop('normalized_heading') candidate_new.append(c_new) return candidate_new
def make_candidate(self, feature, scanId, viewpointId, viewId): def _loc_distance(loc): return np.sqrt(loc.rel_heading**2 + loc.rel_elevation**2) def get_relative_position(loc_heading, base_heading): left, right, front, back = 0, 0, 0, 0 if abs(loc_heading) >= math.pi / 180 * 180: if loc_heading > 0: loc_heading = loc_heading - math.pi / 180 * 360 else: loc_heading = loc_heading + math.pi / 180 * 360 if loc_heading < 0: left = 1 if loc_heading > -math.pi / 180 * 90: front = 1 else: back = 1 else: right = 1 if loc_heading < math.pi / 180 * 90: front = 1 else: back = 1 return [left, right, front, back] base_heading = (viewId % 12) * math.radians(30) adj_dict = {} long_id = "%s_%s" % (scanId, viewpointId) if long_id not in self.buffered_state_dict: for ix in range(36): if ix == 0: self.sim.newEpisode(scanId, viewpointId, 0, math.radians(-30)) elif ix % 12 == 0: self.sim.makeAction(0, 1.0, 1.0) else: self.sim.makeAction(0, 1.0, 0) state = self.sim.getState() assert state.viewIndex == ix # Heading and elevation for the viewpoint center heading = state.heading - base_heading elevation = state.elevation visual_feat = feature[ix] # get adjacent locations for j, loc in enumerate(state.navigableLocations[1:]): # if a loc is visible from multiple view, use the closest # view (in angular distance) as its representation distance = _loc_distance(loc) # Heading and elevation for for the loc loc_heading = heading + loc.rel_heading loc_elevation = elevation + loc.rel_elevation angle_feat = utils.angle_feature(loc_heading, loc_elevation) relative_position = get_relative_position( loc_heading, base_heading) if (loc.viewpointId not in adj_dict or distance < adj_dict[loc.viewpointId]['distance']): adj_dict[loc.viewpointId] = { 'heading': loc_heading, 'elevation': loc_elevation, "normalized_heading": state.heading + loc.rel_heading, 'scanId': scanId, 'viewpointId': loc.viewpointId, # Next viewpoint id 'pointId': ix, 'distance': distance, 'idx': j + 1, 'feature': np.concatenate((visual_feat, angle_feat), -1), 'obj_feat': self.pano_caffee[scanId][viewpointId][ix] ['text_feature'], 'obj_mask': self.pano_caffee[scanId][viewpointId][ix] ['text_mask'] } candidate = list(adj_dict.values()) self.buffered_state_dict[long_id] = [{ key: c[key] for key in [ 'normalized_heading', 'elevation', 'scanId', 'viewpointId', 'pointId', 'idx' ] } for c in candidate] return candidate else: candidate = self.buffered_state_dict[long_id] candidate_new = [] for c in candidate: c_new = c.copy() ix = c_new['pointId'] normalized_heading = c_new['normalized_heading'] visual_feat = feature[ix] loc_heading = normalized_heading - base_heading c_new['heading'] = loc_heading angle_feat = utils.angle_feature(c_new['heading'], c_new['elevation']) c_new['feature'] = np.concatenate((visual_feat, angle_feat), -1) c_new['obj_feat'] = self.pano_caffee[ c_new['scanId']][viewpointId][ix]['text_feature'] c_new['obj_mask'] = self.pano_caffee[ c_new['scanId']][viewpointId][ix]['text_mask'] candidate_new.append(c_new) return candidate_new
def from_shortest_path(self, viewpoints=None, get_first_feat=False): """ :param viewpoints: [[], [], ....(batch_size)]. Only for dropout viewpoint :param get_first_feat: whether output the first feat :return: """ obs = self.env._get_obs() ended = np.array( [False] * len(obs)) # Indices match permuation of the model, not env length = np.zeros(len(obs), np.int64) img_feats = [] can_feats = [] teacher_actions = [] teacher_actions_1h = [] candidate_feats = [] candidate_masks = [] first_feat = np.zeros((len(obs), self.obs_dim), np.float32) for i, ob in enumerate(obs): first_feat[i, -args.angle_feat_size:] = utils.angle_feature( ob['heading'], ob['elevation']) first_feat = torch.from_numpy(first_feat).cuda() while not ended.all(): if viewpoints is not None: for i, ob in enumerate(obs): viewpoints[i].append(ob['viewpoint']) teacher_action = self._teacher_action(obs, ended) teacher_action = teacher_action.cpu().numpy() # TODO: why last teacher action not -1 teacher_actions.append(teacher_action.copy()) candidate_length = [len(ob['candidate']) + 1 for ob in obs] # +1 is for the end candidate_feat = np.zeros( (len(obs), max(candidate_length), self.obs_dim)) # NOTE: The candidate_feat at len(ob['candidate']) is the feature for the END, which is zero in my implementation for i, ob in enumerate(obs): for j, c in enumerate(ob['candidate']): candidate_feat[i, j, :] = c['feature'] candidate_feats.append(torch.Tensor(candidate_feat).cuda()) candidate_masks.append(utils.length2mask(candidate_length)) img_feats.append(self._feature_variable(obs)) for i, act in enumerate(teacher_action): if act < 0 or act == len( obs[i]['candidate']): # Ignore or Stop teacher_action[i] = -1 # Stop Action can_feats.append(self._candidate_variable(obs, teacher_action)) self.make_equiv_action(teacher_action, obs) length += (1 - ended) ended[:] = np.logical_or(ended, (teacher_action == -1)) obs = self.env._get_obs() # TODO: heading random ? # TODO: policy decoder behavior clone # TODO: state decoder mse # TODO: state decoder weight = 0 ? assert len(teacher_actions) == len(candidate_feats) == len( candidate_masks) _max = 0 for i in range(len(candidate_feats)): _max = max(_max, candidate_feats[i].shape[1]) shape_list = np.array(candidate_feats[0].shape) shape_list[1] = 1 feat_pad_vec = torch.zeros(tuple(shape_list)).cuda() shape_list = np.array(candidate_masks[0].shape) shape_list[1] = 1 mask_pad_vec = torch.ones(tuple(shape_list)).bool().cuda() for i in range(len(candidate_feats)): diff = _max - candidate_feats[i].shape[1] diff2 = _max - candidate_masks[i].shape[1] assert diff == diff2 if diff > 0: candidate_feats[i] = torch.cat( [candidate_feats[i], feat_pad_vec.repeat(1, diff, 1)], dim=1) candidate_masks[i] = torch.cat( [candidate_masks[i], mask_pad_vec.repeat(1, diff)], dim=1) # convert teacher actions to one-hot vectors teacher_actions_1h.append( torch.nn.functional.one_hot(torch.LongTensor( teacher_actions[i]), num_classes=_max).cuda()) img_feats = torch.stack( img_feats, 1).contiguous() # batch_size, max_len, 36, 2052 can_feats = torch.stack(can_feats, 1).contiguous() # batch_size, max_len, 2052 teacher_actions_1h = torch.stack(teacher_actions_1h, 1).contiguous() candidate_feats = torch.stack(candidate_feats, 1).contiguous() candidate_masks = torch.stack(candidate_masks, 1).contiguous() if get_first_feat: return (img_feats, can_feats, first_feat), length else: return (img_feats, can_feats, teacher_actions_1h, candidate_feats, candidate_masks), length
def from_shortest_path(self, viewpoints=None, get_first_feat=False, creator=None): """ :param viewpoints: [[], [], ....(batch_size)]. Only for dropout viewpoint :param get_first_feat: whether output the first feat :param creator: [encoder, decoder] :return: """ obs = self.env._get_obs() batch_size = len(obs) ended = np.array( [False] * len(obs)) # Indices match permuation of the model, not env length = np.zeros(len(obs), np.int64) img_feats = [] can_feats = [] if creator is not None: weights_reg = 0. cnt = 0 seq, seq_mask, seq_lengths, perm_idx = self._sort_batch(obs) ctx_f, h_t_f, c_t_f = creator[0](seq, seq_lengths) inv_idx = [0 for _ in perm_idx] for i, _ in enumerate(perm_idx): inv_idx[_] = i ctx_mask = seq_mask[inv_idx] ctx_f = ctx_f[inv_idx] h_t_f = h_t_f[inv_idx] c_t_f = c_t_f[inv_idx] h1_f = h_t_f rand_idx = [_ for _ in range(batch_size)] random.shuffle(rand_idx) first_feat = np.zeros( (len(obs), self.feature_size + self.args.angle_feat_size), np.float32) for i, ob in enumerate(obs): first_feat[i, -self.args.angle_feat_size:] = utils.angle_feature( ob['heading'], ob['elevation']) first_feat = torch.from_numpy(first_feat).cuda() while not ended.all(): if viewpoints is not None: for i, ob in enumerate(obs): viewpoints[i].append(ob['viewpoint']) input_a_t, f_t_pano = self.get_input_feat( obs) # Image features from obs teacher_action = self._teacher_action(obs, ended) teacher_action = teacher_action.cpu().numpy() for i, act in enumerate(teacher_action): if act < 0 or act == len( obs[i]['candidate']): # Ignore or Stop teacher_action[i] = -1 # Stop Action candidate_feat = self._candidate_variable(obs, teacher_action) if creator is not None: f_t_shuffle = f_t_pano[rand_idx] h_t_f, c_t_f, h1_f, f_t_pano, weights = creator[1]( input_a_t, f_t_pano, f_t_shuffle, h1_f, c_t_f, ctx_f, ctx_mask) for i, ob in enumerate(obs): a = teacher_action[i] c = ob['candidate'][a] idx = c['pointId'] candidate_feat[i, :-self.args.angle_feat_size] = f_t_pano[ i, idx, :-self.args.angle_feat_size] weights_reg += (weights.mean(1).sum(1) * torch.from_numpy(~ended).float().cuda()).sum() cnt += (~ended).astype(np.float).sum() img_feats.append(f_t_pano) can_feats.append(candidate_feat) self.make_equiv_action(teacher_action, obs) length += (1 - ended) ended[:] = np.logical_or(ended, (teacher_action == -1)) obs = self.env._get_obs() img_feats = torch.stack( img_feats, 1).contiguous() # batch_size, max_len, 36, 2052 can_feats = torch.stack(can_feats, 1).contiguous() # batch_size, max_len, 2052 if get_first_feat: return (img_feats, can_feats, first_feat), length else: if creator is not None: return (img_feats, can_feats), length, weights_reg / cnt return (img_feats, can_feats), length