Пример #1
0
    def from_shortest_path(self, viewpoints=None, get_first_feat=False):
        """
        :param viewpoints: [[], [], ....(batch_size)]. Only for dropout viewpoint
        :param get_first_feat: whether output the first feat
        :return:
        """
        obs = self.env._get_obs()
        ended = np.array(
            [False] *
            len(obs))  # Indices match permuation of the model, not env
        length = np.zeros(len(obs), np.int64)
        img_feats = []
        can_feats = []
        # obj_feats = [] # objects at the teacher selected direction at each step
        first_feat = np.zeros(
            (len(obs), self.feature_size + args.angle_feat_size), np.float32)
        for i, ob in enumerate(obs):
            first_feat[i, -args.angle_feat_size:] = utils.angle_feature(
                ob['heading'], ob['elevation'])
        first_feat = torch.from_numpy(first_feat).cuda()

        while not ended.all():
            if viewpoints is not None:  # default None
                for i, ob in enumerate(obs):
                    viewpoints[i].append(ob['viewpoint'])

            img_feats.append(self.listener._feature_variable(
                obs))  # a list of [batch, 36, 2176] features, one for a step
            teacher_action = self._teacher_action(obs, ended)
            teacher_action = teacher_action.cpu().numpy()

            for i, act in enumerate(teacher_action):
                if act < 0 or act == len(
                        obs[i]['candidate']):  # Ignore or Stop
                    teacher_action[i] = -1  # Stop Action
            can_feat_i, obj_feat_i = self._candidate_variable(
                obs, teacher_action)
            can_feats.append(
                can_feat_i
            )  # a list of [batch, 2176] features, at teacher seleccted direction
            # obj_feats.append(obj_feat_i) # a list of [batch, n_objects, 300] objects, at teacher seleccted direction

            self.make_equiv_action(teacher_action, obs)
            length += (1 - ended)
            ended[:] = np.logical_or(ended, (teacher_action == -1))
            obs = self.env._get_obs()

        img_feats = torch.stack(
            img_feats, 1).contiguous()  # [batch_size, total_steps, 36, 2176]
        can_feats = torch.stack(
            can_feats, 1).contiguous()  # [batch_size, total_steps, 2176]
        # obj_feats = torch.stack(obj_feats, 1).contiguous()  # [batch_size, total_steps, n_objects, 2176]

        if get_first_feat:
            # return (img_feats, can_feats, first_feat), length
            ERROR
        else:  # default False
            return (img_feats, can_feats), length
Пример #2
0
    def get_input_feat(self, obs):
        input_a_t = np.zeros((len(obs), args.angle_feat_size), np.float32)
        for i, ob in enumerate(obs):
            input_a_t[i] = utils.angle_feature(ob['heading'], ob['elevation'])
        input_a_t = torch.from_numpy(input_a_t).cuda()
        # f_t = self._feature_variable(obs)      # Pano image features from obs
        candidate_feat, candidate_leng = self._candidate_variable(obs)

        return input_a_t, candidate_feat, candidate_leng
Пример #3
0
 def make_simple_candidate(self, candidate, viewId):
     base_heading = (viewId % 12) * math.radians(30)
     new_candidate = []
     for c in candidate:
         c_new = c.copy()
         heading = c['heading'] - base_heading
         c_new['heading'] = heading
         c_new['feature'] = np.concatenate(
             (c['feature'], utils.angle_feature(heading, c['elevation'])))
         new_candidate.append(c_new)
     return new_candidate
Пример #4
0
 def _candidate_variable(self, obs, actions):
     candidate_feat = np.zeros((len(obs), 2048 + 4), dtype=np.float32)
     for i, (ob, act) in enumerate(zip(obs, actions)):
         if act == -1:  # Ignore or Stop --> Just use zero vector as the feature
             pass
         else:
             c = ob['candidate'][act]
             candidate_feat[i, :args.feature_size] = c[
                 'feature']  # Image feat
             candidate_feat[i, -4:] = utils.angle_feature(
                 c['heading'], c['elevation'])  # Position Feat
     return torch.from_numpy(candidate_feat).cuda()
Пример #5
0
    def make_candidate(self, feature, scanId, viewpointId, viewId, obj_d_feat=None, obj_s_feat=None):
        def _loc_distance(loc):
            return np.sqrt(loc.rel_heading ** 2 + loc.rel_elevation ** 2)
        base_heading = (viewId % 12) * math.radians(30)
        adj_dict = {}
        long_id = "%s_%s" % (scanId, viewpointId)
        if long_id not in self.buffered_state_dict:
            for ix in range(36):
                if ix == 0:
                    self.sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
                elif ix % 12 == 0:
                    self.sim.makeAction(0, 1.0, 1.0)
                else:
                    self.sim.makeAction(0, 1.0, 0)

                state = self.sim.getState()
                assert state.viewIndex == ix

                # Heading and elevation for the viewpoint center
                heading = state.heading - base_heading
                elevation = state.elevation

                visual_feat = feature[ix]
                if obj_d_feat:
                    odf = obj_d_feat[ix]

                # if obj_s_feat:
                #     num_obj = 0
                #     obj_index = []
                #     for n_obj, viewIndex in enumerate(obj_s_feat['concat_viewIndex']):
                #         if viewIndex == ix:
                #             num_obj += 1
                #             obj_index.append(n_obj)
                #     concat_angles_h = obj_s_feat['concat_angles_h'][obj_index]
                #     concat_angles_e = obj_s_feat['concat_angles_e'][obj_index]
                #     concat_feature = obj_s_feat['concat_feature'][obj_index]
                #     odf = concat_feature
                for j, loc in enumerate(state.navigableLocations[1:]):
                    # if a loc is visible from multiple view, use the closest
                    # view (in angular distance) as its representation
                    distance = _loc_distance(loc)

                    # Heading and elevation for for the loc
                    loc_heading = heading + loc.rel_heading
                    loc_elevation = elevation + loc.rel_elevation
                    angle_feat = utils.angle_feature(loc_heading, loc_elevation)
                    if (loc.viewpointId not in adj_dict or
                            distance < adj_dict[loc.viewpointId]['distance']):
                        adj_dict[loc.viewpointId] = {
                            'heading': loc_heading,
                            'elevation': loc_elevation,
                            "normalized_heading": state.heading + loc.rel_heading,
                            'scanId':scanId,
                            'viewpointId': loc.viewpointId, # Next viewpoint id
                            'pointId': ix,
                            'distance': distance,
                            'idx': j + 1,
                            'feature': np.concatenate((visual_feat, angle_feat), -1),
                            'ang_feat': angle_feat
                        }
                        if obj_d_feat:
                            adj_dict[loc.viewpointId]['obj_d_feat'] = odf
            candidate = list(adj_dict.values())
            self.buffered_state_dict[long_id] = [
                {key: c[key]
                 for key in
                    ['normalized_heading', 'elevation', 'scanId', 'viewpointId',
                     'pointId', 'idx']}
                for c in candidate
            ]
            return candidate
        else:
            candidate = self.buffered_state_dict[long_id]
            candidate_new = []
            for c in candidate:
                c_new = c.copy()
                ix = c_new['pointId']
                normalized_heading = c_new['normalized_heading']
                visual_feat = feature[ix]
                loc_heading = normalized_heading - base_heading
                c_new['heading'] = loc_heading
                angle_feat = utils.angle_feature(c_new['heading'], c_new['elevation'])
                c_new['feature'] = np.concatenate((visual_feat, angle_feat), -1)
                c_new['ang_feat'] = angle_feat
                if obj_d_feat:
                    c_new['obj_feat'] = obj_d_feat[ix]
                c_new.pop('normalized_heading')
                candidate_new.append(c_new)
            return candidate_new
Пример #6
0
    def make_candidate(self, feature, scanId, viewpointId, viewId):
        def _loc_distance(loc):
            return np.sqrt(loc.rel_heading**2 + loc.rel_elevation**2)

        base_heading = (viewId % 12) * math.radians(30)
        adj_dict = {}
        long_id = "%s_%s" % (scanId, viewpointId)
        if long_id not in self.buffered_state_dict:
            """
            Agent's current view [0-35] (set only when viewing angles are discretized)
            [0-11] looking down, [12-23] looking at horizon, [24-35] looking up
            
            Rocky: from 0 to 35, look up the accessiable candidates.
            """
            for ix in range(36):
                if ix == 0:
                    # Rocky:  newEpisode(scanId, viewpointId, heading, elevation);
                    self.sim.newEpisode(scanId, viewpointId, 0,
                                        math.radians(-30))
                elif ix % 12 == 0:
                    #  Rocky: makeAction(index, heading, elevation);
                    self.sim.makeAction(0, 1.0, 1.0)
                else:
                    self.sim.makeAction(0, 1.0, 0)

                state = self.sim.getState()
                assert state.viewIndex == ix

                # Heading and elevation for the viewpoint center
                heading = state.heading - base_heading
                elevation = state.elevation

                visual_feat = feature[ix]

                # get adjacent locations
                for j, loc in enumerate(state.navigableLocations[1:]):
                    # if a loc is visible from multiple view, use the closest
                    # view (in angular distance) as its representation
                    distance = _loc_distance(loc)

                    # Heading and elevation for for the loc
                    loc_heading = heading + loc.rel_heading
                    loc_elevation = elevation + loc.rel_elevation
                    angle_feat = utils.angle_feature(loc_heading,
                                                     loc_elevation)
                    if (loc.viewpointId not in adj_dict or
                            distance < adj_dict[loc.viewpointId]['distance']):
                        adj_dict[loc.viewpointId] = {
                            'heading':
                            loc_heading,
                            'elevation':
                            loc_elevation,
                            "normalized_heading":
                            state.heading + loc.rel_heading,
                            'scanId':
                            scanId,
                            'viewpointId':
                            loc.viewpointId,  # Next viewpoint id
                            'pointId':
                            ix,
                            'distance':
                            distance,
                            'idx':
                            j + 1,
                            'feature':
                            np.concatenate((visual_feat, angle_feat), -1)
                        }
            candidate = list(adj_dict.values())
            self.buffered_state_dict[long_id] = [{
                key: c[key]
                for key in [
                    'normalized_heading', 'elevation', 'scanId', 'viewpointId',
                    'pointId', 'idx'
                ]
            } for c in candidate]
            return candidate
        else:
            candidate = self.buffered_state_dict[long_id]
            candidate_new = []
            for c in candidate:
                c_new = c.copy()
                ix = c_new['pointId']
                normalized_heading = c_new['normalized_heading']
                visual_feat = feature[ix]
                loc_heading = normalized_heading - base_heading
                c_new['heading'] = loc_heading
                angle_feat = utils.angle_feature(c_new['heading'],
                                                 c_new['elevation'])
                c_new['feature'] = np.concatenate((visual_feat, angle_feat),
                                                  -1)
                c_new.pop('normalized_heading')
                candidate_new.append(c_new)
            return candidate_new
Пример #7
0
    def make_candidate(self, feature, scanId, viewpointId, viewId):
        def _loc_distance(loc):
            return np.sqrt(loc.rel_heading**2 + loc.rel_elevation**2)

        def get_relative_position(loc_heading, base_heading):
            left, right, front, back = 0, 0, 0, 0
            if abs(loc_heading) >= math.pi / 180 * 180:
                if loc_heading > 0:
                    loc_heading = loc_heading - math.pi / 180 * 360
                else:
                    loc_heading = loc_heading + math.pi / 180 * 360

            if loc_heading < 0:
                left = 1
                if loc_heading > -math.pi / 180 * 90:
                    front = 1
                else:
                    back = 1
            else:
                right = 1
                if loc_heading < math.pi / 180 * 90:
                    front = 1
                else:
                    back = 1
            return [left, right, front, back]

        base_heading = (viewId % 12) * math.radians(30)
        adj_dict = {}
        long_id = "%s_%s" % (scanId, viewpointId)
        if long_id not in self.buffered_state_dict:
            for ix in range(36):
                if ix == 0:
                    self.sim.newEpisode(scanId, viewpointId, 0,
                                        math.radians(-30))
                elif ix % 12 == 0:
                    self.sim.makeAction(0, 1.0, 1.0)
                else:
                    self.sim.makeAction(0, 1.0, 0)

                state = self.sim.getState()
                assert state.viewIndex == ix

                # Heading and elevation for the viewpoint center
                heading = state.heading - base_heading
                elevation = state.elevation

                visual_feat = feature[ix]

                # get adjacent locations
                for j, loc in enumerate(state.navigableLocations[1:]):
                    # if a loc is visible from multiple view, use the closest
                    # view (in angular distance) as its representation
                    distance = _loc_distance(loc)

                    # Heading and elevation for for the loc
                    loc_heading = heading + loc.rel_heading
                    loc_elevation = elevation + loc.rel_elevation
                    angle_feat = utils.angle_feature(loc_heading,
                                                     loc_elevation)
                    relative_position = get_relative_position(
                        loc_heading, base_heading)

                    if (loc.viewpointId not in adj_dict or
                            distance < adj_dict[loc.viewpointId]['distance']):
                        adj_dict[loc.viewpointId] = {
                            'heading':
                            loc_heading,
                            'elevation':
                            loc_elevation,
                            "normalized_heading":
                            state.heading + loc.rel_heading,
                            'scanId':
                            scanId,
                            'viewpointId':
                            loc.viewpointId,  # Next viewpoint id
                            'pointId':
                            ix,
                            'distance':
                            distance,
                            'idx':
                            j + 1,
                            'feature':
                            np.concatenate((visual_feat, angle_feat), -1),
                            'obj_feat':
                            self.pano_caffee[scanId][viewpointId][ix]
                            ['text_feature'],
                            'obj_mask':
                            self.pano_caffee[scanId][viewpointId][ix]
                            ['text_mask']
                        }
            candidate = list(adj_dict.values())
            self.buffered_state_dict[long_id] = [{
                key: c[key]
                for key in [
                    'normalized_heading', 'elevation', 'scanId', 'viewpointId',
                    'pointId', 'idx'
                ]
            } for c in candidate]
            return candidate
        else:
            candidate = self.buffered_state_dict[long_id]
            candidate_new = []
            for c in candidate:
                c_new = c.copy()
                ix = c_new['pointId']
                normalized_heading = c_new['normalized_heading']
                visual_feat = feature[ix]
                loc_heading = normalized_heading - base_heading
                c_new['heading'] = loc_heading
                angle_feat = utils.angle_feature(c_new['heading'],
                                                 c_new['elevation'])
                c_new['feature'] = np.concatenate((visual_feat, angle_feat),
                                                  -1)
                c_new['obj_feat'] = self.pano_caffee[
                    c_new['scanId']][viewpointId][ix]['text_feature']
                c_new['obj_mask'] = self.pano_caffee[
                    c_new['scanId']][viewpointId][ix]['text_mask']
                candidate_new.append(c_new)
            return candidate_new
Пример #8
0
    def from_shortest_path(self, viewpoints=None, get_first_feat=False):
        """
        :param viewpoints: [[], [], ....(batch_size)]. Only for dropout viewpoint
        :param get_first_feat: whether output the first feat
        :return:
        """
        obs = self.env._get_obs()
        ended = np.array(
            [False] *
            len(obs))  # Indices match permuation of the model, not env
        length = np.zeros(len(obs), np.int64)
        img_feats = []
        can_feats = []
        teacher_actions = []
        teacher_actions_1h = []
        candidate_feats = []
        candidate_masks = []
        first_feat = np.zeros((len(obs), self.obs_dim), np.float32)
        for i, ob in enumerate(obs):
            first_feat[i, -args.angle_feat_size:] = utils.angle_feature(
                ob['heading'], ob['elevation'])
        first_feat = torch.from_numpy(first_feat).cuda()
        while not ended.all():
            if viewpoints is not None:
                for i, ob in enumerate(obs):
                    viewpoints[i].append(ob['viewpoint'])
            teacher_action = self._teacher_action(obs, ended)
            teacher_action = teacher_action.cpu().numpy()
            # TODO: why last teacher action not -1
            teacher_actions.append(teacher_action.copy())
            candidate_length = [len(ob['candidate']) + 1
                                for ob in obs]  # +1 is for the end
            candidate_feat = np.zeros(
                (len(obs), max(candidate_length), self.obs_dim))
            # NOTE: The candidate_feat at len(ob['candidate']) is the feature for the END, which is zero in my implementation
            for i, ob in enumerate(obs):
                for j, c in enumerate(ob['candidate']):
                    candidate_feat[i, j, :] = c['feature']
            candidate_feats.append(torch.Tensor(candidate_feat).cuda())
            candidate_masks.append(utils.length2mask(candidate_length))
            img_feats.append(self._feature_variable(obs))
            for i, act in enumerate(teacher_action):
                if act < 0 or act == len(
                        obs[i]['candidate']):  # Ignore or Stop
                    teacher_action[i] = -1  # Stop Action
            can_feats.append(self._candidate_variable(obs, teacher_action))
            self.make_equiv_action(teacher_action, obs)
            length += (1 - ended)
            ended[:] = np.logical_or(ended, (teacher_action == -1))
            obs = self.env._get_obs()
            # TODO: heading random ?
            # TODO: policy decoder behavior clone
            # TODO: state decoder mse
            # TODO: state decoder weight = 0 ?

        assert len(teacher_actions) == len(candidate_feats) == len(
            candidate_masks)
        _max = 0
        for i in range(len(candidate_feats)):
            _max = max(_max, candidate_feats[i].shape[1])
        shape_list = np.array(candidate_feats[0].shape)
        shape_list[1] = 1
        feat_pad_vec = torch.zeros(tuple(shape_list)).cuda()
        shape_list = np.array(candidate_masks[0].shape)
        shape_list[1] = 1
        mask_pad_vec = torch.ones(tuple(shape_list)).bool().cuda()
        for i in range(len(candidate_feats)):
            diff = _max - candidate_feats[i].shape[1]
            diff2 = _max - candidate_masks[i].shape[1]
            assert diff == diff2
            if diff > 0:
                candidate_feats[i] = torch.cat(
                    [candidate_feats[i],
                     feat_pad_vec.repeat(1, diff, 1)],
                    dim=1)
                candidate_masks[i] = torch.cat(
                    [candidate_masks[i],
                     mask_pad_vec.repeat(1, diff)], dim=1)
            # convert teacher actions to one-hot vectors
            teacher_actions_1h.append(
                torch.nn.functional.one_hot(torch.LongTensor(
                    teacher_actions[i]),
                                            num_classes=_max).cuda())

        img_feats = torch.stack(
            img_feats, 1).contiguous()  # batch_size, max_len, 36, 2052
        can_feats = torch.stack(can_feats,
                                1).contiguous()  # batch_size, max_len, 2052
        teacher_actions_1h = torch.stack(teacher_actions_1h, 1).contiguous()
        candidate_feats = torch.stack(candidate_feats, 1).contiguous()
        candidate_masks = torch.stack(candidate_masks, 1).contiguous()
        if get_first_feat:
            return (img_feats, can_feats, first_feat), length
        else:
            return (img_feats, can_feats, teacher_actions_1h, candidate_feats,
                    candidate_masks), length
Пример #9
0
    def from_shortest_path(self,
                           viewpoints=None,
                           get_first_feat=False,
                           creator=None):
        """
        :param viewpoints: [[], [], ....(batch_size)]. Only for dropout viewpoint
        :param get_first_feat: whether output the first feat
        :param creator: [encoder, decoder]
        :return:
        """

        obs = self.env._get_obs()
        batch_size = len(obs)
        ended = np.array(
            [False] *
            len(obs))  # Indices match permuation of the model, not env
        length = np.zeros(len(obs), np.int64)
        img_feats = []
        can_feats = []
        if creator is not None:
            weights_reg = 0.
            cnt = 0
            seq, seq_mask, seq_lengths, perm_idx = self._sort_batch(obs)

            ctx_f, h_t_f, c_t_f = creator[0](seq, seq_lengths)
            inv_idx = [0 for _ in perm_idx]
            for i, _ in enumerate(perm_idx):
                inv_idx[_] = i

            ctx_mask = seq_mask[inv_idx]
            ctx_f = ctx_f[inv_idx]
            h_t_f = h_t_f[inv_idx]
            c_t_f = c_t_f[inv_idx]

            h1_f = h_t_f

            rand_idx = [_ for _ in range(batch_size)]

            random.shuffle(rand_idx)

        first_feat = np.zeros(
            (len(obs), self.feature_size + self.args.angle_feat_size),
            np.float32)
        for i, ob in enumerate(obs):
            first_feat[i, -self.args.angle_feat_size:] = utils.angle_feature(
                ob['heading'], ob['elevation'])
        first_feat = torch.from_numpy(first_feat).cuda()
        while not ended.all():
            if viewpoints is not None:
                for i, ob in enumerate(obs):
                    viewpoints[i].append(ob['viewpoint'])

            input_a_t, f_t_pano = self.get_input_feat(
                obs)  # Image features from obs

            teacher_action = self._teacher_action(obs, ended)
            teacher_action = teacher_action.cpu().numpy()
            for i, act in enumerate(teacher_action):
                if act < 0 or act == len(
                        obs[i]['candidate']):  # Ignore or Stop
                    teacher_action[i] = -1  # Stop Action

            candidate_feat = self._candidate_variable(obs, teacher_action)

            if creator is not None:
                f_t_shuffle = f_t_pano[rand_idx]
                h_t_f, c_t_f, h1_f, f_t_pano, weights = creator[1](
                    input_a_t, f_t_pano, f_t_shuffle, h1_f, c_t_f, ctx_f,
                    ctx_mask)

                for i, ob in enumerate(obs):
                    a = teacher_action[i]
                    c = ob['candidate'][a]
                    idx = c['pointId']
                    candidate_feat[i, :-self.args.angle_feat_size] = f_t_pano[
                        i, idx, :-self.args.angle_feat_size]

                weights_reg += (weights.mean(1).sum(1) *
                                torch.from_numpy(~ended).float().cuda()).sum()
                cnt += (~ended).astype(np.float).sum()

            img_feats.append(f_t_pano)
            can_feats.append(candidate_feat)
            self.make_equiv_action(teacher_action, obs)
            length += (1 - ended)
            ended[:] = np.logical_or(ended, (teacher_action == -1))
            obs = self.env._get_obs()
        img_feats = torch.stack(
            img_feats, 1).contiguous()  # batch_size, max_len, 36, 2052
        can_feats = torch.stack(can_feats,
                                1).contiguous()  # batch_size, max_len, 2052
        if get_first_feat:
            return (img_feats, can_feats, first_feat), length
        else:
            if creator is not None:
                return (img_feats, can_feats), length, weights_reg / cnt

            return (img_feats, can_feats), length