Exemplo n.º 1
0
    def __getitem__(self, idx):
        name = self._filenames[idx]

        # load states
        pose_i, pose_f, c_height_i, d_height_i, c_height_f, \
            d_height_f, object_mask, hole_mask, kit_minus_hole_mask, \
                kit_plus_hole_mask, all_corrs, rot_quant_indices = self._load_state(self._filenames[idx])

        # split heightmap into source and target
        c_height_s, c_height_t = self._split_heightmap(c_height_f)
        d_height_s, d_height_t = self._split_heightmap(d_height_f)
        self._H, self._W = c_height_t.shape[:2]

        hole_mask[:, 1] = hole_mask[:, 1] - self._half
        kit_minus_hole_mask[:, 1] = kit_minus_hole_mask[:, 1] - self._half
        kit_plus_hole_mask[:, 1] = kit_plus_hole_mask[:, 1] - self._half
        for corr in all_corrs:
            corr[:, 1] = corr[:, 1] - self._half

        if self._background_subtract is not None:
            # idxs = np.vstack(np.where(d_height_s > self._background_subtract[0])).T
            # mask = np.zeros_like(d_height_s)
            # mask[idxs[:, 0], idxs[:, 1]] = 1
            # mask = misc.largest_cc(np.logical_not(mask))
            # idxs = np.vstack(np.where(mask == 1)).T
            mask = np.zeros_like(d_height_s)
            mask[kit_plus_hole_mask[:, 0], kit_plus_hole_mask[:, 1]] = 1
            idxs = np.vstack(np.where(mask == 1)).T
            mask[int(idxs[:, 0].min()):int(idxs[:, 0].max()), int(idxs[:, 1].min()):int(idxs[:, 1].max())] = 1
            mask = np.logical_not(mask)
            idxs = np.vstack(np.where(mask == 1)).T
            c_height_s[idxs[:, 0], idxs[:, 1]] = 0
            d_height_s[idxs[:, 0], idxs[:, 1]] = 0
            idxs = np.vstack(np.where(d_height_t > self._background_subtract[1])).T
            mask = np.zeros_like(d_height_s)
            mask[idxs[:, 0], idxs[:, 1]] = 1
            mask = misc.largest_cc(np.logical_not(mask))
            idxs = np.vstack(np.where(mask == 1)).T
            c_height_t[idxs[:, 0], idxs[:, 1]] = 0
            d_height_t[idxs[:, 0], idxs[:, 1]] = 0

        # partition correspondences into current and previous
        curr_corrs = all_corrs[-1]
        prev_corrs = all_corrs[:-1]

        # compute rotation about z-axis using inital and final pose
        gd_truth_rot = self._compute_relative_rotation(pose_i, pose_f)

        # center of rotation is the center of the kit
        self._uc = int((kit_plus_hole_mask[:, 0].max() + kit_plus_hole_mask[:, 0].min()) // 2)
        self._vc = int((kit_plus_hole_mask[:, 1].max() + kit_plus_hole_mask[:, 1].min()) // 2)

        if self._augment:
            shape = (self._W, self._H)
            source_corrs = curr_corrs[:, 0:2].astype("float64")
            target_corrs = curr_corrs[:, 2:4].astype("float64")

            # determine bounds on translation for source and target
            all_corrz = []
            for i, corr in enumerate(all_corrs):
                all_corrz.append(corr)
            sources = [kit_plus_hole_mask]
            targets = [p[:, 2:4] for p in all_corrz]

            angle_s = np.radians(np.random.uniform(0, 360))
            tu_s, tv_s = self._sample_translation(sources, angle_s)
            aff_1 = np.eye(3)
            aff_1[:2, 2] = [-self._vc, -self._uc]
            aff_2 = RotationMatrix.rotz(angle_s)
            aff_2[:2, 2] = [tu_s, tv_s]
            aff_3 = np.eye(3, 3)
            aff_3[:2, 2] = [self._vc, self._uc]
            affine_s = aff_3 @ aff_2 @ aff_1
            affine_s = affine_s[:2, :]

            c_height_s = cv2.warpAffine(c_height_s, affine_s, shape, flags=cv2.INTER_NEAREST)
            d_height_s = cv2.warpAffine(d_height_s, affine_s, shape, flags=cv2.INTER_NEAREST)
            aff_1[:2, 2] = [-self._uc, -self._vc]
            aff_2 = RotationMatrix.rotz(-angle_s)
            aff_2[:2, 2] = [tv_s, tu_s]
            aff_3[:2, 2] = [self._uc, self._vc]
            affine_s = aff_3 @ aff_2 @ aff_1
            affine_s = affine_s[:2, :]
            source_corrs = (affine_s @ np.hstack((source_corrs, np.ones((len(source_corrs), 1)))).T).T

            # target affine transformation
            angle_t = 0
            tu_t, tv_t = 0, 0  # self._sample_translation(targets, angle_t)
            aff_1 = np.eye(3)
            aff_1[:2, 2] = [-self._vc, -self._uc]
            aff_2 = RotationMatrix.rotz(angle_t)
            aff_2[:2, 2] = [tu_t, tv_t]
            aff_3 = np.eye(3, 3)
            aff_3[:2, 2] = [self._vc, self._uc]
            affine_t = aff_3 @ aff_2 @ aff_1
            affine_t = affine_t[:2, :]

            c_height_t = cv2.warpAffine(c_height_t, affine_t, shape, flags=cv2.INTER_NEAREST)
            d_height_t = cv2.warpAffine(d_height_t, affine_t, shape, flags=cv2.INTER_NEAREST)
            aff_1[:2, 2] = [-self._uc, -self._vc]
            aff_2 = RotationMatrix.rotz(-angle_t)
            aff_2[:2, 2] = [tv_t, tu_t]
            aff_3[:2, 2] = [self._uc, self._vc]
            affine_t = aff_3 @ aff_2 @ aff_1
            affine_t = affine_t[:2, :]
            target_corrs = (affine_t @ np.hstack((target_corrs, np.ones((len(target_corrs), 1)))).T).T

            # remove invalid indices
            valid_target_idxs = self._get_valid_idxs(target_corrs, self._H, self._W)
            target_corrs = target_corrs[valid_target_idxs].astype("int64")
            source_corrs = source_corrs[valid_target_idxs].astype("int64")
            curr_corrs = np.hstack((source_corrs, target_corrs))

            # apply affine transformation to masks in source
            masks = [hole_mask, kit_plus_hole_mask, kit_minus_hole_mask]
            for i in range(len(masks)):
                ones = np.ones((len(masks[i]), 1))
                masks[i] = (affine_s @ np.hstack((masks[i], ones)).T).T
            hole_mask, kit_plus_hole_mask, kit_minus_hole_mask = masks

            # apply affine transformation to masks in target
            ones = np.ones((len(object_mask), 1))
            object_mask = (affine_t @ np.hstack((object_mask, ones)).T).T
            object_mask[:, 0] = np.clip(object_mask[:, 0], 0, self._H - 1)
            object_mask[:, 1] = np.clip(object_mask[:, 1], 0, self._W - 1)

        # reupdate kit mask center
        self._uc = int((kit_plus_hole_mask[:, 0].max() + kit_plus_hole_mask[:, 0].min()) // 2)
        self._vc = int((kit_plus_hole_mask[:, 1].max() + kit_plus_hole_mask[:, 1].min()) // 2)

        if self._augment:
            gd_truth_rot = gd_truth_rot + np.degrees(angle_t) - np.degrees(angle_s)

        # quantize rotation
        curr_rot_idx = self._quantize_rotation(gd_truth_rot)
        curr_rot = self._rotations[curr_rot_idx]

        self._features_source = []
        self._features_target = []
        self._rot_idxs = []
        self._is_match = []

        # sample matches from all previous timesteps if not markovian
        if not self._markovian:
            for rot_idx, corrs in zip(rot_quant_indices, prev_corrs):
                self._process_correspondences(corrs, rot_idx)

        # sample matches from the current timestep
        self._process_correspondences(curr_corrs, curr_rot_idx, depth=d_height_t)

        # determine the number of non-matches to sample per rotation
        num_matches = 0
        for m in self._is_match:
            num_matches += len(m)
        num_non_matches = int(self._sample_ratio * num_matches / self._num_rotations)

        # convert masks to linear indices for sampling
        all_idxs_1d = np.arange(0, self._H * self._W)
        object_target_1d = sampling.make1d(object_mask, self._W)
        background_target_1d = np.array(list((set(all_idxs_1d) - set(object_target_1d))))
        hole_source_1d = sampling.make1d(hole_mask, self._W)
        kit_minus_hole_source_1d = sampling.make1d(kit_minus_hole_mask, self._W)
        kit_plus_hole_source_1d = sampling.make1d(kit_plus_hole_mask, self._W)
        background_source_1d = np.array(list(set(all_idxs_1d) - set(kit_plus_hole_source_1d)))
        background_source_1d = sampling.remove_outliers(background_source_1d, kit_plus_hole_mask, self._W)

        # sample non-matches
        temp_idx = 0
        div_factor = 2
        for rot_idx in range(self._num_rotations):
            non_matches = []

            # # source: anywhere
            # # target: anywhere but the object
            # non_matches.append(sampling.sample_non_matches(
            #     1 * num_non_matches // div_factor,
            #     (self._H, self._W),
            #     -self._rotations[rot_idx],
            #     mask_target=background_target_1d,
            #     rotate=False)
            # )

            # # source: anywhere but the kit
            # # target: on the object
            # nm_idxs = sampling.sample_non_matches(
            #     1 * num_non_matches // div_factor,
            #     (self._H, self._W),
            #     -self._rotations[rot_idx],
            #     background_source_1d,
            #     object_target_1d,
            #     rotate=False,
            # )
            # non_matches.append(nm_idxs)

            # source: on the kit but not in the hole
            # target: on the object
            nm_idxs = sampling.sample_non_matches(
                1 * num_non_matches // div_factor,
                (self._H, self._W),
                -self._rotations[rot_idx],
                kit_minus_hole_source_1d,
                object_target_1d,
                cxcy=(self._uc, self._vc),
            )
            non_matches.append(nm_idxs)

            # # here, I want to explicity samples matches
            # # for the incorrect rotations to teach
            # # the network that in fact, this is
            # # the incorrect rotation.
            # # This is especially useful for the
            # # 180 degree rotated version of the
            # # correct rotation.
            # if rot_idx != curr_rot_idx:
            #     nm_idxs = self._process_correspondences(curr_corrs, rot_idx, False)
            #     subset_mask = np.random.choice(np.arange(len(nm_idxs)), replace=False, size=(1 * num_non_matches // div_factor))
            #     nm_idxs = nm_idxs[subset_mask]
            #     non_matches.append(nm_idxs)

            # source: in the hole
            # target: on the object
            if self._markovian:
                if rot_idx == curr_rot_idx:
                    nm_idxs = sampling.non_matches_from_matches(
                        1 * num_non_matches // div_factor,
                        (self._H, self._W),
                        -self._rotations[rot_idx],
                        hole_source_1d,
                        self._features_source[0],
                        self._features_target[0],
                        cxcy=(self._uc, self._vc),
                    )
                    non_matches.append(nm_idxs)
                else:
                    nm_idxs = sampling.sample_non_matches(
                        1 * num_non_matches // div_factor,
                        (self._H, self._W),
                        -self._rotations[rot_idx],
                        hole_source_1d,
                        object_target_1d,
                        cxcy=(self._uc, self._vc),
                    )
                    non_matches.append(nm_idxs)
            else:
                if rot_idx in rot_quant_indices:
                    non_matches.append(
                        sampling.non_matches_from_matches(
                            num_non_matches // div_factor,
                            (self._H, self._W),
                            -self._rotations[rot_idx],
                            hole_source_1d,
                            self._features_source[temp_idx],
                            self._features_target[temp_idx],
                        )
                    )
                    temp_idx += 1
                else:
                    non_matches.append(
                        sampling.sample_non_matches(
                            num_non_matches // div_factor,
                            (self._H, self._W),
                            -self._rotations[rot_idx],
                            hole_source_1d,
                            object_target_1d,
                        )
                    )
            non_matches = np.vstack(non_matches)
            self._features_source.append(non_matches[:, :2])
            self._features_target.append(non_matches[:, 2:])
            self._rot_idxs.append(np.repeat([rot_idx], len(non_matches)))
            self._is_match.append(np.repeat([0], len(non_matches)))

        # convert lists to numpy arrays
        self._features_source = np.concatenate(self._features_source)
        self._features_target = np.concatenate(self._features_target)
        self._rot_idxs = np.concatenate(self._rot_idxs)[..., np.newaxis]
        self._is_match = np.concatenate(self._is_match)[..., np.newaxis]

        # concatenate into 1 big array
        label = np.hstack(
            (
                self._features_source,
                self._features_target,
                self._rot_idxs,
                self._is_match,
            )
        )

        if self._num_channels == 2:
            c_height_s = c_height_s[..., np.newaxis]
            c_height_t = c_height_t[..., np.newaxis]
        else:  # clone the gray channel 3 times
            c_height_s = np.repeat(c_height_s[..., np.newaxis], 3, axis=-1)
            c_height_t = np.repeat(c_height_t[..., np.newaxis], 3, axis=-1)

        # ndarray -> tensor
        label_tensor = torch.LongTensor(label)

        # heightmaps -> tensor
        c_height_s = self._c_norm(self._transform(c_height_s))
        c_height_t = self._c_norm(self._transform(c_height_t))
        d_height_s = self._d_norm(self._transform(d_height_s[..., np.newaxis]))
        d_height_t = self._d_norm(self._transform(d_height_t[..., np.newaxis]))

        # concatenate height and depth into a 4-channel tensor
        source_img_tensor = torch.cat([c_height_s, d_height_s], dim=0)
        target_img_tensor = torch.cat([c_height_t, d_height_t], dim=0)

        # concatenate source and target into a 8-channel tensor
        img_tensor = torch.cat([source_img_tensor, target_img_tensor], dim=0)

        return img_tensor, label_tensor, (self._uc, self._vc)
Exemplo n.º 2
0
            # load estimated pose
            estimated_pose = estimated_poses[folder_idx]

            if np.isnan(np.min(estimated_pose)):
                add_errors.append(np.nan)
                reproj_errors.append(np.nan)
                rotational_errors.append(np.nan)
                translational_errors.append(np.nan)
                continue

            # trim object mask
            depth_vals = depth_f[obj_mask[:, 0], obj_mask[:, 1]]
            valid_ds = depth_vals >= depth_vals.mean()
            mask = np.zeros_like(depth_f)
            mask[obj_mask[valid_ds][:, 0], obj_mask[valid_ds][:, 1]] = 1
            mask = misc.largest_cc(mask)
            valid_mask = np.vstack(np.where(mask == 1)).T
            tset = set([tuple(x) for x in valid_mask])
            for i in range(len(valid_ds)):
                is_valid = valid_ds[i]
                if is_valid:
                    tidx = obj_mask[i]
                    if tuple(tidx) not in tset:
                        valid_ds[i] = False
            obj_mask = obj_mask[valid_ds]

            zs = depth_f[obj_mask[:, 0], obj_mask[:, 1]].reshape(-1, 1)
            obj_xyz = np.hstack([obj_mask, zs])
            obj_xyz[:, [0, 1]] = obj_xyz[:, [1, 0]]
            obj_xyz[:, 0] = (obj_xyz[:, 0] *
                             config.HEIGHTMAP_RES) + config.VIEW_BOUNDS[0, 0]
Exemplo n.º 3
0
    def __getitem__(self, idx):
        name = self._filenames[idx]

        # load state
        c_height, d_height, positives, kit_mask, c_height_i, d_height_i = self._load_state(
            name)

        # split heightmap into source and target
        c_height = self._split_heightmap(c_height)
        d_height = self._split_heightmap(d_height)
        c_height_i = self._split_heightmap(c_height_i)
        d_height_i = self._split_heightmap(d_height_i)
        self._H, self._W = c_height.shape[:2]

        pos_placement = []
        for pos in positives:
            rr, cc = circle(pos[0], pos[1], self.radius)
            pos_placement.append(np.vstack([rr, cc]).T)
        pos_placement = np.concatenate(pos_placement)

        # offset placement point to adjust for splitting
        pos_placement[:, 1] = pos_placement[:, 1] - self._half
        kit_mask[:, 1] = kit_mask[:, 1] - self._half

        # center of rotation is the center of the kit
        self._uc = int((kit_mask[:, 0].max() + kit_mask[:, 0].min()) // 2)
        self._vc = int((kit_mask[:, 1].max() + kit_mask[:, 1].min()) // 2)

        if self._augment:
            shape = (self._W, self._H)
            angle = np.radians(np.random.uniform(0, 360))
            tu, tv = self._sample_translation([kit_mask], angle)
            aff_1 = np.eye(3)
            aff_1[:2, 2] = [-self._vc, -self._uc]
            aff_2 = RotationMatrix.rotz(angle)
            aff_2[:2, 2] = [tu, tv]
            aff_3 = np.eye(3, 3)
            aff_3[:2, 2] = [self._vc, self._uc]
            affine = aff_3 @ aff_2 @ aff_1
            affine = affine[:2, :]
            c_height = cv2.warpAffine(c_height,
                                      affine,
                                      shape,
                                      flags=cv2.INTER_NEAREST)
            d_height = cv2.warpAffine(d_height,
                                      affine,
                                      shape,
                                      flags=cv2.INTER_NEAREST)

            aff_1[:2, 2] = [-self._uc, -self._vc]
            aff_2 = RotationMatrix.rotz(-angle)
            aff_2[:2, 2] = [tv, tu]
            aff_3[:2, 2] = [self._uc, self._vc]
            affine = aff_3 @ aff_2 @ aff_1
            affine = affine[:2, :]
            pos_placement = (affine @ np.hstack(
                (pos_placement, np.ones((len(pos_placement), 1)))).T).T
            kit_mask = (affine @ np.hstack(
                (kit_mask, np.ones((len(kit_mask), 1)))).T).T

        # update center of rotation
        self._uc = int((kit_mask[:, 0].max() + kit_mask[:, 0].min()) // 2)
        self._vc = int((kit_mask[:, 1].max() + kit_mask[:, 1].min()) // 2)

        if self._background_subtract is not None:
            idxs = np.vstack(
                np.where(d_height > self._background_subtract[0])).T
            mask = np.zeros_like(d_height)
            mask[idxs[:, 0], idxs[:, 1]] = 1
            mask = misc.largest_cc(np.logical_not(mask))
            idxs = np.vstack(np.where(mask == 1)).T
            c_height[idxs[:, 0], idxs[:, 1]] = 0
            d_height[idxs[:, 0], idxs[:, 1]] = 0
            idxs = np.vstack(
                np.where(d_height_i > self._background_subtract[0])).T
            mask = np.zeros_like(d_height)
            mask[idxs[:, 0], idxs[:, 1]] = 1
            mask = misc.largest_cc(np.logical_not(mask))
            idxs = np.vstack(np.where(mask == 1)).T
            c_height_i[idxs[:, 0], idxs[:, 1]] = 0
            d_height_i[idxs[:, 0], idxs[:, 1]] = 0

        if self._num_channels == 2:
            c_height = c_height[..., np.newaxis]
            c_height_i = c_height_i[..., np.newaxis]
        else:  # clone the gray channel 3 times
            c_height = np.repeat(c_height[..., np.newaxis], 3, axis=-1)
            c_height_i = np.repeat(c_height_i[..., np.newaxis], 3, axis=-1)

        # convert heightmaps tensors
        c_height = self._c_norm(self._transform(c_height))
        d_height = self._d_norm(self._transform(d_height[..., np.newaxis]))
        c_height_i = self._c_norm(self._transform(c_height_i))
        d_height_i = self._d_norm(self._transform(d_height_i[..., np.newaxis]))

        # concatenate height and depth into a 4-channel tensor
        # img_tensor = torch.cat([c_height, d_height], dim=0)
        img_tensor_i = torch.cat([c_height_i, d_height_i], dim=0)
        img_tensor = torch.cat([c_height, d_height], dim=0)
        img_tensor = torch.stack([img_tensor_i, img_tensor], dim=0)

        # add columns of 1 (positive labels)
        pos_label = np.hstack((pos_placement, np.ones(
            (len(pos_placement), 1))))

        # generate negative labels
        neg_placement = np.vstack(self._sample_negative(pos_label)).T
        neg_label = np.hstack((neg_placement, np.zeros(
            (len(neg_placement), 1))))

        # stack positive and negative into a single array
        label = np.vstack((pos_label, neg_label))

        neg_placement_i = self._sample_free_negative(kit_mask)
        neg_label_i = np.hstack(
            (neg_placement_i, np.zeros((len(neg_placement_i), 1))))

        label_tensor_i = torch.LongTensor(neg_label_i)
        label_tensor_f = torch.LongTensor(label)
        label_tensor = [label_tensor_i, label_tensor_f]

        # convert suction points to tensors
        # label_tensor = torch.LongTensor(label)

        return img_tensor, label_tensor
Exemplo n.º 4
0
    def __getitem__(self, idx):
        name = self._filenames[idx]

        # load state
        c_height_i, d_height_i, c_height_f, d_height_f, \
            pos_suction_i, pos_suction_f, all_corrs, kit_mask = self._load_state(name)

        # split heightmap into source and target
        c_height_f = self._split_heightmap(c_height_f, False)
        d_height_f = self._split_heightmap(d_height_f, False)
        c_height_i = self._split_heightmap(c_height_i, True)
        d_height_i = self._split_heightmap(d_height_i, True)

        self._H, self._W = c_height_f.shape[:2]

        # offset indices to adjust for splitting
        pos_suction_i[:, 1] = pos_suction_i[:, 1] - self._half
        kit_mask[:, 1] = kit_mask[:, 1] - self._half

        pos_f = []
        for pos in pos_suction_f:
            rr, cc = circle(pos[0], pos[1], self._radius)
            pos_f.append(np.vstack([rr, cc]).T)
        pos_suction_f = np.concatenate(pos_f)
        pos_i = []
        for pos in pos_suction_i:
            rr, cc = circle(pos[0], pos[1], self._radius)
            pos_i.append(np.vstack([rr, cc]).T)
        pos_suction_i = np.concatenate(pos_i)

        for corr in all_corrs:
            corr[:, 1] = corr[:, 1] - self._half

        self._uc = int((kit_mask[:, 0].max() + kit_mask[:, 0].min()) // 2)
        self._vc = int((kit_mask[:, 1].max() + kit_mask[:, 1].min()) // 2)
        shape = (self._W, self._H)
        if self._augment:
            angle = np.radians(np.random.uniform(0, 360))
            tu, tv = 0, 0  # self._sample_translation([kit_mask], angle)
            aff_1 = np.eye(3)
            aff_1[:2, 2] = [-self._vc, -self._uc]
            aff_2 = RotationMatrix.rotz(angle)
            aff_2[:2, 2] = [tu, tv]
            aff_3 = np.eye(3, 3)
            aff_3[:2, 2] = [self._vc, self._uc]
            affine = aff_3 @ aff_2 @ aff_1
            affine = affine[:2, :]
            c_height_i = cv2.warpAffine(c_height_i,
                                        affine,
                                        shape,
                                        flags=cv2.INTER_NEAREST)
            d_height_i = cv2.warpAffine(d_height_i,
                                        affine,
                                        shape,
                                        flags=cv2.INTER_NEAREST)
            aff_1[:2, 2] = [-self._uc, -self._vc]
            aff_2 = RotationMatrix.rotz(-angle)
            aff_2[:2, 2] = [tv, tu]
            aff_3[:2, 2] = [self._uc, self._vc]
            affine = aff_3 @ aff_2 @ aff_1
            affine = affine[:2, :]
            pos_suction_i = (affine @ np.hstack(
                (pos_suction_i, np.ones((len(pos_suction_i), 1)))).T).T
            kit_mask = (affine @ np.hstack(
                (kit_mask, np.ones((len(kit_mask), 1)))).T).T

            # augment obj heightmap
            angle = np.radians(np.random.uniform(0, 360))
            tu, tv = self._sample_translation(
                [p[:, 2:4].copy() for p in all_corrs], angle, False)
            aff_1 = np.eye(3)
            aff_1[:2, 2] = [-self._W // 2, -self._H // 2]
            aff_2 = RotationMatrix.rotz(angle)
            aff_2[:2, 2] = [tu, tv]
            aff_3 = np.eye(3, 3)
            aff_3[:2, 2] = [self._W // 2, self._H // 2]
            affine = aff_3 @ aff_2 @ aff_1
            affine = affine[:2, :]
            c_height_f = cv2.warpAffine(c_height_f,
                                        affine,
                                        shape,
                                        flags=cv2.INTER_NEAREST)
            d_height_f = cv2.warpAffine(d_height_f,
                                        affine,
                                        shape,
                                        flags=cv2.INTER_NEAREST)
            aff_1[:2, 2] = [-self._H // 2, -self._W // 2]
            aff_2 = RotationMatrix.rotz(-angle)
            aff_2[:2, 2] = [tv, tu]
            aff_3[:2, 2] = [self._H // 2, self._W // 2]
            affine = aff_3 @ aff_2 @ aff_1
            affine = affine[:2, :]
            pos_suction_f = (affine @ np.hstack(
                (pos_suction_f, np.ones((len(pos_suction_f), 1)))).T).T

        if self._background_subtract is not None:
            idxs = np.vstack(
                np.where(d_height_i > self._background_subtract[0])).T
            mask = np.zeros_like(d_height_i)
            mask[idxs[:, 0], idxs[:, 1]] = 1
            mask = misc.largest_cc(mask)
            idxs = np.vstack(np.where(mask == 1)).T
            mask = np.zeros_like(d_height_i)
            mask[idxs[:, 0].min():idxs[:, 0].max(),
                 idxs[:, 1].min():idxs[:, 1].max()] = 1
            # mask = np.zeros_like(d_height_i)
            # mask[idxs[:, 0], idxs[:, 1]] = 1
            # mask = misc.largest_cc(np.logical_not(mask))
            idxs = np.vstack(np.where(mask == 0)).T
            c_height_i[idxs[:, 0], idxs[:, 1]] = 0
            d_height_i[idxs[:, 0], idxs[:, 1]] = 0
            idxs = np.vstack(
                np.where(d_height_f > self._background_subtract[1])).T
            mask = np.zeros_like(d_height_f)
            mask[idxs[:, 0], idxs[:, 1]] = 1
            mask = misc.largest_cc(np.logical_not(mask))
            idxs = np.vstack(np.where(mask == 1)).T
            c_height_f[idxs[:, 0], idxs[:, 1]] = 0
            d_height_f[idxs[:, 0], idxs[:, 1]] = 0

        if self._num_channels == 2:
            c_height_i = c_height_i[..., np.newaxis]
            c_height_f = c_height_f[..., np.newaxis]
        else:  # clone the gray channel 3 times
            c_height_i = np.repeat(c_height_i[..., np.newaxis], 3, axis=-1)
            c_height_f = np.repeat(c_height_f[..., np.newaxis], 3, axis=-1)

        # convert heightmaps tensors
        c_height_i = self._c_norm(self._transform(c_height_i))
        c_height_f = self._c_norm(self._transform(c_height_f))
        d_height_i = self._d_norm(self._transform(d_height_i[..., np.newaxis]))
        d_height_f = self._d_norm(self._transform(d_height_f[..., np.newaxis]))

        # concatenate height and depth into a 4-channel tensor
        img_tensor_i = torch.cat([c_height_i, d_height_i], dim=0)
        img_tensor_f = torch.cat([c_height_f, d_height_f], dim=0)
        img_tensor = torch.stack([img_tensor_i, img_tensor_f], dim=0)

        # add columns of 1 (positive labels)
        pos_label_i = np.hstack((pos_suction_i, np.ones(
            (len(pos_suction_i), 1))))
        pos_label_f = np.hstack((pos_suction_f, np.ones(
            (len(pos_suction_f), 1))))

        # generate negative labels
        neg_suction_i = np.vstack(self._sample_negative(pos_label_i)).T
        neg_label_i = np.hstack(
            (neg_suction_i, np.zeros((len(neg_suction_i), 1))))
        neg_suction_f = np.vstack(self._sample_negative(pos_label_f)).T
        neg_label_f = np.hstack(
            (neg_suction_f, np.zeros((len(neg_suction_f), 1))))

        # stack positive and negative into a single array
        label_i = np.vstack((pos_label_i, neg_label_i))
        label_f = np.vstack((pos_label_f, neg_label_f))

        # convert suction points to tensors
        label_tensor_i = torch.LongTensor(label_i)
        label_tensor_f = torch.LongTensor(label_f)
        label_tensor = [label_tensor_i, label_tensor_f]

        return img_tensor, label_tensor