示例#1
0
    def _prepare_test(self, index, img, label):
        # This returns cpu tensors.
        #   Image: 3D with channels last, float32, in range [0, 1] (normally done
        #     by ToTensor).
        #   Label map: 2D, flat int64, [0 ... sef.gt_k - 1]
        # label is passed in canonical [0 ... 181] indexing

        assert (img.shape[:2] == label.shape)
        img = img.astype(np.float32)
        label = label.astype(np.int32)

        # shrink original images, for memory purposes, otherwise no point
        if self.pre_scale_all:
            assert (self.pre_scale_factor < 1.)
            img = cv2.resize(img,
                             dsize=None,
                             fx=self.pre_scale_factor,
                             fy=self.pre_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
            label = cv2.resize(label,
                               dsize=None,
                               fx=self.pre_scale_factor,
                               fy=self.pre_scale_factor,
                               interpolation=cv2.INTER_NEAREST)

        # center crop to input sz
        img, _ = pad_and_or_crop(img, self.input_sz, mode="centre")
        label, _ = pad_and_or_crop(label, self.input_sz, mode="centre")

        # finish
        if not self.no_sobel:
            img = custom_greyscale_numpy(img, include_rgb=self.include_rgb)

        img = img.astype(np.float32) / 255.
        img = torch.from_numpy(img).permute(2, 0, 1)

        if RENDER_DATA:
            render(label,
                   mode="label",
                   name=("test_data_label_pre_%d" % index))

        # convert to coarse if required, reindex to [0, gt_k -1], and get mask
        label, mask = self._filter_label(label)

        # mask if required
        if self.mask_input:
            masked = 1 - mask
            img[:, masked] = 0

        if RENDER_DATA:
            render(img, mode="image", name=("test_data_img_%d" % index))
            render(label,
                   mode="label",
                   name=("test_data_label_post_%d" % index))
            render(mask, mode="mask", name=("test_data_mask_%d" % index))

        # dataloader must return tensors (conversion forced in their code anyway)
        return img, torch.from_numpy(label), torch.from_numpy(
            mask.astype(np.uint8))
示例#2
0
    def _prepare_train(self, index, img):
        # This returns gpu tensors.
        # label is passed in canonical [0 ... 181] indexing

        img = img.astype(np.float32)

        # shrink original images, for memory purposes
        # or enlarge
        if self.pre_scale_all:
            img = cv2.resize(img,
                             dsize=None,
                             fx=self.pre_scale_factor,
                             fy=self.pre_scale_factor,
                             interpolation=cv2.INTER_LINEAR)

        # basic augmentation transforms for both img1 and img2
        if self.use_random_scale:
            # bilinear interp requires float img
            scale_factor = (np.random.rand() * (self.scale_max - self.scale_min)) + \
                           self.scale_min
            img = cv2.resize(img,
                             dsize=None,
                             fx=scale_factor,
                             fy=scale_factor,
                             interpolation=cv2.INTER_LINEAR)

        # random crop to input sz
        img, coords = pad_and_or_crop(img, self.input_sz, mode="random")

        # make img2 different from img1 (img)

        # tf_mat can be:
        # *A, from img2 to img1 (will be applied to img2's heatmap)-> img1 space
        #   input img1 tf: *tf.functional or pil.image
        #   input mask tf: *none
        #   output heatmap: *tf.functional (parallel), inverse of what is used
        #     for inputs, create inverse of this tf in [-1, 1] format

        # B, from img1 to img2 (will be applied to img1's heatmap)-> img2 space
        #   input img1 tf: pil.image
        #   input mask tf: pil.image (discrete)
        #   output heatmap: tf.functional, create copy of this tf in [-1,1] format

        # tf.function tf_mat: translation is opposite to what we'd expect (+ve 1
        # is shift half towards left)
        # but rotation is correct (-sin in top right = counter clockwise)

        # flip is [[-1, 0, 0], [0, 1, 0], [0, 0, 1]]
        # img2 = flip(affine1_to_2(img1))
        # => img1_space = affine1_to_2^-1(flip^-1(img2_space))
        #               = affine2_to_1(flip^-1(img2_space))
        # so tf_mat_img2_to_1 = affine2_to_1 * flip^-1 (order matters as not diag)
        # flip^-1 = flip

        # no need to tf label, as we're doing option A, mask needed in img1 space
        # converting to PIL does not change underlying np datatype it seems

        # images are RGBIR. We don't want to jitter or greyscale the IR part
        img_ir = img[:, :, 3]
        img = img[:, :, :3]

        img1 = Image.fromarray(img.astype(np.uint8))

        # (img2) do jitter, no tf_mat change
        img2 = self.jitter_tf(img1)  # not in place, new memory
        img1 = np.array(img1)
        img2 = np.array(img2)

        # channels still last
        # channels still last
        if not self.no_sobel:
            img1 = custom_greyscale_numpy(img1, include_rgb=self.include_rgb)
            img2 = custom_greyscale_numpy(img2, include_rgb=self.include_rgb)

        img1 = img1.astype(np.float32) / 255.
        img2 = img2.astype(np.float32) / 255.

        # concatenate IR back on before spatial warps
        # may be concatenating onto just greyscale image
        # grey/RGB underneath IR
        img_ir = img_ir.astype(np.float32) / 255.
        img1 = np.concatenate([img1, np.expand_dims(img_ir, axis=2)], axis=2)
        img2 = np.concatenate([img2, np.expand_dims(img_ir, axis=2)], axis=2)

        # convert both to channel-first tensor format
        # make them all cuda tensors now, except label, for optimality
        img1 = torch.from_numpy(img1).permute(2, 0, 1).cuda()
        img2 = torch.from_numpy(img2).permute(2, 0, 1).cuda()

        # (img2) do affine if nec, tf_mat changes
        if self.use_random_affine:
            affine_kwargs = {
                "min_rot": self.aff_min_rot,
                "max_rot": self.aff_max_rot,
                "min_shear": self.aff_min_shear,
                "max_shear": self.aff_max_shear,
                "min_scale": self.aff_min_scale,
                "max_scale": self.aff_max_scale
            }
            img2, affine1_to_2, affine2_to_1 = random_affine(
                img2, **affine_kwargs)  #
            # tensors
        else:
            affine2_to_1 = torch.zeros([2, 3
                                        ]).to(torch.float32).cuda()  # identity
            affine2_to_1[0, 0] = 1
            affine2_to_1[1, 1] = 1

        # (img2) do random flip, tf_mat changes
        if np.random.rand() > self.flip_p:
            img2 = torch.flip(img2, dims=[2])  # horizontal, along width

            # applied affine, then flip, new = flip * affine * coord
            # (flip * affine)^-1 is just flip^-1 * affine^-1.
            # No order swap, unlike functions...
            # hence top row is negated
            affine2_to_1[0, :] *= -1.

        # uint8 tensor as masks should be binary, also for consistency,
        # but converted to float32 in main loop because is used
        # multiplicatively in loss
        mask_img1 = torch.ones(self.input_sz,
                               self.input_sz).to(torch.uint8).cuda()

        if RENDER_DATA:
            render(img1, mode="image", name=("train_data_img1_%d" % index))
            render(img2, mode="image", name=("train_data_img2_%d" % index))
            render(affine2_to_1,
                   mode="matrix",
                   name=("train_data_affine2to1_%d" % index))
            render(mask_img1, mode="mask", name=("train_data_mask_%d" % index))

        return img1, img2, affine2_to_1, mask_img1
示例#3
0
    def _prepare_test(self, index, img, label):
        # This returns cpu tensors.
        #   Image: 3D with channels last, float32, in range [0, 1] (normally done
        #     by ToTensor).
        #   Label map: 2D, flat int64, [0 ... sef.gt_k - 1]
        # label is passed in canonical [0 ... 181] indexing

        assert (label is not None)

        assert (img.shape[:2] == label.shape)
        img = img.astype(np.float32)
        label = label.astype(np.int32)

        # shrink original images, for memory purposes, or magnify
        if self.pre_scale_all:
            img = cv2.resize(img,
                             dsize=None,
                             fx=self.pre_scale_factor,
                             fy=self.pre_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
            label = cv2.resize(label,
                               dsize=None,
                               fx=self.pre_scale_factor,
                               fy=self.pre_scale_factor,
                               interpolation=cv2.INTER_NEAREST)

        # center crop to input sz
        img, _ = pad_and_or_crop(img, self.input_sz, mode="centre")
        label, _ = pad_and_or_crop(label, self.input_sz, mode="centre")

        img_ir = img[:, :, 3]
        img = img[:, :, :3]

        # finish
        # may be concatenating onto just greyscale image
        if not self.no_sobel:
            img = custom_greyscale_numpy(img, include_rgb=self.include_rgb)

        img = img.astype(np.float32) / 255.

        img_ir = img_ir.astype(np.float32) / 255.
        img = np.concatenate([img, np.expand_dims(img_ir, axis=2)], axis=2)  #
        # grey/RGB under IR

        img = torch.from_numpy(img).permute(2, 0, 1)

        if RENDER_DATA:
            render(label,
                   mode="label",
                   name=("test_data_label_pre_%d" % index))

        # convert to coarse if required, reindex to [0, gt_k -1], and get mask
        label = self._filter_label(label)
        mask = torch.ones(self.input_sz, self.input_sz).to(torch.uint8)

        if RENDER_DATA:
            render(img, mode="image", name=("test_data_img_%d" % index))
            render(label,
                   mode="label",
                   name=("test_data_label_post_%d" % index))
            render(mask, mode="mask", name=("test_data_mask_%d" % index))

        # dataloader must return tensors (conversion forced in their code anyway)
        return img, torch.from_numpy(label), mask
示例#4
0
    def _prepare_train_single(self, index, img):
        # Returns one pair only, i.e. without transformed second image.
        # Used for standard CNN training (baselines).
        # This returns gpu tensors.
        # label is passed in canonical [0 ... 181] indexing

        img = img.astype(np.float32)

        # shrink original images, for memory purposes
        # or enlarge
        if self.pre_scale_all:
            img = cv2.resize(img,
                             dsize=None,
                             fx=self.pre_scale_factor,
                             fy=self.pre_scale_factor,
                             interpolation=cv2.INTER_LINEAR)

        # basic augmentation transforms for both img1 and img2
        if self.use_random_scale:
            # bilinear interp requires float img
            scale_factor = (np.random.rand() * (self.scale_max - self.scale_min)) + \
                           self.scale_min
            img = cv2.resize(img,
                             dsize=None,
                             fx=scale_factor,
                             fy=scale_factor,
                             interpolation=cv2.INTER_LINEAR)

        # random crop to input sz
        img, coords = pad_and_or_crop(img, self.input_sz, mode="random")

        # converting to PIL does not change underlying np datatype it seems

        # images are RGBIR. We don't want to jitter or greyscale the IR part
        img_ir = img[:, :, 3]
        img = img[:, :, :3]

        img1 = Image.fromarray(img.astype(np.uint8))

        img1 = self.jitter_tf(img1)  # not in place, new memory
        img1 = np.array(img1)

        # channels still last
        # channels still last
        if not self.no_sobel:
            img1 = custom_greyscale_numpy(img1, include_rgb=self.include_rgb)

        img1 = img1.astype(np.float32) / 255.

        # concatenate IR back on before spatial warps
        # may be concatenating onto just greyscale image
        # grey/RGB underneath IR
        img_ir = img_ir.astype(np.float32) / 255.
        img1 = np.concatenate([img1, np.expand_dims(img_ir, axis=2)], axis=2)

        # convert both to channel-first tensor format
        # make them all cuda tensors now, except label, for optimality
        img1 = torch.from_numpy(img1).permute(2, 0, 1).cuda()

        if self.use_random_affine:
            affine_kwargs = {
                "min_rot": self.aff_min_rot,
                "max_rot": self.aff_max_rot,
                "min_shear": self.aff_min_shear,
                "max_shear": self.aff_max_shear,
                "min_scale": self.aff_min_scale,
                "max_scale": self.aff_max_scale
            }
            img1, _, _ = random_affine(img1, **affine_kwargs)  # tensors

        # (img2) do random flip, tf_mat changes
        if np.random.rand() > self.flip_p:
            img1 = torch.flip(img1, dims=[2])  # horizontal, along width

        # uint8 tensor as masks should be binary, also for consistency,
        # but converted to float32 in main loop because is used
        # multiplicatively in loss
        mask_img1 = torch.ones(self.input_sz,
                               self.input_sz).to(torch.uint8).cuda()

        if RENDER_DATA:
            render(img1, mode="image", name=("train_data_img1_%d" % index))
            render(mask_img1, mode="mask", name=("train_data_mask_%d" % index))

        return img1, mask_img1
示例#5
0
    def _prepare_train_single(self, index, img, label):
        # Returns one pair only, i.e. without transformed second image.
        # Used for standard CNN training (baselines).
        # This returns gpu tensors.
        # label is passed in canonical [0 ... 181] indexing

        assert (img.shape[:2] == label.shape)
        img = img.astype(np.float32)
        label = label.astype(np.int32)

        # shrink original images, for memory purposes, otherwise no point
        if self.pre_scale_all:
            assert (self.pre_scale_factor < 1.)
            img = cv2.resize(img,
                             dsize=None,
                             fx=self.pre_scale_factor,
                             fy=self.pre_scale_factor,
                             interpolation=cv2.INTER_LINEAR)
            label = cv2.resize(label,
                               dsize=None,
                               fx=self.pre_scale_factor,
                               fy=self.pre_scale_factor,
                               interpolation=cv2.INTER_NEAREST)

        if self.use_random_scale:
            # bilinear interp requires float img
            scale_factor = (np.random.rand() * (self.scale_max - self.scale_min)) + \
                           self.scale_min
            img = cv2.resize(img,
                             dsize=None,
                             fx=scale_factor,
                             fy=scale_factor,
                             interpolation=cv2.INTER_LINEAR)
            label = cv2.resize(label,
                               dsize=None,
                               fx=scale_factor,
                               fy=scale_factor,
                               interpolation=cv2.INTER_NEAREST)

        # random crop to input sz
        img, coords = pad_and_or_crop(img, self.input_sz, mode="random")
        label, _ = pad_and_or_crop(label,
                                   self.input_sz,
                                   mode="fixed",
                                   coords=coords)

        _, mask_img1 = self._filter_label(label)
        # uint8 tensor as masks should be binary, also for consistency with
        # prepare_train, but converted to float32 in main loop because is used
        # multiplicatively in loss
        mask_img1 = torch.from_numpy(mask_img1.astype(np.uint8)).cuda()

        # converting to PIL does not change underlying np datatype it seems
        img1 = Image.fromarray(img.astype(np.uint8))

        img1 = self.jitter_tf(img1)  # not in place, new memory
        img1 = np.array(img1)

        # channels still last
        if not self.no_sobel:
            img1 = custom_greyscale_numpy(img1, include_rgb=self.include_rgb)

        img1 = img1.astype(np.float32) / 255.

        # convert both to channel-first tensor format
        # make them all cuda tensors now, except label, for optimality
        img1 = torch.from_numpy(img1).permute(2, 0, 1).cuda()

        # mask if required
        if self.mask_input:
            masked = 1 - mask_img1
            img1[:, masked] = 0

        if self.use_random_affine:
            affine_kwargs = {
                "min_rot": self.aff_min_rot,
                "max_rot": self.aff_max_rot,
                "min_shear": self.aff_min_shear,
                "max_shear": self.aff_max_shear,
                "min_scale": self.aff_min_scale,
                "max_scale": self.aff_max_scale
            }
            img1, _, _ = random_affine(img1, **affine_kwargs)  # tensors

        if np.random.rand() > self.flip_p:
            img1 = torch.flip(img1, dims=[2])  # horizontal, along width

        if RENDER_DATA:
            render(img1, mode="image", name=("train_data_img1_%d" % index))
            render(mask_img1, mode="mask", name=("train_data_mask_%d" % index))

        return img1, mask_img1
def IID_segmentation_loss_uncollapsed(x1_outs,
                                      x2_outs,
                                      all_affine2_to_1=None,
                                      all_mask_img1=None,
                                      lamb=1.0,
                                      half_T_side_dense=None,
                                      half_T_side_sparse_min=None,
                                      half_T_side_sparse_max=None):
    assert (x1_outs.requires_grad)
    assert (x2_outs.requires_grad)
    assert (not all_affine2_to_1.requires_grad)
    assert (not all_mask_img1.requires_grad)

    assert (x1_outs.shape == x2_outs.shape)

    # bring x2 back into x1's spatial frame
    x2_outs_inv = perform_affine_tf(x2_outs, all_affine2_to_1)

    if (half_T_side_sparse_min != 0) or (half_T_side_sparse_max != 0):
        x2_outs_inv = random_translation_multiple(
            x2_outs_inv,
            half_side_min=half_T_side_sparse_min,
            half_side_max=half_T_side_sparse_max)

    if RENDER:
        # indices added to each name by render()
        render(x1_outs, mode="image_as_feat", name="invert_img1_")
        render(x2_outs, mode="image_as_feat", name="invert_img2_pre_")
        render(x2_outs_inv, mode="image_as_feat", name="invert_img2_post_")
        render(all_mask_img1, mode="mask", name="invert_mask_")

    # zero out all irrelevant patches
    bn, k, h, w = x1_outs.shape
    all_mask_img1 = all_mask_img1.view(bn, 1, h, w)  # mult, already float32
    x1_outs = x1_outs * all_mask_img1  # broadcasts
    x2_outs_inv = x2_outs_inv * all_mask_img1

    # sum over everything except classes, by convolving x1_outs with x2_outs_inv
    # which is symmetric, so doesn't matter which one is the filter
    x1_outs = x1_outs.permute(1, 0, 2, 3).contiguous()  # k, ni, h, w
    x2_outs_inv = x2_outs_inv.permute(1, 0, 2, 3).contiguous()  # k, ni, h, w

    # k, k, 2 * half_T_side_dense + 1,2 * half_T_side_dense + 1
    p_i_j = F.conv2d(x1_outs,
                     weight=x2_outs_inv,
                     padding=(half_T_side_dense, half_T_side_dense))

    # do expectation over each shift location in the T_side_dense *
    # T_side_dense box
    T_side_dense = half_T_side_dense * 2 + 1

    # T x T x k x k
    p_i_j = p_i_j.permute(2, 3, 0, 1)
    p_i_j = p_i_j / p_i_j.sum(dim=3, keepdim=True).sum(dim=2,
                                                       keepdim=True)  # norm

    # symmetrise, transpose the k x k part
    p_i_j = (p_i_j + p_i_j.permute(0, 1, 3, 2)) / 2.0

    # T x T x k x k
    p_i_mat = p_i_j.sum(dim=2, keepdim=True).repeat(1, 1, k, 1)
    p_j_mat = p_i_j.sum(dim=3, keepdim=True).repeat(1, 1, 1, k)

    # for log stability; tiny values cancelled out by mult with p_i_j anyway
    p_i_j[(p_i_j < EPS).data] = EPS
    p_i_mat[(p_i_mat < EPS).data] = EPS
    p_j_mat[(p_j_mat < EPS).data] = EPS

    # maximise information
    loss = (-p_i_j *
            (torch.log(p_i_j) - lamb * torch.log(p_i_mat) -
             lamb * torch.log(p_j_mat))).sum() / (T_side_dense * T_side_dense)

    # for analysis only
    loss_no_lamb = (-p_i_j *
                    (torch.log(p_i_j) - torch.log(p_i_mat) -
                     torch.log(p_j_mat))).sum() / (T_side_dense * T_side_dense)

    return loss, loss_no_lamb
def IID_segmentation_loss(x1_outs,
                          x2_outs,
                          all_affine2_to_1=None,
                          all_mask_img1=None,
                          lamb=1.0,
                          half_T_side_dense=None,
                          half_T_side_sparse_min=None,
                          half_T_side_sparse_max=None):
    assert (x1_outs.requires_grad)
    assert (x2_outs.requires_grad)
    assert (not all_affine2_to_1.requires_grad)
    assert (not all_mask_img1.requires_grad)

    assert (x1_outs.shape == x2_outs.shape)

    # bring x2 back into x1's spatial frame
    x2_outs_inv = perform_affine_tf(x2_outs, all_affine2_to_1)

    if (half_T_side_sparse_min != 0) or (half_T_side_sparse_max != 0):
        x2_outs_inv = random_translation_multiple(
            x2_outs_inv,
            half_side_min=half_T_side_sparse_min,
            half_side_max=half_T_side_sparse_max)

    if RENDER:
        # indices added to each name by render()
        render(x1_outs, mode="image_as_feat", name="invert_img1_")
        render(x2_outs, mode="image_as_feat", name="invert_img2_pre_")
        render(x2_outs_inv, mode="image_as_feat", name="invert_img2_post_")
        render(all_mask_img1, mode="mask", name="invert_mask_")

    # zero out all irrelevant patches
    bn, k, h, w = x1_outs.shape
    all_mask_img1 = all_mask_img1.view(bn, 1, h, w)  # mult, already float32
    x1_outs = x1_outs * all_mask_img1  # broadcasts
    x2_outs_inv = x2_outs_inv * all_mask_img1

    # sum over everything except classes, by convolving x1_outs with x2_outs_inv
    # which is symmetric, so doesn't matter which one is the filter
    x1_outs = x1_outs.permute(1, 0, 2, 3).contiguous()  # k, ni, h, w
    x2_outs_inv = x2_outs_inv.permute(1, 0, 2, 3).contiguous()  # k, ni, h, w

    # k, k, 2 * half_T_side_dense + 1,2 * half_T_side_dense + 1
    p_i_j = F.conv2d(x1_outs,
                     weight=x2_outs_inv,
                     padding=(half_T_side_dense, half_T_side_dense))
    p_i_j = p_i_j.sum(dim=2, keepdim=False).sum(dim=2, keepdim=False)  # k, k

    # normalise, use sum, not bn * h * w * T_side * T_side, because we use a mask
    # also, some pixels did not have a completely unmasked box neighbourhood,
    # but it's fine - just less samples from that pixel
    current_norm = float(p_i_j.sum())
    p_i_j = p_i_j / current_norm

    # symmetrise
    p_i_j = (p_i_j + p_i_j.t()) / 2.

    # compute marginals
    p_i_mat = p_i_j.sum(dim=1).unsqueeze(1)  # k, 1
    p_j_mat = p_i_j.sum(dim=0).unsqueeze(0)  # 1, k

    # for log stability; tiny values cancelled out by mult with p_i_j anyway
    p_i_j[(p_i_j < EPS).data] = EPS
    p_i_mat[(p_i_mat < EPS).data] = EPS
    p_j_mat[(p_j_mat < EPS).data] = EPS

    # maximise information
    loss = (-p_i_j * (torch.log(p_i_j) - lamb * torch.log(p_i_mat) -
                      lamb * torch.log(p_j_mat))).sum()

    # for analysis only
    loss_no_lamb = (
        -p_i_j *
        (torch.log(p_i_j) - torch.log(p_i_mat) - torch.log(p_j_mat))).sum()

    return loss, loss_no_lamb