def learn_dictionary(X, n_filters, filter_size, n_sample=1000,
                     n_sample_patches=0, **kwargs):
    """
    learn a dictionary of n_filters atoms from n_sample images from X
    """

    n_channels = X.shape[1]

    # subsample n_sample images randomly
    rand_idx = np.random.choice(len(X), n_sample, replace=False)

    # extract patches
    patch_size = (filter_size, filter_size)
    patches = PatchExtractor(patch_size).transform(
        X[rand_idx, ...].reshape(n_sample, X.shape[2], X.shape[3], X.shape[1]))
    patches = patches.reshape(patches.shape[0], -1)
    patches -= np.mean(patches, axis=0)
    patches /= np.std(patches, axis=0)

    if n_sample_patches > 0 and (n_sample_patches < len(patches)):
        np.random.shuffle(patches)
        patches = patches[:n_sample_patches, ...]

    # learn dictionary
    print('Learning dictionary for weight initialization...')

    dico = MiniBatchDictionaryLearning(n_components=n_filters, alpha=1, n_iter=1000, batch_size=10, shuffle=True,
                                       verbose=True, **kwargs)
    W = dico.fit(patches).components_
    W = W.reshape(n_filters, n_channels, filter_size, filter_size)

    print('Dictionary learned.')

    return W.astype(np.float32)
Exemplo n.º 2
0
    def __init__(self, train, patch_size=(9, 9)):
        self.patch_size = patch_size
        X = []
        toTensor = transforms.ToTensor()
        for _input, _ in train:
            X.append(toTensor(_input).permute(1, 2, 0).numpy())
        X = np.array(X)

        self.mean = (X.mean(axis=(0, 1, 2)))
        X = np.add(X, -self.mean)
        self.mean = torch.from_numpy(
            self.mean.reshape(1, self.mean.shape[0], 1, 1)
        )
        _, _, _, n_channels = X.shape

        # 1. Sample 10M random image patches (each with 3 colors)
        patches = PatchExtractor(patch_size=self.patch_size,
                                 max_patches=int(2.5e2)).transform(X)

        # 2. Perform PCA on these to get eigenvectors V and eigenvalues D.
        pca = PCA()
        pca.fit(patches.reshape(patches.shape[0], -1))

        dim = (-1,) + self.patch_size + (n_channels,)
        eigenvectors = torch.from_numpy(
            pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype(
                X.dtype)
        )
        eigenvalues = torch.from_numpy(
            np.diag(1. / np.sqrt(pca.explained_variance_))
        )
        # 4. Construct the whitening kernel k:
        # for each pair of colors (ci,cj),
        # set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :]
        # where (x0, y0) is the center pixel location
        # (e.g. (5,5) for a 9x9 kernel)
        x_0 = int(np.floor(self.patch_size[0] / 2))
        y_0 = int(np.floor(self.patch_size[1] / 2))
        filter_shape = (n_channels,
                        n_channels,
                        self.patch_size[0],
                        self.patch_size[1])
        self.kernel = torch.zeros(filter_shape)
        eigenvectorsT = eigenvectors.permute(2, 3, 1, 0)
        # build the kernel
        for i in range(n_channels):
            for j in range(n_channels):
                a = torch.mm(
                    eigenvectorsT[x_0, y_0, j, :].contiguous().view(1, -1),
                    eigenvalues.float()
                )
                b = eigenvectors[:, i, :, :].contiguous().view(
                    -1, self.patch_size[0] * self.patch_size[1]
                )
                c = torch.mm(a, b).contiguous().view(self.patch_size[0],
                                                     self.patch_size[1])
                self.kernel[j, i, :, :] = c
        self.padding = (self.patch_size[0] - 1), (self.patch_size[1] - 1)
Exemplo n.º 3
0
def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)):
    """
    This is an implementation of the convolutional ZCA whitening presented by
    David Eigen in his phd thesis
    http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf

    "Predicting Images using Convolutional Networks:
     Visual Scene Understanding with Pixel Maps"

    From paragraph 8.4:
    A simple adaptation of ZCA to convolutional application is to find the
    ZCA whitening transformation for a sample of local image patches across
    the dataset, and then apply this transform to every patch in a larger image.
    We then use the center pixel of each ZCA patch to create the conv-ZCA
    output image. The operations of applying local ZCA and selecting the center
    pixel can be combined into a single convolution kernel,
    resulting in the following algorithm
    (explained using RGB inputs and 9x9 kernel):

    1. Sample 10M random 9x9 image patches (each with 3 colors)
    2. Perform PCA on these to get eigenvectors V and eigenvalues D.
    3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9].
    4. Construct the whitening kernel k:
        for each pair of colors (ci,cj),
        set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :]

    where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel)


    :param input: 4D tensor of shape [batch_size, rows, col, channels]
    :param patch_size: size of the patches extracted from the dataset
    :param max_patches: max number of patches extracted from the dataset

    :return: conv-zca whitened dataset
    """

    # I don't know if it's correct or not.. but it seems to work
    mean = np.mean(input, axis=(0, 1, 2))
    input -= mean  # center the data

    n_imgs, h, w, n_channels = input.shape
    patch_size = (patch_size, patch_size)
    patches = PatchExtractor(patch_size=patch_size,
                             max_patches=max_patches).transform(input)
    pca = PCA()
    pca.fit(patches.reshape(patches.shape[0], -1))

    # Transpose the components into theano convolution filter type
    dim = (-1,) + patch_size + (n_channels,)
    V = shared(pca.components_.reshape(dim).
               transpose(0, 3, 1, 2).astype(input.dtype))
    D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_))

    x_0 = int(np.floor(patch_size[0] / 2))
    y_0 = int(np.floor(patch_size[1] / 2))

    filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]]
    image_shape = [n_imgs, n_channels, h, w]
    kernel = T.zeros(filter_shape)
    VT = V.dimshuffle(2, 3, 1, 0)

    # V : 243 x 3 x 9 x 9
    # VT : 9 x 9 x 3 x 243

    # build the kernel
    for i in range(n_channels):
        for j in range(n_channels):
            a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1])
            b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]])
            c = T.dot(a, b).reshape([patch_size[0], patch_size[1]])
            kernel = T.set_subtensor(kernel[j, i, :, :], c)

    kernel = kernel.astype(floatX)
    input = input.astype(floatX)
    input_images = T.tensor4(dtype=floatX)
    conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)),
                            kernel,
                            input_shape=image_shape,
                            filter_shape=filter_shape,
                            border_mode='full')
    s_crop = [(patch_size[0] - 1) // 2,
              (patch_size[1] - 1) // 2]
    # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1,
    #           s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1]

    conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0], s_crop[
        1]:-s_crop[1]]
    conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1)
    f_convZCA = function([input_images], conv_whitening)

    return f_convZCA(input)
Exemplo n.º 4
0
    def __call__(self, _input):
        _input = _input.permute(1, 2, 0).numpy()
        _input = _input.reshape(1,
                                _input.shape[0],
                                _input.shape[1],
                                _input.shape[2])

        mean = (_input.mean(axis=(0, 1, 2)))
        _input = np.add(_input, -mean)
        _, _, _, n_channels = _input.shape

        # 1. Sample 10M random image patches (each with 3 colors)
        patches = PatchExtractor(patch_size=self.patch_size).transform(_input)
        # 2. Perform PCA on these to get eigenvectors V and eigenvalues D.
        pca = PCA()
        pca.fit(patches.reshape(patches.shape[0], -1))

        dim = (-1,) + self.patch_size + (n_channels,)
        eigenvectors = torch.from_numpy(
            pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype(
                _input.dtype)
        )
        eigenvalues = torch.from_numpy(
            np.diag(1. / np.sqrt(pca.explained_variance_))
        )
        # 4. Construct the whitening kernel k:
        # for each pair of colors (ci,cj),
        # set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :]
        # where (x0, y0) is the center pixel location
        # (e.g. (5,5) for a 9x9 kernel)
        x_0 = int(np.floor(self.patch_size[0] / 2))
        y_0 = int(np.floor(self.patch_size[1] / 2))
        filter_shape = (n_channels,
                        n_channels,
                        self.patch_size[0],
                        self.patch_size[1])
        kernel = torch.zeros(filter_shape)
        eigenvectorsT = eigenvectors.permute(2, 3, 1, 0)
        # build the kernel
        for i in range(n_channels):
            for j in range(n_channels):
                a = torch.mm(
                    eigenvectorsT[x_0, y_0, j, :].contiguous().view(1, -1),
                    eigenvalues.float()
                )
                b = eigenvectors[:, i, :, :].contiguous().view(
                    -1, self.patch_size[0] * self.patch_size[1]
                )
                c = torch.mm(a, b).contiguous().view(self.patch_size[0],
                                                     self.patch_size[1])
                kernel[j, i, :, :] = c
        padding = (self.patch_size[0] - 1), (self.patch_size[1] - 1)
        input_tensor = torch.from_numpy(_input).permute(0, 3, 1, 2)
        conv_whitening = torch.nn.functional.conv2d(
            input=input_tensor,
            weight=kernel,
            padding=padding
        )
        s_crop = [(self.patch_size[0] - 1) // 2, (self.patch_size[1] - 1) // 2]
        conv_whitening = conv_whitening[
                         :, :, s_crop[0]:-s_crop[0], s_crop[1]:-s_crop[1]
                         ]

        return conv_whitening.view(conv_whitening.shape[1],
                                   conv_whitening.shape[2],
                                   conv_whitening.shape[3])
Exemplo n.º 5
0
def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)):
    """
    This is an implementation of the convolutional ZCA whitening presented by
    David Eigen in his phd thesis
    http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf

    "Predicting Images using Convolutional Networks:
     Visual Scene Understanding with Pixel Maps"

    From paragraph 8.4:
    A simple adaptation of ZCA to convolutional application is to find the
    ZCA whitening transformation for a sample of local image patches across
    the dataset, and then apply this transform to every patch in a larger image.
    We then use the center pixel of each ZCA patch to create the conv-ZCA
    output image. The operations of applying local ZCA and selecting the center
    pixel can be combined into a single convolution kernel,
    resulting in the following algorithm
    (explained using RGB inputs and 9x9 kernel):

    1. Sample 10M random 9x9 image patches (each with 3 colors)
    2. Perform PCA on these to get eigenvectors V and eigenvalues D.
    3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9].
    4. Construct the whitening kernel k:
        for each pair of colors (ci,cj),
        set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :]

    where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel)


    :param input: 4D tensor of shape [batch_size, rows, col, channels]
    :param patch_size: size of the patches extracted from the dataset
    :param max_patches: max number of patches extracted from the dataset

    :return: conv-zca whitened dataset
    """

    # I don't know if it's correct or not.. but it seems to work
    mean = np.mean(input, axis=(0, 1, 2))
    input -= mean  # center the data

    n_imgs, h, w, n_channels = input.shape
    patch_size = (patch_size, patch_size)
    patches = PatchExtractor(patch_size=patch_size,
                             max_patches=max_patches).transform(input)
    pca = PCA()
    pca.fit(patches.reshape(patches.shape[0], -1))

    # Transpose the components into theano convolution filter type
    dim = (-1, ) + patch_size + (n_channels, )
    V = shared(
        pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype(input.dtype))
    D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_))

    x_0 = int(np.floor(patch_size[0] / 2))
    y_0 = int(np.floor(patch_size[1] / 2))

    filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]]
    image_shape = [n_imgs, n_channels, h, w]
    kernel = T.zeros(filter_shape)
    VT = V.dimshuffle(2, 3, 1, 0)

    # V : 243 x 3 x 9 x 9
    # VT : 9 x 9 x 3 x 243

    # build the kernel
    for i in range(n_channels):
        for j in range(n_channels):
            a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1])
            b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]])
            c = T.dot(a, b).reshape([patch_size[0], patch_size[1]])
            kernel = T.set_subtensor(kernel[j, i, :, :], c)

    kernel = kernel.astype(floatX)
    input = input.astype(floatX)
    input_images = T.tensor4(dtype=floatX)
    conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)),
                            kernel,
                            input_shape=image_shape,
                            filter_shape=filter_shape,
                            border_mode='full')
    s_crop = [(patch_size[0] - 1) // 2, (patch_size[1] - 1) // 2]
    # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1,
    #           s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1]

    conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0],
                                    s_crop[1]:-s_crop[1]]
    conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1)
    f_convZCA = function([input_images], conv_whitening)

    return f_convZCA(input)