Пример #1
0
def split_images(x, y=None, size=(128, 128), num_part=4):
    """
    Takes two arrays of images, x,y, and splits them into num_part number
    of random patches.

    Arguments:
        x: Numpy ndarray with images.
        y: Numpy ndarray with images.
        size: Tuple with two integer values, (height, width) of the resulting patches.
        num_part: Integer value; the number of resulting patches.

    Returns:
        x_imgs, y_imgs: Numpy ndarrays with the patches of the original images.
    """
    x_patches = image.PatchExtractor(patch_size=size,
                                     max_patches=num_part,
                                     random_state=0)
    x_imgs = x_patches.transform(x)
    # Check if number of channels is the same for grayscale
    if x.shape[-1] != x_imgs.shape[-1]:
        x_imgs = x_imgs[:, :, :, np.newaxis]

    if not y is None:
        y_patches = image.PatchExtractor(patch_size=size,
                                         max_patches=num_part,
                                         random_state=0)
        y_imgs = y_patches.transform(y)

        # Check if number of channels is the same for grayscale
        if y.shape[-1] != y_imgs.shape[-1]:
            y_imgs = y_imgs[:, :, :, np.newaxis]

        return x_imgs, y_imgs

    return x_imgs
def fast_represent(all_data,whitener,cb,W,K,spatial_pooling,random_state=666,verbose=False):
    """
        all_dataL samokes of NxWxWxD
        whitener - pre-fit whitener (ZCA).
        cb - pre-fit codebook (kmeans)
        W  - patch size
        K  - #centroids in cb
        spatial_pooling - tuple of zone_x,zone_y : (2,2) means 4 quaters.  1x1 means the whole image.  (2,1),(1,2) is also possible.
        
        returns: an array of #images x K*#zones
        
        extract dense patches from al images, apply normalize and whitening, convert each patch into a K-size vector, and 
        then sum each set of vectors according to it's zone, for example in (2x2) spatial-pooling, there will be 4 zones.
        The end results of each image is a vector of size K*#zones , and the returned value is a matrix of #images x K*#zones
    """
    ex = image.PatchExtractor((W, W),random_state=random_state)
    all_representation=np.empty(shape=[len(all_data),K*spatial_pooling[0]*spatial_pooling[1]],dtype=np.float32)
    
    #run with batches, due to memory constraints
    BATCH=500
    for start_img in range(0,len(all_data),BATCH):
        max_curr = min(len(all_data),start_img+BATCH)
        data = all_data[start_img:max_curr]
        
        # all actions will be done seperatly on the different spatail zones
        repr_list=[]
        zone_size_x = int((data.shape[1]-W+1)/spatial_pooling[0]) #e.g. (32-30)/2=1
        zone_size_y = int((data.shape[1]-W+1)/spatial_pooling[1]) #e.g. (32-30)/2=1
        zone_counter=0
        for ix,zone_x in enumerate(range(0,spatial_pooling[0]*zone_size_x,zone_size_x)):
            for iy,zone_y in enumerate(range(0,spatial_pooling[1]*zone_size_y,zone_size_y)):
                patches = ex.transform(data[:,
                                            zone_x:zone_x+zone_size_x+W-1,
                                            zone_y:zone_y+zone_size_y+W-1,
                                            :]) #example: 4, spatial=2, 3x3patches
                
                if verbose and ix==0 and iy==0 and zone_counter==0:
                    print ('patches : ',patches.shape)
                
                patches = patches.reshape(patches.shape[0],np.product(patches.shape[-3:]))

                patches = sampleNormalize(patches)

                patches = whitener.transform(patches)

                representations= cb.fast_transform(patches)
                
                # representations are now #patches*K for each zone, we want to sum over all images
                patches_per_images=int(patches.shape[0]//data.shape[0])
                sum_over_zone= representations.reshape(data.shape[0],patches_per_images,representations.shape[1]).sum(axis=1)
                #for example ,[2,1] means there are two zones, which should be summed for k=100 , 0:100 100:200
                all_representation[start_img:max_curr,zone_counter*K:(1+zone_counter)*K]=sum_over_zone
                
                zone_counter+=1
                
    return all_representation
Пример #3
0
	def readin(self):
		print('Reading in image data...')
		with h5py.File(self.im_path, 'r') as f:
			images = f['van_hateren_good'][()]

		print('Extracting patches...')
		n_samples = round(self.completion * np.prod(self.X_shape)) * np.prod(self.X_shape) * 10
		patches = image.PatchExtractor(patch_size = self.X_shape,
			max_patches = n_samples // images.shape[0],
			random_state = self.rng).transform(images)
		self.X_train = patches.reshape((patches.shape[0], np.prod(self.X_shape)))
Пример #4
0
def ExtractPatches_KMeans(X, patch_size, max_patches, n_clusters):
    ''' MiniBatchKMeansAutoConv Method 
            Extract patches from the input X and pass the complete set of patches to MiniBatchKMeans to 
            learn a dictionary of filters 
            
            Args:
                X: (Number of samples, Height, Width, Number of Filters)
                patch_size: int size of patches to extract from X 
                max_patches: float decimal percentage of maximum number of patches to 
                             extract from X, else 'None' to indicate no maximum
                n_clusters: int number of centroids (filters) to learn via MiniBatchKMeans
            Returns: 
                learned centroids of shape: (Number of samples, Number of filters, Height, Width)
                
    '''
    # Batch size for MiniBatchKMeans
    batch_size=50
    # Input Shape: (Number of samples, Number of Filters, Height, Width)
    # Reshape into: (Number of samples, Height, Width, Number of Filters)
    X = np.transpose(X,(0,2,3,1))
    # Dimensions of X
    sz = X.shape
    # Extract patches from each sample up to the maximum number of patches using sklearn's
    # PatchExtractor
    X = image.PatchExtractor(patch_size=patch_size,max_patches=max_patches).transform(X)
    # For later processing, ensure that X has 4 dimensions (add an additional last axis of
    # size 1 if there are fewer dimensions)
    if(len(X.shape)<=3):
        X = X[...,numpy.newaxis]
    # Local centering by subtracting the mean
    X = X-numpy.reshape(numpy.mean(X, axis=(1,2)),(-1,1,1,X.shape[-1])) 
    # Local scaling by dividing by the standard deviation 
    X = X/(numpy.reshape(numpy.std(X, axis=(1,2)),(-1,1,1,X.shape[-1])) + 1e-10) 
    X = X.transpose((0,3,1,2)) 
    # Reshape X into a 2-D array for input into MiniBatchKMeans
    X = numpy.asarray(X.reshape(X.shape[0],-1),dtype=numpy.float32)
    # Convert X into an intensity matrix with values ranging from 0 to 1
    X = mat2gray(X)
    # Perform PCA whitening
    pca = PCA(whiten=True)
    X = pca.fit_transform(X)
    # Scale input samples individually to unit norm (using sklearn's "normalize")
    X = normalize(X)
    # Use "MiniBatchKMeans" on the extracted patches to find a dictionary of n_clusters 
    # filters (centroids)
    km = MiniBatchKMeans(n_clusters = n_clusters,batch_size=batch_size,init_size=3*n_clusters).fit(X).cluster_centers_
    # Reshape centroids into shape: (Number of samples, Number of filters, Height, Width)
    return km.reshape(-1,sz[3],patch_size[0],patch_size[1])
def MiniBatchKMeansAutoConv(X,
                            patch_size,
                            max_patches,
                            n_clusters,
                            conv_orders,
                            batch_size=20):
    ''' MiniBatchKMeansAutoConv Method 
            Extract patches from the input X, perform all specified orders of recursive
            autoconvolution to generate a richer set of patches, and pass the complete set
            of patches to MiniBatchKMeans to learn a dictionary of filters 
            
            Args:
                X: (Number of samples, Number of filters, Height, Width)
                patch_size: int size of patches to extract from X 
                max_patches: float decimal percentage of maximum number of patches to 
                             extract from X
                n_clusters: int number of centroids (filters) to learn via MiniBatchKMeans
                conv_orders: 1-D array of integers indicating which orders of recursive 
                             autoconvolution to perform, with the set of possible orders 
                             ranging from 0 to 3 inclusive
                batch_size: int size of batches to use in MiniBatchKMeans
            Returns: 
                learned centroids of shape: (Number of samples, Number of filters, Height, Width)
                
    '''
    sz = X.shape
    # Transpose to Shape: (Number of samples, Number of Filters, Height, Width) and extract
    # patches from each sample up to the maximum number of patches using sklearn's
    # PatchExtractor
    X = image.PatchExtractor(patch_size=patch_size,
                             max_patches=max_patches).transform(
                                 X.transpose((0, 2, 3, 1)))
    # For later processing, ensure that X has 4 dimensions (add an additional last axis of
    # size 1 if there are fewer dimensions)
    if (len(X.shape) <= 3):
        X = X[..., numpy.newaxis]
    # Local centering by subtracting the mean
    X = X - numpy.reshape(numpy.mean(X, axis=(1, 2)), (-1, 1, 1, X.shape[-1]))
    # Local scaling by dividing by the standard deviation
    X = X / (numpy.reshape(numpy.std(X, axis=(1, 2)),
                           (-1, 1, 1, X.shape[-1])) + 1e-10)
    # Transpose to Shape: (Number of samples, Number of Filters, Height, Width)
    X = X.transpose((0, 3, 1, 2))
    # Number of batches determined by number of samples in X and batch size
    n_batches = int(numpy.ceil(len(X) / float(batch_size)))
    # Array to store patches modified by recursive autoconvolution
    autoconv_patches = []
    for batch in range(n_batches):
        # Obtain the samples corresponding to the current batch
        X_order = X[numpy.arange(batch * batch_size,
                                 min(len(X) - 1, (batch + 1) * batch_size))]
        # conv_orders is an array containing the desired orders of recursive autoconvolution
        # (with 0 corresponding to no recursive autoconvolution and 3 corresponding to
        # recursive autoconvolution of order 3)
        for conv_order in conv_orders:
            if conv_order > 0:
                # Perform recursive autoconvolution using "autoconv2d"
                X_order = autoconv2d(X_order)
                # In order to perform recursive autoconvolution, the height and width
                # dimensions of X were doubled. Therefore, after recursive autoconvolution is
                # performed, reduce the height and width dimensions of X by 2.
                X_sampled = resize_batch(
                    X_order,
                    [int(numpy.round(s / 2.)) for s in X_order.shape[2:]])
                if conv_order > 1:
                    X_order = X_sampled
                # Resize X_sampled to the expected shape for MiniBatchKMeans
                if X_sampled.shape[2] != X.shape[2]:
                    X_sampled = resize_batch(X_sampled, X.shape[2:])
            else:
                X_sampled = X_order
            # Append the output of each order of recursive autoconvolution to "autoconv_patches"
            # in order to provide MiniBatchKMeans with a richer set of patches
            autoconv_patches.append(X_sampled)
        print('%d/%d ' % (batch, n_batches))
    X = numpy.concatenate(autoconv_patches)
    # Reshape X into a 2-D array for input into MiniBatchKMeans
    X = numpy.asarray(X.reshape(X.shape[0], -1), dtype=numpy.float32)
    # Convert X into an intensity matrix with values ranging from 0 to 1
    X = mat2gray(X)
    # Use PCA to reduce dimensionality of X
    pca = PCA(whiten=True)
    X = pca.fit_transform(X)
    # Scale input sample vectors individually to unit norm (using sklearn's "normalize")
    X = normalize(X)
    # Use "MiniBatchKMeans" on the extracted patches to find a dictionary of n_clusters
    # filters (centroids)
    km = MiniBatchKMeans(n_clusters=n_clusters,
                         batch_size=batch_size,
                         init_size=3 * n_clusters).fit(X).cluster_centers_
    # Reshape centroids into shape: (Number of samples, Number of filters, Height, Width)
    return km.reshape(-1, sz[1], patch_size[0], patch_size[1])
Пример #6
0
# skip

# 4.2.3.9. Performing out-of-core scaling with HashingVectorizer
# skip

# 4.2.3.10. Customizing the vectorizer classes
# skip

# 4.2.4. Image feature extraction
# 4.2.4.1. Patch extraction
from sklearn.feature_extraction import image
one_image = np.arange(4*4*3).reshape((4,4,3))
print(one_image[:,:,0])
patches = image.extract_patches_2d(one_image,(2,2),max_patches=2,random_state=0)
print(patches.shape)
print(patches[:,:,:,0])
patches = image.extract_patches_2d(one_image,(2,2))
print(patches.shape)
print(patches[4,:,:,0])
reconstructed = image.reconstruct_from_patches_2d(patches, (4, 4,3))
np.testing.assert_array_equal(one_image, reconstructed)
five_images = np.arange(5*4*4*3).reshape(5,4,4,3)
patches = image.PatchExtractor((2,2)).transform(five_images)
print(patches.shape)



# 4.2.4.2. Connectivity graph of an image
# nothing