Exemplo n.º 1
0
def vl_dsift(
			data, 
			step=-1, 
			bounds=numpy.zeros(1, 'f'), 
			size=-1, 
			fast=True, 
			verbose=False, 
			norm=False):
	""" [F,D] = VL_DSIFT(I) calculates the Dense Histogram of Gradients (DSIFT) 
	descriptors for the image I. I must be grayscale in SINGLE format.\n\n
	
	In this implementation, a DSIFT descriptor is equivalent to a SIFT 
	descriptor (see VL_SIFT()). This function calculates quickly a large number
	of such descriptors, for a dense covering of the image with features of the
	same size and orientation.\n\n
	
	The function returns the frames F and the descriptors D. Since all frames
	have identical size and orientation, F has only two rows (for the X and Y
	center coordinates). The orientation is fixed to zero. The scale is related
	to the SIZE of the spatial bins, which by default is equal to 3 pixels (see
	below). If NS is the number of bins in each spatial direction (by default
	4), then a DSIFT keypoint covers a square patch of NS by SIZE pixels.\n\n
	
	@remark The size of a SIFT bin is equal to the magnification factor MAGNIF 
	(usually 3) by the scale of the SIFT keypoint. This means that the scale of
	the SIFT keypoints corresponding to the DSIFT descriptors is SIZE / MAGNIF. 
	
	@remark Although related, DSIFT is not the same as the HOG descriptor used 
	in [1]. This descriptor is equivalent to SIFT instead. 
		
	@param step    Extract a descriptor each STEP pixels.
	@param size    A spatial bin covers SIZE pixels.
	@param norm    Append the frames with the normalization factor applied to 
	               each descriptor. In this case, F has 3 rows and this value 
	               is the 3rd row. This information can be used to suppress
	               descriptors with low contrast.
	@param fast    Use a flat rather than Gaussian window. Much faster.
	@param verbose Be verbose. 
	"""
	if not data.flags['F_CONTIGUOUS']:
		data = numpy.array(data, order='F')		
		
	return _vlfeat.vl_dsift(data, step, bounds, size, fast, verbose, norm)
Exemplo n.º 2
0
def vl_phow(I,
            verbose=False,
            fast=True,
            sizes=[4, 6, 8, 10],
            step=2,
            color='gray',
            floatDescriptors=False,
            magnif=6,
            windowSize=1.5,
            contrastThreshold=0.005):
    '''extracts PHOW features [1] from the image IM. 
    PHOW is simply dense SIFT applied at several resolutions.
    
    Parameters
    ----------
    
    data : float32 ndarray
        float32 image
        
    Returns
    -------
    
    frames : float64 ndarray with shape (4, n_descrs)
        Dense SIFT DoG keypoint frames
        
    descrs : float32 ndarray with shape (128, n_descrs)
        Dense SIFT descriptors at frames
    
    Options
    -------
    Verbose : false
      Set to true to turn on verbose output.
 
    Sizes : [4 6 8 10]
      Scales at which the dense SIFT features are extracted. Each
      value is used as bin size for the VL_DSIFT() function.
 
    Fast : true
      Set to false to turn off the fast SIFT features computation by
      VL_DSIFT().
 
    Step : 2
      Step (in pixels) of the grid at which the dense SIFT features
      are extracted.
 
    Color : 'gray'
      Choose between 'gray' (PHOW-gray), 'rgb', 'hsv', and 'opponent'
      (PHOW-color).
 
    ContrastThreshold : 0.005
      Contrast threshold below which SIFT features are mapped to
      zero. The input image is scaled to have intensity range in [0,1]
      (rather than [0,255]) and this value is compared to the
      descriptor norm as returned by VL_DSIFT().
 
    WindowSize : 1.5
      Size of the Gaussian window in units of spatial bins.
 
    Magnif : 6
      The image is smoothed by a Gaussian kernel of standard deviation
      SIZE / MAGNIF. Note that, in the standard SIFT descriptor, the
      magnification value is 3; here the default one is 6 as it seems
      to perform better in applications.
 
    FloatDescriptors : false
      If set to TRUE, the descriptors are returned in floating point
      format.
    '''

    opts = Options(verbose, fast, sizes, step, color, floatDescriptors, magnif,
                   windowSize, contrastThreshold)
    dsiftOpts = DSiftOptions(opts)

    # make sure image I is float32, f_order
    #    I = np.asfortranarray(I, dtype=np.float32)

    # Extract the features
    imageSize = I.shape
    if I.ndim == 3:
        if imageSize[2] != 3:
            # "IndexError: tuple index out of range" if both if's are checked at the same time
            raise ValueError("Image data in unknown format/shape")
    if opts.color == 'gray':
        numChannels = 1
        if (I.ndim != 2):
            I = rgb2gray(I)
    else:
        numChannels = 3
        if (I.ndim == 2):
            I = np.dstack([I, I, I])
        if opts.color == 'rgb':
            pass
        elif opts.color == 'hsv':
            I = rgb2hsv(I)
        elif opts.color == 'opponent':
            # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
            # Note that the mean differs from the standard definition of opponent
            # space and is the regular intesity (for compatibility with
            # the contrast thresholding).
            # Note also that the mean is added pack to the other two
            # components with a small multipliers for monochromatic
            # regions.

            mu = 0.3 * I[:, :, 0] + 0.59 * I[:, :, 1] + 0.11 * I[:, :, 2]
            alpha = 0.01
            I = np.dstack([
                mu, (I[:, :, 0] - I[:, :, 1]) / np.sqrt(2) + alpha * mu,
                (I[:, :, 0] + I[:, :, 1] - 2 * I[:, :, 2]) / np.sqrt(6) +
                alpha * mu
            ])
        else:
            raise ValueError('Color option ' + str(opts.color) +
                             ' not recognized')
    if opts.verbose:
        print('{0}: color space: {1}'.format('vl_phow', opts.color))
        print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0],
                                                  imageSize[1]))
        print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes))

    frames_all = []
    descrs_all = []
    for size_of_spatial_bins in opts.sizes:
        # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
        # Recall from VL_DSIFT() that the first descriptor for scale SIZE has
        # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
        # similar). It is convenient to align the descriptors at different
        # scales so that they have the same geometric centers. For the
        # maximum size we pick XMIN = 1 and we get centers starting from
        # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
        # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
        # In pracrice, the offset must be integer ('bounds'), so the
        # alignment works properly only if all OPTS.SZES are even or odd.

        off = np.floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1

        # smooth the image to the appropriate scale based on the size
        # of the SIFT bins
        sigma = size_of_spatial_bins / float(opts.magnif)
        ims = vl_imsmooth(I, sigma)

        if opts.verbose:
            print('smooth sigma: %.2f' % sigma)
        if ims.ndim == 2:
            ims = ims[:, :, np.newaxis]

        # extract dense SIFT features from all channels
        frames = []
        descrs = []
        for k in range(numChannels):
            size_of_spatial_bins = int(size_of_spatial_bins)
            # vl_dsift does not accept numpy.int64 or similar
            f_temp, d_temp = vl_dsift(
                ims[:, :, k],
                step=dsiftOpts.step,
                size=size_of_spatial_bins,
                fast=dsiftOpts.fast,
                floatDescriptors=dsiftOpts.floatDescriptors,
                verbose=dsiftOpts.verbose,
                norm=dsiftOpts.norm,
                bounds=[off, off, maxint, maxint],
                windowSize=dsiftOpts.windowSize)
            if (not opts.floatDescriptors):
                d_temp = np.floor(d_temp)
            frames.append(f_temp)
            descrs.append(d_temp)

        if (opts.color == 'gray') or (opts.color == 'opponent'):
            contrast = frames[0][2, :]
        elif opts.color == 'rgb':
            contrast = np.mean(
                [frames[0][2, :], frames[1][2, :], frames[2][2, :]], axis=0)
        elif opts.color == 'hsv':
            contrast = frames[2][2, :]
        else:
            raise ValueError('Color option ' + str(opts.color) +
                             ' not recognized')

        for k in xrange(numChannels):
            descrs[k][:, contrast < opts.contrastThreshold] = 0

        _pos = frames[0][0:2, :]  # x, y
        _contrast = contrast
        _binSize = size_of_spatial_bins * np.ones(frames[0].shape[1])

        frames_all.append(np.vstack((_pos, _contrast, _binSize)))
        descrs_all.append(np.vstack((descrs)))

    frames_all = np.hstack(frames_all)
    descrs_all = np.hstack(descrs_all)
    return frames_all, descrs_all