def Dense_SIFT_Extractor(images, data_name_code): kp, desc0 = cy.vl_dsift(images[0], step=1, size=1, bounds=None, window_size=1, norm=True, fast=True, float_descriptors=True, geometry=(4, 4, 8), verbose=1) count = 0 print("Image number " + str(count) + " in Stack") kp, desc1 = cy.vl_dsift(images[1], step=1, size=1, bounds=None, window_size=1, norm=True, fast=True, float_descriptors=True, geometry=(4, 4, 8), verbose=1) count = 1 print("Image number " + str(count) + " in Stack") concate = np.concatenate((desc0, desc1)) for img in images[2:]: kp, desc = cy.vl_dsift(img, step=1, size=1, bounds=None, window_size=1, norm=True, fast=True, float_descriptors=True, geometry=(4, 4, 8), verbose=1) concate = np.concatenate((concate, desc)) print(concate.shape) count += 1 print("Image number " + str(count) + " in Stack") print("All Dense SIFT Descriptors Generated") filename = "Fer2013_DenseSIFF_Descriptors.npy" concate = np.reshape(concate, (images.shape[0], 2025, 128)) np.save(filename, concate) print("Saved all Dense SIFT Descriptors as numpy array to Disk")
def calculate_sift_descriptors(image, step_size, cell_size, norm_threshold=0.0): """ Calculate sift descriptors on single image. @param image: Image to extract sift descriptors from. @param step_size: Horizontal and vertical distance between keypoints in pixels. @param cell_size: Area size of one descriptor in pixels. @return: Coordinates and descriptors for each keypoint. """ # we don't want to start directly in the corner of the page if cell_size == 8: off = 2.5 elif cell_size == 5: off = 7.5 else: off = 0.0 frames, desc = vlfeat.vl_dsift(image, bounds=np.array((off, off, image.shape[0]-off, image.shape[1]-off), 'f'), step=step_size, size=cell_size, norm=(norm_threshold > 0.0)) frames = frames.T desc = desc.T # throw away all descriptors with a magnitude < norm_threshold if norm_threshold > 0.0: norms = frames[:, 2] frames = np.array([p[:2] for i, p in enumerate(frames) if norms[i] > norm_threshold]) desc = np.array([d for i, d in enumerate(desc) if norms[i] > norm_threshold]) return frames, desc
def process_image(imagename, resultname='temp.sift', dense=False): """ process an image and save the results in a .key ascii file""" print "working on ", imagename if dense == False: if imagename[-3:] != 'pgm': #create a pgm file, image is resized, if it is too big. # sift returns an error if more than 8000 features are found size = (MAXSIZE, MAXSIZE) im = Image.open(imagename).convert('L') im.thumbnail(size, Image.ANTIALIAS) im.save('tmp.pgm') imagename = 'tmp.pgm' #check if linux or windows if os.name == "posix": cmmd = "./sift <" + imagename + ">" + resultname print cmmd else: cmmd = "siftWin32 <" + imagename + ">" + resultname os.system(cmmd) if os.path.getsize(resultname) == 0: raise IOError("extracting SIFT features failed " + resultname) else: import vlfeat # defines how dense the grid is size = (150, 150) step = 10 im = Image.open(imagename).resize(size, Image.ANTIALIAS) im_array = numpy.asarray(im) if im_array.ndim == 3: im_gray = vlfeat.vl_rgb2gray(im_array) elif im_array.ndim == 2: im_gray = im_array else: raise IOError("Not enough dims found in image " + resultname) locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE) nfeatures = int_descriptors.shape[1] padding = numpy.zeros((2, nfeatures)) locs = numpy.vstack((locs, padding)) header = ' '.join([str(nfeatures), str(128)]) temp = int_descriptors.astype('float') # convert descriptors to float descriptors = temp[:] with open(resultname, 'wb') as f: cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL) print "features saved in", resultname if WRITE_VERBOSE: with open(resultname, 'w') as f: f.write(header) f.write("\n") for i in range(nfeatures): f.write(' '.join(map(str, locs[:, i]))) f.write("\n") f.write(' '.join(map(str, descriptors[:, i]))) f.write("\n")
def __init__(self, word_img, word, codebook): self._word_img_ = word_img self._word_ = word self._bof_ = None #Deskriptoren berechnen step_size = 5 cell_size = 5 frames, desc = vlfeat.vl_dsift(word_img / 255, step=step_size, size=cell_size) desc = np.array(desc, dtype=np.float) #Deskriptoren quantisieren mithilfe vom Codebook dist_mat = distance.cdist(desc, codebook, 'euclidean') dist_mat_sort_ind = np.argsort(dist_mat, axis=1) global_ = dist_mat_sort_ind[:, 0] left = global_[:len(global_) / 2] #mid = global_[len(global_)/3:2*len(global_)/3] right = global_[len(global_) / 2:] bof_g = np.bincount(global_, minlength=4095) bof_l = np.bincount(left, minlength=4095) #bof_m = np.bincount(mid,minlength = 4095) bof_r = np.bincount(right, minlength=4095) self._bof_ = np.concatenate((bof_g, bof_l, bof_r), axis=0)
def calculate_histograms(images, clusters_mean): for image in images: f, des = vl_dsift(image.img, size=8, step=8) histogram = get_histogram(des, clusters_mean) his_sum = sum(histogram.itervalues()) * 1.0 for item in range(len(clusters_mean)): image.histogram.append( histogram[item] / his_sum if his_sum != 0 else histogram[item]) return images
def BOW(n_clusters=40): all_descriptors = [] for image in TRAIN_IMAGES: f, des = vl_dsift(image.img, size=8, step=8) for item in des.tolist(): all_descriptors.append(item) kmeans = KMeans(n_clusters=n_clusters).fit(np.array(all_descriptors)) clusters_mean = kmeans.cluster_centers_.tolist() return clusters_mean
def calculate_visual_words_for_query(self, query_image, visualize = False): # Fuer spaeter folgende Verarbeitungsschritte muss das Bild mit float32-Werten vorliegen. im_arr = query_image # Die colormap legt fest wie die Intensitaetswerte interpretiert werden. #if visualize: # plt.imshow(im_arr, cmap=cm.get_cmap('Greys_r')) # plt.show() # SIFT Deskriptoren berechnen frames, desc = vlfeat.vl_dsift(im_arr, step=self.step_size, size=self.cell_size) # pickle_densesift_fn = '2700270-small_dense-%d_sift-%d_descriptors.p' % (step_size, cell_size) # frames, desc = pickle.load(open(pickle_densesift_fn, 'rb')) frames = frames.T desc = desc.T distance_matrix = scipy.spatial.distance.cdist(desc, self.centroids, "euclidean") labels = np.argmin(distance_matrix, axis = 1) if visualize: draw_descriptor_cells = False fig = plt.figure() ax = fig.add_subplot(111) ax.imshow(im_arr, cmap=cm.get_cmap('Greys_r')) ax.hold(True) ax.autoscale(enable=False) colormap = cm.get_cmap('jet') desc_len = self.cell_size * 4 for (x, y), label in zip(frames, labels): color = colormap(label / float(self.n_centroids)) circle = Circle((x, y), radius=1, fc=color, ec=color, alpha=1) #rect = Rectangle((x - desc_len / 2, y - desc_len / 2), desc_len, desc_len, alpha=0.08, lw=1) ax.add_patch(circle) if draw_descriptor_cells: for p_factor in [0.25, 0.5, 0.75]: offset_dyn = desc_len * (0.5 - p_factor) offset_stat = desc_len * 0.5 line_h = Line2D((x - offset_stat, x + offset_stat), (y - offset_dyn, y - offset_dyn), alpha=0.08, lw=1) line_v = Line2D((x - offset_dyn , x - offset_dyn), (y - offset_stat, y + offset_stat), alpha=0.08, lw=1) ax.add_line(line_h) ax.add_line(line_v) #ax.add_patch(rect) plt.show() # Centroids: Eine Liste von Zentroiden (Auch SWIFT Operatoren!) # Labels: Fuer jeden SWIFT Operator ist ein Index vorhanden, der angibt, zu welchem Zentroid der Operator zugeordnet ist. # ORDNUNG: Spaltenweise von oben nach unten und links nach rechts. Beispiel: # 1 4 7 # 2 5 8 # 3 6 9 labels = np.reshape(labels,(len(np.unique(frames[:,1])),-1),order='F') return labels
def rgsift(image): from skimage import img_as_float shaped_image = img_as_float(image) gray = rgb2gray(image) s = shaped_image.sum(axis=2) red = shaped_image[:, :, 0] / (s + 1e-5) green = shaped_image[:, :, 1] / (s + 1e-5) descs = [] for channel in [gray, red, green]: loc, desc = vl_dsift(channel, step=4, size=6) descs.append(desc.T.copy()) return loc, np.hstack(descs)
def extract_feature_vector(self): img = imread(self.image.get_path()) if self.memory == False else self.image imgResized = resize(img, (300,250)) grayScaleImg = vl_rgb2gray(imgResized).astype('uint8') histEqualizedImage = equalizeHist(grayScaleImg) sizeOfSpatialBins = 8 step = 10 fast = False #if set to True it uses a flat window rather than a Gaussian window verbose = False norm = False bounds = -1 frames ,descriptors = vl_dsift(histEqualizedImage,step,bounds,sizeOfSpatialBins,fast,verbose,norm) return descriptors.transpose().astype('float32')
def sift_descriptors(images, dataset): descs = [] coordinates = [] print("computing sift descriptors") for f in images: print("processing image %s" % f) image = dataset.get_image(f) #coords, sift = rgsift(image) #tracer() gray_image = rgb2gray(image) coords, sift = vl_dsift(gray_image, step=3, size=4) #coords2, sift2 = vl_dsift(gray_image, step=3, size=8) #coords3, sift3 = vl_dsift(gray_image, step=3, size=16) #tracer() #sift = np.hstack([sift, sift2, sift3]) #coords = np.hstack([coords, coords2, coords3]) descs.append(sift.T) coordinates.append(coords) return descs, coordinates
def calculate_sift_descriptors(image, step_size, cell_size, norm_threshold=0.0): """ Calculate sift descriptors on single image. @param image: Image to extract sift descriptors from. @param step_size: Horizontal and vertical distance between keypoints in pixels. @param cell_size: Area size of one descriptor in pixels. @return: Coordinates and descriptors for each keypoint. """ # we don't want to start directly in the corner of the page if cell_size == 8: off = 2.5 elif cell_size == 5: off = 7.5 else: off = 0.0 frames, desc = vlfeat.vl_dsift( image, bounds=np.array((off, off, image.shape[0] - off, image.shape[1] - off), 'f'), step=step_size, size=cell_size, norm=(norm_threshold > 0.0)) frames = frames.T desc = desc.T # throw away all descriptors with a magnitude < norm_threshold if norm_threshold > 0.0: norms = frames[:, 2] frames = np.array( [p[:2] for i, p in enumerate(frames) if norms[i] > norm_threshold]) desc = np.array( [d for i, d in enumerate(desc) if norms[i] > norm_threshold]) return frames, desc
def extract_RGB_SIFT_features(image, labels): n_sp = np.max(labels) + 1 feat_descs = np.zeros((n_sp, 128 * 3)) img_superpixel = np.zeros_like(labels, dtype='int') # extract SIFT features for each colour channel f = np.empty((3, ), dtype='object') d = np.empty((3, ), dtype='object') for n in range(3): f[n], d[n] = vl_dsift(image[..., n], size=1, float_descriptors=True) r = np.arange(f[0].shape[0]) # indices of all features # find feature desc nearest to centroid and fill in for each channel for i in range(n_sp): # get centroid of i'th superpixel img_superpixel[:] = labels == i c = regionprops(img_superpixel)[0].centroid # find nearest sift feature location to the centroid D = np.sum((f[0] - c)**2, axis=1) d_amin = D.argmin() # see how many are equally close equal_mask = D == D[d_amin] n_equal = np.count_nonzero(equal_mask) # if no draws, pick closest, else randomly pick from the equally closest idx = d_amin if n_equal == 1 else np.random.choice(r[equal_mask]) # fill in the feature vector for each image channel for n in range(3): # pick out which bit of the feature vector we're in and fill it in j, k = n * 128, (1 + n) * 128 feat_descs[i, j:k] = d[n][idx, :] return feat_descs
def vl_phow(im, verbose=True, fast=True, sizes=[4, 6, 8, 10], step=2, color='rgb', floatdescriptors=False, magnif=6, windowsize=1.5, contrastthreshold=0.005): opts = Options(verbose, fast, sizes, step, color, floatdescriptors, magnif, windowsize, contrastthreshold) dsiftOpts = DSiftOptions(opts) # make sure image is float, otherwise segfault im = array(im, 'float32') # Extract the features imageSize = shape(im) if im.ndim == 3: if imageSize[2] != 3: # "IndexError: tuple index out of range" if both if's are checked at the same time raise ValueError("Image data in unknown format/shape") if opts.color == 'gray': numChannels = 1 if (im.ndim == 2): im = vl_rgb2gray(im) else: numChannels = 3 if (im.ndim == 2): im = dstack([im, im, im]) if opts.color == 'rgb': pass elif opts.color == 'opponent': # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Note that the mean differs from the standard definition of opponent # space and is the regular intesity (for compatibility with # the contrast thresholding). # Note also that the mean is added pack to the other two # components with a small multipliers for monochromatic # regions. mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2] alpha = 0.01 im = dstack([ mu, (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu, (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu ]) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') if opts.verbose: print('{0}: color space: {1}'.format('vl_phow', opts.color)) print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1])) print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes)) frames_all = [] descrs_all = [] for size_of_spatial_bins in opts.sizes: # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Recall from VL_DSIFT() that the first descriptor for scale SIZE has # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is # similar). It is convenient to align the descriptors at different # scales so that they have the same geometric centers. For the # maximum size we pick XMIN = 1 and we get centers starting from # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES). # In pracrice, the offset must be integer ('bounds'), so the # alignment works properly only if all OPTS.SZES are even or odd. off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1 # smooth the image to the appropriate scale based on the size # of the SIFT bins sigma = size_of_spatial_bins / float(opts.magnif) ims = vl_imsmooth(im, sigma) # extract dense SIFT features from all channels frames = [] descrs = [] for k in range(numChannels): size_of_spatial_bins = int(size_of_spatial_bins) # vl_dsift does not accept numpy.int64 or similar f_temp, d_temp = vl_dsift(data=ims[:, :, k], step=dsiftOpts.step, size=size_of_spatial_bins, fast=dsiftOpts.fast, verbose=dsiftOpts.verbose, norm=dsiftOpts.norm, bounds=[off, off, maxint, maxint]) frames.append(f_temp) descrs.append(d_temp) frames = array(frames) descrs = array(descrs) d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]] descrs = descrs.reshape(d_new_shape) # remove low contrast descriptors # note that for color descriptors the V component is # thresholded if (opts.color == 'gray') | (opts.color == 'opponent'): contrast = frames[0][2, :] elif opts.color == 'rgb': contrast = mean( [frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') descrs[:, contrast < opts.contrastthreshold] = 0 # save only x,y, and the scale frames_temp = array(frames[0][0:3, :]) padding = array(size_of_spatial_bins * ones(frames[0][0].shape)) frames_all.append(vstack([frames_temp, padding])) descrs_all.append(array(descrs)) frames_all = hstack(frames_all) descrs_all = hstack(descrs_all) return frames_all, descrs_all
def process_image(imagename, resultname='temp.sift', dense=False): """ process an image and save the results in a .key ascii file""" #print "working on ", imagename if dense == False: if imagename[-3:] != 'pgm': #create a pgm file, image is resized, if it is too big. # sift returns an error if more than 8000 features are found size = (MAXSIZE, MAXSIZE) im = Image.open(imagename).convert('L') im.thumbnail(size, Image.ANTIALIAS) im.save('tmp.pgm') imagename = 'tmp.pgm' #check if linux or windows if os.name == "posix": cmmd = "./sift < " + imagename + " > " + resultname else: cmmd = "siftWin32 < " + imagename + " > " + resultname # run extraction command returnvalue = subprocess.call(cmmd, shell=True) if returnvalue == 127: os.remove(resultname) # removing empty resultfile created by output redirection raise IOError("SIFT executable not found") if returnvalue == 2: os.remove(resultname) # removing empty resultfile created by output redirection raise IOError("image " + imagename + " not found") if os.path.getsize(resultname) == 0: raise IOError("extracting SIFT features failed " + resultname) else: import vlfeat # defines how dense the grid is size = (150, 150) step = 10 im = Image.open(imagename).resize(size, Image.ANTIALIAS) im_array = numpy.asarray(im) if im_array.ndim == 3: im_gray = vlfeat.vl_rgb2gray(im_array) elif im_array.ndim == 2: im_gray = im_array else: raise IOError("Not enough dims found in image " + resultname) locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE) nfeatures = int_descriptors.shape[1] padding = numpy.zeros((2, nfeatures)) locs = numpy.vstack((locs, padding)) header = ' '.join([str(nfeatures), str(128)]) temp = int_descriptors.astype('float') # convert descriptors to float descriptors = temp[:] with open(resultname, 'wb') as f: cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL) print "features saved in", resultname if WRITE_VERBOSE: with open(resultname, 'w') as f: f.write(header) f.write("\n") for i in range(nfeatures): f.write(' '.join(map(str, locs[:, i]))) f.write("\n") f.write(' '.join(map(str, descriptors[:, i]))) f.write("\n")
def test_all(self): img = numpy.array(Image.open('roofs1.jpg')) # Test rgb2gray img_gray = rgb2gray(img) self.assertEqual(tuple(img_gray.shape), (478, 640)) self.assertTrue( numpy.allclose(img_gray[:4, :4], numpy.array([[0.8973, 0.8973, 0.8973, 0.9052], [0.8973, 0.8973, 0.8973, 0.9052], [0.8973, 0.8973, 0.9021, 0.9061], [0.9013, 0.9013, 0.9061, 0.9100]]), atol=1e-4)) if os.path.exists("img_gray.txt"): expected = numpy.loadtxt("img_gray.txt", delimiter='\t') self.assertTrue(numpy.allclose(img_gray, expected, atol=1e-4)) # Test vl_imsmooth binsize = 8 magnif = 3 img_smooth = vlfeat.vl_imsmooth(img_gray, math.sqrt((binsize / magnif)**2 - 0.25), verbose=True) self.assertEqual(tuple(img_smooth.shape), (478, 640)) self.assertTrue( numpy.allclose(img_smooth[:4, :4], numpy.array([[0.8998, 0.9013, 0.9034, 0.9057], [0.9000, 0.9015, 0.9035, 0.9057], [0.9002, 0.9017, 0.9036, 0.9057], [0.9005, 0.9020, 0.9038, 0.9058]]), atol=1e-4)) if os.path.exists("img_smooth.txt"): expected = numpy.loadtxt("img_smooth.txt", delimiter='\t') self.assertTrue(numpy.allclose(img_smooth, expected, atol=1e-4)) # Test vl_dsift frames, descrs = vlfeat.vl_dsift(img_smooth, size=binsize, verbose=True) frames = numpy.add(frames.transpose(), 1) descrs = descrs.transpose() self.assertEqual(tuple(frames.shape), (2, 279664)) self.assertEqual(tuple(descrs.shape), (128, 279664)) self.assertTrue( numpy.allclose(frames[:, :4], numpy.array([[13, 13, 13, 13], [13, 14, 15, 16]]))) self.assertTrue( numpy.allclose( descrs[:, 0], numpy.array([ 134, 35, 0, 0, 0, 0, 0, 5, 109, 9, 1, 0, 0, 0, 0, 61, 7, 2, 32, 21, 0, 0, 1, 28, 2, 13, 111, 9, 0, 0, 0, 2, 33, 134, 131, 0, 0, 0, 0, 0, 30, 92, 134, 0, 0, 0, 0, 19, 11, 42, 134, 0, 0, 0, 1, 31, 6, 20, 124, 3, 0, 0, 0, 7, 5, 134, 134, 0, 0, 0, 0, 0, 1, 94, 134, 0, 0, 0, 0, 0, 0, 34, 134, 1, 0, 0, 0, 0, 0, 4, 134, 13, 0, 2, 2, 0, 27, 53, 15, 0, 0, 0, 0, 1, 11, 48, 27, 2, 0, 0, 0, 0, 0, 5, 28, 16, 1, 0, 0, 0, 0, 2, 13, 16, 4, 5, 4, 0 ]))) if os.path.exists("dsift_frames.txt"): expected = numpy.loadtxt("dsift_frames.txt", delimiter='\t') self.assertTrue(numpy.allclose(frames, expected)) if os.path.exists("dsift_descrs.txt"): expected = numpy.loadtxt("dsift_descrs.txt", delimiter='\t') self.assertTrue( numpy.linalg.norm(expected - descrs) < 28) # rounding errors? # Test vl_kmeans centers, assigns = vlfeat.vl_kmeans(numpy.array( [[1], [2], [3], [10], [11], [12]], dtype='f'), 2, ret_quantize=True, verbose=True) self.assertTrue(numpy.allclose(centers, numpy.array([[2], [11]]))) self.assertTrue( numpy.allclose(assigns, numpy.array([0, 0, 0, 1, 1, 1]))) centers, assigns = vlfeat.vl_kmeans(numpy.array( [[1, 0], [2, 0], [3, 0], [10, 1], [11, 1], [12, 1]], dtype='f'), 2, ret_quantize=True) self.assertTrue(numpy.allclose(centers, numpy.array([[11, 1], [2, 0]]))) # order swapped? self.assertTrue( numpy.allclose(assigns, numpy.array([1, 1, 1, 0, 0, 0]))) # Test vl_gmm if os.path.exists("gmm_data.txt"): data = numpy.loadtxt("gmm_data.txt", delimiter='\t').transpose() else: data = numpy.random.rand(5000, 2) means, covariances, priors, ll, posteriors = vlfeat.vl_gmm( data, 30, verbose=True, ret_loglikelihood=True, ret_posterior=True) self.assertEqual(tuple(means.shape), (30, 2)) self.assertEqual(tuple(covariances.shape), (30, 2)) self.assertEqual(tuple(priors.shape), (30, )) self.assertEqual(tuple(posteriors.shape), (5000, 30)) if os.path.exists("gmm_means.txt"): expected = numpy.loadtxt("gmm_means.txt", delimiter='\t').transpose() self.assertTrue(numpy.allclose(means, expected, atol=1e-4)) if os.path.exists("gmm_covariances.txt"): expected = numpy.loadtxt("gmm_covariances.txt", delimiter='\t').transpose() self.assertTrue(numpy.allclose(covariances, expected, atol=1e-4)) if os.path.exists("gmm_priors.txt"): expected = numpy.loadtxt("gmm_priors.txt", delimiter='\t').transpose() self.assertTrue(numpy.allclose(priors, expected, atol=1e-4)) if os.path.exists("gmm_posteriors.txt"): expected = numpy.loadtxt("gmm_posteriors.txt", delimiter='\t').transpose() self.assertTrue(numpy.allclose(posteriors, expected, atol=1e-3)) # Test vl_fisher if os.path.exists("fisher_data.txt"): data = numpy.loadtxt("fisher_data.txt", delimiter='\t').transpose() else: data = numpy.random.rand(1000, 2) encoding = vlfeat.vl_fisher(data, means, covariances, priors, verbose=True) self.assertEqual(tuple(encoding.shape), (120, )) if os.path.exists("fisher_encoding.txt"): expected = numpy.loadtxt("fisher_encoding.txt", delimiter='\t').transpose() self.assertTrue(numpy.allclose(encoding, expected, atol=1e-4))
def block_feat(bk='', *varargin): if bk == '': bk = bkinit('feat', 'db') bk['fetch'] = fetch__ bk['rand_send'] = [] bk['detector'] = 'sift' bk['descriptor'] = 'siftnosmooth' bk['ref_size'] = [] bk['min_sigma'] = 0; bk['max_num'] = np.inf bk['rescale'] = 6 return bk ############################ # check/load inputs bk, dirty = bkbegin(bk) if not dirty: print('block_feat not dirty') return bk db = bkfetch(bk['db']['tag'], 'db') reduce_ = True ############################## # for seg in db['segs']: print('process %s'%(seg['path'])) ############################## # preprocess Iorig = imread(os.path.join(db['images_path'], seg['path'])) I = img_as_float(Iorig) M,N,K = I.shape if len(bk['ref_size']): rho = bk.ref_size / np.max(M,N) else: rho = 1; Icolor = I ############################## # Detector ############################# # frame selector ############################### # Descriptor if bk['descriptor'] == 'dsift-color': RGB=(Icolor.sum(2)+np.finfo(float).eps) Irgb=Icolor / np.stack((RGB,RGB,RGB),2) fr,dr=vlfeat.vl_dsift(Irgb[:,:,0],size=bk['dsift_size'],step=bk['dsift_step'],fast=True,norm=True) fr=fr.transpose(); dr=dr.transpose(); fg,dg=vlfeat.vl_dsift(Irgb[:,:,1],size=bk['dsift_size'],step=bk['dsift_step'],fast=True,norm=True) fg=fg.transpose() dg=dg.transpose() d=np.concatenate((dr,dg),axis=0) keep1=(fr[2,:]>bk['dsift_minnorm']) | (fg[2,:]>bk['dsift_minnorm']) f=fr[0:2,:] f=f[:,keep1] d=d[:,keep1] sigma=bk['dsift_size']*4/6; f=np.concatenate((f, sigma*np.ones((1,f.shape[1])) , np.pi*np.ones((1,f.shape[1]))),axis=0) #rescale=6 R=f[3,:]*bk['rescale'] keep2=(f[0,:]-R>=0)&(f[0,:]+R<=N-1) & (f[1,:]-R>=0) &( f[1,:]+R<=M-1) f=f[:,keep2] d=d[:,keep2] else: print('unkonw descriptor') ############################## # pose process f[0:2,:]=(f[0:2,:]-1)/rho+1; f[2,:]=f[2,:]/rho; ############################# # save path_d = os.path.join(glb.wrd['prefix'],'data','%05d.d.pkl'%(seg['seg'])) path_f = os.path.join(glb.wrd['prefix'],'data','%05d.f.pkl'%(seg['seg'])) pickle.dump(f, open(path_f,'wb')) pickle.dump(d, open(path_d,'wb')) ################################### if reduce_==True: bk = bkend(bk) return bk
def process_image(imagename, resultname='temp.sift', dense=False): """ process an image and save the results in a .key ascii file""" print "working on ", imagename if dense == False: if imagename[-3:] != 'pgm': #create a pgm file, image is resized, if it is too big. # sift returns an error if more than 8000 features are found size = (MAXSIZE, MAXSIZE) im = Image.open(imagename).convert('L') im.thumbnail(size, Image.ANTIALIAS) im.save('tmp.pgm') imagename = 'tmp.pgm' #check if linux or windows if os.name == "posix": cmmd = "./sift <" + imagename + ">" + resultname print cmmd else: cmmd = "siftWin32 <" + imagename + ">" + resultname os.system(cmmd) if os.path.getsize(resultname) == 0: raise IOError("extracting SIFT features failed " + resultname) #print 'processed', imagename else: import vlfeat # like in pinto2008 why is vision hard size = (150, 150) window_size = 8 step = 10 im = Image.open(imagename).resize(size, Image.ANTIALIAS) im_array = numpy.asarray(im) if im_array.ndim == 3: im_gray = vlfeat.vl_rgb2gray(im_array) elif im_array.ndim == 2: im_gray = im_array else: raise IOError("Not enough dims found in image " + resultname) #locs,int_descriptors = vlfeat.vl_dsift(im_gray,size=window_size,verbose=VERBOSE) locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE) nfeatures = int_descriptors.shape[1] padding = numpy.zeros((2, nfeatures)) locs = numpy.vstack((locs, padding)) header = ' '.join([str(nfeatures), str(128)]) temp = int_descriptors.astype('float') # convert descriptors to float descriptors = temp[:] with open(resultname, 'wb') as f: cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL) print "features saved in", resultname if WRITE_VERBOSE: with open(resultname, 'w') as f: f.write(header) f.write("\n") for i in range(nfeatures): f.write(' '.join(map(str, locs[:, i]))) f.write("\n") f.write(' '.join(map(str, descriptors[:, i]))) f.write("\n")
from vlfeat import vl_dsift import cv2 import numpy as np img = cv2.imread('../data/face.jpg') img = cv2.resize(img, (640, 480)) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) gray = gray.astype('float32') print type(gray) print gray.shape import time start = time.time() frame, desc = vl_dsift(gray, 8, 16, fast=True, verbose=True) print 'romputing the dsift of the full image took {}'.format(time.time() - start) import numpy as np a = np.transpose(desc) print a.shape[0] * a.shape[1]
def dsift(im, verbose=True, fast=True, sizes=[4, 6, 8, 10], step=2, color='rgb', floatdescriptors=False, magnif=6, windowsize=1.5, contrastthreshold=0.005): opts = Options(verbose, fast, sizes, step, color, floatdescriptors, magnif, windowsize, contrastthreshold) dsiftOpts = DSiftOptions(opts) # make sure image is float, otherwise segfault im = array(im, 'float32') # Extract the features imageSize = shape(im) if im.ndim == 3: if imageSize[2] != 3: # "IndexError: tuple index out of range" if both if's are checked at the same time raise ValueError("Image data in unknown format/shape") if opts.color == 'gray': numChannels = 1 if (im.ndim == 2): im = vl_rgb2gray(im) else: numChannels = 3 if (im.ndim == 2): im = dstack([im, im, im]) if opts.color == 'rgb': pass elif opts.color == 'opponent': # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Note that the mean differs from the standard definition of opponent # space and is the regular intesity (for compatibility with # the contrast thresholding). # Note also that the mean is added pack to the other two # components with a small multipliers for monochromatic # regions. mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2] alpha = 0.01 im = dstack([mu, (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu, (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu]) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') if opts.verbose: great='great' #print('{0}: color space: {1}'.format('vl_phow', opts.color)) #print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1])) #print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes)) frames_all = [] descrs_all = [] for size_of_spatial_bins in opts.sizes: # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Recall from VL_DSIFT() that the first descriptor for scale SIZE has # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is # similar). It is convenient to align the descriptors at different # scales so that they have the same geometric centers. For the # maximum size we pick XMIN = 1 and we get centers starting from # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES). # In pracrice, the offset must be integer ('bounds'), so the # alignment works properly only if all OPTS.SZES are even or odd. off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1 # smooth the image to the appropriate scale based on the size # of the SIFT bins sigma = size_of_spatial_bins / float(opts.magnif) ims = vl_imsmooth(im, sigma) # extract dense SIFT features from all channels frames = [] descrs = [] for k in range(numChannels): size_of_spatial_bins = int(size_of_spatial_bins) # vl_dsift does not accept numpy.int64 or similar f_temp, d_temp = vl_dsift(data=ims[:, :, k], step=dsiftOpts.step, size=size_of_spatial_bins, fast=dsiftOpts.fast, verbose=dsiftOpts.verbose, norm=dsiftOpts.norm, bounds=[off, off, maxint, maxint]) frames.append(f_temp) descrs.append(d_temp) frames = array(frames) descrs = array(descrs) d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]] descrs = descrs.reshape(d_new_shape) # remove low contrast descriptors # note that for color descriptors the V component is # thresholded if (opts.color == 'gray') | (opts.color == 'opponent'): contrast = frames[0][2, :] elif opts.color == 'rgb': contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') descrs[:, contrast < opts.contrastthreshold] = 0 # save only x,y, and the scale frames_temp = array(frames[0][0:3, :]) padding = array(size_of_spatial_bins * ones(frames[0][0].shape)) frames_all.append(vstack([frames_temp, padding])) descrs_all.append(array(descrs)) frames_all = hstack(frames_all) descrs_all = hstack(descrs_all) return frames_all, descrs_all
def denseSift(imgIn,verbose=False,border=[10,10],visu=False,dtype=numpy.float32,scale=None): def extendBorder(imgIn,borderSize,visu=False): img=imgIn oldShape = img.shape flipUd = numpy.fliplr(img) imgUd_U = flipUd[:,oldShape[1]-borderSize[1]:oldShape[1]] imgUd_D = flipUd[:, 0: borderSize[1]] assert imgUd_U.shape[1]==borderSize[1] assert imgUd_D.shape[1]==borderSize[1] img = numpy.concatenate([imgUd_U,img,imgUd_D],axis=1) oldShape = img.shape flipLr = numpy.flipud(img) imgLr_R = flipLr[oldShape[0]-borderSize[0]:oldShape[0],:] imgLr_L = flipLr[0: borderSize[0],:] assert imgLr_R.shape[0]==borderSize[0] assert imgLr_L.shape[0]==borderSize[0] img = numpy.concatenate([imgLr_R,img,imgLr_L],axis=0) if visu: plt.imshow(img.T, cmap = cm.Greys_r) plt.show() return img def frameToCoordinate(F): cX = F[1,:] cY = F[0,:] cX = numpy.floor(cX) cY = numpy.floor(cY) cX = cX.astype(numpy.int32) cY = cY.astype(numpy.int32) coords = (cX,cY) return coords def checkCovering(shape,border,coord): covering = numpy.zeros(shape) covering[coord] = 1 coveringSub = covering[border[0]:shape[0]-border[0] ,border[1]:shape[1]-border[1] ] whereZero = numpy.where(coveringSub==0) total = coveringSub.shape[0]*coveringSub.shape[1] nZeros = len(whereZero[0]) nOnes = total-nZeros return nZeros==0 #imgIn = numpy.squeeze(vigra.filters.laplacianOfGaussian(imgIn,1.0)) if scale is not None : shapeOrg = [dx,dy] =imgIn.shape[0:2] newShape = [ int(float(dx)*scale) ,int(float(dy)*scale) ] imgIn = vigra.sampling.resize(imgIn,newShape) if imgIn.ndim == 3 : imgIn = numpy.sum(imgIn,axis=2) srcShape = imgIn.shape # extend image with mirror boundary condions img = extendBorder(imgIn,border) img = numpy.require(img,dtype=numpy.float32) # shape is bigger than src shape since the image has # been extented by "extendBorder" shape = img.shape # call *EXTERNAL* code for dense sift F,D=siftRes=vlfeat.vl_dsift( img, step=-1, bounds=numpy.zeros(1, 'f'), size=-1, fast=True, verbose=verbose, norm=False ) # number of desc (128 default) numDescriptors = D.shape[0] featureImg = numpy.ones([shape[0],shape[1],numDescriptors],dtype=dtype) coords = frameToCoordinate(F) # check that each pixel is covered assert checkCovering(shape,border,coords) # MAKE ME FAST!!!! # write results in one dense array for fi in xrange(numDescriptors): if verbose : print fi,",",numDescriptors,"(make me faster!!)" #featureImg_fi = featureImg[:,:,fi] featureImg[coords[0],coords[1],fi] = D[fi,:] # UN-extend image featureImgSrcShape = featureImg[border[0]:border[0]+srcShape[0],border[1]:border[1]+srcShape[1],:].copy() assert featureImgSrcShape.shape[0] == srcShape[0] assert featureImgSrcShape.shape[1] == srcShape[1] assert featureImgSrcShape.shape[2] == numDescriptors # visualize results ? if visu: for fi in xrange(numDescriptors): print fi plt.imshow(featureImgSrcShape[:,:,fi].T, cmap = cm.Greys_r) plt.show() if scale is not None : featureImgSrcShape = vigra.sampling.resize(featureImgSrcShape,shapeOrg+(featureImgSrcShape.shape[2],)) return featureImgSrcShape
def calculate_visual_words_for_document(self, document_image_filename, visualize = False): image = Image.open(document_image_filename) # Fuer spaeter folgende Verarbeitungsschritte muss das Bild mit float32-Werten vorliegen. im_arr = np.asarray(image, dtype='float32') # Die colormap legt fest wie die Intensitaetswerte interpretiert werden. if visualize: plt.imshow(im_arr, cmap=cm.get_cmap('Greys_r')) plt.show() print "Berechne Sift Deskriptoren" # SIFT Deskriptoren berechnen frames, desc = vlfeat.vl_dsift(im_arr, step=self.step_size, size=self.cell_size) # pickle_densesift_fn = '2700270-small_dense-%d_sift-%d_descriptors.p' % (step_size, cell_size) # frames, desc = pickle.load(open(pickle_densesift_fn, 'rb')) frames = frames.T desc = desc.T # # Um eine Bag-of-Features Repraesentation des Bilds zu erstellen, wird ein # Visual Vocabulary benoetigt. Im Folgenden wird es in einer Clusteranalyse # berechnet. Fuer die Clusteranalyse wird Lloyd's Version des k-means Algorithmus # verwendet. Parameter sind # - die Anzahl der Centroiden in der Clusteranalyse (n_centroids). Das entspricht # der Groesse des Visual Vocabulary bzw. der Anzahl von Visual Words. # - Der Anzahl von Durchlaeufen des Algorithmus (iter) # - Der Initialisierung (minit). Der Wert 'points' fuehrt zu einer zufaelligen # Auswahl von gegebenen Datenpunkten, die als initiale Centroiden verwendet # werden. # Die Methode gibt zwei NumPy Arrays zurueck: # - Das sogenannte Codebuch. Eine zeilenweise organisierte Matrix mit Centroiden (hier nicht verwendet). # - Einen Vektor mit einem Index fuer jeden Deskriptor in desc. Der Index bezieht # sich auf den aehnlichsten Centroiden aus dem Codebuch (labels). # # Die Abbildung von Deskriptoren auf Centroiden (Visual Words) bezeichnet man als Quantisierung. print "Berechne Visual Words" centroids, labels = kmeans2(desc.astype(float), self.n_centroids, minit='points') print "Berechnung Visual Words abgeschlossen." # # Die Deskriptoren und deren Quantisierung werden nun visualisiert. Zu jedem # Deskriptor werden dazu die Mittelpunkte und die 4x4 Zellen eingezeichnet. # Die Farbe des Mittelpunkts codiert den Index des Visual Words im Visual Vocabulary # (Codebuch). Beachten Sie, dass durch diese Kodierung einige Farben sehr # aehnlich sein koennen. # Da das Zeichnen der 4x4 Zellen fuer jeden Deskriptor viel Performance kosten # kann, ist es moeglich es ueber das Flag draw_descriptor_cells abzuschalten. # if visualize: draw_descriptor_cells = True fig = plt.figure() ax = fig.add_subplot(111) ax.imshow(im_arr, cmap=cm.get_cmap('Greys_r')) ax.hold(True) ax.autoscale(enable=False) colormap = cm.get_cmap('jet') desc_len = self.cell_size * 4 for (x, y), label in zip(frames, labels): color = colormap(label / float(self.n_centroids)) circle = Circle((x, y), radius=1, fc=color, ec=color, alpha=1) rect = Rectangle((x - desc_len / 2, y - desc_len / 2), desc_len, desc_len, alpha=0.08, lw=1) ax.add_patch(circle) if draw_descriptor_cells: for p_factor in [0.25, 0.5, 0.75]: offset_dyn = desc_len * (0.5 - p_factor) offset_stat = desc_len * 0.5 line_h = Line2D((x - offset_stat, x + offset_stat), (y - offset_dyn, y - offset_dyn), alpha=0.08, lw=1) line_v = Line2D((x - offset_dyn , x - offset_dyn), (y - offset_stat, y + offset_stat), alpha=0.08, lw=1) ax.add_line(line_h) ax.add_line(line_v) ax.add_patch(rect) plt.show() # Centroids: Eine Liste von Zentroiden (Auch SWIFT Operatoren!) # Labels: Fuer jeden SWIFT Operator ist ein Index vorhanden, der angibt, zu welchem Zentroid der Operator zugeordnet ist. # ORDNUNG: Spaltenweise von oben nach unten und links nach rechts. Beispiel: # 1 4 7 # 2 5 8 # 3 6 9 self.centroids = centroids labels = np.reshape(labels,(len(np.unique(frames[:,1])),-1),order='F') return centroids, labels
def parallelImageProcessing(patchSizeTraining, patchStepTraining, \ scaleList, testPredictFolder, svmWeights, feStandardized, enableMSER, \ imgPath): start = time.time() dirName, fName = os.path.split(imgPath) fName = fName.split('.')[0] dirName = dirName.split('/')[-1] print 'Starting processing Image - ', imgPath imageBatchSize = 500 minT = 0 # save file names predictFileName = os.path.join(testPredictFolder, 'MaxPredict_' + dirName + '_' + fName + '.npy') meanFeatureValues = feStandardized[0] stdDevFeatureValues = feStandardized[1] if(os.path.isfile(predictFileName)): print 'Data already present' imageArray = np.round(imread(imgPath, flatten=True)).astype(np.uint8) maxPredictImage = np.load(predictFileName) else: # open the image and load the image as array imageArray = np.round(imread(imgPath, flatten=True)).astype(np.uint8) orgImageSize = imageArray.shape if (enableMSER == True): delta = 5 minArea = 30 maxArea = 90000 maxVariation = 0.2 minDiversity = 0.1 # 1 column - width 2nd column height bboxesDetected = mserHelper.detectMSERBboxes(imgPath, delta, minArea, maxArea, maxVariation, minDiversity) numOfScales = bboxesDetected.shape[0] print 'Number of scales detected by MSER in image - ', fName, ' - ', numOfScales if(numOfScales == 0): bboxesDetected = [patchSizeTraining[0],patchSizeTraining[1]] else: numOfScales = len(scaleList) maxPredictImage = np.ones(imageArray.shape)*(-np.Inf) for scaleRun in xrange(numOfScales): if(enableMSER == True): # 1 column - width 2nd column height curBBoxes = bboxesDetected[scaleRun,:] widthRatio = float(patchSizeTraining[1])/curBBoxes[0] heightRatio = float(patchSizeTraining[0])/curBBoxes[1] rescaledImgSize = (int(orgImageSize[0]*heightRatio), int(orgImageSize[1]*widthRatio)) rescaledImage = imresize(imageArray, rescaledImgSize, interp = 'bicubic') print 'Computing the response for image ', fName, ' size ', rescaledImage.shape else: curScale = scaleList[scaleRun] print 'Computing the response for image ', fName, ' at scale ', curScale # rescale the image based on ratio given rescaledImage = imresize(imageArray, curScale, interp = 'bicubic') widthRatio = curScale heightRatio = curScale if((rescaledImage.shape[0] >= 32) and (rescaledImage.shape[1] >=32)): txtPredictions = np.zeros((rescaledImage.shape[0]-24,rescaledImage.shape[1]-24)) for rowImageRun in np.arange(0, rescaledImage.shape[0]-24, imageBatchSize-24): for colImageRun in np.arange(0, rescaledImage.shape[1]-24, imageBatchSize-24): curImage = rescaledImage[rowImageRun:rowImageRun+imageBatchSize, colImageRun:colImageRun+imageBatchSize] frames, siftArray = vlfeat.vl_dsift(curImage.astype(np.float32), size = 8, step = 1 ,verbose = False) siftArray = siftArray.T.astype(np.float32) siftArray -= meanFeatureValues siftArray /= stdDevFeatureValues frames = frames.T minColumn = np.min(frames[:,0]) maxColumn = np.max(frames[:,0]) minRow = np.min(frames[:,1]) maxRow = np.max(frames[:,1]) # +1 is beacause of zero indexing numColumns = maxColumn - minColumn + 1 numRows = maxRow -minRow + 1 siftArray = np.hstack((siftArray, np.ones((siftArray.shape[0],1)))) predict = np.dot(siftArray, svmWeights) predictPart = predict[:,0].reshape(numColumns,numRows).T txtPredictions[rowImageRun:rowImageRun+numRows,\ colImageRun:colImageRun+numColumns] = predictPart arrIndex = np.ndindex(txtPredictions.shape[0], txtPredictions.shape[1]) for predictRun in arrIndex: eachPredict = txtPredictions[predictRun[0], predictRun[1]] minT = np.minimum(minT, eachPredict) eachRowCord = np.floor((predictRun[0]*patchStepTraining[0])/heightRatio) eachColCord = np.floor((predictRun[1]*patchStepTraining[1])/widthRatio) eachRowCordEnd = eachRowCord + np.floor(patchSizeTraining[0]/heightRatio) eachColCordEnd = eachColCord + np.floor(patchSizeTraining[1]/widthRatio) predPart = maxPredictImage[eachRowCord:eachRowCordEnd, eachColCord:eachColCordEnd] maxPredictImage[eachRowCord:eachRowCordEnd, eachColCord:eachColCordEnd] = np.maximum(predPart, eachPredict) # save the response maxPredictImage[maxPredictImage==-np.Inf] = minT np.save(predictFileName, maxPredictImage) end = time.time() print 'Finished processing - ', imgPath, ' seconds - ', end-start
def compute(self, image, step, size): kp, desc = vl_dsift(image, step=step, size=size, fast=True) return kp, desc
def process_image(imagename, resultname="temp.sift", dense=False): """ process an image and save the results in a .key ascii file""" print "working on ", imagename if dense == False: if imagename[-3:] != "pgm": # create a pgm file, image is resized, if it is too big. # sift returns an error if more than 8000 features are found size = (MAXSIZE, MAXSIZE) im = Image.open(imagename).convert("L") im.thumbnail(size, Image.ANTIALIAS) im.save("tmp.pgm") imagename = "tmp.pgm" # check if linux or windows if os.name == "posix": cmmd = "./sift < " + imagename + " > " + resultname else: cmmd = "siftWin32 < " + imagename + " > " + resultname # run extraction command returnvalue = subprocess.call(cmmd, shell=True) if returnvalue == 127: os.remove(resultname) # removing empty resultfile created by output redirection raise IOError("SIFT executable not found") if returnvalue == 2: os.remove(resultname) # removing empty resultfile created by output redirection raise IOError("image " + imagename + " not found") if os.path.getsize(resultname) == 0: raise IOError("extracting SIFT features failed " + resultname) else: import vlfeat # defines how dense the grid is size = (150, 150) step = 10 im = Image.open(imagename).resize(size, Image.ANTIALIAS) im_array = numpy.asarray(im) if im_array.ndim == 3: im_gray = vlfeat.vl_rgb2gray(im_array) elif im_array.ndim == 2: im_gray = im_array else: raise IOError("Not enough dims found in image " + resultname) locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE) nfeatures = int_descriptors.shape[1] padding = numpy.zeros((2, nfeatures)) locs = numpy.vstack((locs, padding)) header = " ".join([str(nfeatures), str(128)]) temp = int_descriptors.astype("float") # convert descriptors to float descriptors = temp[:] with open(resultname, "wb") as f: cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL) print "features saved in", resultname if WRITE_VERBOSE: with open(resultname, "w") as f: f.write(header) f.write("\n") for i in range(nfeatures): f.write(" ".join(map(str, locs[:, i]))) f.write("\n") f.write(" ".join(map(str, descriptors[:, i]))) f.write("\n")
def _get_vectorised_sift_samples(archetype_config, dataloader): # returns num unmasked pixels x SIFT_DLEN, in uint8 format # operates on greyscale 128 bit images num_batches, batch_sz = len( dataloader), archetype_config.dataloader_batch_sz num_imgs_max = num_batches * batch_sz # estimate img_sz = archetype_config.input_sz # cluster individual (box central) pixels desc_side = int(img_sz / SIFT_STEP) print("img sz %d, desc_side %d" % (img_sz, desc_side)) sys.stdout.flush() descs_all = np.zeros((num_imgs_max, desc_side * desc_side, SIFT_DLEN), dtype=np.uint8) masks_all = np.zeros((num_imgs_max, desc_side * desc_side), dtype=np.bool) labels_all = None actual_num_imgs = 0 # when descriptor matrix flattened, goes along rows first (rows change slow) central_inds_h = (np.arange(desc_side) * SIFT_STEP + (SIFT_STEP / 2)).reshape( (desc_side, 1)).repeat(desc_side, axis=1) central_inds_w = (np.arange(desc_side) * SIFT_STEP + (SIFT_STEP / 2)).reshape( (1, desc_side)).repeat(desc_side, axis=0) central_inds_h, central_inds_w = central_inds_h.reshape(-1), \ central_inds_w.reshape(-1) for b_i, batch in enumerate(dataloader): if len(batch) == 3: # test dataloader store_labels = True if (labels_all is None): labels_all = np.zeros((num_imgs_max, desc_side * desc_side), dtype=np.int32) imgs, labels, masks = batch labels = labels.cpu().numpy().astype(np.int32) else: # training dataloader store_labels = False imgs, _, _, masks = batch # imgs currently channel first, [0-1] range, floats imgs = (imgs * 255.).permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) masks = masks.cpu().numpy().astype(np.bool) curr_batch_sz, h, w, c = imgs.shape assert (h == archetype_config.input_sz and w == archetype_config.input_sz and c == archetype_config.in_channels) if b_i < num_batches - 1: assert (batch_sz == curr_batch_sz) start = b_i * batch_sz for i in xrange(curr_batch_sz): grey_img = cv2.cvtColor(imgs[i, :, :, :], cv2.COLOR_RGB2GRAY) locs, descs = vlfeat.vl_dsift(grey_img, step=SIFT_STEP) descs = descs.transpose((1, 0)) # 40*40, 128 descs = descs.reshape(-1, SIFT_DLEN) # rows change slowest # get the corresponding box central mask/label mask = masks[i][central_inds_h, central_inds_w] offset = start + i descs_all[offset, :, :] = descs masks_all[offset, :] = mask if store_labels: label = labels[i][central_inds_h, central_inds_w] labels_all[offset, :] = label actual_num_imgs += curr_batch_sz descs_all = descs_all[:actual_num_imgs, :, :] masks_all = masks_all[:actual_num_imgs, :] num_unmasked = masks_all.sum() if store_labels: labels_all = labels_all[:actual_num_imgs, :] samples_labels = labels_all[masks_all].reshape(-1) assert (samples_labels.shape[0] == num_unmasked) samples = descs_all[masks_all, :].reshape(-1, SIFT_DLEN) assert (samples.shape[0] == num_unmasked) if not store_labels: return samples else: return samples, samples_labels