def spatial_pyramid_fisher(size_image, descriptors, coordinates_keypoints, k, gmm, levels_pyramid): num_subim = list( np.append([1], [ levels_pyramid[i][0] * levels_pyramid[i][1] for i in range(len(levels_pyramid)) ])) num_grids = sum(num_subim) acc_grid = list(np.cumsum(num_subim)) d = int(descriptors.shape[1]) dim_vec = 2 * d * k fisher_vector = np.zeros((1, num_grids * dim_vec), dtype=np.float32) #First, we compute the Fisher Vector for the whole image fisher_vector[0, 0:dim_vec] = ynumpy.fisher(gmm, descriptors, include=['mu', 'sigma']) for i in range(1, len(num_subim)): #For each level of the pyramid, we divide the image in the specified parts grid = levels_pyramid[i - 1] X = np.floor(np.linspace(0, size_image[0] - 1, num=grid[0] + 1)) Y = np.floor(np.linspace(0, size_image[1] - 1, num=grid[1] + 1)) #Compute the corners of each subimage up_corner = list(itertools.product(X[:-1], Y[:-1])) down_corner = list(itertools.product(X[1:], Y[1:])) descriptors_subimages = [[] for j in range(num_subim[i])] for l in range(len(coordinates_keypoints)): #For each descriptor, determine the subimage it belongs to for j in range(num_subim[i]): x = coordinates_keypoints[l][0] y = coordinates_keypoints[l][1] if x > up_corner[j][0] and y > up_corner[j][ 1] and x < down_corner[j][0] and y < down_corner[j][1]: descriptors_subimages[j].append(descriptors[l]) break #For each subimage, we compute the visual words and we concatenate all for j in range(num_subim[i]): if len(descriptors_subimages[j]) != 0: vector = ynumpy.fisher(gmm, np.array(descriptors_subimages[j], dtype=np.float32), include=['mu', 'sigma']) fisher_vector[0, dim_vec * (acc_grid[i - 1] + j):dim_vec * (acc_grid[i - 1] + j + 1)] = vector return fisher_vector
def create_fisher_vector_unsaved(gmm_list, video_desc): """ expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py. this single video_desc contains the (trajs, hogs, hofs, mbhxs, mbhys) np.ndarrays works like create_fisher_vector but without saving anything to improve it's speed """ vid_desc_list = [] vid_desc_list.append(video_desc.traj) vid_desc_list.append(video_desc.hog) vid_desc_list.append(video_desc.hof) vid_desc_list.append(video_desc.mbhx) vid_desc_list.append(video_desc.mbhy) fvs = [] for descriptor, gmm_mean_pca in zip(vid_desc_list, gmm_list): gmm, mean, pca_transform = gmm_mean_pca descrip = descriptor.astype('float32') - mean if pca_transform is not None: descrip = np.dot( descriptor.astype('float32') - mean, pca_transform) fv = ynumpy.fisher(gmm, descrip, include=['mu', 'sigma']) fv = np.sign(fv) * (np.abs(fv)**0.5) norms = np.sqrt(np.sum(fv**2)) fv /= norms fv[np.isnan(fv)] = 100 fvs.append(fv.T) output_fv = np.hstack(fvs) norm = np.sqrt(np.sum(output_fv**2)) output_fv /= norm return output_fv
def test_system(test_filenames, test_labels, detector, stdSlr_features, pca, gmm, stdSlr, clf, options): if options.apply_pca: num_features = options.ncomp_pca else: num_features = 128 fisher_test = np.zeros( (len(test_filenames), options.kmeans * num_features * 2), dtype=np.float32) for i in range(len(test_filenames)): filename = test_filenames[i] print 'Reading image ' + filename ima = cv2.imread(filename) gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY) kpt, des = detector.detectAndCompute(gray, None) if options.apply_pca: des = stdSlr_features.transform(des) des = pca.transform(des) fisher_test[i, :] = ynumpy.fisher(gmm, des, include=['mu', 'sigma']) if options.apply_normalization: fisher_test = applyNormalization(fisher_test, options) test_fisher_vectors_scaled = stdSlr.transform(fisher_test) accuracy = 100 * clf.score(test_fisher_vectors_scaled, test_labels) if options.evaluation_measures: final_issues(test_fisher_vectors_scaled, test_labels, clf, options) return accuracy
def calculateFV(img): im_matrix_ = np.array(img) # k is the GMM dimension k = 256 n_sample = im_matrix_.shape[0] # compute PCA and transform the samples pca_transform = myPCA(im_matrix_, k) im_matrix_ = pca_transform.transform(im_matrix_) # train GMM print("Start fitting GMM") GMM_ = GaussianMixture(n_components=k, covariance_type='diag', verbose_interval=1) t1 = time.time() GMM_.fit(im_matrix_) print("GMM fit in {}".format(time.time() - t1)) # Get GMM matrices w_, mu_, sigma_ = GMM_.weights_, GMM_.means_, GMM_.covariances_ # Convert to FP32 (from FP64) gmm = w_.astype('float32'), mu_.astype('float32'), sigma_.astype('float32') # compute FVS print("Processing FV of image i") # compute the Fisher vector, using only the derivative w.r.t mu fv = ynumpy.fisher(gmm, im_matrix_, include='mu') print("FV processed.") return fv
def EncodeSift (gmm, image_descs, pca_transform, mean): image_fvs = [] for image_desc in image_descs: # apply the PCA to the image descriptor image_desc = np.dot(image_desc - mean, pca_transform) # compute the Fisher vector, using only the derivative w.r.t mu fv = ynumpy.fisher(gmm, image_desc, include = 'mu') image_fvs.append(fv) # make one matrix with all FVs image_fvs = np.vstack(image_fvs) # normalizations are done on all descriptors at once # power-normalization image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5 # L2 normalize norms = np.sqrt(np.sum(image_fvs ** 2, 1)) image_fvs /= norms.reshape(-1, 1) # handle images with 0 local descriptor (100 = far away from "normal" images) image_fvs[np.isnan(image_fvs)] = 100 return image_fvs
def create_fisher_vector(gmm_list, video_desc, fisher_path): """ expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py fisher path is the full path to the fisher vector that is created. this single video_desc contains the (trajs, hogs, hofs, mbhxs, mbhys) np.ndarrays """ vid_desc_list = [] vid_desc_list.append(video_desc.traj) vid_desc_list.append(video_desc.hog) vid_desc_list.append(video_desc.hof) vid_desc_list.append(video_desc.mbhx) vid_desc_list.append(video_desc.mbhy) #For each video create and normalize a fisher vector for each of the descriptors. Then, concatenate the #fisher vectors together to get an extra long fisher vector. # Return a list of all of these long fisher vectors. The list should be the same length as the number # of input videos. fvs = [] for descriptor,gmm_mean_pca in zip(vid_desc_list,gmm_list): gmm, mean, pca_transform = gmm_mean_pca # apply the PCA to the vid_trajectory descriptor #each image_desc is of size (X,TRAJ_DIM). Pca_tranform is of size (TRAJ_DIM,TRAJ_DIM/2) descrip = descriptor.astype('float32') - mean print type(gmm),type(mean),type(pca_transform) print len(gmm), len(mean), len(pca_transform) if pca_transform.all != None: descrip = np.dot(descriptor.astype('float32') - mean, pca_transform) # compute the Fisher vector, using the derivative w.r.t mu and sigma fv = ynumpy.fisher(gmm, descrip, include = ['mu', 'sigma']) # normalizations are done on each descriptors individually # power-normalization fv = np.sign(fv) * (np.abs(fv) ** 0.5) # L2 normalize #sum along the rows. norms = np.sqrt(np.sum(fv ** 2)) # -1 allows reshape to infer the length. So it just solidifies the dimensions to (274,1) fv /= norms # handle images with 0 local descriptor (100 = far away from "normal" images) fv[np.isnan(fv)] = 100 print "Performing fvs" fvs.append(fv.T) output_fv = np.hstack(fvs) #L2 normalize the entire fv. norm = np.sqrt(np.sum(output_fv ** 2)) output_fv /= norm #example name: # 'v_Archery_g01_c01.fisher.npz' #subdirectory name np.savez(fisher_path, fish=output_fv) print fisher_path return output_fv
def get_distances(train_images): surf = cv2.SURF(hessianThreshold=500, extended=True) image_descs = [] for fnames in train_images: try: img = cv2.imread(fnames,0); kp, des = surf.detectAndCompute(img, None) except: continue image_descs.append(des) all_desc= np.vstack(image_descs) k = 128 n_sample = k * 500 sample = all_desc sample = sample.astype('float32') mean = sample.mean(axis = 0) sample = sample - mean cov = np.dot(sample.T, sample) eigvals, eigvecs = np.linalg.eig(cov) perm = eigvals.argsort() pca_transform = eigvecs[:, perm[32:128]] sample = np.dot(sample, pca_transform) gmm = ynumpy.gmm_learn(sample, k) image_fvs = [] for image_desc in image_descs: image_desc = np.dot(image_desc - mean, pca_transform) fv = ynumpy.fisher(gmm, image_desc, include = 'mu') image_fvs.append(fv) image_fvs = np.vstack(image_fvs) image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5 norms = np.sqrt(np.sum(image_fvs ** 2, 1)) image_fvs /= norms.reshape(-1, 1) image_fvs[np.isnan(image_fvs)] = 100 query_imnos = range(0,len(image_fvs)-1); query_fvs = image_fvs#[query_imnos] results, distances = ynumpy.knn(query_fvs, image_fvs, nnn = len(image_fvs)) s_results = np.argsort(results, axis = 1) s_distances = distances*0 for i in range(distances.shape[0]): s_distances[i,:] = distances[i,s_results[i,:]] return s_distances
def create_fisher_vector(gmm_list, video_desc, fisher_path): """ expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py fisher path is the full path to the fisher vector that is created. this single video_desc contains the (trajs, hogs, hofs, mbhxs, mbhys) np.ndarrays """ vid_desc_list = [] vid_desc_list.append(video_desc.traj) vid_desc_list.append(video_desc.hog) vid_desc_list.append(video_desc.hof) vid_desc_list.append(video_desc.mbhx) vid_desc_list.append(video_desc.mbhy) #For each video create and normalize a fisher vector for each of the descriptors. Then, concatenate the #fisher vectors together to get an extra long fisher vector. # Return a list of all of these long fisher vectors. The list should be the same length as the number # of input videos. fvs = [] for descriptor,gmm_mean_pca in zip(vid_desc_list,gmm_list): gmm, mean, pca_transform = gmm_mean_pca # apply the PCA to the vid_trajectory descriptor #each image_desc is of size (X,TRAJ_DIM). Pca_tranform is of size (TRAJ_DIM,TRAJ_DIM/2) descrip = descriptor.astype('float32') - mean if pca_transform != None: descrip = np.dot(descriptor.astype('float32') - mean, pca_transform) # compute the Fisher vector, using the derivative w.r.t mu and sigma fv = ynumpy.fisher(gmm, descrip, include = ['mu', 'sigma']) # normalizations are done on each descriptors individually # power-normalization fv = np.sign(fv) * (np.abs(fv) ** 0.5) # L2 normalize #sum along the rows. norms = np.sqrt(np.sum(fv ** 2)) # -1 allows reshape to infer the length. So it just solidifies the dimensions to (274,1) fv /= norms # handle images with 0 local descriptor (100 = far away from "normal" images) fv[np.isnan(fv)] = 100 fvs.append(fv.T) output_fv = np.hstack(fvs) #L2 normalize the entire fv. norm = np.sqrt(np.sum(output_fv ** 2)) output_fv /= norm #example name: # 'v_Archery_g01_c01.fisher.npz' #subdirectory name np.savez(fisher_path, fish=output_fv) print fisher_path return output_fv
def getFisherVectors(Train_descriptors, k, gmm): print 'Computing Fisher vectors' d = int(Train_descriptors[0].shape[1]) init = time.time() fisher = np.zeros((len(Train_descriptors), k * d * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): fisher[i, :] = ynumpy.fisher(gmm, Train_descriptors[i], include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' return fisher
def evaluate_test(clf, stdSl, test_images_filenames, k, detector, gmm, n_components): fisher_test = np.zeros((len(test_images_filenames), k * n_components * 2), dtype=np.float32) for i in range(len(test_images_filenames)): filename = test_images_filenames[i] print 'Reading image ' + filename kpt, des = compute_dense(test_images_filenames[i], detector) fisher_test[i, :] = ynumpy.fisher(gmm, des, include=['mu', 'sigma']) accuracy = 100 * clf.score(stdSl.transform(fisher_test), test_labels) return accuracy
def GetKnn(ID): print ID info = Info.GetVideoInfo(ID) frame_sift_lst = [ x for x in sorted(os.listdir(info['frame_sift_path'])) if x.endswith('.sift') ] pano_sift_lst = [ x for x in sorted(os.listdir(info['pano_sift_path'])) if x.endswith('.sift') ] #print pano_sift_lst frame_desc = [] pano_desc = [] for one in frame_sift_lst: f_name = info['frame_sift_path'] + '/' + one desc = ReadSift.ReadSift(f_name)[1] if desc.size == 0: desc = np.zeros((0, 128), dtype='uint8') frame_desc.append(desc) for one in pano_sift_lst: f_name = info['pano_sift_path'] + '/' + one desc = ReadSift.ReadSift(f_name)[1] if desc.size == 0: desc = np.zeros((0, 128), dtype='uint8') pano_desc.append(desc) data = np.load(Info.Config.ROOT_PATH + '/gmm_2step.npz') gmm = (data['a'], data['b'], data['c']) mean = data['mean'] pca_transform = data['pca_transform'] image_fvs = [] for image_dec in (frame_desc + pano_desc): image_dec = np.dot(image_dec - mean, pca_transform) fv = ynumpy.fisher(gmm, image_dec, include='mu') image_fvs.append(fv) image_fvs = np.vstack(image_fvs) image_fvs = np.sign(image_fvs) * np.abs(image_fvs)**0.5 norms = np.sqrt(np.sum(image_fvs**2, 1)) image_fvs /= norms.reshape(-1, 1) image_fvs[np.isnan(image_fvs)] = 100 frame_fvs = image_fvs[0:len(frame_sift_lst)] pano_fvs = image_fvs[len(frame_sift_lst):] results, distances = ynumpy.knn(frame_fvs, pano_fvs, nnn=10) #print results #print distances np.save(info['pano_path'] + '/fisher_results', results)
def getFisherForImage(filename): #k = data[0] descriptor_type = data[1] gmm = data[2] computedPca = data[4] kpt,des=getKptDesForImage(filename,descriptor_type) if computedPca != None: des = computedPca.transform(des) des=np.float32(des) fisher_test=ynumpy.fisher(gmm, des, include = ['mu','sigma']) return fisher_test
def transform(self, X): print 'Getting Fisher Vector representation' init = time.time() descriptors = X['descriptors'] positions = X['positions'] imsizes = X['imsizes'] image_fvs=[] for image_desc in descriptors: # apply the PCA to the image descriptor image_desc = self.PCA.transform(image_desc-self.mean) # compute the Fisher vector, using only the derivative w.r.t mu fv = ynumpy.fisher(self.gmm, image_desc, include='mu') image_fvs.append(fv) end = time.time() print '\tDone in ' + str(end - init) + ' secs.' return image_fvs
def GetKnn(ID): print ID info = Info.GetVideoInfo(ID) frame_sift_lst = [x for x in sorted(os.listdir(info['frame_sift_path'])) if x.endswith('.sift')] pano_sift_lst = [x for x in sorted(os.listdir(info['pano_sift_path'])) if x.endswith('.sift')] #print pano_sift_lst frame_desc = [] pano_desc = [] for one in frame_sift_lst: f_name = info['frame_sift_path'] + '/' + one desc = ReadSift.ReadSift(f_name)[1] if desc.size == 0: desc = np.zeros((0, 128), dtype = 'uint8') frame_desc.append(desc) for one in pano_sift_lst: f_name = info['pano_sift_path'] + '/' + one desc = ReadSift.ReadSift(f_name)[1] if desc.size == 0: desc = np.zeros((0, 128), dtype = 'uint8') pano_desc.append(desc) data = np.load(Info.Config.ROOT_PATH + '/gmm_2step.npz') gmm = (data['a'], data['b'], data['c']) mean = data['mean'] pca_transform = data['pca_transform'] image_fvs = [] for image_dec in (frame_desc + pano_desc): image_dec = np.dot(image_dec - mean, pca_transform) fv = ynumpy.fisher(gmm, image_dec, include = 'mu') image_fvs.append(fv) image_fvs = np.vstack(image_fvs) image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5 norms = np.sqrt(np.sum(image_fvs ** 2, 1)) image_fvs /= norms.reshape(-1,1) image_fvs[np.isnan(image_fvs)] = 100 frame_fvs = image_fvs[0:len(frame_sift_lst)] pano_fvs = image_fvs[len(frame_sift_lst):] results, distances = ynumpy.knn(frame_fvs, pano_fvs, nnn = 10) #print results #print distances np.save(info['pano_path'] + '/fisher_results', results)
def compute_fisher_vectors(D, n_components, k): print 'Computing gmm with ' + str(k) + ' centroids' init = time.time() gmm = ynumpy.gmm_learn(np.float32(D), k) end = time.time() print 'Done in ' + str(end - init) + ' secs.' init = time.time() fisher = np.zeros((len(Train_descriptors), k * n_components * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): fisher[i, :] = ynumpy.fisher(gmm, np.float32(D), include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' return (fisher, gmm)
queries = [] if show: fig = plt.figure(figsize=(10, 10)) fig.canvas.set_window_title("100 image dataset") plot_idx = 1 for i in image_range: filename = "%s/ukbench%05d.siftgeo" % (sift_directory, i) print(" " + filename + "\r") sys.stdout.flush() sift_descriptors, geometric_info = ynumpy.siftgeo_read(filename) # compute the Fisher vector using the GMM fv = ynumpy.fisher(gmm, sift_descriptors.astype('float32')) dataset.append(fv) if show: imagename = "%s/ukbench%05d.jpg" % (image_directory, i) im = Image.open(imagename) ax = plt.subplot(13, 8, plot_idx) ax.axis('off') plt.imshow(im) if i % 8 == 7: plt.draw() plot_idx += 1 dataset = numpy.vstack(dataset)
import numpy as np from yael import ynumpy dat = np.load("test/py/test_fisher_dat.npy") gmm = np.load("test/py/test_gmm.pickle") dat_a = dat[: len(dat) / 2] dat_b = dat[len(dat) / 2 :] a = ynumpy.fisher(gmm, np.vstack([dat, dat]).astype(np.float32), include="mu+sigma") b = ynumpy.fisher(gmm, np.vstack([dat]).astype(np.float32), include="mu+sigma") sw_a = np.ones(len(dat) / 2) * 4 sw_b = np.ones(len(dat) / 2) * 2 c = ynumpy.fisher_sw(gmm, dat.astype(np.float32), np.vstack([sw_a, sw_b]).astype(np.float32), include="mu+sigma") # print a - c print a - b # print b - c # sw = np.array([(i + 5) % 10 for i in xrange(len(dat))]) # dat_weighted = np.vstack([np.vstack([dat[i]] * sw[i]) # for i in range(len(dat)) if sw[i] != 0]) # # np.ones(len(dat), dtype=np.float32) # a = ynumpy.fisher(gmm, dat_weighted.astype(np.float32), include='mu+sigma') # b = ynumpy.fisher_sw(gmm, dat.astype(np.float32), (sw).astype(np.float32), include='mu+sigma') # print sw # print sw * 0.1
def predict_fishergmm(gmm, des, options): # Compute the Fisher Vectors from the features. # des is supposed to be the features of a single image. des2 = np.float32(des) fisher = ynumpy.fisher(gmm, des2, include=['mu', 'sigma']) return fisher
perm = eigvals.argsort() # sort by increasing eigenvalue pca_transform = eigvecs[:, perm[64:128]] # eigenvectors for the 64 last eigenvalues # transform sample with PCA (note that numpy imposes line-vectors, # so we right-multiply the vectors) sample = np.dot(sample, pca_transform) # train GMM gmm = ynumpy.gmm_learn(sample, k) image_fvs = [] for image_desc in image_descs: # apply the PCA to the image descriptor image_desc = np.dot(image_desc - mean, pca_transform) # compute the Fisher vector, using the derivative w.r.t mu and sigma fv = ynumpy.fisher(gmm, image_desc, include = 'mu, sigma') image_fvs.append(fv) # make one matrix with all FVs image_fvs = np.vstack(image_fvs) # normalizations are done on all descriptors at once # power-normalization image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5 # L2 normalize norms = np.sqrt(np.sum(image_fvs ** 2, 1)) image_fvs /= norms.reshape(-1, 1) # handle images with 0 local descriptor (100 = far away from "normal" images)
def generate_fisher(mirrored_features, gmm_results, gmm_object, ncomponents, start_idxs, end_idxs): """ Generate Fisher vector features for mirrored_features. :param mirrored_features: Features to compute Fisher vector features for. :param gmm_results: (weights, means, sigmas) from fitted GMM. :param gmm_object: GMM object from either yael or scikit-learn. :param ncomponents: Number of components used in Gaussian mixture model. :param start_idxs: Start indices of each sliding window. :param end_idxs: End indices of each sliding window. :return: fv_features: Fisher vector features for mirrored_features. """ print('Generating Fisher vector features...') fv_features = np.zeros( (len(start_idxs), 2 * ncomponents * np.size(mirrored_features, 1) + ncomponents - 1)) if not USE_YAEL: ws, mus, sigmas = gmm_results ncomponents = len(ws) for i in range(len(start_idxs)): if start_idxs[i] != -1: if USE_YAEL: X = mirrored_features[int(start_idxs[i]):int(end_idxs[i]) + 1, :].astype('float32') fv_features[i, :] = ynumpy.fisher(gmm_results, X, include=['w', 'mu', 'sigma']) else: X = mirrored_features[int(start_idxs[i]):int(end_idxs[i]) + 1, :] num_samples = np.size(X, 0) try: gammas = gmm_object.predict_proba(X) except: gammas = np.zeros((np.size(X, 0), len(ws))) for obs in range(len(X)): gammas[obs, :] = compute_gmm_probs( X[obs, :], ws, mus, sigmas) accus = np.sum(gammas[:, 1:] / ws[1:] - (gammas[:, 0] / ws[0])[:, np.newaxis], axis=0) grad_alpha = [ accus[idx] / np.sqrt((1 / ws[idx + 1] + 1 / ws[0])) for idx in range(0, ncomponents - 1) ] grad_mu = [ np.sqrt(sigmas[k, :] / (ws[k])) * np.dot(gammas[:, k], (X - mus[k]) / sigmas[k]) for k in range(ncomponents) ] grad_sigma = [ np.sqrt(1 / (2 * ws[k])) * np.dot(gammas[:, k], (X - mus[k])**2 / sigmas[k] - 1) for k in range(ncomponents) ] fv_features[i, :] = 1 / np.sqrt(num_samples) * np.concatenate( (grad_alpha, np.array(grad_mu).flatten(), np.array(grad_sigma).flatten())) # Normalize fv_features = power_l2_normalize(fv_features, power_normalize=False) return fv_features
#n_sifts = image_desc.shape[0] #for i in range(n_sifts): # if np.linalg.norm(image_desc[i], ord=1) == 0.0: # continue # image_desc[i] = np.sqrt(image_desc[i]/np.linalg.norm(image_desc[i], ord=1)) #n_sifts = image_desc.shape[0] #for i in range(n_sifts): # image_desc[i] = np.sign(image_desc[i]) * np.log(1.0 + np.abs(image_desc[i])) # apply the PCA to the image descriptor image_desc = np.dot(image_desc - mean, pca_transform) image_desc = image_desc.astype(np.float32) # compute the Fisher vector, using only the derivative w.r.t mu fv = ynumpy.fisher(gmm, image_desc, include=['mu', 'sigma']) features.append(fv) image_names.append(img_name) # make one matrix with all FVs features = np.vstack(features) # normalizations are done on all descriptors at once # power-normalization features = np.sign(features) * np.abs(features)**0.5 # L2 normalize #norms = np.sqrt(np.sum(image_fvs ** 2, 1)) #image_fvs /= norms.reshape(-1, 1)
def Pyramid_BoW_fisher(gmm, Image_info, x_part, y_part): k = gmm.shape[0] # Dimensió de cada vector = k* nº cel·les, en aquest cas 21 (16 peques, 4 qadrants i la sencera) visual_words = [] #i = 0 for img, label in Image_info: total_rows = x_part**2 total_columns = y_part**2 x_step = img.shape[0] / total_rows y_step = img.shape[1] / total_columns Q = [[0 for x in xrange(total_rows)] for y in xrange(total_columns)] Q_int = [[0 for w in xrange(x_part)] for z in xrange(y_part)] #classifiquem els descriptors segons les coordenades del kp al qual pertanyen for kpt, desc in zip(img.kpt, img.des): #nota: shape(num_files, num_columnes) \ coordenada del punt = (x,y) kpt = kpt.pt for row in xrange(total_rows): for column in xrange(total_columns): if kpt[0] < y_step * (column + 1) and kpt[ 0] > y_step * column and kpt[1] < x_step * ( row + 1) and kpt[1] > x_step * row: Q[row][column].append(desc.tolist()) #Componer nivel intermedio for row in xrange(x_part): for column in xrange(y_part): for sub_r in xrange(x_part): for sub_c in xrange(y_part): Q_int[row][column] = np.array( Q[row * x_part + sub_r][column * y_part + sub_c]) #Q_int[row][column] = np.array(Q[row*x_part:row*x_part+(x_part),column*y_part:column*y_part+(y_part)]) #Per comoditat, formem una llista amb tots els descriptors classificats #Q = [img.des, Q1, Q2, Q3, Q4, np.array(Q11), np.array(Q12), np.array(Q13), np.array(Q14), np.array(Q21), np.array(Q22), np.array(Q23), np.array(Q24), np.array(Q31), np.array(Q32), np.array(Q33), np.array(Q34), np.array(Q41), np.array(Q42), np.array(Q43), np.array(Q44)] des_array = [] des_array.append(0.25 * np.array(img.des)) for arr_r in xrange(x_part): for arr_c in xrange(y_part): des_array.append(0.25 * np.array(Q_int[arr_r][arr_c])) for arr_r in xrange(total_rows): for arr_c in xrange(total_columns): des_array.append(0.5 * np.array(Q[arr_r][arr_c])) #Iniciem el descriptor piramidal Pdesc = [] for q in des_array: #Generate fisher vectors with each grid partition if len(q): #Fisher prediction Pdesc += ynumpy.fisher(gmm, q, include=['mu', 'sigma']) #Pdesc += np.bincount(codebook.predict(np.array(q)),minlength=k).tolist() just for BOW else: Pdesc += np.zeros(k, dtype=np.int64).tolist() visual_words.append(Pdesc) return visual_words
def main(nfeatures=100, code_size=32, n_components=60, kernel='linear', C=1, reduction=None, features='sift', pyramid=False, grid_step=6): start = time.time() # read the train and test files train_images_filenames, test_images_filenames, train_labels, test_labels = get_dataset( ) # create the SIFT detector object SIFTdetector = features_detector(nfeatures, features, grid_step) # extract SIFT keypoints and descriptors # store descriptors in a python list of numpy arrays Train_descriptors, Train_label_per_descriptor = getDescriptors( SIFTdetector, train_images_filenames, train_labels, pyramid) Train_descriptors = np.asarray(Train_descriptors) # Transform everything to numpy arrays size_descriptors = Train_descriptors[0][0].shape[-1] # for D we only need the first level of the pyramid (because it already contains all points) D = np.zeros( (np.sum([len(p[0]) for p in Train_descriptors]), size_descriptors), dtype=np.uint8) startingpoint = 0 for i in range(len(Train_descriptors)): D[startingpoint:startingpoint + len(Train_descriptors[i][0])] = Train_descriptors[i][0] startingpoint += len(Train_descriptors[i][0]) if reduction == 'pca': D, pca_reducer = PCA_reduce(D, n_components) k = code_size # Compute Codebook gmm = compute_codebook(D, k, nfeatures, None, features, grid_step, D.shape[1]) init = time.time() fisher = np.zeros((len(Train_descriptors), k * D.shape[1] * 2 * Train_descriptors.shape[1]), dtype=np.float32) #TODO: change 128 for i in xrange(len(Train_descriptors)): for j in range(Train_descriptors.shape[1]): #number of levels if reduction == 'pca': des = pca_reducer.transform( Train_descriptors[i][j]) # for pyramid level j else: des = Train_descriptors[i][j] # for pyramid level j fisher[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] * 2] = ynumpy.fisher(gmm, np.float32(des), include=['mu', 'sigma']) # fisher[i,:]= l2 end = time.time() print 'Done in ' + str(end - init) + ' secs.' # Train a linear SVM classifier stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) print 'Training the SVM classifier...' if kernel == 'pyramid_match': ker_matrix = spatialPyramidKernel(D_scaled, D_scaled, k * D.shape[1] * 2, pyramid) clf = svm.SVC(kernel='precomputed', C=C) clf.fit(ker_matrix, train_labels) else: clf = svm.SVC(kernel=kernel, C=C).fit(D_scaled, train_labels) print 'Done!' # get all the test data and predict their labels fisher_test = np.zeros((len(test_images_filenames), k * D.shape[1] * 2 * Train_descriptors.shape[1]), dtype=np.float32) for i in range(len(test_images_filenames)): filename = test_images_filenames[i] print 'Reading image ' + filename ima = cv2.imread(filename) gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY) all_kpt, all_des = SIFTdetector.detect_compute(gray, pyramid) for j in range(len(all_des)): #number of levels des = all_des[j] if reduction == 'pca': des = pca_reducer.transform(des) fisher_test[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] * 2] = ynumpy.fisher(gmm, np.float32(des), include=['mu', 'sigma']) accuracy = 100 * clf.score(stdSlr.transform(fisher_test), test_labels) fisher_test = stdSlr.transform(fisher_test) if kernel == 'pyramid_match': predictMatrix = spatialPyramidKernel(fisher_test, D_scaled, k * D.shape[1] * 2, pyramid) #predictions = clf.predict(predictMatrix) #predictions_proba = clf.predict_proba(predictMatrix) accuracy = 100 * clf.score(predictMatrix, test_labels) else: accuracy = 100 * clf.score(fisher_test, test_labels) print 'Final accuracy: ' + str(accuracy) end = time.time() print 'Done in ' + str(end - start) + ' secs.'
import numpy as np from yael import ynumpy dat = np.load('test/py/test_fisher_dat.npy') gmm = np.load('test/py/test_gmm.pickle') dat_a = dat[:len(dat) / 2] dat_b = dat[len(dat) / 2:] a = ynumpy.fisher(gmm, np.vstack([dat, dat]).astype(np.float32), include='mu+sigma') b = ynumpy.fisher(gmm, np.vstack([dat]).astype(np.float32), include='mu+sigma') sw_a = np.ones(len(dat) / 2) * 4 sw_b = np.ones(len(dat) / 2) * 2 c = ynumpy.fisher_sw(gmm, dat.astype(np.float32), np.vstack([sw_a, sw_b]).astype(np.float32), include='mu+sigma') # print a - c print a - b # print b - c # sw = np.array([(i + 5) % 10 for i in xrange(len(dat))]) # dat_weighted = np.vstack([np.vstack([dat[i]] * sw[i]) # for i in range(len(dat)) if sw[i] != 0]) # # np.ones(len(dat), dtype=np.float32) # a = ynumpy.fisher(gmm, dat_weighted.astype(np.float32), include='mu+sigma')
centroids = ynumpy.kmeans(v, 3) print "result centroids =" print centroids[:10, :] print "gmm:" gmm = ynumpy.gmm_learn(v, 3) (w, mu, sigma) = gmm print "mu = " print mu print "sigma = " print sigma muc = numpy.vstack((mu[0, :], mu[0, :])) # mu[1, :], # mu[1, :], # mu[1, :])) print "mu=", mu muc += numpy.random.normal(-0.02, 0.02, size=muc.shape) print "muc=", muc fish = ynumpy.fisher(gmm, muc) print fish
def process(signature=None): # make a big matrix with all image descriptors all_desc = [] #handle the case it's requested to process the entire dataset if signature is None: h5f = h5py.File("feature_matrix.h5", 'r') feats = h5f['feature_matrix'][:] h5f.close() #normalize input matrix to avoid GMM crash feats = normalize(feats, axis=1, norm='l2') #ensure thet the descriptors are FP32 and put them in a matrix image_descs = np.array(feats).astype('float32') all_desc = np.vstack(image_descs) try: #if available, load GMM model and PCA h5f = h5py.File("GMM.h5", 'r') gmm = np.array(h5f['gmm1']).astype('float32'), np.array( h5f['gmm2']).astype('float32'), np.array( h5f['gmm3']).astype('float32') pca_transform = joblib.load('pca_transform_gmm.pkl') h5f.close() print("there are GMM and pca_transform") except: #handle the case where there aren't the needed data to process. if signature is not None: error = "No needed data found. Abort." print(error) return error #in case it's needed to populate the DB print("there aren't GMM and pca_transform: computing.") #k is the GMM dimension k = 512 n_sample = k * 100 #choose n_sample descriptors at random sample_indices = np.random.choice(all_desc.shape[0], n_sample) sample = all_desc[sample_indices] #compute PCA and transform the samples pca_transform = myPCA(sample, k) sample = pca_transform.transform(sample) #train GMM print("Start fitting GMM") GMM_ = GaussianMixture(n_components=k, covariance_type='diag', verbose_interval=1) t1 = time.time() GMM_.fit(sample) print("GMM fit in %s") % (time.time() - t1) #Get GMM matrices w_, mu_, sigma_ = GMM_.weights_, GMM_.means_, GMM_.covariances_ #Convert to FP32 (from FP64) gmm = w_.astype('float32'), mu_.astype('float32'), sigma_.astype( 'float32') #Save GMM h5f = h5py.File("GMM.h5", 'w') h5f.create_dataset('gmm1', data=gmm[0]) h5f.create_dataset('gmm2', data=gmm[1]) h5f.create_dataset('gmm3', data=gmm[2]) h5f.close() #Save PCA model joblib.dump(pca_transform, 'pca_transform_gmm.pkl') #compute FVS image_fvs = [] if signature is not None: image_descs = np.array(signature) image_descs = image_descs.reshape(1, -1) image_descs = image_descs.astype('float32') for image_desc in image_descs: # apply the PCA to the image descriptor image_desc = np.expand_dims(image_desc, axis=0) image_desc = pca_transform.transform(image_desc - image_desc.mean()) # compute the Fisher vector, using only the derivative w.r.t mu fv = ynumpy.fisher(gmm, image_desc, include='mu') image_fvs.append(fv) print("FVS processed.") # make one matrix with all FVs image_fvs = np.vstack(image_fvs) #compute PCA to reduce FVs dimensionality (which is k^2) if signature is None: pca_transform2 = myPCA(image_fvs, dim=512) image_fvs = pca_transform2.transform(image_fvs) #Save FVS PCA joblib.dump(pca_transform2, 'pca_transform_fvs.pkl') #Save processed vectors that must be insert in the DB h5f = h5py.File("image_fvs.h5", 'w') h5f.create_dataset('image_fvs', data=np.real(image_fvs)) h5f.close() print("YAEL SCRIPT: Mission accomplished!") return "YAEL SCRIPT: Mission accomplished!" pca_transform2 = joblib.load('pca_transform_fvs.pkl') image_fv = pca_transform2.transform(image_fvs) return image_fv.tolist()
def create_fisher_vector(gmm_list, video_desc, fv_file, fv_sqrt=False, fv_l2=False): """ expects a single video_descriptors object. videos_desciptors objects are defined in IDT_feature.py fv_file is the full path to the fisher vector that is created. this single video_desc contains the (trajs, hogs, hofs, mbhs) np.ndarrays """ vid_desc_list = [] vid_desc_list.append(video_desc.traj) vid_desc_list.append(video_desc.hog) vid_desc_list.append(video_desc.hof) vid_desc_list.append(video_desc.mbh) # For each video create and normalize a fisher vector for each of the descriptors. Then, concatenate the # fisher vectors together to get an extra long fisher vector. # Return a list of all of these long fisher vectors. The list should be the same length as the number # of input videos. fvs = [] for descriptor,gmm_mean_pca in zip(vid_desc_list,gmm_list): if descriptor.size: gmm, mean, pca_transform = gmm_mean_pca # apply the PCA to the vid_trajectory descriptor # each image_desc is of size (X,TRAJ_DIM). Pca_tranform is of size (TRAJ_DIM,TRAJ_DIM/2) descrip = descriptor.astype('float32') - mean if pca_transform != None: descrip = np.dot(descrip, pca_transform) # compute the Fisher vector, using the derivative w.r.t mu and sigma fv = ynumpy.fisher(gmm, descrip, include = ['mu', 'sigma']) # normalizations are done on each descriptor individually if fv_sqrt: # power-normalization fv = np.sign(fv) * (np.abs(fv) ** 0.5) if fv_l2: # L2 normalize # sum along the rows. norms = np.sqrt(np.sum(fv ** 2)) # -1 allows reshape to infer the length. So it just solidifies the dimensions to (274,1) fv /= norms # handle images with 0 local descriptor (100 = far away from "normal" images) fv[np.isnan(fv)] = 100 # make column to row -wise?? fvs.append(fv.T) # concatenate fvs # output_fv = np.hstack(fvs) # L2 normalize the entire fv. # norm = np.sqrt(np.sum(output_fv ** 2)) # output_fv /= norm # example name: # 'v_Archery_g01_c01.fisher.npz' # subdirectory name # np.savez(fv_file, fv=output_fv) # print fv_file # return output_fv # fvs[0] >>> traj.fv # fvs[1] >>> hog.fv # fvs[2] >>> hof.fv # fvs[3] >>> mbh.fv # np.savez(fv_file, fv=fvs) # fl['fv'][0,:] scipy.io.savemat(fv_file+'.mat', mdict={'fv':fvs}, oned_as='row') print fv_file return fvs
def train_system(train_filenames, train_labels, detector, options): # Read the images and extract the SIFT features. Train_descriptors = [] Train_label_per_descriptor = [] for i in range(len(train_filenames)): filename = train_filenames[i] print 'Reading image ' + filename ima = cv2.imread(filename) gray = cv2.cvtColor(ima, cv2.COLOR_BGR2GRAY) if options.spatial_pyramids: des = spatial_pyramid(gray, detector, options) else: des = extract_SIFT_features(gray, detector, options.detector_options) Train_descriptors.append(des) Train_label_per_descriptor.append(train_labels[i]) # Transform everything to numpy arrays D = Train_descriptors[0] L = np.array([Train_label_per_descriptor[0]] * Train_descriptors[0].shape[0]) for i in range(1, len(Train_descriptors)): D = np.vstack((D, Train_descriptors[i])) L = np.hstack((L, np.array([Train_label_per_descriptor[i]] * Train_descriptors[i].shape[0]))) stdSlr_features = StandardScaler() pca = None if options.apply_pca: stdSlr_features = StandardScaler().fit(D) D = stdSlr_features.transform(D) pca = PCA(n_components=options.ncomp_pca) pca.fit(D) D = pca.transform(D) print 'Computing gmm with ' + str(options.kmeans) + ' centroids' init = time.time() gmm = ynumpy.gmm_learn(np.float32(D), options.kmeans) end = time.time() print 'Done in ' + str(end - init) + ' secs.' if options.apply_pca: num_features = options.ncomp_pca else: num_features = 128 init = time.time() fisher = np.zeros( (len(Train_descriptors), options.kmeans * num_features * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): if options.apply_pca: descriptor = stdSlr_features.transform(Train_descriptors[i]) descriptor = pca.trasform(descriptor) else: descriptor = Train_descriptors[i] fisher[i, :] = ynumpy.fisher(gmm, descriptor, include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' if options.apply_normalization: fisher = applyNormalization(fisher, options) # Train a linear SVM classifier stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) print 'Training the SVM classifier...' clf = svm.SVC(kernel='linear', C=1).fit(D_scaled, train_labels) print 'Done!' return stdSlr_features, pca, gmm, stdSlr, clf
def _compute_vd_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \ pca_reduction=False, treelike=True, clusters_path=None, verbose=False): try: makedirs(feats_path) except OSError: pass for k, part in enumerate(traintest_parts): # cach'd pca and gmm for j, feat_t in enumerate(feat_types): try: makedirs(join(feats_path, feat_t + '-' + str(k))) except OSError: pass cache = None # process videos total = len(videonames) for i in indices: # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types'] all_done = np.all([isfile(join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')) for feat_t in feat_types]) if all_done: if verbose: print('[_compute_vd_descriptors] %s -> OK' % videonames[i]) continue if cache is None: cache = dict() for j, feat_t in enumerate(feat_types): with open(join(intermediates_path, 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl'), 'rb') as f: cache[feat_t] = cPickle.load(f) start_time = time.time() # object features used for the per-frame FV representation computation (cach'd) with open(join(tracklets_path, 'obj', videonames[i] + '.pkl'), 'rb') as f: obj = cPickle.load(f) with open(join(clusters_path, videonames[i] + '.pkl'), 'rb') as f: clusters = cPickle.load(f) for j, feat_t in enumerate(feat_types): if isfile(join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')): continue # load video tracklets' feature with open(join(tracklets_path, feat_t, videonames[i] + '.pkl'), 'rb') as f: d = cPickle.load(f) if feat_t == 'trj': # (special case) d = convert_positions_to_displacements(d) if feat_t == 'mbh': dx = preprocessing.normalize(d[:,:d.shape[1]/2], norm='l1', axis=1) dy = preprocessing.normalize(d[:,d.shape[1]/2:], norm='l1', axis=1) d = np.hstack((dx,dy)) else: d = preprocessing.normalize(d, norm='l1', axis=1) d = rootSIFT(d) if pca_reduction: d = cache[feat_t]['pca'].transform(d) # reduce dimensionality d = np.ascontiguousarray(d, dtype=np.float32) # required in many of Yael functions output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl') # compute FV of the video if not treelike: # (in a per-frame representation) fids = np.unique(obj[:,0]) V = [] # row-wise fisher vectors (matrix) for f in fids: tmp = d[np.where(obj[:,0] == f)[0],:] # hopefully this is contiguous if d already was fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, include=INTERNAL_PARAMETERS['fv_repr_feats']) # f-th frame fisher vec V.append(fv) # no normalization or nothing (it's done when computing darwin) vd = videodarwin.darwin(np.array(V)) with open(output_filepath, 'wb') as f: cPickle.dump(dict(v=vd), f) else: # or separately the FVs of the tree nodes vdtree = dict() if len(clusters['tree']) == 1: fids = np.unique(obj[:,0]) V = [ynumpy.fisher(cache[feat_t]['gmm'], d[np.where(obj[:,0] == f)[0],:], INTERNAL_PARAMETERS['fv_repr_feats']) for f in fids] vdtree[1] = videodarwin.darwin(np.array(V)) else: T = reconstruct_tree_from_leafs(np.unique(clusters['int_paths'])) for parent_idx, children_inds in T.iteritems(): # (in a per-frame representation) node_inds = np.where(np.any([clusters['int_paths'] == idx for idx in children_inds], axis=0))[0] fids = np.unique(obj[node_inds,0]) V = [] for f in fids: tmp = d[np.where(obj[node_inds,0] == f)[0],:] fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, INTERNAL_PARAMETERS['fv_repr_feats']) V.append(fv) # no normalization or nothing (it's done when computing darwin) vdtree[parent_idx] = videodarwin.darwin(np.array(V)) with open(output_filepath, 'wb') as f: cPickle.dump(dict(tree=vdtree), f) elapsed_time = time.time() - start_time if verbose: print('[_compute_vd_descriptors] %s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
def _compute_vd_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \ pca_reduction=False, treelike=True, clusters_path=None, verbose=False): try: makedirs(feats_path) except OSError: pass for k, part in enumerate(traintest_parts): # cach'd pca and gmm for j, feat_t in enumerate(feat_types): try: makedirs(join(feats_path, feat_t + '-' + str(k))) except OSError: pass cache = None # process videos total = len(videonames) for i in indices: # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types'] all_done = np.all([ isfile( join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')) for feat_t in feat_types ]) if all_done: if verbose: print('[_compute_vd_descriptors] %s -> OK' % videonames[i]) continue if cache is None: cache = dict() for j, feat_t in enumerate(feat_types): with open( join( intermediates_path, 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl'), 'rb') as f: cache[feat_t] = cPickle.load(f) start_time = time.time() # object features used for the per-frame FV representation computation (cach'd) with open(join(tracklets_path, 'obj', videonames[i] + '.pkl'), 'rb') as f: obj = cPickle.load(f) with open(join(clusters_path, videonames[i] + '.pkl'), 'rb') as f: clusters = cPickle.load(f) for j, feat_t in enumerate(feat_types): if isfile( join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl')): continue # load video tracklets' feature with open(join(tracklets_path, feat_t, videonames[i] + '.pkl'), 'rb') as f: d = cPickle.load(f) if feat_t == 'trj': # (special case) d = convert_positions_to_displacements(d) if feat_t == 'mbh': dx = preprocessing.normalize(d[:, :d.shape[1] / 2], norm='l1', axis=1) dy = preprocessing.normalize(d[:, d.shape[1] / 2:], norm='l1', axis=1) d = np.hstack((dx, dy)) else: d = preprocessing.normalize(d, norm='l1', axis=1) d = rootSIFT(d) if pca_reduction: d = cache[feat_t]['pca'].transform( d) # reduce dimensionality d = np.ascontiguousarray( d, dtype=np.float32) # required in many of Yael functions output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl') # compute FV of the video if not treelike: # (in a per-frame representation) fids = np.unique(obj[:, 0]) V = [] # row-wise fisher vectors (matrix) for f in fids: tmp = d[np.where( obj[:, 0] == f )[0], :] # hopefully this is contiguous if d already was fv = ynumpy.fisher( cache[feat_t]['gmm'], tmp, include=INTERNAL_PARAMETERS['fv_repr_feats'] ) # f-th frame fisher vec V.append( fv ) # no normalization or nothing (it's done when computing darwin) vd = videodarwin.darwin(np.array(V)) with open(output_filepath, 'wb') as f: cPickle.dump(dict(v=vd), f) else: # or separately the FVs of the tree nodes vdtree = dict() if len(clusters['tree']) == 1: fids = np.unique(obj[:, 0]) V = [ ynumpy.fisher(cache[feat_t]['gmm'], d[np.where(obj[:, 0] == f)[0], :], INTERNAL_PARAMETERS['fv_repr_feats']) for f in fids ] vdtree[1] = videodarwin.darwin(np.array(V)) else: T = reconstruct_tree_from_leafs( np.unique(clusters['int_paths'])) for parent_idx, children_inds in T.iteritems(): # (in a per-frame representation) node_inds = np.where( np.any([ clusters['int_paths'] == idx for idx in children_inds ], axis=0))[0] fids = np.unique(obj[node_inds, 0]) V = [] for f in fids: tmp = d[np.where(obj[node_inds, 0] == f)[0], :] fv = ynumpy.fisher( cache[feat_t]['gmm'], tmp, INTERNAL_PARAMETERS['fv_repr_feats']) V.append( fv ) # no normalization or nothing (it's done when computing darwin) vdtree[parent_idx] = videodarwin.darwin( np.array(V)) with open(output_filepath, 'wb') as f: cPickle.dump(dict(tree=vdtree), f) elapsed_time = time.time() - start_time if verbose: print('[_compute_vd_descriptors] %s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
len(Train_descriptors[i])] = Train_descriptors[i] startingpoint += len(Train_descriptors[i]) k = 32 print 'Computing gmm with ' + str(k) + ' centroids' init = time.time() gmm = ynumpy.gmm_learn(np.float32(D), k) end = time.time() print 'Done in ' + str(end - init) + ' secs.' init = time.time() fisher = np.zeros((len(Train_descriptors), k * 128 * 2), dtype=np.float32) for i in xrange(len(Train_descriptors)): fisher[i, :] = ynumpy.fisher(gmm, Train_descriptors[i], include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' # Train a linear SVM classifier stdSlr = StandardScaler().fit(fisher) D_scaled = stdSlr.transform(fisher) print 'Training the SVM classifier...' clf = svm.SVC(kernel='linear', C=1).fit(D_scaled, train_labels) print 'Done!' # get all the test data and predict their labels fisher_test = np.zeros((len(test_images_filenames), k * 128 * 2),
def _compute_fv_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \ pca_reduction=True, treelike=True, clusters_path=None): if not exists(feats_path): makedirs(feats_path) for k, part in enumerate(traintest_parts): # cach'd pca and gmm cache = dict() for j, feat_t in enumerate(feat_types): if not exists(feats_path + feat_t + '-' + str(k)): makedirs(feats_path + feat_t + '-' + str(k)) with open(intermediates_path + 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl', 'rb') as f: cache[feat_t] = cPickle.load(f) # process videos total = len(videonames) for i in indices: # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types'] output_filepath = join(feats_path, feat_types[-1] + '-' + str(k), videonames[i] + '.pkl') if isfile(output_filepath): # for j, feat_t in enumerate(feat_types): # featnames.setdefault(feat_t, []).append(feats_path + feat_t + '/' + videonames[i] + '-fvtree.pkl') print('%s -> OK' % output_filepath) continue start_time = time.time() # object features used for the per-frame FV representation computation (cach'd) with open(tracklets_path + 'obj/' + videonames[i] + '.pkl', 'rb') as f: obj = cPickle.load(f) with open(clusters_path + videonames[i] + '.pkl', 'rb') as f: clusters = cPickle.load(f) for j, feat_t in enumerate(feat_types): # load video tracklets' feature with open(tracklets_path + feat_t + '/' + videonames[i] + '.pkl', 'rb') as f: d = cPickle.load(f) if feat_t == 'trj': # (special case) d = convert_positions_to_displacements(d) # pre-processing d = rootSIFT(preprocessing.normalize(d, norm='l1', axis=1)) # https://hal.inria.fr/hal-00873267v2/document if pca_reduction: d = cache[feat_t]['pca'].transform(d) # reduce dimensionality d = np.ascontiguousarray(d, dtype=np.float32) # required in many of Yael functions output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl') # compute FV of the video if not treelike: fv = ynumpy.fisher(cache[feat_t]['gmm'], d, INTERNAL_PARAMETERS['fv_repr_feats']) # fisher vec fv = preprocessing.normalize(fv) with open(output_filepath, 'wb') as f: cPickle.dump(dict(v=fv), f) else: # or separately the FVs of the tree nodes T = reconstruct_tree_from_leafs(np.unique(clusters['int_paths'])) fvtree = dict() for parent_idx, children_inds in T.iteritems(): # (in a global representation) node_inds = np.where(np.any([clusters['int_paths'] == idx for idx in children_inds], axis=0))[0] fv = ynumpy.fisher(cache[feat_t]['gmm'], d[node_inds,:], INTERNAL_PARAMETERS['fv_repr_feats']) # fisher vec fvtree[parent_idx] = normalize(rootSIFT(fv,p=0.5), norm='l2') # https://www.robots.ox.ac.uk/~vgg/rg/papers/peronnin_etal_ECCV10.pdf with open(output_filepath, 'wb') as f: cPickle.dump(dict(tree=fvtree), f) elapsed_time = time.time() - start_time print('%s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
video_data = np.genfromtxt(DATASET_PATH + vname) # delete first ten columns video_data = video_data[:, 10:] video_data = video_data.astype('float32') # seperate data into different features video_data_traj = video_data[:, 0:30] video_data_hog = video_data[:, 30:126] video_data_hof = video_data[:, 126:234] video_data_mbh = video_data[:, 234:426] # apply the PCA to the image descriptor video_data_traj = np.dot(video_data_traj - mean_traj, pca_traj) video_data_hog = np.dot(video_data_hog - mean_hog, pca_hog) video_data_hof = np.dot(video_data_hof - mean_hof, pca_hof) video_data_mbh = np.dot(video_data_mbh - mean_mbh, pca_mbh) # compute the Fisher vector, using the derivative w.r.t mu and sigma fv_traj = ynumpy.fisher(gmm_traj, video_data_traj, include='mu, sigma') fv_hog = ynumpy.fisher(gmm_hog, video_data_hog, include='mu, sigma') fv_hof = ynumpy.fisher(gmm_hof, video_data_hof, include='mu, sigma') fv_mbh = ynumpy.fisher(gmm_mbh, video_data_mbh, include='mu, sigma') # concatenate the fisher vectors fv = np.concatenate((fv_traj, fv_hog, fv_hof, fv_mbh)) print fv.shape image_fvs.append(fv) # make one matrix with all FVs image_fvs = np.vstack(image_fvs) # normalizations are done on all descriptors at once # power-normalization image_fvs = np.sign(image_fvs) * np.abs(image_fvs)**0.5 # L2 normalize norms = np.sqrt(np.sum(image_fvs**2, 1))
def _compute_vd_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, indices, feat_types, feats_path, \ pca_reduction=True, treelike=True, clusters_path=None): if not exists(feats_path): makedirs(feats_path) for k, part in enumerate(traintest_parts): # cach'd pca and gmm cache = dict() for j, feat_t in enumerate(feat_types): if not exists(feats_path + feat_t + '-' + str(k)): makedirs(feats_path + feat_t + '-' + str(k)) with open(intermediates_path + 'gmm' + ('_pca-' if pca_reduction else '-') + feat_t + '-' + str(k) + '.pkl', 'rb') as f: cache[feat_t] = cPickle.load(f) # process videos total = len(videonames) for i in indices: # FV computed for all feature types? see the last in INTERNAL_PARAMETERS['feature_types'] output_filepath = join(feats_path, feat_types[-1] + '-' + str(k), videonames[i] + '.pkl') if isfile(output_filepath): # for j, feat_t in enumerate(feat_types): # featnames.setdefault(feat_t, []).append(feats_path + feat_t + '/' + videonames[i] + '-fvtree.pkl') print('%s -> OK' % output_filepath) continue start_time = time.time() # object features used for the per-frame FV representation computation (cach'd) with open(tracklets_path + 'obj/' + videonames[i] + '.pkl', 'rb') as f: obj = cPickle.load(f) with open(clusters_path + videonames[i] + '.pkl', 'rb') as f: clusters = cPickle.load(f) for j, feat_t in enumerate(feat_types): # load video tracklets' feature with open(tracklets_path + feat_t + '/' + videonames[i] + '.pkl', 'rb') as f: d = cPickle.load(f) if feat_t == 'trj': # (special case) d = convert_positions_to_displacements(d) # pre-processing d = rootSIFT(preprocessing.normalize(d, norm='l1', axis=1)) # https://hal.inria.fr/hal-00873267v2/document if pca_reduction: d = cache[feat_t]['pca'].transform(d) # reduce dimensionality d = np.ascontiguousarray(d, dtype=np.float32) # required in many of Yael functions output_filepath = join(feats_path, feat_t + '-' + str(k), videonames[i] + '.pkl') # compute FV of the video if not treelike: # (in a per-frame representation) fids = np.unique(obj[:,0]) V = [] # row-wise fisher vectors (matrix) for f in fids: tmp = d[np.where(obj[:,0] == f)[0],:] # hopefully this is contiguous if d already was fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, include=INTERNAL_PARAMETERS['fv_repr_feats']) # f-th frame fisher vec V.append(fv) # no normalization or nothing (it's done when computing darwin) vd = normalize(videodarwin.darwin(np.array(V))) with open(output_filepath, 'wb') as f: cPickle.dump(dict(v=vd), f) else: # or separately the FVs of the tree nodes T = reconstruct_tree_from_leafs(np.unique(clusters['int_paths'])) vdtree = dict() for parent_idx, children_inds in T.iteritems(): # (in a per-frame representation) node_inds = np.where(np.any([clusters['int_paths'] == idx for idx in children_inds], axis=0))[0] fids = np.unique(obj[node_inds,0]) # dim = INTERNAL_PARAMETERS['fv_gmm_k'] * len(INTERNAL_PARAMETERS['fv_repr_feats']) * d.shape[1] V = [] for f in fids: tmp = d[np.where(obj[node_inds,0] == f)[0],:] fv = ynumpy.fisher(cache[feat_t]['gmm'], tmp, INTERNAL_PARAMETERS['fv_repr_feats']) V.append(fv) # no normalization or nothing (it's done when computing darwin) vdtree[parent_idx] = normalize(videodarwin.darwin(np.array(V))) with open(output_filepath, 'wb') as f: cPickle.dump(dict(tree=vdtree), f) elapsed_time = time.time() - start_time print('%s -> DONE (in %.2f secs)' % (videonames[i], elapsed_time))
print "result centroids =" print centroids[:10,:] print "gmm:" gmm = ynumpy.gmm_learn(v, 3) (w, mu, sigma) = gmm print "mu = " print mu print "sigma = " print sigma muc = numpy.vstack((mu[0, :], mu[0, :])); # mu[1, :], # mu[1, :], # mu[1, :])) print "mu=", mu muc += numpy.random.normal(-0.02, 0.02, size = muc.shape) print "muc=", muc fish = ynumpy.fisher(gmm, muc) print fish
video_data = np.genfromtxt(DATASET_PATH + vname) # delete first ten columns video_data = video_data[:,10:] video_data = video_data.astype('float32') # seperate data into different features video_data_traj = video_data[:, 0:30] video_data_hog = video_data[:, 30:126] video_data_hof = video_data[:, 126:234] video_data_mbh = video_data[:, 234:426] # apply the PCA to the image descriptor video_data_traj = np.dot(video_data_traj - mean_traj, pca_traj) video_data_hog = np.dot(video_data_hog - mean_hog, pca_hog) video_data_hof = np.dot(video_data_hof - mean_hof, pca_hof) video_data_mbh = np.dot(video_data_mbh - mean_mbh, pca_mbh) # compute the Fisher vector, using the derivative w.r.t mu and sigma fv_traj = ynumpy.fisher(gmm_traj, video_data_traj, include = 'mu, sigma') fv_hog = ynumpy.fisher(gmm_hog, video_data_hog, include = 'mu, sigma') fv_hof = ynumpy.fisher(gmm_hof, video_data_hof, include = 'mu, sigma') fv_mbh = ynumpy.fisher(gmm_mbh, video_data_mbh, include = 'mu, sigma') # concatenate the fisher vectors fv = np.concatenate((fv_traj, fv_hog, fv_hof, fv_mbh)) print fv.shape image_fvs.append(fv) # make one matrix with all FVs image_fvs = np.vstack(image_fvs) # normalizations are done on all descriptors at once # power-normalization image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5 # L2 normalize norms = np.sqrt(np.sum(image_fvs ** 2, 1))
def getCrossVal(folds_num, folds_descriptors, start, nfeatures, code_size, kernel, C, output_layer, n_comps, reduction, decision, sampling_step, sampling_type): accuracies = [] for fold_i in range(folds_num): # 5 folds # Transform everything to numpy arrays Train_descriptors = [] train_labels = [] # select training images for j in range(folds_num): if fold_i != j: Train_descriptors.extend(folds_descriptors[j]['descriptors']) train_labels.extend( folds_descriptors[j]['label_per_descriptor']) Train_descriptors = np.asarray(Train_descriptors) # Transform everything to numpy arrays size_descriptors = Train_descriptors[0][0].shape[-1] # for D we only need the first level of the pyramid (because it already contains all points) D = np.zeros( (np.sum([len(p[0]) for p in Train_descriptors]), size_descriptors), dtype=np.uint8) startingpoint = 0 for i in range(len(Train_descriptors)): D[startingpoint:startingpoint + len(Train_descriptors[i][0])] = Train_descriptors[i][0] startingpoint += len(Train_descriptors[i][0]) if reduction == 'pca': D, pca_reducer = PCA_reduce(D, n_comps) if decision == 'bow': k = code_size # Compute Codebook gmm = compute_codebook(D, k, nfeatures, fold_i, output_layer, D.shape[1], sampling_step, sampling_type) init = time.time() samples = np.zeros( (len(Train_descriptors), k * D.shape[1] * 2 * Train_descriptors.shape[1]), dtype=np.float32) #TODO: change 128 for i in xrange(len(Train_descriptors)): for j in range(Train_descriptors.shape[1]): #number of levels if reduction == 'pca': des = pca_reducer.transform( Train_descriptors[i][j]) # for pyramid level j else: des = Train_descriptors[i][j] # for pyramid level j samples[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] * 2] = ynumpy.fisher(gmm, np.float32(des), include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' elif decision == 'svm': samples = D else: print 'wrong decision type use: bow or svm' quit() # Train a linear SVM classifier stdSlr = StandardScaler().fit(samples) D_scaled = stdSlr.transform(samples) print 'Training the SVM classifier...' clf = svm.SVC(kernel=kernel, C=C).fit(D_scaled, train_labels) print 'Done!' # get all the test data and predict their labels test_images_desc = folds_descriptors[fold_i]['descriptors'] #print folds_descriptors[fold_i]['descriptors'][0].shape test_labels = folds_descriptors[fold_i]['label_per_descriptor'] test_images_desc = np.asarray(test_images_desc) #test_images_desc = test_images_desc.squeeze() print test_images_desc.shape # Apply BoW if decision == 'bow': fisher_test = np.zeros( (len(test_images_desc), k * D.shape[1] * 2 * test_images_desc.shape[1]), dtype=np.float32) for i in range(len(test_images_desc)): for j in range(test_images_desc.shape[1]): #number of levels des = test_images_desc[i][ j] # now only working with 1 PYRAMID LEVEL [0] if reduction == 'pca': des = pca_reducer.transform(des) fisher_test[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] * 2] = ynumpy.fisher( gmm, np.float32(des), include=['mu', 'sigma']) test_images_desc = fisher_test else: test_images_desc = test_images_desc.squeeze() if reduction == 'pca': test_images_desc = pca_reducer.transform(test_images_desc) test_images_desc = stdSlr.transform(test_images_desc) accuracy = 100 * clf.score(test_images_desc, test_labels) print 'Fold ' + str(fold_i) + ' accuracy: ' + str(accuracy) accuracies.append(accuracy) return np.asarray(accuracies)
def main(nfeatures=100, code_size=512, n_components=60, kernel='linear', C=1, reduction=None, output_layer='fc2', decision='svm', sampling_step=4, sampling_type='default'): start = time.time() # read the train and test files train_images_filenames, test_images_filenames, train_labels, test_labels = get_dataset( ) # create the CNN detector object cnn_model = cnn_features(output_layer) # extract SIFT keypoints and descriptors # store descriptors in a python list of numpy arrays Train_descriptors, Train_label_per_descriptor = getDescriptors( cnn_model, train_images_filenames, train_labels, decision, sampling_step, sampling_type) Train_descriptors = np.asarray(Train_descriptors) # Transform everything to numpy arrays size_descriptors = Train_descriptors[0][0].shape[-1] # for D we only need the first level of the pyramid (because it already contains all points) D = np.zeros( (np.sum([len(p[0]) for p in Train_descriptors]), size_descriptors), dtype=np.uint8) startingpoint = 0 for i in range(len(Train_descriptors)): D[startingpoint:startingpoint + len(Train_descriptors[i][0])] = Train_descriptors[i][0] startingpoint += len(Train_descriptors[i][0]) if reduction == 'pca': D, pca_reducer = PCA_reduce(D, n_components) if decision == 'bow': k = code_size # Compute Codebook gmm = compute_codebook(D, k, nfeatures, None, output_layer, D.shape[1], sampling_step, sampling_type) init = time.time() samples = np.zeros((len(Train_descriptors), k * D.shape[1] * 2 * Train_descriptors.shape[1]), dtype=np.float32) #TODO: change 128 for i in xrange(len(Train_descriptors)): for j in range(Train_descriptors.shape[1]): #number of levels if reduction == 'pca': des = pca_reducer.transform( Train_descriptors[i][j]) # for pyramid level j else: des = Train_descriptors[i][j] # for pyramid level j samples[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] * 2] = ynumpy.fisher(gmm, np.float32(des), include=['mu', 'sigma']) end = time.time() print 'Done in ' + str(end - init) + ' secs.' else: samples = D # Train a linear SVM classifier stdSlr = StandardScaler().fit(samples) D_scaled = stdSlr.transform(samples) print 'Training the SVM classifier...' clf = svm.SVC(kernel=kernel, C=C).fit(D_scaled, train_labels) print 'Done!' # Apply BoW if decision == 'bow': test_descriptors, test_label_per_descriptor = getDescriptors( cnn_model, test_images_filenames, test_labels, decision, sampling_step, sampling_type) test_descriptors = np.asarray(test_descriptors) fisher_test = np.zeros( (len(test_descriptors), k * D.shape[1] * 2 * Train_descriptors.shape[1]), dtype=np.float32) for i in range(len(test_descriptors)): for j in range(test_descriptors.shape[1]): #number of levels des = test_descriptors[i][ j] # now only working with 1 PYRAMID LEVEL [0] if reduction == 'pca': des = pca_reducer.transform(des) fisher_test[i, j * k * D.shape[1] * 2:(j + 1) * k * D.shape[1] * 2] = ynumpy.fisher(gmm, np.float32(des), include=['mu', 'sigma']) test_images_desc = fisher_test else: test_descriptors, test_label_per_descriptor = getDescriptors( cnn_model, test_images_filenames, test_labels, decision, sampling_step, sampling_type) test_descriptors = np.asarray(test_descriptors) test_images_desc = test_descriptors.squeeze() if reduction == 'pca': test_images_desc = pca_reducer.transform(test_images_desc) test_images_desc = stdSlr.transform(test_images_desc) accuracy = 100 * clf.score(test_images_desc, test_labels) print 'Final accuracy: ' + str(accuracy) end = time.time() print 'Done in ' + str(end - start) + ' secs.'