def update_plabels(self, X, k=50, max_iter=20): print('Updating pseudo-labels...') alpha = 0.99 labels = np.asarray(self.all_labels) labeled_idx = np.asarray(self.labeled_idx) unlabeled_idx = np.asarray(self.unlabeled_idx) # kNN search for the graph d = X.shape[1] res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = int(torch.cuda.device_count()) - 1 index = faiss.GpuIndexFlatIP(res, d, flat_config) # build the index normalize_L2(X) index.add(X) N = X.shape[0] Nidx = index.ntotal c = time.time() D, I = index.search(X, k + 1) elapsed = time.time() - c print('kNN Search done in %d seconds' % elapsed) # Create the graph D = D[:, 1:]**3 I = I[:, 1:] row_idx = np.arange(N) row_idx_rep = np.tile(row_idx, (k, 1)).T W = scipy.sparse.csr_matrix( (D.flatten('F'), (row_idx_rep.flatten('F'), I.flatten('F'))), shape=(N, N)) W = W + W.T # Normalize the graph W = W - scipy.sparse.diags(W.diagonal()) S = W.sum(axis=1) S[S == 0] = 1 D = np.array(1. / np.sqrt(S)) D = scipy.sparse.diags(D.reshape(-1)) Wn = D * W * D # Initiliaze the y vector for each class (eq 5 from the paper, normalized with the class size) and apply label propagation Z = np.zeros((N, len(self.classes))) A = scipy.sparse.eye(Wn.shape[0]) - alpha * Wn for i in range(len(self.classes)): cur_idx = labeled_idx[np.where(labels[labeled_idx] == i)] y = np.zeros((N, )) y[cur_idx] = 1.0 / cur_idx.shape[0] f, _ = scipy.sparse.linalg.cg(A, y, tol=1e-6, maxiter=max_iter) Z[:, i] = f # Handle numberical errors Z[Z < 0] = 0 # Compute the weight for each instance based on the entropy (eq 11 from the paper) probs_l1 = F.normalize(torch.tensor(Z), 1).numpy() probs_l1[probs_l1 < 0] = 0 entropy = scipy.stats.entropy(probs_l1.T) weights = 1 - entropy / np.log(len(self.classes)) weights = weights / np.max(weights) p_labels = np.argmax(probs_l1, 1) # Compute the accuracy of pseudolabels for statistical purposes correct_idx = (p_labels == labels) acc = correct_idx.mean() p_labels[labeled_idx] = labels[labeled_idx] weights[labeled_idx] = 1.0 self.p_weights = weights.tolist() self.p_labels = p_labels # Compute the weight for each class for i in range(len(self.classes)): cur_idx = np.where(np.asarray(self.p_labels) == i)[0] self.class_weights[i] = (float(labels.shape[0]) / len(self.classes)) / cur_idx.size return acc
def device(self): import faiss # For now, consider only one GPU, do not distribute the index return faiss.StandardGpuResources() if self.on_gpu else None
def clip_parameters(model, clip): """ Clip model weights. """ if clip > 0: for x in model.parameters(): x.data.clamp_(-clip, clip) def to_cuda(*args): """ Move tensors to CUDA. """ return [None if x is None else x.cuda() for x in args] def pad_tensor(tensor, n, pad_value=-1): sz = list(tensor.size()) sz[0] = n padded_tensor = pad_value * torch.ones(sz, dtype=tensor.dtype) padded_tensor[:tensor.size(0)] = tensor return padded_tensor FAISS_RES = faiss.StandardGpuResources() FAISS_RES.setDefaultNullStreamAllDevices() FAISS_RES.setTempMemory(1200 * 1024 * 1024)
def main(): args = parser.parse_args() # check if there are unknown datasets # pdb.set_trace() for dataset in args.datasets.split(','): if dataset not in datasets_names: raise ValueError( 'Unsupported or unknown dataset: {}!'.format(dataset)) expansion_m = args.query_expansion # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id liner_s = args.linear_s # evaluate on test datasets datasets = args.datasets.split(',') vectore_dir = 'best_model/se101_gem/model_epoch1/show_result' # extract database and query vectors nets = args.trained_network.split(',') cfg = configdataset("pet", get_data_root()) # print('>> query images...') # qvecs = np.vstack([np.load(os.path.join(vectore_dir, i, # "{}_qvecs_ep{}_resize.npy".format( # dataset,i.split('/')[-1].replace('model_epoch',''))) # ).astype('float32') for i in nets]) # print('>> database images...') # vecs = np.vstack([np.load(os.path.join(vectore_dir, i, # "{}_vecs_ep{}_resize.npy".format( # dataset, i.split('/')[-1].replace('model_epoch', ''))) # ).astype('float32') for i in nets]) # np.save(os.path.join(vectore_dir, "se50g_se101g_se101p_vecs_ep0_resize.npy"), vecs) # np.save(os.path.join(vectore_dir, "se50g_se101g_se101p_qvecs_ep0_resize.npy"), qvecs) print('>> query images...') qvecs = np.load( os.path.join(vectore_dir, "pet_show_alldatabase_qvecs_ep1_resize_pca.npy")) print('>> database images...') vecs = np.load( os.path.join(vectore_dir, "pet_show_alldatabase_vecs_ep1_resize_pca.npy")) start = time.time() # scores = np.dot(vecs, qvecs.T) # ranks = np.argsort(-scores, axis=0) # compute_map_and_print(dataset, ranks, cfg['gnd_id']) print(vecs.shape, qvecs.shape) print(">> compute scores..") # vecs = vecs.transpose(1, 0)#(1093759, 2048) # qvecs = qvecs.transpose(1, 0)#(115977, 2048) res = faiss.StandardGpuResources() dimension = vecs.shape[1] index_flat = faiss.IndexFlatIP(dimension) gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat) gpu_index_flat.add(np.ascontiguousarray(vecs)) top_k = 20 D, I = gpu_index_flat.search(np.ascontiguousarray(qvecs), top_k) # actual search (115977, top_k) # qe_qvecs = np.zeros((qvecs.shape), dtype=np.float32) for na in tqdm(range(qe_qvecs.shape[0])): qe_qvecs[na, :] = np.vstack( (qvecs[na, :][np.newaxis, :], vecs[I[na, :top_k], :])).mean(0) np.save( os.path.join(vectore_dir, "pet_show_alldatabase_qvecs_ep1_resize_dba.npy"), qe_qvecs) # scores = np.dot(vecs, qe_qvecs.T) # ranks = np.argsort(-scores, axis=0) # compute_map_and_print(dataset, ranks, cfg['gnd_id']) print('>> time: {}'.format(htime(time.time() - start)))
def __init__(self, L_train, train_embeddings, gpu=False, metric='cosine', method='pytorch'): ''' Initialize an instance of Epoxy. Args: L_train: The training matrix to look through to find nearest neighbors train_embeddings: embeddings of each item in L_train gpu: if True, build the FAISS index on GPU metric: 'cosine' or 'L2' -- prefer cosine method: 'pytorch', 'faiss', or 'sklearn' ''' self.L_train = L_train self.gpu = gpu self.metric = metric self.method = method self.preprocessed = False if metric not in ['cosine', 'l2']: raise NotImplementedError('Metric {} not supported'.format(metric)) if self.method == 'faiss': if metric == 'cosine': # Copy because faiss.normalize_L2() modifies the original train_embeddings = np.copy(train_embeddings) # Normalize the vectors before adding to the index faiss.normalize_L2(train_embeddings) elif metric == 'L2': pass else: raise NotImplementedError( 'Metric {} not supported'.format(metric)) d = train_embeddings.shape[1] m = L_train.shape[1] self.m = m if metric == 'cosine': # use IndexFlatIP (inner product) label_fn_indexes = [faiss.IndexFlatIP(d) for i in range(m)] elif metric == 'l2': # 'L2': label_fn_indexes = [faiss.IndexFlatL2(d) for i in range(m)] if gpu: res = faiss.StandardGpuResources() label_fn_indexes = [ faiss.index_cpu_to_gpu(res, 0, x) for x in label_fn_indexes ] support = [] for i in range(m): support.append(np.argwhere(L_train[:, i] != 0).flatten()) label_fn_indexes[i].add(train_embeddings[support[i]]) self.label_fn_indexes = label_fn_indexes self.support = support elif self.method in ['sklearn', 'pytorch']: self.train_embeddings = train_embeddings else: raise NotImplementedError('Method {} not supported'.format( self.method))
def test_flat(self): index = faiss.GpuIndexFlat(faiss.StandardGpuResources(), self.d, faiss.METRIC_L2) index.add(self.xb)
def main(pretrained_net, whichGPU): if not 'ilsvrc2012' in pretrained_net: iterStr = pretrained_net.split('-')[-1] splitStr = pretrained_net.split('/') output_dir = os.path.join( '/'.join(splitStr[:np.where(np.array(splitStr) == 'ckpts')[0][0]]), 'results_small', iterStr) else: iterStr = 'ilsvrc2012' output_dir = os.path.join('./output/ilsvrc2012/results_small', iterStr) res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = int(whichGPU) train_feats = load_h5('train_feats', os.path.join(output_dir, 'trainFeats.h5')) train_classes = load_h5('train_classes', os.path.join(output_dir, 'trainClasses.h5')) train_ims = load_h5('train_ims', os.path.join(output_dir, 'trainIms.h5')) gpu_index = faiss.GpuIndexFlatIP(res, train_feats.shape[1], flat_config) for feat in train_feats: gpu_index.add(np.expand_dims(feat, 0)) test_datasets = [ './input/test/small_test_by_hotel.txt', './input/occluded_test_small/by_hotel/0.txt', './input/occluded_test/by_hotel_small/1.txt', './input/occluded_test_small/by_hotel/2.txt', './input/occluded_test_small/by_hotel/3.txt' ] test_names = [ 'by_hotel', 'occluded0', 'occluded1', 'occluded2', 'occluded3' ] for test_dataset, test_name in zip(test_datasets, test_names): test_output_dir = os.path.join(output_dir, test_name) test_feats = load_h5('test_feats', os.path.join(test_output_dir, 'testFeats.h5')) test_ims = load_h5('test_ims', os.path.join(test_output_dir, 'testIms.h5')) test_classes = load_h5('test_classes', os.path.join(test_output_dir, 'testClasses.h5')) top_k = np.zeros((test_feats.shape[0], 100)) for aa in range(0, test_feats.shape[0], 100): # print aa, ' out of ', test_feats.shape[0] ff = test_feats[aa:aa + 100, :] result_dists, result_inds = gpu_index.search( ff.astype('float32'), 1000) result_classes = train_classes[result_inds] for idx in range(ff.shape[0]): correctResults = np.where( result_classes[idx] == test_classes[aa + idx])[0] if len(correctResults) > 0 and correctResults[0] < 100: topResult = correctResults[0] top_k[aa + idx, topResult:] = 1 average_accuracy = np.mean(top_k, axis=0) save_h5('average_retrieval_accuracy', average_accuracy, 'f', os.path.join(test_output_dir, 'average_retrieval_accuracy.h5')) print iterStr, test_name, average_accuracy[0], average_accuracy[ 9], average_accuracy[99] import json jsonTestData = json.load(open('./input/test_set.json')) jsonTrainData = json.load(open('./input/train_set.json')) cls_to_chain = {} for hotel in jsonTrainData.keys(): if jsonTrainData[hotel]['chainId'] != -1: cls_to_chain[int(hotel)] = jsonTrainData[hotel]['chainId'] for hotel in jsonTestData.keys(): if jsonTestData[hotel]['chainId'] != -1 and int( hotel) not in cls_to_chain.keys(): cls_to_chain[int(hotel)] = jsonTestData[hotel]['chainId'] by_chain_inds = np.where( np.in1d(train_classes, cls_to_chain.keys()) == True)[0] del gpu_index train_feats2 = train_feats[by_chain_inds, :] train_classes2 = train_classes[by_chain_inds] train_ims2 = train_ims[by_chain_inds] train_class_to_chain = np.array( [cls_to_chain[cls] for cls in train_classes2]) gpu_index = faiss.GpuIndexFlatIP(res, train_feats2.shape[1], flat_config) for feat in train_feats2: gpu_index.add(np.expand_dims(feat, 0)) test_datasets = [ './input/test/small_test_by_chain.txt', './input/occluded_test_small/by_chain/0.txt', './input/occluded_test_small/by_chain/1.txt', './input/occluded_test_small/by_chain/2.txt', './input/occluded_test_small/by_chain/3.txt' ] test_names = [ 'by_chain', 'by_chain_occluded0', 'by_chain_occluded1', 'by_chain_occluded2', 'by_chain_occluded3' ] for test_dataset, test_name in zip(test_datasets, test_names): test_output_dir = os.path.join(output_dir, test_name) test_feats = load_h5('test_feats', os.path.join(test_output_dir, 'testFeats.h5')) test_ims = load_h5('test_ims', os.path.join(test_output_dir, 'testIms.h5')) test_classes = load_h5('test_classes', os.path.join(test_output_dir, 'testClasses.h5')) test_class_to_chain = np.array( [cls_to_chain[cls] for cls in test_classes]) top_k_chain = np.zeros((test_feats.shape[0], 100)) for aa in range(0, test_feats.shape[0], 100): # print aa, ' out of ', test_feats.shape[0] ff = test_feats[aa:aa + 100, :] result_dists, result_inds = gpu_index.search( ff.astype('float32'), 1000) result_chains = train_class_to_chain[result_inds] for idx in range(ff.shape[0]): correctResults = np.where( result_chains[idx] == test_class_to_chain[aa + idx])[0] if len(correctResults) > 0 and correctResults[0] < 100: topResult = correctResults[0] top_k_chain[aa + idx, topResult:] = 1 average_chain_accuracy = np.mean(top_k_chain, axis=0) save_h5( 'average_chain_retrieval_accuracy', average_chain_accuracy, 'f', os.path.join(test_output_dir, 'average_chain_retrieval_accuracy.h5')) print iterStr, test_name, average_chain_accuracy[ 0], average_chain_accuracy[2], average_chain_accuracy[ 4], average_chain_accuracy[9]
def get_nn_avg_dist(emb, query, knn): """ Compute the average distance of the `knn` nearest neighbors for a given set of embeddings and queries. Use Faiss if available. """ if FAISS_AVAILABLE: emb = emb.cpu().numpy() query = query.cpu().numpy() # if hasattr(faiss, 'StandardGpuResources'): if False: # gpu mode res = faiss.StandardGpuResources() config = faiss.GpuIndexFlatConfig() config.device = 0 index = faiss.GpuIndexFlatIP(res, emb.shape[1], config) else: # cpu mode index = faiss.IndexFlatIP(emb.shape[1]) index.add(emb) distances, _ = index.search(query, knn) return distances.mean(1) # if FAISS_AVAILABLE: # emb_all = emb.cpu().numpy() # query_all = query.cpu().numpy() # batch = 2000 # # mean_distance = [] # for i in range(0,emb_all.shape[0],batch): # query = query_all[i:min(i + batch, emb_all.shape[0])] # if i + batch < emb_all.shape[0]: # emb = emb_all[i:i+batch] # else: # emb = emb_all[-batch:] # # # # if hasattr(faiss, 'StandardGpuResources'): # if False: # # gpu mode # res = faiss.StandardGpuResources() # config = faiss.GpuIndexFlatConfig() # config.device = 0 # index = faiss.GpuIndexFlatIP(res, emb.shape[1], config) # else: # # cpu mode # index = faiss.IndexFlatIP(emb.shape[1]) # index.add(emb) # distances, _ = index.search(query, knn) # mean_distance.append(distances.mean(1)) # return np.concatenate(mean_distance,0) else: bs = 1024 all_distances = [] emb = emb.transpose(0, 1).contiguous() for i in range(0, query.shape[0], bs): distances = query[i:i + bs].mm(emb) best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True) all_distances.append(best_distances.mean(1).cpu()) all_distances = torch.cat(all_distances) return all_distances.numpy()
def find_nearest_neighbors(x, queries=None, k=5, gpu_id=None): """ Find k nearest neighbors for each of the n examples. Distances are computed using Squared Euclidean distance metric. Arguments: ---------- queries x (ndarray): N examples to search within. [N x d]. gpu_id (int): use CPU if None else use GPU with the specified id. queries (ndarray): find nearest neigbor for each query example. [M x d] matrix If None than find k nearest neighbors for each row of x (excluding self exampels). k (int): number of nearest neighbors to find. Return I (ndarray): Indices of the nearest neighnpors. [M x k] distances (ndarray): Distances to the nearest neighbors. [M x k] """ if gpu_id is not None and not isinstance(gpu_id, int): raise ValueError('gpu_id must be None or int') x = np.asarray(x.reshape(x.shape[0], -1), dtype=np.float32) remove_self = False # will we have queries in the search results? if queries is None: remove_self = True queries = x k += 1 d = x.shape[1] tic = time.time() if gpu_id is None: logging.debug('FAISS: cpu::find {} nearest neighbors'\ .format(k - int(remove_self))) index = faiss.IndexFlatL2(d) else: logging.debug('FAISS: gpu[{}]::find {} nearest neighbors'\ .format(gpu_id, k - int(remove_self))) cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = gpu_id flat_config = [cfg] resources = [faiss.StandardGpuResources()] index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0]) index.add(x) distances, nns = index.search(queries, k) if remove_self: for i in range(len(nns)): indices = np.nonzero(nns[i, :] != i)[0] indices.sort() if len(indices) > k - 1: indices = indices[:-1] nns[i, :-1] = nns[i, indices] distances[i, :-1] = distances[i, indices] nns = nns[:, :-1] distances = distances[:, :-1] logging.debug( 'FAISS: Neighbors search total elapsed time: {:.2f} sec'.format( time.time() - tic)) return nns, distances
def find_nearest_neighbors( target, emb, k=5, metric="euclidean", gpu_id=None, exact=True ): """Find the nearest neighbors for each point. :param emb: vectors for the points for which we find the nearest neighbors :type emb: numpy.ndarray (num_entities, dim) :param emb: vectors for the points from which we find the nearest neighbors. :type emb: numpy.ndarray (num_entities, dim) :param k: Number of nearest neighbors, defaults to 5 :type k: int, optional :paramm metric: Distance metric for finding nearest neighbors. Available metric `metric="euclidean"`, `metric="cosine"` , `metric="dotsim"` :type metric: str :return: IDs of emb (indices), and similarity (distances) :rtype: indices (numpy.ndarray), distances (numpy.ndarray) .. highlight:: python .. code-block:: python >>> import emlens >>> import numpy as np >>> emb = np.random.randn(100, 20) >>> target = np.random.randn(10, 20) >>> A = emlens.find_nearest_neighbors(target, emb, k = 10) """ if emb.flags["C_CONTIGUOUS"]: emb = emb.copy(order="C") if target.flags["C_CONTIGUOUS"]: target = target.copy(order="C") emb = emb.astype(np.float32) target = target.astype(np.float32) # Find the nearest neighbors if metric == "euclidean": if exact: index = faiss.IndexFlatL2(emb.shape[1]) else: quantiser = faiss.IndexFlatL2(emb.shape[1]) nlist = int(np.ceil(10 * np.sqrt(emb.shape[0]))) index = faiss.IndexIVFFlat(quantiser, emb.shape[1], nlist, faiss.METRIC_L2) index.train(emb) elif metric == "cosine": denom = np.array(np.linalg.norm(emb, axis=1)).reshape(-1) denom[np.isclose(denom, 0)] = 1 emb = np.einsum("i,ij->ij", 1 / denom, emb) denom = np.array(np.linalg.norm(target, axis=1)).reshape(-1) denom[np.isclose(denom, 0)] = 1 target = np.einsum("i,ij->ij", 1 / denom, target) if exact: index = faiss.IndexFlatIP(emb.shape[1]) else: quantiser = faiss.IndexFlatIP(emb.shape[1]) nlist = int(np.ceil(10 * np.sqrt(emb.shape[0]))) index = faiss.IndexIVFFlat( quantiser, emb.shape[1], nlist, faiss.METRIC_INNER_PRODUCT ) index.train(emb) elif metric == "dotsim": if exact: index = faiss.IndexFlatIP(emb.shape[1]) else: quantiser = faiss.IndexFlatIP(emb.shape[1]) nlist = int(np.ceil(10 * np.sqrt(emb.shape[0]))) index = faiss.IndexIVFFlat( quantiser, emb.shape[1], nlist, faiss.METRIC_INNER_PRODUCT ) index.train(emb) else: raise NotImplementedError("does not support metric: {}".format(metric)) if gpu_id is None: gpu_id = 0 if k >= 2048: # if k is larger than that supported by GPU index.add(emb) else: try: res = faiss.StandardGpuResources() index = faiss.index_cpu_to_gpu(res, gpu_id, index) index.add(emb) except (RuntimeError, AttributeError): index.add(emb) distances, neighbors = index.search(target, k=k) assert distances.dtype == "float32" assert neighbors.dtype == "int64" nodes = (np.arange(target.shape[0]).reshape((-1, 1)) @ np.ones((1, k))).astype(int) neighbors = neighbors.astype(int) return nodes, neighbors, distances
def pool_kmean_init_gpu(self, seed=0, gpu_num=0, temperature=1): """TODO: clear up perform kmeans for cluster concept pool initialization Args: x: data to be clustered """ print('performing kmeans clustering') results = {'im2cluster':[],'centroids':[],'density':[]} x = self.concept_pool.clone().cpu().numpy().T x = np.ascontiguousarray(x) num_cluster = self.num_k # intialize faiss clustering parameters d = x.shape[1] k = int(num_cluster) clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 100 clus.nredo = 10 clus.seed = seed clus.max_points_per_centroid = 1000 clus.min_points_per_centroid = 10 res = faiss.StandardGpuResources() cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = gpu_num index = faiss.GpuIndexFlatL2(res, d, cfg) clus.train(x, index) D, I = index.search(x, 1) # for each sample, find cluster distance and assignments im2cluster = [int(n[0]) for n in I] # get cluster centroids centroids = faiss.vector_to_array(clus.centroids).reshape(k,d) # sample-to-centroid distances for each cluster Dcluster = [[] for c in range(k)] for im,i in enumerate(im2cluster): Dcluster[i].append(D[im][0]) # concentration estimation (phi) density = np.zeros(k) for i,dist in enumerate(Dcluster): if len(dist)>1: d = (np.asarray(dist)**0.5).mean()/np.log(len(dist)+10) density[i] = d #if cluster only has one point, use the max to estimate its concentration dmax = density.max() for i,dist in enumerate(Dcluster): if len(dist)<=1: density[i] = dmax density = density.clip(np.percentile(density,10),np.percentile(density,90)) #clamp extreme values for stability print(density.mean()) density = temperature*density/density.mean() #scale the mean to temperature # convert to cuda Tensors for broadcast centroids = torch.Tensor(centroids) centroids = nn.functional.normalize(centroids, p=2, dim=1) im2cluster = torch.LongTensor(im2cluster) density = torch.Tensor(density) results['centroids'].append(centroids) results['density'].append(density) results['im2cluster'].append(im2cluster) del cfg, res, index, clus # rearrange self.structure_memory_bank(results) print("Finish kmean init...") del results
def add_to_index(dump_paths, trained_index_path, target_index_path, idx2id_path, max_norm, para=False, num_docs_per_add=1000, num_dummy_zeros=0, cuda=False, fine_quant='SQ8', offset=0, norm_th=999, ignore_ids=None): idx2doc_id = [] idx2para_id = [] idx2word_id = [] dumps = [h5py.File(dump_path, 'r') for dump_path in dump_paths] print('reading %s' % trained_index_path) start_index = faiss.read_index(trained_index_path) if cuda: if fine_quant.startswith('PQ'): print('PQ not supported on GPU; keeping CPU.') else: res = faiss.StandardGpuResources() start_index = faiss.index_cpu_to_gpu(res, 0, start_index) print('adding following dumps:') for dump_path in dump_paths: print(dump_path) if para: for di, phrase_dump in enumerate(tqdm(dumps, desc='dumps')): starts = [] for i, (doc_idx, doc_group) in enumerate( tqdm(phrase_dump.items(), desc='faiss indexing')): for para_idx, group in doc_group.items(): num_vecs = group['start'].shape[0] start = int8_to_float(group['start'][:], group.attrs['offset'], group.attrs['scale']) norms = np.linalg.norm(start, axis=1, keepdims=True) consts = np.sqrt(np.maximum(0.0, max_norm**2 - norms**2)) start = np.concatenate([consts, start], axis=1) if num_dummy_zeros > 0: start = np.concatenate([ start, np.zeros([start.shape[0], num_dummy_zeros], dtype=start.dtype) ], axis=1) starts.append(start) idx2doc_id.extend([int(doc_idx)] * num_vecs) idx2para_id.extend([int(para_idx)] * num_vecs) idx2word_id.extend(list(range(num_vecs))) if len(starts) > 0 and i % num_docs_per_add == 0: print('concatenating') concat = np.concatenate(starts, axis=0) print('adding') add_with_offset(start_index, concat, offset) # start_index.add(concat) print('done') starts = [] if i % 100 == 0: print('%d/%d' % (i + 1, len(phrase_dump.keys()))) print('adding leftover') add_with_offset(start_index, np.concatenate(starts, axis=0), offset) # start_index.add(np.concatenate(starts, axis=0)) # leftover print('done') else: for di, phrase_dump in enumerate(tqdm(dumps, desc='dumps')): starts = [] valids = [] for i, (doc_idx, doc_group) in enumerate( tqdm(phrase_dump.items(), desc='adding %d' % di)): if ignore_ids is not None and doc_idx in ignore_ids: continue num_vecs = doc_group['start'].shape[0] start = int8_to_float(doc_group['start'][:], doc_group.attrs['offset'], doc_group.attrs['scale']) valid = np.linalg.norm(start, axis=1) <= norm_th norms = np.linalg.norm(start, axis=1, keepdims=True) consts = np.sqrt(np.maximum(0.0, max_norm**2 - norms**2)) start = np.concatenate([consts, start], axis=1) if num_dummy_zeros > 0: start = np.concatenate([ start, np.zeros([start.shape[0], num_dummy_zeros], dtype=start.dtype) ], axis=1) starts.append(start) valids.append(valid) idx2doc_id.extend([int(doc_idx)] * num_vecs) idx2word_id.extend(range(num_vecs)) if len(starts) > 0 and i % num_docs_per_add == 0: print('adding at %d' % (i + 1)) add_with_offset(start_index, np.concatenate(starts, axis=0), offset, np.concatenate(valids)) # start_index.add(np.concatenate(starts, axis=0)) starts = [] valids = [] if i % 100 == 0: # print('%d/%d' % (i + 1, len(phrase_dump.keys()))) continue print('final adding at %d' % (i + 1)) add_with_offset(start_index, np.concatenate(starts, axis=0), offset, np.concatenate(valids)) # start_index.add(np.concatenate(starts, axis=0)) # leftover for dump in dumps: dump.close() if cuda and not fine_quant.startswith('PQ'): print('moving back to cpu') start_index = faiss.index_gpu_to_cpu(start_index) print('index ntotal: %d' % start_index.ntotal) idx2doc_id = np.array(idx2doc_id, dtype=np.int32) idx2para_id = np.array(idx2para_id, dtype=np.int32) idx2word_id = np.array(idx2word_id, dtype=np.int32) print('writing index and metadata') with h5py.File(idx2id_path, 'w') as f: g = f.create_group(str(offset)) g.create_dataset('doc', data=idx2doc_id) g.create_dataset('para', data=idx2para_id) g.create_dataset('word', data=idx2word_id) g.attrs['offset'] = offset faiss.write_index(start_index, target_index_path) print('done')
def test_knn_gpu(self): torch.manual_seed(10) d = 32 nb = 1024 nq = 10 k = 10 res = faiss.StandardGpuResources() # make GT on torch cpu and test using IndexFlatL2 xb = torch.rand(nb, d, dtype=torch.float32) xq = torch.rand(nq, d, dtype=torch.float32) index = faiss.IndexFlatL2(d) index.add(xb) gt_D, gt_I = index.search(xq, k) # for the GPU, we'll use a non-default stream s = torch.cuda.Stream() with torch.cuda.stream(s): # test numpy inputs xb_np = xb.numpy() xq_np = xq.numpy() for xq_row_major in True, False: for xb_row_major in True, False: if not xq_row_major: xq_c = to_column_major_numpy(xq_np) assert not xq_c.flags.contiguous else: xq_c = xq_np if not xb_row_major: xb_c = to_column_major_numpy(xb_np) assert not xb_c.flags.contiguous else: xb_c = xb_np D, I = faiss.knn_gpu(res, xb_c, xq_c, k) self.assertTrue(torch.equal(torch.from_numpy(I), gt_I)) self.assertLess((torch.from_numpy(D) - gt_D).abs().max(), 1e-4) # test torch (cpu, gpu) inputs for is_cuda in True, False: for xq_row_major in True, False: for xb_row_major in True, False: if is_cuda: xq_c = xq.cuda() xb_c = xb.cuda() else: # also test torch cpu tensors xq_c = xq xb_c = xb if not xq_row_major: xq_c = to_column_major_torch(xq) assert not xq_c.is_contiguous() if not xb_row_major: xb_c = to_column_major_torch(xb) assert not xb_c.is_contiguous() D, I = faiss.knn_gpu(res, xb_c, xq_c, k) self.assertTrue(torch.equal(I.cpu(), gt_I)) self.assertLess((D.cpu() - gt_D).abs().max(), 1e-4) # test on subset try: # This internally uses the current pytorch stream D, I = faiss.knn_gpu(res, xb_c, xq_c[6:8], k) except TypeError: if not xq_row_major: # then it is expected continue # otherwise it is an error raise self.assertTrue(torch.equal(I.cpu(), gt_I[6:8])) self.assertLess((D.cpu() - gt_D[6:8]).abs().max(), 1e-4)
def _index_to_gpu(index, device_id): # pragma: no cover res = faiss.StandardGpuResources() return faiss.index_cpu_to_gpu(res, device_id, index)
FEATURES_NUMBER = args.features PCA_FEATURES = args.pca train = args.train pca = args.pca != 0 gpu = not args.cpu features_dir = args.features_dir + "/" + args.net FEATURES_NPY = features_dir + '/*.npy' INDEX_FILENAME_PRE = args.results_dir + '/' + args.net.replace("-", "_") INDEX_FILENAME = INDEX_FILENAME_PRE + '.index' INDEX_FILENAME_PK = INDEX_FILENAME_PRE + '.pk' INDEX_FILENAME_PCA = INDEX_FILENAME_PRE + '.pca' + str(args.pca) res = faiss.StandardGpuResources() # use a single GPU co = faiss.GpuClonerOptions() # here we are using a 64-byte PQ, so we must set the lookup tables to # 16 bit float (this is due to the limited temporary memory). if args.float16: co.useFloat16 = True if os.path.exists(INDEX_FILENAME): cpu_index = faiss.read_index(INDEX_FILENAME) index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) if gpu else cpu_index if pca: mat = faiss.read_VectorTransform(INDEX_FILENAME_PCA) # todo calculate it if not there with open(INDEX_FILENAME_PK, 'rb') as fp: index_dict = pickle.load(fp) else:
def run_kmeans(x, args): """ Args: x: data to be clustered """ x = x.numpy() print('performing kmeans clustering') results = { 'im2cluster': [], 'centroids': [], 'density': [], 'sampled_protos': [] } for seed, num_cluster in enumerate(args.num_cluster): # intialize faiss clustering parameters d = x.shape[1] k = int(num_cluster) clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 clus.nredo = 5 clus.seed = seed clus.max_points_per_centroid = 1000 clus.min_points_per_centroid = 10 res = faiss.StandardGpuResources() cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False # originally False cfg.device = args.gpu # cfg.device = 1 #REMEMBER TO CHANGE THIS index = faiss.GpuIndexFlatL2(res, d, cfg) clus.train(x, index) D, I = index.search( x, 1) # for each sample, find cluster distance and assignments im2cluster = [int(n[0]) for n in I] # get cluster centroids centroids = faiss.vector_to_array(clus.centroids).reshape(k, d) # sample-to-centroid distances for each cluster Dcluster = [[] for c in range(k)] indices_per_cluster = [[] for c in range(k) ] # for next step - random sampling for im, i in enumerate(im2cluster): Dcluster[i].append(D[im][0]) indices_per_cluster[i].append(im) if args.centroid_sampling: # print("WTF") # sample a random point from each cluster to act as a prototype rather than the centroid # sampled_protos = [np.zeros((len(indices_per_cluster[i]), d)) for i in range(k)] sampled_protos = [0 for i in range(k)] for i in range(k): # if there are no points other than the centroid (empty), this won't work # print(len(indices_per_cluster[i])) selected_proto_id = random.choice( indices_per_cluster[i % num_cluster]) sampled_protos[i] = selected_proto_id # sampled_protos[i] = x[indices_per_cluster[i]] # concentration estimation (phi) density = np.zeros(k) for i, dist in enumerate(Dcluster): if len(dist) > 1: d = (np.asarray(dist)**0.5).mean() / np.log(len(dist) + 10) density[i] = d #if cluster only has one point, use the max to estimate its concentration dmax = density.max() for i, dist in enumerate(Dcluster): if len(dist) <= 1: density[i] = dmax density = density.clip(np.percentile(density, 10), np.percentile( density, 90)) #clamp extreme values for stability density = args.temperature * density / density.mean( ) #scale the mean to temperature # convert to cuda Tensors for broadcast centroids = torch.Tensor(centroids).cuda(args.gpu) centroids = nn.functional.normalize(centroids, p=args.norm_p, dim=1) # hmmmm ? if args.centroid_sampling: for i in range(k): sampled_protos[i] = torch.Tensor(sampled_protos[i]).cuda( args.gpu) im2cluster = torch.LongTensor(im2cluster).cuda(args.gpu) density = torch.Tensor(density).cuda(args.gpu) results['centroids'].append(centroids) results['density'].append(density) results['im2cluster'].append(im2cluster) if args.centroid_sampling: results['sampled_protos'].append(sampled_protos) return results
def test_resources(self): # this used to crash! index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, faiss.IndexFlatL2(self.d)) index.add(self.xb)
def test_sq_cpu_to_gpu(self): res = faiss.StandardGpuResources() index = faiss.index_factory(32, "SQfp16") index.add(np.random.rand(1000, 32).astype(np.float32)) gpu_index = faiss.index_cpu_to_gpu(res, 0, index) self.assertIsInstance(gpu_index, faiss.GpuIndexFlat)
def test_ivfflat(self): index = faiss.GpuIndexIVFFlat(faiss.StandardGpuResources(), self.d, self.nlist, faiss.METRIC_L2) index.train(self.xb)
def compute_standard(self, opt, model, dataloader, evaltypes, device, **kwargs): evaltypes = copy.deepcopy(evaltypes) n_classes = opt.n_classes image_paths = np.array([x[0] for x in dataloader.dataset.image_list]) _ = model.eval() ### feature_colls = {key: [] for key in evaltypes} ### with torch.no_grad(): target_labels = [] final_iter = tqdm(dataloader, desc='Embedding Data...'.format(len(evaltypes))) image_paths = [x[0] for x in dataloader.dataset.image_list] for idx, inp in enumerate(final_iter): input_img, target = inp[1], inp[0] target_labels.extend(target.numpy().tolist()) out = model(input_img.to(device)) if isinstance(out, tuple): out, aux_f = out ### Include embeddings of all output features for evaltype in evaltypes: if isinstance(out, dict): feature_colls[evaltype].extend( out[evaltype].cpu().detach().numpy().tolist()) else: feature_colls[evaltype].extend( out.cpu().detach().numpy().tolist()) target_labels = np.hstack(target_labels).reshape(-1, 1) computed_metrics = {evaltype: {} for evaltype in evaltypes} extra_infos = {evaltype: {} for evaltype in evaltypes} ### faiss.omp_set_num_threads(self.pars.kernels) # faiss.omp_set_num_threads(self.pars.kernels) res = None torch.cuda.empty_cache() if self.pars.evaluate_on_gpu: res = faiss.StandardGpuResources() import time for evaltype in evaltypes: features = np.vstack(feature_colls[evaltype]).astype('float32') start = time.time() if 'kmeans' in self.requires: ### Set CPU Cluster index cluster_idx = faiss.IndexFlatL2(features.shape[-1]) if res is not None: cluster_idx = faiss.index_cpu_to_gpu(res, 0, cluster_idx) kmeans = faiss.Clustering(features.shape[-1], n_classes) kmeans.niter = 20 kmeans.min_points_per_centroid = 1 kmeans.max_points_per_centroid = 1000000000 ### Train Kmeans kmeans.train(features, cluster_idx) centroids = faiss.vector_float_to_array( kmeans.centroids).reshape(n_classes, features.shape[-1]) if 'kmeans_nearest' in self.requires: faiss_search_index = faiss.IndexFlatL2(centroids.shape[-1]) if res is not None: faiss_search_index = faiss.index_cpu_to_gpu( res, 0, faiss_search_index) faiss_search_index.add(centroids) _, computed_cluster_labels = faiss_search_index.search( features, 1) if 'nearest_features' in self.requires: faiss_search_index = faiss.IndexFlatL2(features.shape[-1]) if res is not None: faiss_search_index = faiss.index_cpu_to_gpu( res, 0, faiss_search_index) faiss_search_index.add(features) max_kval = np.max([ int(x.split('@')[-1]) for x in self.metric_names if 'recall' in x ]) _, k_closest_points = faiss_search_index.search( features, int(max_kval + 1)) k_closest_classes = target_labels.reshape(-1)[ k_closest_points[:, 1:]] ### if self.pars.evaluate_on_gpu: features = torch.from_numpy(features).to(self.pars.device) start = time.time() for metric in self.list_of_metrics: input_dict = {} if 'features' in metric.requires: input_dict['features'] = features if 'target_labels' in metric.requires: input_dict['target_labels'] = target_labels if 'kmeans' in metric.requires: input_dict['centroids'] = centroids if 'kmeans_nearest' in metric.requires: input_dict[ 'computed_cluster_labels'] = computed_cluster_labels if 'nearest_features' in metric.requires: input_dict['k_closest_classes'] = k_closest_classes computed_metrics[evaltype][metric.name] = metric(**input_dict) extra_infos[evaltype] = { 'features': features, 'target_labels': target_labels, 'image_paths': dataloader.dataset.image_paths, 'query_image_paths': None, 'gallery_image_paths': None } torch.cuda.empty_cache() return computed_metrics, extra_infos
def FlatGpu(config): print("FlatGpu, ", config) d = config['dimension'] # dimension nb = config['db_size'] # database size nq = config['query_num'] # nb of queries topk = config['top_k'] search_repeat = 10 xq = np.random.random((nq, d)).astype('float32') xq[:, 0] += np.arange(nq) / 1000. res = faiss.StandardGpuResources() # use a single GPU # temp memory if config["temp_memory"] == 0: res.noTempMemory() elif config["temp_memory"] != -1: res.setTempMemory(config["temp_memory"] * 1024 * 1024) index_list = [] create_ave_duration = 0 search_ave_duration = 0 if config['test_batch_write'] == True: batch_write_ave_duration = 0 batch_write_num = config['write_batch_num'] batch_write_time = int(nb / config['write_batch_num']) print("batch_write_time = ", batch_write_num) for i in range(config['db_num']): index_flat = faiss.IndexFlatL2(d) # build a flat (CPU) index # make it a flat GPU index gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat) batch_write_ave_one_lib = 0 for j in range(batch_write_time): np.random.seed(i * batch_write_time + j) xb = np.random.random((batch_write_num, d)).astype('float32') xb[:, 0] += np.arange(batch_write_num) / 1000. begin_time = time.time() gpu_index_flat.add(xb) duration = time.time() - begin_time batch_write_ave_one_lib += duration batch_write_ave_duration += duration print("batch_write_ave_one_lib = ", (batch_write_ave_one_lib / batch_write_time) * 1000 * 1000, " us") index_list.append(gpu_index_flat) print("batch_write_ave_duration = ", (batch_write_ave_duration / len(index_list) / batch_write_time) * 1000 * 1000, " us") return index_list # Using a flat index for i in range(config['db_num']): np.random.seed(i) # make reproducible xb = np.random.random((nb, d)).astype('float32') xb[:, 0] += np.arange(nb) / 1000. begin_time = time.time() index_flat = faiss.IndexFlatL2(d) # build a flat (CPU) index # make it a flat GPU index gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat) gpu_index_flat.add(xb) # add vectors to the index duration = time.time() - begin_time create_ave_duration += duration index_list.append(gpu_index_flat) if i == 0: gpu_index_flat.search(xb[:5], 4) print("craete ave duration = ", create_ave_duration / len(index_list), " s") if len(index_list) == 0: return index_list for i in range(len(index_list)): for j in range(search_repeat): np.random.seed(i * search_repeat + j + config['db_num']) xq = np.random.random((nq, d)).astype('float32') xq[:, 0] += np.arange(nq) / 1000. begin_time = time.time() index_list[i].search(xq, topk) # actual search duration = time.time() - begin_time search_ave_duration += duration print("search index aver time = ", search_ave_duration / len(index_list) / search_repeat, " s") return index_list
def compute_jaccard_distance(target_features, k1=20, k2=6, print_flag=True, search_option=0, use_float16=False): end = time.time() if print_flag: print('Computing jaccard distance...') ngpus = faiss.get_num_gpus() N = target_features.size(0) mat_type = np.float16 if use_float16 else np.float32 if (search_option == 0): # GPU + PyTorch CUDA Tensors (1) res = faiss.StandardGpuResources() res.setDefaultNullStreamAllDevices() _, initial_rank = search_raw_array_pytorch(res, target_features, target_features, k1) initial_rank = initial_rank.cpu().numpy() elif (search_option == 1): # GPU + PyTorch CUDA Tensors (2) res = faiss.StandardGpuResources() index = faiss.GpuIndexFlatL2(res, target_features.size(-1)) index.add(target_features.cpu().numpy()) _, initial_rank = search_index_pytorch(index, target_features, k1) res.syncDefaultStreamCurrentDevice() initial_rank = initial_rank.cpu().numpy() elif (search_option == 2): # GPU index = index_init_gpu(ngpus, target_features.size(-1)) index.add(target_features.cpu().numpy()) _, initial_rank = index.search(target_features.cpu().numpy(), k1) else: # CPU index = index_init_cpu(target_features.size(-1)) index.add(target_features.cpu().numpy()) _, initial_rank = index.search(target_features.cpu().numpy(), k1) nn_k1 = [] nn_k1_half = [] for i in range(N): nn_k1.append(k_reciprocal_neigh(initial_rank, i, k1)) nn_k1_half.append( k_reciprocal_neigh(initial_rank, i, int(np.around(k1 / 2)))) V = np.zeros((N, N), dtype=mat_type) for i in range(N): k_reciprocal_index = nn_k1[i] k_reciprocal_expansion_index = k_reciprocal_index for candidate in k_reciprocal_index: candidate_k_reciprocal_index = nn_k1_half[candidate] if (len( np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len(candidate_k_reciprocal_index)): k_reciprocal_expansion_index = np.append( k_reciprocal_expansion_index, candidate_k_reciprocal_index) k_reciprocal_expansion_index = np.unique( k_reciprocal_expansion_index) ## element-wise unique dist = 2 - 2 * torch.mm( target_features[i].unsqueeze(0).contiguous(), target_features[k_reciprocal_expansion_index].t()) if use_float16: V[i, k_reciprocal_expansion_index] = F.softmax( -dist, dim=1).view(-1).cpu().numpy().astype(mat_type) else: V[i, k_reciprocal_expansion_index] = F.softmax( -dist, dim=1).view(-1).cpu().numpy() del nn_k1, nn_k1_half if k2 != 1: V_qe = np.zeros_like(V, dtype=mat_type) for i in range(N): V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0) V = V_qe del V_qe del initial_rank invIndex = [] for i in range(N): invIndex.append(np.where(V[:, i] != 0)[0]) #len(invIndex)=all_num jaccard_dist = np.zeros((N, N), dtype=mat_type) for i in range(N): temp_min = np.zeros((1, N), dtype=mat_type) # temp_max = np.zeros((1,N), dtype=mat_type) indNonZero = np.where(V[i, :] != 0)[0] indImages = [] indImages = [invIndex[ind] for ind in indNonZero] for j in range(len(indNonZero)): temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum( V[i, indNonZero[j]], V[indImages[j], indNonZero[j]]) # temp_max[0,indImages[j]] = temp_max[0,indImages[j]]+np.maximum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) jaccard_dist[i] = 1 - temp_min / (2 - temp_min) # jaccard_dist[i] = 1-temp_min/(temp_max+1e-6) del invIndex, V pos_bool = (jaccard_dist < 0) jaccard_dist[pos_bool] = 0.0 if print_flag: print("Jaccard distance computing time cost: {}".format(time.time() - end)) return jaccard_dist
def test_set_gpu_param(self): index = faiss.index_factory(12, "PCAR8,IVF10,PQ4") res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) faiss.GpuParameterSpace().set_index_parameter(index, "nprobe", 3)
def main(args): device = 'cuda:0' if torch.cuda.is_available() else 'cpu' with torch.no_grad(): trainset, train_loader = init_dataset(args.train_data_dir, args.batch_size) testset, test_loader = init_dataset(args.test_data_dir, args.batch_size) model_list = [] for ckpt in args.model: if args.use_proto: model = init_protonet() elif args.use_class: class_model = models.resnet34(pretrained=False, num_classes=1000) class_model.load_state_dict(torch.load(ckpt)) model = FeatModel(class_model) elif args.use_distill: class_model = models.resnet34(pretrained=False, num_classes=1000) model = DistillModel(class_model, 1000) model.load_state_dict(torch.load(ckpt)) model = FeatModel(model) else: class_model = models.resnet34(pretrained=False) model = FeatModel(class_model) model.load_state_dict(torch.load(ckpt)) model.to(device) model_list.append(model) feat_novel = torch.zeros((len(trainset), 512)) label_novel = torch.zeros((len(trainset))) feat_query = torch.zeros((len(testset), 512)) label_query = torch.zeros((len(testset))) print('Runing forward on noval images') # tr_iter = iter(train_loader) for idx, batch in enumerate(tqdm(train_loader)): x, y = batch x, y = x.to(device), y.to(device) model_outputs = [model(x).unsqueeze(0) for model in model_list] model_output, _ = torch.max(torch.cat(model_outputs), 0) start_idx = idx * args.batch_size end_idx = min((idx + 1) * args.batch_size, len(trainset)) feat_novel[start_idx:end_idx, :] = model_output label_novel[start_idx:end_idx] = y print('Runing forward on query images') for idx, batch in enumerate(tqdm(test_loader)): x, y = batch x, y = x.cuda(), y.cuda() model_output = model(x) start_idx = idx * args.batch_size end_idx = min((idx + 1) * args.batch_size, len(testset)) feat_query[start_idx:end_idx, :] = model_output label_query[start_idx:end_idx] = y labels0 = label_novel.data.cpu().numpy() labels1 = label_query.data.cpu().numpy() same = labels0 == labels1[:, np.newaxis] r, c = np.where(same) res = faiss.StandardGpuResources() index = faiss.GpuIndexFlatIP(res, 512) index.add(feat_novel.data.cpu().numpy()) # top 5 precision k5 = 5 # we want to see 5 nearest neighbors D5, I5 = search_index_pytorch(index, feat_query, k5) prec5 = (np.isin(c.reshape(-1, 1), I5[r])).sum() / c.shape[0] # top 1 acc k1 = 1 D1, I1 = search_index_pytorch(index, feat_query, k1) prec1 = (c.reshape(-1, 1) == I1[r]).sum().item() / c.shape[0] print("top 5 precision {}".format(prec5)) print("top 1 precision {}".format(prec1))
def add_vectors( self, vectors: Union[np.array, "Dataset"], column: Optional[str] = None, batch_size: int = 1000, train_size: Optional[int] = None, faiss_verbose: Optional[bool] = None, ): """ Add vectors to the index. If the arrays are inside a certain column, you can specify it using the `column` argument. """ import faiss # noqa: F811 # Create index if self.faiss_index is None: size = len(vectors[0]) if column is None else len( vectors[0][column]) if self.string_factory is not None: if self.metric_type is None: index = faiss.index_factory(size, self.string_factory) else: index = faiss.index_factory(size, self.string_factory, self.metric_type) else: if self.metric_type is None: index = faiss.IndexFlat(size) else: index = faiss.IndexFlat(size, self.metric_type) if self.device is not None and self.device > -1: self.faiss_res = faiss.StandardGpuResources() index = faiss.index_cpu_to_gpu(self.faiss_res, self.device, index) self.faiss_index = index logger.info("Created faiss index of type {}".format( type(self.faiss_index))) # Set verbosity level if faiss_verbose is not None: self.faiss_index.verbose = faiss_verbose if hasattr(self.faiss_index, "index") and self.faiss_index.index is not None: self.faiss_index.index.verbose = faiss_verbose if hasattr(self.faiss_index, "quantizer") and self.faiss_index.quantizer is not None: self.faiss_index.quantizer.verbose = faiss_verbose if hasattr(self.faiss_index, "clustering_index" ) and self.faiss_index.clustering_index is not None: self.faiss_index.clustering_index.verbose = faiss_verbose # Train if train_size is not None: train_vecs = vectors[: train_size] if column is None else vectors[:train_size][ column] logger.info("Training the index with the first {} vectors".format( len(train_vecs))) self.faiss_index.train(train_vecs) else: logger.info( "Ignored the training step of the faiss index as `train_size` is None." ) # Add vectors logger.info("Adding {} vectors to the faiss index".format( len(vectors))) not_verbose = bool(logger.getEffectiveLevel() > WARNING) for i in tqdm(range(0, len(vectors), batch_size), disable=not_verbose): vecs = vectors[i:i + batch_size] if column is None else vectors[ i:i + batch_size][column] self.faiss_index.add(vecs)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 Dref, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) # Validate the layout of the memory info mem_info = res.getMemoryInfo() assert type(mem_info) == dict assert type(mem_info[0]['FlatData']) == tuple assert type(mem_info[0]['FlatData'][0]) == int assert type(mem_info[0]['FlatData'][1]) == int gpu_index.setNumProbes(4) Dnew, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) # Give us some margin of error self.assertGreaterEqual((Iref == Inew).sum(), Iref.size - 50) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) Dnew, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
print "cachefiles:" print preproc_cachefile print cent_cachefile print index_cachefile ################################################################# # Wake up GPUs ################################################################# print "preparing resources for %d GPUs" % ngpu gpu_resources = [] for i in range(ngpu): res = faiss.StandardGpuResources() if tempmem >= 0: res.setTempMemory(tempmem) gpu_resources.append(res) def make_vres_vdev(i0=0, i1=-1): " return vectors of device ids and resources useful for gpu_multiple" vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() if i1 == -1: i1 = ngpu for i in range(i0, i1): vdev.push_back(i) vres.push_back(gpu_resources[i]) return vres, vdev
def test_bf_input_types(self): d = 33 k = 5 nb = 1000 nq = 10 xs = make_t(nb, d) qs = make_t(nq, d) res = faiss.StandardGpuResources() # Get ground truth using IndexFlat index = faiss.IndexFlatL2(d) index.add(xs) ref_d, ref_i = index.search(qs, k) out_d = np.empty((nq, k), dtype=np.float32) out_i = np.empty((nq, k), dtype=np.int64) # Try f32 data/queries, i64 out indices params = faiss.GpuDistanceParams() params.k = k params.dims = d params.vectors = faiss.swig_ptr(xs) params.numVectors = nb params.queries = faiss.swig_ptr(qs) params.numQueries = nq params.outDistances = faiss.swig_ptr(out_d) params.outIndices = faiss.swig_ptr(out_i) faiss.bfKnn(res, params) self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5)) self.assertGreaterEqual((out_i == ref_i).sum(), ref_i.size) # Try int32 out indices out_i32 = np.empty((nq, k), dtype=np.int32) params.outIndices = faiss.swig_ptr(out_i32) params.outIndicesType = faiss.IndicesDataType_I32 faiss.bfKnn(res, params) self.assertEqual((out_i32 == ref_i).sum(), ref_i.size) # Try float16 data/queries, i64 out indices xs_f16 = xs.astype(np.float16) qs_f16 = qs.astype(np.float16) xs_f16_f32 = xs_f16.astype(np.float32) qs_f16_f32 = qs_f16.astype(np.float32) index.reset() index.add(xs_f16_f32) ref_d_f16, ref_i_f16 = index.search(qs_f16_f32, k) params.vectors = faiss.swig_ptr(xs_f16) params.vectorType = faiss.DistanceDataType_F16 params.queries = faiss.swig_ptr(qs_f16) params.queryType = faiss.DistanceDataType_F16 out_d_f16 = np.empty((nq, k), dtype=np.float32) out_i_f16 = np.empty((nq, k), dtype=np.int64) params.outDistances = faiss.swig_ptr(out_d_f16) params.outIndices = faiss.swig_ptr(out_i_f16) params.outIndicesType = faiss.IndicesDataType_I64 faiss.bfKnn(res, params) self.assertGreaterEqual((out_i_f16 == ref_i_f16).sum(), ref_i_f16.size - 5) self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol=2e-3))
def fit(self, x_a, x_b, res=None): """ Perform PCA on the two domains and learn the linear transformation. :param x_a: Samples from A [m, d] (rows = samples) :param x_b: Samples from B [m, d] (rows = samples) :param res: Optional GPU resource for faiss. Can be used if called multiple times. """ print('Got {} samples in A and {} in B.'.format( x_a.shape[0], x_b.shape[0])) t0 = time.time() self.pca_a, self.pca_b = aligned_pca(x_a, x_b, comps=self.args.n_components) z_a = self.pca_a.transform(x_a) z_b = self.pca_b.transform(x_b) print('PCA representations: ', z_a.shape, z_b.shape, 'took:', time.time() - t0) Q = np.eye(self.args.n_components, dtype=np.float32) if res is None: res = faiss.StandardGpuResources() nbrs_b = faiss.GpuIndexFlatL2(res, self.args.n_components) nbrs_b.add(z_b) print('Learning {} transformation using {} sets:'.format( self.args.transform_type, self.args.pairing)) for it in range(self.args.n_iters): t0 = time.time() # Step 1 - Matching if self.args.pairing == 'paired': if it > 0: break assert z_a.shape == z_b.shape A, B = z_a, z_b else: print('Iter {}: '.format(it), end='') # Find nearest-neighbors to z_A Q in B: d_qa_to_b, i_qa_to_b = nbrs_b.search(z_a @ Q, 1) i_qa_to_b = i_qa_to_b.squeeze() if self.args.matching == 'nn': A = z_a B = z_b[i_qa_to_b] print('Found {} NNs. Mean NN l2 = {:.3f}. '.format( len(np.unique(i_qa_to_b)), np.mean(d_qa_to_b)), end='') else: # Find nearest-neighbors in the reverse direction, for cycle-consistency: sel_b = np.unique(i_qa_to_b) assert len(sel_b) > 100, 'Only {} unique NNs'.format( len(sel_b)) nbrs_aQ = faiss.GpuIndexFlatL2(res, self.args.n_components) nbrs_aQ.add(z_a @ Q) _d_iqb_to_a, _i_iqb_to_a = nbrs_aQ.search(z_b[sel_b], 1) i_iqb_to_a = -np.ones(shape=[z_b.shape[0]], dtype=int) i_iqb_to_a[sel_b] = _i_iqb_to_a.squeeze() # Check for cycle-consistency cyc_consistent_a = i_iqb_to_a[i_qa_to_b] == np.arange( len(i_qa_to_b)) if np.count_nonzero(cyc_consistent_a) < 1000: print('(only {} consisten pairs) '.format( np.count_nonzero(cyc_consistent_a)), end='') cyc_consistent_a = np.ones_like(cyc_consistent_a) A = z_a[cyc_consistent_a] B = z_b[i_qa_to_b[cyc_consistent_a]] print('{} B-NNs / {} consistent, mean NN l2 = {:.3f}. '. format(len(sel_b), np.count_nonzero(cyc_consistent_a), np.mean(d_qa_to_b[cyc_consistent_a])), end='') # Step 2 - Mapping (updating Q): prev_Q = Q if self.args.transform_type == 'orthogonal': U, S, V = np.linalg.svd(A.T @ B) Q = U @ V else: Q = np.linalg.inv(A.T @ A) @ A.T @ B if np.allclose(Q, prev_Q): print('Converged - terminating ICP iterations.') break print('took {:.2f} sec.'.format(time.time() - t0)) self.fitted = True self.Q = Q return self
def test_dist(self): metrics = [ faiss.METRIC_L2, faiss.METRIC_INNER_PRODUCT, faiss.METRIC_L1, faiss.METRIC_Linf, faiss.METRIC_Canberra, faiss.METRIC_BrayCurtis, faiss.METRIC_JensenShannon ] for metric in metrics: print(metric) d = 33 k = 500 # all pairwise distance should be the same as nb = k nb = k nq = 20 xs = make_t(nb, d) qs = make_t(nq, d) res = faiss.StandardGpuResources() # Get ground truth using IndexFlat index = faiss.IndexFlat(d, metric) index.add(xs) ref_d, _ = index.search(qs, k) out_d = np.empty((nq, k), dtype=np.float32) # Try f32 data/queries params = faiss.GpuDistanceParams() params.metric = metric params.k = -1 # all pairwise params.dims = d params.vectors = faiss.swig_ptr(xs) params.numVectors = nb params.queries = faiss.swig_ptr(qs) params.numQueries = nq params.outDistances = faiss.swig_ptr(out_d) faiss.bfKnn(res, params) # IndexFlat will sort the results, so we need to # do the same on our end out_d = np.sort(out_d, axis=1) # INNER_PRODUCT is in descending order, make sure it is the same # order if metric == faiss.METRIC_INNER_PRODUCT: ref_d = np.sort(ref_d, axis=1) print('f32', np.abs(ref_d - out_d).max()) self.assertTrue(np.allclose(ref_d, out_d, atol=1e-5)) # Try float16 data/queries xs_f16 = xs.astype(np.float16) qs_f16 = qs.astype(np.float16) xs_f16_f32 = xs_f16.astype(np.float32) qs_f16_f32 = qs_f16.astype(np.float32) index.reset() index.add(xs_f16_f32) ref_d_f16, _ = index.search(qs_f16_f32, k) params.vectors = faiss.swig_ptr(xs_f16) params.vectorType = faiss.DistanceDataType_F16 params.queries = faiss.swig_ptr(qs_f16) params.queryType = faiss.DistanceDataType_F16 out_d_f16 = np.empty((nq, k), dtype=np.float32) params.outDistances = faiss.swig_ptr(out_d_f16) faiss.bfKnn(res, params) # IndexFlat will sort the results, so we need to # do the same on our end out_d_f16 = np.sort(out_d_f16, axis=1) # INNER_PRODUCT is in descending order, make sure it is the same # order if metric == faiss.METRIC_INNER_PRODUCT: ref_d_f16 = np.sort(ref_d_f16, axis=1) print('f16', np.abs(ref_d_f16 - out_d_f16).max()) self.assertTrue(np.allclose(ref_d_f16, out_d_f16, atol=4e-3))