def _load_ann_index(index_filename: str, device: int) -> faiss.Index: """ Load the ANN index from the given file and move it to the GPU(s). Parameters ---------- index_filename : str The ANN index filename. Returns ------- faiss.Index The Faiss `Index`. """ # https://github.com/facebookresearch/faiss/blob/2cce2e5f59a5047aa9a1729141e773da9bec6b78/benchs/bench_gpu_1bn.py#L608 # logger.debug('Load the ANN index from file %s', index_filename) index_cpu = faiss.read_index(index_filename) res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True co.useFloat16CoarseQuantizer = False co.indicesOptions = faiss.INDICES_CPU co.reserveVecs = index_cpu.ntotal index = faiss.index_cpu_to_gpu(res, device, index_cpu, co) if hasattr(index, 'at'): for i in range(index.count()): simple_index = faiss.downcast_index(index.at(i)) simple_index.nprobe = min(math.ceil(simple_index.nlist / 2), config.num_probe) else: index.nprobe = min(math.ceil(index.nlist / 2), config.num_probe) return index
def convert_index_to_gpu(index, faiss_gpu_index, useFloat16=False): if type(faiss_gpu_index) == list and len(faiss_gpu_index) == 1: faiss_gpu_index = faiss_gpu_index[0] if isinstance(faiss_gpu_index, int): res = faiss.StandardGpuResources() res.setTempMemory(512 * 1024 * 1024) co = faiss.GpuClonerOptions() co.useFloat16 = useFloat16 index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co) else: global gpu_resources if len(gpu_resources) == 0: import torch for i in range(torch.cuda.device_count()): res = faiss.StandardGpuResources() res.setTempMemory(256 * 1024 * 1024) gpu_resources.append(res) assert isinstance(faiss_gpu_index, list) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() co = faiss.GpuMultipleClonerOptions() co.shard = True co.useFloat16 = useFloat16 for i in faiss_gpu_index: vdev.push_back(i) vres.push_back(gpu_resources[i]) index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) return index
def __init__(self, target, nprobe=128, num_gpu=None, index_factory_str=None, verbose=False, mode='proxy', using_gpu=True): self._res_list = [] found_gpu = len(os.environ['CUDA_VISIBLE_DEVICES'].split(",")) if found_gpu == 0: raise RuntimeError( "No GPU found. Please export CUDA_VISIBLE_DEVICES") if num_gpu is None or num_gpu > found_gpu: num_gpu = found_gpu print('[faiss gpu] #GPU: {}'.format(num_gpu)) size, dim = target.shape assert size > 0, "size: {}".format(size) index_factory_str = "IVF{},PQ{}".format( min(8192, 16 * round(np.sqrt(size))), 32) if index_factory_str is None else index_factory_str cpu_index = faiss.index_factory(dim, index_factory_str) cpu_index.nprobe = nprobe if mode == 'proxy': co = faiss.GpuClonerOptions() co.useFloat16 = True co.usePrecomputed = False index = faiss.IndexProxy() for i in range(num_gpu): res = faiss.StandardGpuResources() self._res_list.append(res) sub_index = faiss.index_cpu_to_gpu( res, i, cpu_index, co) if using_gpu else cpu_index index.addIndex(sub_index) elif mode == 'shard': raise NotImplementedError else: raise KeyError("Unknown index mode") index = faiss.IndexIDMap(index) index.verbose = verbose # get nlist to decide how many samples used for training nlist = int([ item for item in index_factory_str.split(",") if 'IVF' in item ][0].replace("IVF", "")) # training if not index.is_trained: indexes_sample_for_train = np.random.randint(0, size, nlist * 256) index.train(target[indexes_sample_for_train]) # add with ids target_ids = np.arange(0, size) index.add_with_ids(target, target_ids) self.index = index
def fit(self, X): X = X.astype(numpy.float32) self._index = faiss.index_factory(len(X[0]), "IVF%d,PQ64" % self._n_bits) res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True self._index = faiss.index_cpu_to_gpu(res, 0, self._index, co) self._index.train(X) self._index.add(X) self._index.setNumProbes(self._n_probes)
def __init__( self, xt_path="/home/wenqingfu/project/faiss/sift1M/sift_learn.fvecs", xb_path="/home/wenqingfu/project/faiss/sift1M/sift_base.fvecs"): self.xt = self.fvecs_read(xt_path) self.xb = self.fvecs_read(xb_path) self.res = faiss.StandardGpuResources() self.co = faiss.GpuClonerOptions() self.co.useFloat16 = True self.co.usePrecomputed = False
def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) # Used only for reimplementation if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) # Default index type elif 'OPQ' in fine_quant: code_size = int(fine_quant[fine_quant.index('OPQ') + 3:]) if hnsw: start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96", faiss.METRIC_INNER_PRODUCT) else: opq_matrix = faiss.OPQMatrix(ds, code_size) opq_matrix.niter = 10 sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, 8, faiss.METRIC_INNER_PRODUCT) start_index = faiss.IndexPreTransform(opq_matrix, sub_index) elif 'none' in fine_quant: start_index = faiss.IndexFlatIP(ds) else: raise ValueError(fine_quant) start_index.verbose = False if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = False # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again if 'none' not in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) index_ivf.make_direct_map() index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def _init_faiss(self, ngpu, feat_len, tempIVF, tempPQ, tempNP): co = faiss.GpuClonerOptions() co.useFloat16 = True co.usePrecomputed = False # Setting up GPU resources self.res = [faiss.StandardGpuResources() for i in range(ngpu)] self.indexes = [] for i in range(ngpu): index = faiss.index_factory(feat_len,tempIVF + "," + tempPQ) index.nprobe = tempNP index = faiss.index_cpu_to_gpu(self.res[i],i,index,co) self.indexes.append(index) self.index = faiss.IndexProxy() for sub_index in self.indexes: self.index.addIndex(sub_index)
def build_nn_search_index(item_vectors, embedding_dim, index_type, nprobe=1, gpu=0): index = faiss.index_factory(embedding_dim, index_type, faiss.METRIC_INNER_PRODUCT) index.nprobe = nprobe if th.cuda.is_available(): res = faiss.StandardGpuResources() cloner_options = faiss.GpuClonerOptions() index = faiss.index_cpu_to_gpu(res, gpu, index, cloner_options) index.train(item_vectors) index.add(item_vectors) return index
def fit(x_train): D = x_train.shape[1] co = faiss.GpuClonerOptions() co.useFloat16 = use_float16 if metric in ["euclidean", "angular"]: if algorithm == "flat": index = faiss.IndexFlatL2(D) # May be used as quantizer index = faiss.index_cpu_to_gpu(res, deviceId, index, co) elif algorithm == "ivfflat": quantizer = faiss.IndexFlatL2(D) # the other index faiss_metric = ( faiss.METRIC_L2 if metric == "euclidean" else faiss.METRIC_INNER_PRODUCT ) index = faiss.IndexIVFFlat(quantizer, D, nlist, faiss_metric) index = faiss.index_cpu_to_gpu(res, deviceId, index, co) assert not index.is_trained index.train(x_train) # add vectors to the index assert index.is_trained else: raise NotImplementedError(f"The '{metric}' distance is not supported.") # Pre-processing: start = timer(use_torch=False) index.add(x_train) index.nprobe = nprobe elapsed = timer(use_torch=False) - start # Return an operator for actual KNN queries: def f(x_test): start = timer(use_torch=False) distances, indices = index.search(x_test, K) elapsed = timer(use_torch=False) - start return indices, elapsed return f, elapsed
def _get_ann_index(self, charge: int) -> faiss.IndexIVF: """ Get the ANN index for the specified charge. This allows on-demand loading of the ANN indices and prevents having to keep a large amount of data for the index into memory. The previously used index is cached to avoid reloading the same index (only a single index is cached to prevent using an excessive amount of memory). If no index for the specified charge is cached yet, this index is loaded from the disk. To prevent loading the same index multiple times (incurring a significant performance quality) it is CRUCIAL that query spectrum processing is partitioned by charge so the previous index can be reused. Parameters ---------- charge : int The charge for which the ANN index is retrieved. Returns ------- faiss.IndexIVF The ANN index for the specified charge. """ with self._ann_index_lock: if self._current_index[0] != charge: # Release memory reserved by the previous index. if self._current_index[1] is not None: self._current_index[1].reset() # Load the new index. logging.debug('Load the ANN index for charge %d', charge) index = faiss.read_index(self._ann_filenames[charge]) if self._use_gpu: co = faiss.GpuClonerOptions() co.useFloat16 = True index = faiss.index_cpu_to_gpu(self._res, 0, index, co) index.setNumProbes(self._num_probe) else: index.nprobe = self._num_probe self._current_index = charge, index return self._current_index[1]
def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) elif 'PQ' in fine_quant: code_size = int(fine_quant.split('_')[0][2:]) bits_per_sub = int(fine_quant.split('_')[1]) assert bits_per_sub == 8 start_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, bits_per_sub, faiss.METRIC_INNER_PRODUCT) else: raise ValueError(fine_quant) start_index.verbose = True if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = True # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again start_index.make_direct_map() start_index.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def get_idxs_and_dists(query_features, index_features, BS=32): import faiss flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() FEAT_DIM = index_features.shape[1] cpu_index = faiss.IndexFlatL2(FEAT_DIM) cpu_index.add(index_features) index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) out_dists = np.zeros((len(query_features), 100), dtype=np.float32) out_idxs = np.zeros((len(query_features), 100), dtype=np.int32) NUM_QUERY = len(query_features) for ind in progress_bar(range(0, len(query_features), BS)): fin = ind + BS if fin > NUM_QUERY: fin = NUM_QUERY q_descs = query_features[ind:fin] D, I = index.search(q_descs, 100) out_dists[ind:fin] = D out_idxs[ind:fin] = I return out_idxs, out_dists
def load_index(passage_embeddings, index_path, faiss_gpu_index, use_gpu): dim = passage_embeddings.shape[1] if index_path is None: index = faiss.index_factory(dim, "Flat", faiss.METRIC_INNER_PRODUCT) index.add(passage_embeddings) else: index = faiss.read_index(index_path) if faiss_gpu_index and use_gpu: if len(faiss_gpu_index) == 1: res = faiss.StandardGpuResources() res.setTempMemory(1024 * 1024 * 1024) co = faiss.GpuClonerOptions() if index_path: co.useFloat16 = True else: co.useFloat16 = False index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co) else: assert not index_path # Only need one GPU for compressed index global gpu_resources import torch for i in range(torch.cuda.device_count()): res = faiss.StandardGpuResources() res.setTempMemory(128 * 1024 * 1024) gpu_resources.append(res) assert isinstance(faiss_gpu_index, list) vres = faiss.GpuResourcesVector() vdev = faiss.IntVector() co = faiss.GpuMultipleClonerOptions() co.shard = True for i in faiss_gpu_index: vdev.push_back(i) vres.push_back(gpu_resources[i]) index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co) return index
# the recall should be 1 at all times print("k=%d %.3f ms, R@1 %.4f" % (k, t, r[1])) ################################################################# # Approximate search experiment ################################################################# print("============ Approximate search") index = faiss.index_factory(d, "IVF4096,PQ64") # faster, uses more memory # index = faiss.index_factory(d, "IVF16384,Flat") co = faiss.GpuClonerOptions() # here we are using a 64-byte PQ, so we must set the lookup tables to # 16 bit float (this is due to the limited temporary memory). co.useFloat16 = True index = faiss.index_cpu_to_gpu(res, 0, index, co) print("train") index.train(xt) print("add vectors to index") index.add(xb)
def add_to_index(dump_paths, trained_index_path, target_index_path, idx2id_path, num_docs_per_add=1000, cuda=False, fine_quant='SQ4', offset=0, norm_th=999, ignore_ids=None): sidx2doc_id = [] sidx2word_id = [] dumps = [h5py.File(dump_path, 'r') for dump_path in dump_paths] print('reading %s' % trained_index_path) start_index = faiss.read_index(trained_index_path) start_index.make_direct_map() start_index.set_direct_map_type(faiss.DirectMap.Hashtable) if cuda: if 'PQ' in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) quantizer = index_ivf.quantizer quantizer_gpu = faiss.index_cpu_to_all_gpus(quantizer) index_ivf.quantizer = quantizer_gpu else: res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True start_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) print('adding following dumps:') for dump_path in dump_paths: print(dump_path) start_total = 0 start_total_prev = 0 cnt = 0 for di, phrase_dump in enumerate(tqdm(dumps, desc='dumps')): starts = [] start_valids = [] dump_length = len(phrase_dump) for i, (doc_idx, doc_group) in enumerate( tqdm(phrase_dump.items(), desc='adding %d' % di)): if ignore_ids is not None and doc_idx in ignore_ids: continue num_start = doc_group['start'].shape[0] if num_start == 0: continue cnt += 1 start = int8_to_float(doc_group['start'][:], doc_group.attrs['offset'], doc_group.attrs['scale']) start_valid = np.linalg.norm(start, axis=1) <= norm_th starts.append(start) start_valids.append(start_valid) sidx2doc_id.extend([int(doc_idx)] * num_start) sidx2word_id.extend(range(num_start)) start_total += num_start if len(starts) > 0 and ((i % num_docs_per_add == 0) or (i == dump_length - 1)): print('adding at %d' % (i + 1)) add_with_offset(start_index, concat_vectors(starts), concat_vectors(start_valids), start_total_prev, offset) start_total_prev = start_total starts = [] start_valids = [] if len(starts) > 0: print('final adding at %d' % (i + 1)) add_with_offset(start_index, concat_vectors(starts), concat_vectors(start_valids), start_total_prev, offset) start_total_prev = start_total print('number of docs', cnt) for dump in dumps: dump.close() if cuda: print('moving back to cpu') if 'PQ' in fine_quant: index_ivf.quantizer = quantizer del quantizer_gpu else: start_index = faiss.index_gpu_to_cpu(start_index) print('start_index ntotal: %d' % start_index.ntotal) print(start_total) sidx2doc_id = np.array(sidx2doc_id, dtype=np.int32) sidx2word_id = np.array(sidx2word_id, dtype=np.int32) print('writing index and metadata') with h5py.File(idx2id_path, 'w') as f: g = f.create_group(str(offset)) g.create_dataset('doc', data=sidx2doc_id) g.create_dataset('word', data=sidx2word_id) g.attrs['offset'] = offset faiss.write_index(start_index, target_index_path) print('done')
def add_to_index(dump_paths, trained_index_path, target_index_path, idx2id_path, num_docs_per_add=1000, cuda=False, fine_quant='SQ4', offset=0, norm_th=999, ignore_ids=None, avg_vec=None, std_vec=None, first_passage=False, index_filter=-1e8): sidx2doc_id = [] sidx2word_id = [] dumps = [h5py.File(dump_path, 'r') for dump_path in dump_paths] # filter dumps if index_filter != -1e8: f_dumps = [ h5py.File(dump_path.replace('/phrase/', '/filter/'), 'r') for dump_path in dump_paths ] print('reading %s' % trained_index_path) start_index = faiss.read_index(trained_index_path) if 'none' not in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) index_ivf.make_direct_map() index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable) if cuda: if 'PQ' in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) quantizer = index_ivf.quantizer quantizer_gpu = faiss.index_cpu_to_all_gpus(quantizer) index_ivf.quantizer = quantizer_gpu else: res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True start_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) print('adding following dumps:') for dump_path in dump_paths: print(dump_path) start_total = 0 start_total_prev = 0 cnt = 0 for di, phrase_dump in enumerate(tqdm(dumps, desc='dumps')): starts = [] start_valids = [] dump_length = len(phrase_dump) for i, (doc_idx, doc_group) in enumerate( tqdm(phrase_dump.items(), desc='adding %d' % di)): if ignore_ids is not None and doc_idx in ignore_ids: continue num_start = doc_group['start'].shape[0] if num_start == 0: continue cnt += 1 # First passage only if first_passage: f2o_start = doc_group['f2o_start'][:] cut = sum(f2o_start < doc_group['len_per_para'][0]) start = int8_to_float(doc_group['start'][:cut], doc_group.attrs['offset'], doc_group.attrs['scale']) num_start = start.shape[0] # Apply index filter elif index_filter != -1e8: o2f_start = { orig: ft for ft, orig in enumerate(doc_group['f2o_start'][:]) } filter_start = f_dumps[di][doc_idx]['filter_start'][:] filter_end = f_dumps[di][doc_idx]['filter_end'][:] start_idxs, = np.where(filter_start > index_filter) end_idxs, = np.where(filter_end > index_filter) save_idx = set(np.concatenate([start_idxs, end_idxs])) save_idx = sorted( [o2f_start[si] for si in save_idx if si in o2f_start]) start = int8_to_float(doc_group['start'][save_idx], doc_group.attrs['offset'], doc_group.attrs['scale']) num_start = start.shape[0] else: start = int8_to_float(doc_group['start'][:], doc_group.attrs['offset'], doc_group.attrs['scale']) start_valid = np.linalg.norm(start, axis=1) <= norm_th starts.append(start) start_valids.append(start_valid) sidx2doc_id.extend([int(doc_idx)] * num_start) if index_filter == -1e8: sidx2word_id.extend(range(num_start)) else: sidx2word_id.extend(save_idx) start_total += num_start if len(starts) > 0 and ((i % num_docs_per_add == 0) or (i == dump_length - 1)): print('adding at %d' % (i + 1)) add_with_offset( start_index, concat_vectors(starts), concat_vectors(start_valids), start_total_prev, offset, fine_quant, ) start_total_prev = start_total starts = [] start_valids = [] if len(starts) > 0: print('final adding at %d' % (i + 1)) add_with_offset( start_index, concat_vectors(starts), concat_vectors(start_valids), start_total_prev, offset, fine_quant, ) start_total_prev = start_total print('number of docs', cnt) for dump in dumps: dump.close() if cuda: print('moving back to cpu') if 'PQ' in fine_quant: index_ivf.quantizer = quantizer del quantizer_gpu else: start_index = faiss.index_gpu_to_cpu(start_index) print('start_index ntotal: %d' % start_index.ntotal) print(start_total) sidx2doc_id = np.array(sidx2doc_id, dtype=np.int32) sidx2word_id = np.array(sidx2word_id, dtype=np.int32) print('writing index and metadata') with h5py.File(idx2id_path, 'w') as f: g = f.create_group(str(offset)) g.create_dataset('doc', data=sidx2doc_id) g.create_dataset('word', data=sidx2word_id) g.attrs['offset'] = offset faiss.write_index(start_index, target_index_path) print('done')