def build_faiss(self): # FAISS build num_clusters = 16 niter = 5 # 1. Clustering p_emb = self.p_embedding.toarray().astype(np.float32) emb_dim = p_emb.shape[-1] index_flat = faiss.IndexFlatL2(emb_dim) clus = faiss.Clustering(emb_dim, num_clusters) clus.verbose = True clus.niter = niter clus.train(p_emb, index_flat) centroids = faiss.vector_float_to_array(clus.centroids) centroids = centroids.reshape(num_clusters, emb_dim) quantizer = faiss.IndexFlatL2(emb_dim) quantizer.add(centroids) # 2. SQ8 + IVF indexer (IndexIVFScalarQuantizer) self.indexer = faiss.IndexIVFScalarQuantizer(quantizer, quantizer.d, quantizer.ntotal, faiss.METRIC_L2) self.indexer.train(p_emb) self.indexer.add(p_emb)
def train_index(data, quantizer_path, trained_index_path, fine_quant='SQ8', cuda=False): quantizer = faiss.read_index(quantizer_path) if fine_quant == 'SQ8': trained_index = faiss.IndexIVFScalarQuantizer(quantizer, quantizer.d, quantizer.ntotal, faiss.METRIC_L2) elif fine_quant.startswith('PQ'): m = int(fine_quant[2:]) trained_index = faiss.IndexIVFPQ(quantizer, quantizer.d, quantizer.ntotal, m, 8) else: raise ValueError(fine_quant) if cuda: if fine_quant.startswith('PQ'): print('PQ not supported on GPU; keeping CPU.') else: res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, trained_index) gpu_index.train(data) trained_index = faiss.index_gpu_to_cpu(gpu_index) else: trained_index.train(data) faiss.write_index(trained_index, trained_index_path)
def subtest(self, mt): d = 32 xt, xb, xq = get_dataset_2(d, 1000, 2000, 200) nlist = 64 gt_index = faiss.IndexFlat(d, mt) gt_index.add(xb) gt_D, gt_I = gt_index.search(xq, 10) quantizer = faiss.IndexFlat(d, mt) for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16'.split(): qtype = getattr(faiss.ScalarQuantizer, 'QT_' + qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, nlist, qtype, mt) index.train(xt) index.add(xb) index.nprobe = 4 # hopefully more robust than 1 D, I = index.search(xq, 10) ninter = faiss.eval_intersection(I, gt_I) print('(%d, %s): %d, ' % (mt, repr(qname), ninter)) assert abs(ninter - self.ref_results[(mt, qname)]) <= 9 D2, I2 = self.subtest_add2col(xb, xq, index, qname) assert np.all(I2 == I) # also test range search if mt == faiss.METRIC_INNER_PRODUCT: radius = float(D[:, -1].max()) else: radius = float(D[:, -1].min()) print('radius', radius) lims, D3, I3 = index.range_search(xq, radius) ntot = ndiff = 0 for i in range(len(xq)): l0, l1 = lims[i], lims[i + 1] Inew = set(I3[l0:l1]) if mt == faiss.METRIC_INNER_PRODUCT: mask = D2[i] > radius else: mask = D2[i] < radius Iref = set(I2[i, mask]) ndiff += len(Inew ^ Iref) ntot += len(Iref) print('ndiff %d / %d' % (ndiff, ntot)) assert ndiff < ntot * 0.01 for pm in 1, 2: print('parallel_mode=%d' % pm) index.parallel_mode = pm lims4, D4, I4 = index.range_search(xq, radius) print('sizes', lims4[1:] - lims4[:-1]) for qno in range(len(lims) - 1): Iref = I3[lims[qno]:lims[qno + 1]] Inew = I4[lims4[qno]:lims4[qno + 1]] assert set(Iref) == set( Inew), "q %d ref %s new %s" % (qno, Iref, Inew)
def test_ScalarQuantizer(self): quantizer = faiss.IndexFlatL2(d) ivfpq = faiss.IndexIVFScalarQuantizer(quantizer, d, ncentroids, faiss.ScalarQuantizer.QT_8bit) ivfpq.nprobe = kprobe res = ev.launch('IVF SQ', ivfpq) e = ev.evalres(res) # should give 0.234 0.236 0.236 assert e[10] > 0.235
def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) # Used only for reimplementation if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) # Default index type elif 'OPQ' in fine_quant: code_size = int(fine_quant[fine_quant.index('OPQ') + 3:]) if hnsw: start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96", faiss.METRIC_INNER_PRODUCT) else: opq_matrix = faiss.OPQMatrix(ds, code_size) opq_matrix.niter = 10 sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, 8, faiss.METRIC_INNER_PRODUCT) start_index = faiss.IndexPreTransform(opq_matrix, sub_index) elif 'none' in fine_quant: start_index = faiss.IndexFlatIP(ds) else: raise ValueError(fine_quant) start_index.verbose = False if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = False # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again if 'none' not in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) index_ivf.make_direct_map() index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def test_4variants_ivf(self): d = 32 nt = 2500 nq = 400 nb = 5000 (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) # common quantizer quantizer = faiss.IndexFlatL2(d) ncent = 64 index_gt = faiss.IndexFlatL2(d) index_gt.add(xb) D, I_ref = index_gt.search(xq, 10) nok = {} index = faiss.IndexIVFFlat(quantizer, d, ncent, faiss.METRIC_L2) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok['flat'] = (I[:, 0] == I_ref[:, 0]).sum() for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16".split( ): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, faiss.METRIC_L2) index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() print(nok, nq) self.assertGreaterEqual(nok['flat'], nq * 0.6) # The tests below are a bit fragile, it happens that the # ordering between uniform and non-uniform are reverted, # probably because the dataset is small, which introduces # jitter self.assertGreaterEqual(nok['flat'], nok['QT_8bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform']) self.assertGreaterEqual(nok['QT_fp16'], nok['QT_8bit'])
def subtest_8bit_direct(self, metric_type, d): xt, xb, xq = get_dataset_2(d, 500, 1000, 30) # rescale everything to get integer tmin, tmax = xt.min(), xt.max() def rescale(x): x = np.floor((x - tmin) * 256 / (tmax - tmin)) x[x < 0] = 0 x[x > 255] = 255 return x xt = rescale(xt) xb = rescale(xb) xq = rescale(xq) gt_index = faiss.IndexFlat(d, metric_type) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexScalarQuantizer( d, faiss.ScalarQuantizer.QT_8bit_direct, metric_type) index.add(xb) D, I = index.search(xq, 10) assert np.all(I == Iref) assert np.all(D == Dref) # same, with IVF nlist = 64 quantizer = faiss.IndexFlat(d, metric_type) gt_index = faiss.IndexIVFFlat(quantizer, d, nlist, metric_type) gt_index.nprobe = 4 gt_index.train(xt) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexIVFScalarQuantizer( quantizer, d, nlist, faiss.ScalarQuantizer.QT_8bit_direct, metric_type) index.nprobe = 4 index.by_residual = False index.train(xt) index.add(xb) D, I = index.search(xq, 10) assert np.all(I == Iref) assert np.all(D == Dref)
def make_indices_copy_from_cpu(nlist, d, qtype, by_residual, metric, clamp): to_train = make_t(10000, d, clamp) quantizer_cp = faiss.IndexFlat(d, metric) idx_cpu = faiss.IndexIVFScalarQuantizer(quantizer_cp, d, nlist, qtype, metric, by_residual) idx_cpu.train(to_train) idx_cpu.add(to_train) res = faiss.StandardGpuResources() res.noTempMemory() idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, idx_cpu) return idx_cpu, idx_gpu
def prepare_trained_index(preproc, coarse_quantizer, ncent, pqflat_str, is_gpu): d = preproc.d_out if pqflat_str == 'Flat': print("making an IVFFlat index") idx_model = faiss.IndexIVFFlat(coarse_quantizer, d, ncent, fmetric) elif 'SQ' in pqflat_str: print("making a SQ index") if fmetric == faiss.METRIC_L2: quantizer = faiss.IndexFlatL2(d) elif fmetric == faiss.METRIC_INNER_PRODUCT: quantizer = faiss.IndexFlatIP(d) if pqflat_str.split("SQ")[1] == "16": name = "QT_fp16" else: name = "QT_" + str(pqflat_str.split("SQ")[1]) + "bit" qtype = getattr(faiss.ScalarQuantizer, name) idx_model = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, fmetric) else: key = pqflat_str[2:].split("x") assert len(key) == 2, "use format PQ(m)x(log2kstar)" m, log2kstar = map(int, pqflat_str[2:].split("x")) assert m < 56 or useFloat16, "PQ%d will work only with -float16" % m if log2kstar == 4 and is_gpu == False: idx_model = faiss.IndexIVFPQFastScan(coarse_quantizer, d, ncent, m, log2kstar, fmetric) print( "making an IndexIVFPQFastScan index, m = %d, log2kstar = %d" % (m, log2kstar)) else: idx_model = faiss.IndexIVFPQ(coarse_quantizer, d, ncent, m, log2kstar, fmetric) print("making an IVFPQ index, m = %d, log2kstar = %d" % (m, log2kstar)) coarse_quantizer.this.disown() idx_model.own_fields = True # finish training on CPU t0 = time.time() print("Training vector codes") x = preproc.apply_py(sanitize(xt)) idx_model.train(x) print(" done %.3f s" % (time.time() - t0)) return idx_model
def test_4variants(self): d = 32 nt = 1500 nq = 200 nb = 10000 np.random.seed(123) xt = np.random.random(size=(nt, d)).astype('float32') xq = np.random.random(size=(nq, d)).astype('float32') xb = np.random.random(size=(nb, d)).astype('float32') # common quantizer quantizer = faiss.IndexFlatL2(d) ncent = 128 index_gt = faiss.IndexFlatL2(d) index_gt.add(xb) D, I_ref = index_gt.search(xq, 10) nok = {} index = faiss.IndexIVFFlat(quantizer, d, ncent, faiss.METRIC_L2) index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok['flat'] = (I[:, 0] == I_ref[:, 0]).sum() for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform".split(): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, faiss.METRIC_L2) index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() print(nok) self.assertGreaterEqual(nok['flat'], nok['QT_8bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform'])
def test_4variants_ivf(self): d = 32 nt = 1500 nq = 200 nb = 10000 (xt, xb, xq) = get_dataset(d, nb, nt, nq) # common quantizer quantizer = faiss.IndexFlatL2(d) ncent = 128 index_gt = faiss.IndexFlatL2(d) index_gt.add(xb) D, I_ref = index_gt.search(xq, 10) nok = {} index = faiss.IndexIVFFlat(quantizer, d, ncent, faiss.METRIC_L2) index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok['flat'] = (I[:, 0] == I_ref[:, 0]).sum() for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform".split(): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, faiss.METRIC_L2) index.nprobe = 4 index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() print(nok) self.assertGreaterEqual(nok['flat'], nok['QT_8bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform'])
def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) elif 'PQ' in fine_quant: code_size = int(fine_quant.split('_')[0][2:]) bits_per_sub = int(fine_quant.split('_')[1]) assert bits_per_sub == 8 start_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, bits_per_sub, faiss.METRIC_INNER_PRODUCT) else: raise ValueError(fine_quant) start_index.verbose = True if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = True # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again start_index.make_direct_map() start_index.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def subtest_add2col(self, xb, xq, index, qname): """Test with 2 additional dimensions to take also the non-SIMD codepath. We don't retrain anything but add 2 dims to the queries, the centroids and the trained ScalarQuantizer. """ nb, d = xb.shape d2 = d + 2 xb2 = self.add2columns(xb) xq2 = self.add2columns(xq) nlist = index.nlist quantizer = faiss.downcast_index(index.quantizer) quantizer2 = faiss.IndexFlat(d2, index.metric_type) centroids = faiss.vector_to_array(quantizer.xb).reshape(nlist, d) centroids2 = self.add2columns(centroids) quantizer2.add(centroids2) index2 = faiss.IndexIVFScalarQuantizer( quantizer2, d2, index.nlist, index.sq.qtype, index.metric_type) index2.nprobe = 4 if qname in ('8bit', '4bit'): trained = faiss.vector_to_array(index.sq.trained).reshape(2, -1) nt = trained.shape[1] # 2 lines: vmins and vdiffs new_nt = int(nt * d2 / d) trained2 = np.hstack(( trained, np.zeros((2, new_nt - nt), dtype='float32') )) trained2[1, nt:] = 1.0 # set vdiff to 1 to avoid div by 0 faiss.copy_array_to_vector(trained2.ravel(), index2.sq.trained) else: index2.sq.trained = index.sq.trained index2.is_trained = True index2.add(xb2) return index2.search(xq2, 10)
def subtest(self, mt): d = 32 xt, xb, xq = get_dataset_2(d, 1000, 2000, 200) nlist = 64 gt_index = faiss.IndexFlat(d, mt) gt_index.add(xb) gt_D, gt_I = gt_index.search(xq, 10) quantizer = faiss.IndexFlat(d, mt) for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16'.split(): qtype = getattr(faiss.ScalarQuantizer, 'QT_' + qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, nlist, qtype, mt) index.train(xt) index.add(xb) index.nprobe = 4 # hopefully more robust than 1 D, I = index.search(xq, 10) ninter = faiss.eval_intersection(I, gt_I) print('(%d, %s): %d, ' % (mt, repr(qname), ninter)) assert ninter >= self.ref_results[(mt, qname)] - 4 D2, I2 = self.subtest_add2col(xb, xq, index, qname) assert np.all(I2 == I)
variants = [(name, getattr(faiss.ScalarQuantizer, name)) for name in dir(faiss.ScalarQuantizer) if name.startswith('QT_')] quantizer = faiss.IndexFlatL2(d) # quantizer.add(np.zeros((1, d), dtype='float32')) if False: for name, qtype in [('flat', 0)] + variants: print("============== test", name) t0 = time.time() if name == 'flat': index = faiss.IndexIVFFlat(quantizer, d, ncent, faiss.METRIC_L2) else: index = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, faiss.METRIC_L2) index.nprobe = 16 print("[%.3f s] train" % (time.time() - t0)) index.train(xt) print("[%.3f s] add" % (time.time() - t0)) index.add(xb) print("[%.3f s] search" % (time.time() - t0)) D, I = index.search(xq, 100) print("[%.3f s] eval" % (time.time() - t0)) for rank in 1, 10, 100: n_ok = (I[:, :rank] == gt[:, :1]).sum() print("%.4f" % (n_ok / float(nq)), end=' ') print()
def __init__(self): quantizer = faiss.IndexFlatL2(self.d) self.index = faiss.IndexIVFScalarQuantizer(quantizer, self.d, self.nlist, faiss.ScalarQuantizer.QT_8bit) self.index.nprobe = self.nprobe
print('apply random rotation') rrot = faiss.RandomRotationMatrix(d, d) rrot.init(1234) centroids = rrot.apply_py(centroids) print('make HNSW index as quantizer') quantizer = faiss.IndexHNSWFlat(d, 32) quantizer.hnsw.efSearch = 1024 quantizer.hnsw.efConstruction = 200 quantizer.add(centroids) print('build index') index = faiss.IndexPreTransform( rrot, faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, faiss.ScalarQuantizer.QT_6bit)) def ivecs_mmap(fname): a = np.memmap(fname, dtype='int32', mode='r') d = a[0] return a.reshape(-1, d + 1)[:, 1:] def fvecs_mmap(fname): return ivecs_mmap(fname).view('float32') print('finish training index') xt = fvecs_mmap(deep1bdir + 'learn.fvecs') xt = np.ascontiguousarray(xt[:256 * 1000], dtype='float32')