def test_OIVFPQ(self): # Parameters inverted indexes ncentroids = 50 M = 4 ev = Randu10kUnbalanced() d = ev.d quantizer = faiss.IndexFlatL2(d) index = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8) index.nprobe = 5 res = ev.launch('IVFPQ', index) e_ivfpq = ev.evalres(res) quantizer = faiss.IndexFlatL2(d) index_ivfpq = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8) index_ivfpq.nprobe = 5 opq_matrix = faiss.OPQMatrix(d, M) opq_matrix.niter = 10 index = faiss.IndexPreTransform(opq_matrix, index_ivfpq) res = ev.launch('O+IVFPQ', index) e_oivfpq = ev.evalres(res) # verify same on OIVFPQ for r in 1, 10, 100: print(e_oivfpq[r], e_ivfpq[r]) assert(e_oivfpq[r] >= e_ivfpq[r])
def test_OPQ(self): M = 4 ev = Randu10kUnbalanced() d = ev.d index = faiss.IndexPQ(d, M, 8) res = ev.launch('PQ', index) e_pq = ev.evalres(res) index_pq = faiss.IndexPQ(d, M, 8) opq_matrix = faiss.OPQMatrix(d, M) # opq_matrix.verbose = true opq_matrix.niter = 10 opq_matrix.niter_pq = 4 index = faiss.IndexPreTransform(opq_matrix, index_pq) res = ev.launch('OPQ', index) e_opq = ev.evalres(res) print('e_pq=%s' % e_pq) print('e_opq=%s' % e_opq) # verify that OPQ better than PQ for r in 1, 10, 100: assert(e_opq[r] > e_pq[r])
def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) # Used only for reimplementation if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) # Default index type elif 'OPQ' in fine_quant: code_size = int(fine_quant[fine_quant.index('OPQ') + 3:]) if hnsw: start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96", faiss.METRIC_INNER_PRODUCT) else: opq_matrix = faiss.OPQMatrix(ds, code_size) opq_matrix.niter = 10 sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, 8, faiss.METRIC_INNER_PRODUCT) start_index = faiss.IndexPreTransform(opq_matrix, sub_index) elif 'none' in fine_quant: start_index = faiss.IndexFlatIP(ds) else: raise ValueError(fine_quant) start_index.verbose = False if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = False # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again if 'none' not in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) index_ivf.make_direct_map() index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def train_preprocessor(preproc_str): print("train preproc", preproc_str) t0 = time.time() if preproc_str.startswith('OPQ'): fi = preproc_str[3:-1].split('_') m = int(fi[0]) dout = int(fi[1]) if len(fi) == 2 else dim preproc = faiss.OPQMatrix(dim, m, dout) elif preproc_str.startswith('PCAR'): dout = int(preproc_str[4:-1]) preproc = faiss.PCAMatrix(dim, dout, 0, True) else: assert False preproc.train(sanitize(xt)) print("preproc train done in %.3f s" % (time.time() - t0)) return preproc
def train_preprocessor(self, preproc_str_local, xt_local): if not self.preproc_cachefile or not os.path.exists( self.preproc_cachefile): print("train preproc", preproc_str_local) d = xt_local.shape[1] t0 = time.time() if preproc_str_local.startswith('OPQ'): fi = preproc_str_local[3:].split('_') m = int(fi[0]) dout = int(fi[1]) if len(fi) == 2 else d preproc = faiss.OPQMatrix(d, m, dout) elif preproc_str_local.startswith('PCAR'): dout = int(preproc_str_local[4:-1]) preproc = faiss.PCAMatrix(d, dout, 0, True) else: assert False preproc.train(indexfunctions.sanitize(xt_local[:100000000])) print("preproc train done in %.3f s" % (time.time() - t0)) faiss.write_VectorTransform(preproc, self.preproc_cachefile) else: print("load preproc ", self.preproc_cachefile) preproc = faiss.read_VectorTransform(self.preproc_cachefile) return preproc
if 'lsq-gpu' in todo: lsq = faiss.LocalSearchQuantizer(d, M, nbits) ngpus = faiss.get_num_gpus() lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus) lsq.verbose = True eval_quantizer(lsq, xb, xt, 'lsq-gpu') if 'pq' in todo: pq = faiss.ProductQuantizer(d, M, nbits) print("===== PQ") eval_quantizer(pq, xq, xb, gt, xt) if 'opq' in todo: d2 = ((d + M - 1) // M) * M print("OPQ d2=", d2) opq = faiss.OPQMatrix(d, M, d2) opq.train(xt) xq2 = opq.apply(xq) xb2 = opq.apply(xb) xt2 = opq.apply(xt) pq = faiss.ProductQuantizer(d2, M, nbits) print("===== PQ") eval_quantizer(pq, xq2, xb2, gt, xt2) if 'prq' in todo: print(f"===== PRQ{nsplits}x{Msub}x{nbits}") prq = faiss.ProductResidualQuantizer(d, nsplits, Msub, nbits) variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)] eval_quantizer(prq, xq, xb, gt, xt, variants=variants) if 'plsq' in todo:
def index_factory(d: int, index_key: str, metric_type: int, ef_construction: Optional[int] = None): """ custom index_factory that fix some issues of faiss.index_factory with inner product metrics. """ if metric_type == faiss.METRIC_INNER_PRODUCT: # make the index described by the key if any(re.findall(r"OPQ\d+_\d+,IVF\d+,PQ\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] cs = params[3] # code size (in Bytes if nbits=8) nbits = params[4] if len(params) == 5 else 8 # default value ncentroids = params[2] out_d = params[1] M_OPQ = params[0] quantizer = faiss.index_factory(out_d, "Flat", metric_type) assert quantizer.metric_type == metric_type index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs, nbits, metric_type) assert index_ivfpq.metric_type == metric_type index_ivfpq.own_fields = True quantizer.this.disown() # pylint: disable = no-member opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d) # opq_matrix.niter = 50 # Same as default value index = faiss.IndexPreTransform(opq_matrix, index_ivfpq) elif any(re.findall(r"OPQ\d+_\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] M_HNSW = params[3] cs = params[4] # code size (in Bytes if nbits=8) nbits = params[5] if len(params) == 6 else 8 # default value ncentroids = params[2] out_d = params[1] M_OPQ = params[0] quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type) if ef_construction is not None and ef_construction >= 1: quantizer.hnsw.efConstruction = ef_construction assert quantizer.metric_type == metric_type index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs, nbits, metric_type) assert index_ivfpq.metric_type == metric_type index_ivfpq.own_fields = True quantizer.this.disown() # pylint: disable = no-member opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d) # opq_matrix.niter = 50 # Same as default value index = faiss.IndexPreTransform(opq_matrix, index_ivfpq) elif any(re.findall(r"Pad\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] out_d = params[0] M_HNSW = params[2] cs = params[3] # code size (in Bytes if nbits=8) nbits = params[4] if len(params) == 5 else 8 # default value ncentroids = params[1] remapper = faiss.RemapDimensionsTransform(d, out_d, True) quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type) if ef_construction is not None and ef_construction >= 1: quantizer.hnsw.efConstruction = ef_construction index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs, nbits, metric_type) index_ivfpq.own_fields = True quantizer.this.disown() # pylint: disable = no-member index = faiss.IndexPreTransform(remapper, index_ivfpq) elif any(re.findall(r"HNSW\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] M_HNSW = params[0] index = faiss.IndexHNSWFlat(d, M_HNSW, metric_type) assert index.metric_type == metric_type elif index_key == "Flat": index = faiss.index_factory(d, index_key, metric_type) else: index = faiss.index_factory(d, index_key, metric_type) raise ValueError(( "Be careful, faiss might not create what you expect when using the " "inner product similarity metric, remove this line to try it anyway." "Happened with index_key: " + str(index_key))) else: index = faiss.index_factory(d, index_key, metric_type) return index
x = x.view('int32') y = np.ones((d, 1), dtype='int32') * w x = np.concatenate([y, x], -1).reshape(-1) x.tofile(fname) def cvecs_write(x, fname): x = x.astype('uint8') x.tofile(fname) x = fvecs_read("sift/sift_base.fvecs") # x = x[:10000, :] n, d = x.shape m = 8 opq = faiss.OPQMatrix(d, 8) # help(opq) opq.train(x) A = faiss.vector_to_array(opq.A).reshape(d, d) print(A.shape) # print(A) xt = opq.apply_py(x) # print(((np.dot(x[0], A.T) - xt[0])**2).sum()) # print(x[0, :10]) # print(xt[0, :10]) print(xt.shape) pq = faiss.ProductQuantizer(d, 8, 8) pq.train(xt) codes = pq.compute_codes(x) cen = faiss.vector_to_array(pq.centroids) cen = cen.reshape(pq.M, pq.ksub, pq.dsub)