def test_wrapped_quantizer_HNSW(self): faiss.omp_set_num_threads(1) def bin2float(v): def byte2float(byte): return np.array( [-1.0 + 2.0 * (byte & (1 << b) != 0) for b in range(0, 8)]) return np.hstack([byte2float(byte) for byte in v]).astype('float32') def floatvec2nparray(v): return np.array([np.float32(v.at(i)) for i in range(0, v.size())]) \ .reshape(-1, d) d = 256 nt = 12800 nb = 10000 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) nlist = 256 clus = faiss.Clustering(d, nlist) clus_index = faiss.IndexFlatL2(d) xt_f = np.array([bin2float(v) for v in xt]) clus.train(xt_f, clus_index) centroids = floatvec2nparray(clus.centroids) hnsw_quantizer = faiss.IndexHNSWFlat(d, 32) hnsw_quantizer.add(centroids) hnsw_quantizer.is_trained = True wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer) assert nlist == hnsw_quantizer.ntotal assert nlist == wrapped_quantizer.ntotal assert wrapped_quantizer.is_trained index = faiss.IndexBinaryIVF(wrapped_quantizer, d, hnsw_quantizer.ntotal) index.nprobe = 128 assert index.is_trained index.add(xb) D_ref, I_ref = index_ref.search(xq, 10) D, I = index.search(xq, 10) recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \ / float(D_ref.shape[0]) assert recall > 0.77, "recall = %g" % recall
def test_ivf_flat2(self): d = self.xq.shape[1] * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(self.xt) index.add(self.xb) Divfflat, _ = index.search(self.xq, 10) self.assertEqual((self.Dref == Divfflat).sum(), 4122)
def test_ivf_flat_empty(self): d = self.xq.shape[1] * 8 index = faiss.IndexBinaryIVF(faiss.IndexBinaryFlat(d), d, 8) index.train(self.xt) for use_heap in [True, False]: index.use_heap = use_heap Divfflat, Iivfflat = index.search(self.xq, 10) assert (np.all(Iivfflat == -1)) assert (np.all(Divfflat == 2147483647)) # NOTE(hoss): int32_t max
def test_ivf_flat_exhaustive(self): d = self.xq.shape[1] * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 8 index.train(self.xt) index.add(self.xb) Divfflat, _ = index.search(self.xq, 10) np.testing.assert_array_equal(self.Dref, Divfflat)
def train(): all_data = np.array(get_all_data()) if len(all_data) == 0: print("No images. exit()") exit() d = 32 * 8 centroids = round(sqrt(all_data.shape[0])) print(f'centroids: {centroids}') quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, centroids) index.nprobe = 8 index.train(all_data) faiss.write_index_binary(index, "./" + "trained_import.index")
def test_wrapped_quantizer_HNSW(self): def bin2float2d(v): n, d = v.shape vf = ((v.reshape(-1, 1) >> np.arange(8)) & 1).astype("float32") vf *= 2 vf -= 1 return vf.reshape(n, d * 8) d = 256 nt = 12800 nb = 10000 nq = 500 (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq) index_ref = faiss.IndexBinaryFlat(d) index_ref.add(xb) nlist = 256 clus = faiss.Clustering(d, nlist) clus_index = faiss.IndexFlatL2(d) xt_f = bin2float2d(xt) clus.train(xt_f, clus_index) centroids = faiss.vector_to_array(clus.centroids).reshape(-1, clus.d) hnsw_quantizer = faiss.IndexHNSWFlat(d, 32) hnsw_quantizer.add(centroids) hnsw_quantizer.is_trained = True wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer) assert nlist == hnsw_quantizer.ntotal assert nlist == wrapped_quantizer.ntotal assert wrapped_quantizer.is_trained index = faiss.IndexBinaryIVF(wrapped_quantizer, d, hnsw_quantizer.ntotal) index.nprobe = 128 assert index.is_trained index.add(xb) D_ref, I_ref = index_ref.search(xq, 10) D, I = index.search(xq, 10) recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \ / float(D_ref.shape[0]) assert recall >= 0.77, "recall = %g" % recall
def test_ivf_flat2(self): d = self.xq.shape[1] * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(self.xt) index.add(self.xb) Divfflat, _ = index.search(self.xq, 10) # Some centroids are equidistant from the query points. # So the answer will depend on the implementation of the heap. self.assertGreater((self.Dref == Divfflat).sum(), 4100)
def test_ivf_reconstruction(self): d = self.xq.shape[1] * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(self.xt) index.add(self.xb) index.set_direct_map_type(faiss.DirectMap.Array) for i in range(0, len(self.xb), 13): np.testing.assert_array_equal(index.reconstruct(i), self.xb[i]) # try w/ hashtable index = faiss.IndexBinaryIVF(quantizer, d, 8) rs = np.random.RandomState(123) ids = rs.choice(10000, size=len(self.xb), replace=False) index.add_with_ids(self.xb, ids) index.set_direct_map_type(faiss.DirectMap.Hashtable) for i in range(0, len(self.xb), 13): np.testing.assert_array_equal(index.reconstruct(int(ids[i])), self.xb[i])
def init_index(): global index try: index = faiss.read_index_binary("trained.index") except: d = 32 * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 1) index.nprobe = 1 index.train(np.array([np.zeros(32)], dtype=np.uint8)) all_data = get_all_data() image_ids = np.array([np.int64(x[0]) for x in all_data]) phashes = np.array([x[1] for x in all_data]) if len(all_data) != 0: index.add_with_ids(phashes, image_ids) print("Index is ready")
def train(): all_descriptors=[] all_data=import_get_all_data() if len(all_data)==0: print("No images. exit()") exit() for x in all_data: all_descriptors.append(x[1]) all_descriptors=np.concatenate(all_descriptors, axis=0) d=61*8 centroids = round(sqrt(all_descriptors.shape[0])) print(f'centroids: {centroids}') quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, centroids) index.nprobe = 8 index.train(all_descriptors) faiss.write_index_binary(index, "./" + "trained_import.index")
def train(): all_descriptors = [] all_ids = get_all_ids() if len(all_ids) == 0: print("No images. exit()") exit() for id in all_ids: x = convert_array(get_akaze_features_by_id(id)) all_descriptors.append(x) all_descriptors = np.concatenate(all_descriptors, axis=0) d = 61 * 8 centroids = round(sqrt(all_descriptors.shape[0])) print(f'centroids: {centroids}') quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, centroids) index.nprobe = 8 index.train(all_descriptors) faiss.write_index_binary(index, "./" + "trained.index")
def test_ivf_range(self): d = self.xq.shape[1] * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(self.xt) index.add(self.xb) D, I = index.search(self.xq, 10) radius = int(np.median(D[:, -1]) + 1) Lr, Dr, Ir = index.range_search(self.xq, radius) for i in range(len(self.xq)): res = Ir[Lr[i]:Lr[i + 1]] if D[i, -1] < radius: self.assertTrue(set(I[i]) <= set(res)) else: subset = I[i, D[i, :] < radius] self.assertTrue(set(subset) == set(res))
def init_index(): global index, POINT_ID try: index = faiss.read_index_binary("trained.index") except: #temporary index d = 61 * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 1) index.nprobe = 1 index.train(np.array([np.zeros(61)], dtype=np.uint8)) all_ids = get_all_ids() for image_id in tqdm(all_ids): features = convert_array(get_akaze_features_by_id(image_id)) point_ids = np.arange(start=POINT_ID, stop=POINT_ID + len(features), dtype=np.int64) for point_id in point_ids: point_id_to_image_id_map[point_id] = image_id image_id_to_point_ids_map[image_id] = point_ids POINT_ID += len(features) index.add_with_ids(features, point_ids) print("Index is ready")
def test_ivf_flat(self): d = self.xq.shape[1] * 8 quantizer = faiss.IndexBinaryFlat(d) index = faiss.IndexBinaryIVF(quantizer, d, 8) index.cp.min_points_per_centroid = 5 # quiet warning index.nprobe = 4 index.train(self.xt) index.add(self.xb) D, I = index.search(self.xq, 3) tmpnam = tempfile.NamedTemporaryFile().name try: faiss.write_index_binary(index, tmpnam) index2 = faiss.read_index_binary(tmpnam) D2, I2 = index2.search(self.xq, 3) assert (I2 == I).all() assert (D2 == D).all() finally: os.remove(tmpnam)
def test_binary_ivf(self): index = faiss.IndexBinaryIVF(faiss.IndexBinaryFlat(dbin), dbin, 10) gc.collect() index.train(xtbin)
import faiss import numpy as np objects = np.array([[1, 1, 2, 1], [5, 4, 6, 5], [1, 2, 1, 2]], dtype=np.uint8) quantizer = faiss.IndexBinaryFlat(32) index = faiss.IndexBinaryIVF(quantizer, 32, 2) index.train(objects) index.add(objects) distances, ids = index.search(objects, 3) print(distances) print(ids)
def create_cpu(dim): quantizer = faiss.IndexBinaryFlat(dim) return faiss.IndexBinaryIVF(quantizer, dim, centroids)