def search_knn(xq, xb, k, distance_type=faiss.METRIC_L2): """ wrapper around the faiss knn functions without index """ nq, d = xq.shape nb, d2 = xb.shape assert d == d2 I = np.empty((nq, k), dtype='int64') D = np.empty((nq, k), dtype='float32') if distance_type == faiss.METRIC_L2: heaps = faiss.float_maxheap_array_t() heaps.k = k heaps.nh = nq heaps.val = faiss.swig_ptr(D) heaps.ids = faiss.swig_ptr(I) faiss.knn_L2sqr(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq, nb, heaps) elif distance_type == faiss.METRIC_INNER_PRODUCT: heaps = faiss.float_minheap_array_t() heaps.k = k heaps.nh = nq heaps.val = faiss.swig_ptr(D) heaps.ids = faiss.swig_ptr(I) faiss.knn_inner_product(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq, nb, heaps) return D, I
def __init__(self, nq, k): " nq: number of query vectors, k: number of results per query " self.I = np.zeros((nq, k), dtype='int64') self.D = np.zeros((nq, k), dtype='float32') self.nq, self.k = nq, k # changed to minheap from maxheap. The reason is that using cosine-similarity, the most similar (e.g. closest) # vectors have a score of 1, whereas with distances the closest score is 0. heaps = faiss.float_minheap_array_t() heaps.k = k heaps.nh = nq heaps.val = faiss.swig_ptr(self.D) heaps.ids = faiss.swig_ptr(self.I) heaps.heapify() self.heaps = heaps