def readXvecs(filename, dim, count, tonumpy=True): extension = filename.strip().split('.')[-1] if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'i8vecs': file = open(filename, 'r') points = np.zeros((count, dim), dtype='float32') for i in xrange(count): file.read(4) points[i,:] = np.fromfile(file, np.int8, dim).astype('float32') return points elif extension == 'ivecs': points = yael.ivec_new(dim * count) yael.ivecs_fread(open(filename, 'r'), points, count, dim) if tonumpy: a = yael.ivec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points else: raise Exception('Bad file extension!')
def readXvecs(filename, dim, count, tonumpy=True): extension = filename.strip().split('.')[-1] if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'i8vecs': file = open(filename, 'r') points = np.zeros((count, dim), dtype='float32') for i in xrange(count): file.read(4) points[i, :] = np.fromfile(file, np.int8, dim).astype('float32') return points elif extension == 'ivecs': points = yael.ivec_new(dim * count) yael.ivecs_fread(open(filename, 'r'), points, count, dim) if tonumpy: a = yael.ivec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points else: raise Exception('Bad file extension!')
#!/usr/bin/env python from yael import yael import time n = 20000 # number of vectors nq = 1000 d = 128 # dimensionality of the vectors nt = 2 # number of threads to use k = 1 # number of nn returned v = yael.fvec_new_rand(d * n) # random set of vectors q = yael.fvec_new_rand(d * nq) idx = yael.ivec_new(nq * k) dis = yael.fvec_new(nq * k) t1 = time.time() yael.knn_thread(nq, n, d, k, v, q, idx, nt) t2 = time.time() idx = yael.IntArray.acquirepointer(idx) print([idx[i] for i in xrange(nq * k)]) print('kmeans performed in %.3fs' % (t2 - t1))
from yael import yael import time n = 20000 # number of vectors nq = 1000 d = 128 # dimensionality of the vectors nt = 2 # number of threads to use k = 1 # number of nn returned v = yael.fvec_new_rand (d * n) # random set of vectors q = yael.fvec_new_rand (d * nq) idx = yael.ivec_new (nq * k) dis = yael.fvec_new (nq * k) t1 = time.time() yael.knn_thread (nq, n, d, k, v, q, idx, nt) t2 = time.time() idx = yael.IntArray.acquirepointer (idx) print [idx[i] for i in xrange (nq * k)] print 'kmeans performed in %.3fs' % (t2 - t1)
from yael import yael import time k = 100 # number of cluster to create d = 128 # dimensionality of the vectors n = 10000 # number of vectors nt = 10 # number of threads to use v = yael.fvec_new_rand (d * n) # random set of vectors niter = 30 # number of iterations redo = 1 # number of redo #[centroids, dis, assign] = yael_kmeans (v, 100, 'nt', 2, 'niter', 25); centroids = yael.fvec_new (d * k) # output: centroids dis = yael.fvec_new (n) # point-to-cluster distance assign = yael.ivec_new (n) # quantization index of each point nassign = yael.ivec_new (k) # output: number of vectors assigned to each centroid nassign = yael.IntArray.acquirepointer (nassign) t1 = time.time() yael.kmeans (d, n, k, niter, v, nt, 0, redo, centroids, dis, assign, nassign) t2 = time.time() print [nassign[i] for i in xrange(k)] print 'kmeans performed in %.3fs' % (t2 - t1)