def readXVecsFromOpenedFile(file, dim, count, extension): if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_fread(file, points, count) a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_fread(file, points, count, dim) a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: raise Exception('Bad file extension!')
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ] Concatenation of the averaged posterior probabilities `Q_sum`, the first moment `Q_xx` and second-order moment `Q_xx_2`. """ xx = np.atleast_2d(xx) N = xx.shape[0] K = gmm.k D = gmm.d # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) # NxK yael.free(Q_yael) # Compute statistics. sstats = np.zeros(K + 2 * K * D, dtype=np.float32) sstats[:K] = np.sum(Q, 0) / N # 1xK sstats[K:K + K * D] = dot(Q.T, xx).flatten() / N # 1xKD sstats[K + K * D:K + 2 * K * D] = dot(Q.T, xx**2).flatten() / N # 1xKD return sstats
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ Q_sum: array [nr_clusters, ] Averaged posterior probabilities. """ K = gmm.k N = xx.shape[0] # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK return np.array(Q_sum, dtype=np.float32)
def descs_to_sstats(xx, gmm): """ Converts the descriptors to sufficient statistics. Inputs ------ xx: array [nr_descs, nr_dimensions] Data matrix containing the descriptors. gmm: yael.gmm instance Mixture of Gaussian object. Output ------ sstats: array [nr_clusters + 2 * nr_clusters * nr_dimensions, ] Concatenation of the averaged posterior probabilities `Q_sum`, the first moment `Q_xx` and second-order moment `Q_xx_2`. """ xx = np.atleast_2d(xx) N = xx.shape[0] K = gmm.k D = gmm.d # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) # NxK yael.free(Q_yael) # Compute statistics. sstats = np.zeros(K + 2 * K * D, dtype=np.float32) sstats[: K] = np.sum(Q, 0) / N # 1xK sstats[K: K + K * D] = dot(Q.T, xx).flatten() / N # 1xKD sstats[K + K * D: K + 2 * K * D] = dot( Q.T, xx ** 2).flatten() / N # 1xKD return sstats
def gmm_predict_proba(xx, gmm): """Computes posterior probabilities using yael.""" N = xx.shape[0] K = gmm.k Q_yael = yael.fvec_new(N * K) yael.gmm_compute_p( N, yael.numpy_to_fvec_ref(xx), gmm, Q_yael, yael.GMM_FLAGS_W) Q = yael.fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) return Q
def readXvecs(filename, dim, count, tonumpy=True): extension = filename.strip().split('.')[-1] if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'i8vecs': file = open(filename, 'r') points = np.zeros((count, dim), dtype='float32') for i in xrange(count): file.read(4) points[i,:] = np.fromfile(file, np.int8, dim).astype('float32') return points elif extension == 'ivecs': points = yael.ivec_new(dim * count) yael.ivecs_fread(open(filename, 'r'), points, count, dim) if tonumpy: a = yael.ivec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points else: raise Exception('Bad file extension!')
def readXvecs(filename, dim, count, tonumpy=True): extension = filename.strip().split('.')[-1] if extension == 'bvecs': points = yael.fvec_new(dim * count) yael.b2fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'fvecs': points = yael.fvec_new(dim * count) yael.fvecs_read(filename, dim, count, points) if tonumpy: a = yael.fvec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points elif extension == 'i8vecs': file = open(filename, 'r') points = np.zeros((count, dim), dtype='float32') for i in xrange(count): file.read(4) points[i, :] = np.fromfile(file, np.int8, dim).astype('float32') return points elif extension == 'ivecs': points = yael.ivec_new(dim * count) yael.ivecs_fread(open(filename, 'r'), points, count, dim) if tonumpy: a = yael.ivec_to_numpy(points, (count, dim)) yael.free(points) return a else: return points else: raise Exception('Bad file extension!')
def descs_to_spatial_sstats(xx, ll, gmm): """ Computes spatial statistics from descriptors and their position. Inputs ------ xx: array [N, D], required N D-dimensional descriptors from an video (usually, after they are processed with PCA). ll: array [N, 3], required Descriptor locations in an image; on each row, we have the triplet (x, y, t). gmm: instance of yael object gmm Gauassian mixture object. Output ------ ss: array [1, K + 2 * 3 * k] Sufficient statistics in the form of a vector that concatenates (i) the sum of posteriors, (ii) an expected value of the locations ll under the posterior distribution Q and (iii) the second-order moment of the locations ll under the posterior distribution Q. """ N = ll.shape[0] K = gmm.k # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK Q_ll = dot(Q.T, ll).flatten() / N # 1x3K Q_ll_2 = dot(Q.T, ll ** 2).flatten() / N # 1x3K return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
def descs_to_spatial_sstats(xx, ll, gmm): """ Computes spatial statistics from descriptors and their position. Inputs ------ xx: array [N, D], required N D-dimensional descriptors from an video (usually, after they are processed with PCA). ll: array [N, 3], required Descriptor locations in an image; on each row, we have the triplet (x, y, t). gmm: instance of yael object gmm Gauassian mixture object. Output ------ ss: array [1, K + 2 * 3 * k] Sufficient statistics in the form of a vector that concatenates (i) the sum of posteriors, (ii) an expected value of the locations ll under the posterior distribution Q and (iii) the second-order moment of the locations ll under the posterior distribution Q. """ N = ll.shape[0] K = gmm.k # Compute posterior probabilities using yael. Q_yael = fvec_new(N * K) gmm_compute_p(N, numpy_to_fvec_ref(xx), gmm, Q_yael, GMM_FLAGS_W) Q = fvec_to_numpy(Q_yael, N * K).reshape(N, K) yael.free(Q_yael) # Compute statistics. Q_sum = sum(Q, 0) / N # 1xK Q_ll = dot(Q.T, ll).flatten() / N # 1x3K Q_ll_2 = dot(Q.T, ll**2).flatten() / N # 1x3K return np.array(hstack((Q_sum, Q_ll, Q_ll_2)), dtype=np.float32)
#!/usr/bin/env python from yael import yael import time n = 20000 # number of vectors nq = 1000 d = 128 # dimensionality of the vectors nt = 2 # number of threads to use k = 1 # number of nn returned v = yael.fvec_new_rand(d * n) # random set of vectors q = yael.fvec_new_rand(d * nq) idx = yael.ivec_new(nq * k) dis = yael.fvec_new(nq * k) t1 = time.time() yael.knn_thread(nq, n, d, k, v, q, idx, nt) t2 = time.time() idx = yael.IntArray.acquirepointer(idx) print([idx[i] for i in xrange(nq * k)]) print('kmeans performed in %.3fs' % (t2 - t1))
from yael import yael import time n = 20000 # number of vectors nq = 1000 d = 128 # dimensionality of the vectors nt = 2 # number of threads to use k = 1 # number of nn returned v = yael.fvec_new_rand (d * n) # random set of vectors q = yael.fvec_new_rand (d * nq) idx = yael.ivec_new (nq * k) dis = yael.fvec_new (nq * k) t1 = time.time() yael.knn_thread (nq, n, d, k, v, q, idx, nt) t2 = time.time() idx = yael.IntArray.acquirepointer (idx) print [idx[i] for i in xrange (nq * k)] print 'kmeans performed in %.3fs' % (t2 - t1)
from yael import yael import time k = 100 # number of cluster to create d = 128 # dimensionality of the vectors n = 10000 # number of vectors nt = 10 # number of threads to use v = yael.fvec_new_rand (d * n) # random set of vectors niter = 30 # number of iterations redo = 1 # number of redo #[centroids, dis, assign] = yael_kmeans (v, 100, 'nt', 2, 'niter', 25); centroids = yael.fvec_new (d * k) # output: centroids dis = yael.fvec_new (n) # point-to-cluster distance assign = yael.ivec_new (n) # quantization index of each point nassign = yael.ivec_new (k) # output: number of vectors assigned to each centroid nassign = yael.IntArray.acquirepointer (nassign) t1 = time.time() yael.kmeans (d, n, k, niter, v, nt, 0, redo, centroids, dis, assign, nassign) t2 = time.time() print [nassign[i] for i in xrange(k)] print 'kmeans performed in %.3fs' % (t2 - t1)