def _gmm_to_numpy(gmm): d, k = gmm.d, gmm.k w = yael.fvec_to_numpy(gmm.w, k) mu = yael.fvec_to_numpy(gmm.mu, d * k) mu = mu.reshape((k, d)) sigma = yael.fvec_to_numpy(gmm.sigma, d * k) sigma = sigma.reshape((k, d)) return w, mu, sigma
def siftgeo_read(filename): # I/O via double pointers (too lazy to make proper swig interface) v_out = yael.BytePtrArray(1) meta_out = yael.FloatPtrArray(1) d_out = yael.ivec(2) n = yael.bvecs_new_from_siftgeo(filename, d_out, v_out.cast(), d_out.plus(1), meta_out.cast()) if n < 0: raise IOError("cannot read " + filename) if n == 0: v = numpy.array([[]], dtype = numpy.uint8) meta = numpy.array([[]*9], dtype = numpy.float32) return v, meta v_out = yael.bvec.acquirepointer(v_out[0]) meta_out = yael.fvec.acquirepointer(meta_out[0]) d = d_out[0] d_meta = d_out[1] assert d_meta == 9 v = yael.bvec_to_numpy(v_out, n * d) v = v.reshape((n, d)) meta = yael.fvec_to_numpy(meta_out, n * d_meta) meta = meta.reshape((n, d_meta)) return v, meta
def flushBuffer(x, y, t): c = int(cnt[x, y, t]) fvs = [] for cutFrom, cutTo, fvSize, gmm, partName in parts: desc = np.ascontiguousarray(buffer[x, y, t, :c, cutFrom:(1 + cutTo)]) fv = yael.fvec_new_0(fvSize) yael.gmm_fisher(c, yael.FloatArray.acquirepointer(yael.numpy_to_fvec(desc)), gmm, flags, fv) fvs.append(yael.fvec_to_numpy(fv, fvSize).flatten()) ndescr[x, y, t] += c cnt[x, y, t] = 0 return np.sqrt(c) * np.hstack(tuple(fvs))
def fvecs_read(filename, nmax = -1): if nmax < 0: (fvecs, n, d) = yael.fvecs_new_read(filename) else: (fvecs, n, d) = yael.fvecs_new_fread_max(open(filename, "r"), nmax) if n == -1: raise IOError("could not read " + filename) elif n == 0: d = 0 fvecs = yael.fvec.acquirepointer(fvecs) # TODO find a way to avoid copy a = yael.fvec_to_numpy(fvecs, n * d) return a.reshape((n, d))
def flushBuffer(x, y, t): c = int(cnt[x, y, t]) fvs = [] for cutFrom, cutTo, fvSize, gmm, partName in parts: desc = np.ascontiguousarray(buffer[x, y, t, :c, cutFrom:(1 + cutTo)]) fv = yael.fvec_new_0(fvSize) yael.gmm_fisher( c, yael.FloatArray.acquirepointer(yael.numpy_to_fvec(desc)), gmm, flags, fv) fvs.append(yael.fvec_to_numpy(fv, fvSize).flatten()) ndescr[x, y, t] += c cnt[x, y, t] = 0 return np.sqrt(c) * np.hstack(tuple(fvs))
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset = 0, run_index = 'n'): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) # for CUDA-based batch indexing, skip the reshaping #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if run_index != 'y': part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs else: index(lsh, part_np_feature_vecs, actual_total_nuse) del part_np_feature_vecs if index_folder != None: save_index(lsh, index_folder, feature_idx_begin) actual_total_nuse += int(actual_nuse) if run_index != 'y': print np_feature_vecs.shape return np_feature_vecs
def partial_pca(mat, nev=6, nt=1): _check_row_float32(mat) n, d = mat.shape avg = mat.mean(axis=0) mat = mat - avg[numpy.newaxis, :] singvals = numpy.empty(nev, dtype=numpy.float32) # pdb.set_trace() pcamat = yael.fmat_new_pca_part(d, n, nev, yael.numpy_to_fvec_ref(mat), yael.numpy_to_fvec_ref(singvals)) assert pcamat != None # print "SVs", singvals pcamat = yael.fvec.acquirepointer(pcamat) pcamat = yael.fvec_to_numpy(pcamat, (nev, d)) return avg, singvals, pcamat
def partial_pca(mat, nev=6, nt=1): _check_row_float32(mat) n, d = mat.shape avg = mat.mean(axis=0) mat = mat - avg[numpy.newaxis, :] singvals = numpy.empty(nev, dtype=numpy.float32) # pdb.set_trace() pcamat = yael.fmat_new_pca_part(d, n, nev, yael.numpy_to_fvec_ref(mat), yael.numpy_to_fvec_ref(singvals)) assert pcamat != None #print "SVs", singvals pcamat = yael.fvec.acquirepointer(pcamat) pcamat = yael.fvec_to_numpy(pcamat, (nev, d)) return avg, singvals, pcamat
def fvecs_read(filename, nmax = -1, c_contiguous = True): if nmax < 0: fv = numpy.fromfile(filename, dtype = numpy.float32) if fv.size == 0: return numpy.zeros((0,0)) dim = fv.view(numpy.int32)[0] assert dim>0 fv = fv.reshape(-1,1+dim) if not all(fv.view(numpy.int32)[:,0]==dim): raise IOError("non-uniform vector sizes in " + filename) fv = fv[:,1:] if c_contiguous: fv = fv.copy() return fv (fvecs, n, d) = yael.fvecs_new_fread_max(open(filename, "r"), nmax) if n == -1: raise IOError("could not read " + filename) elif n == 0: d = 0 fvecs = yael.fvec.acquirepointer(fvecs) # TODO find a way to avoid copy a = yael.fvec_to_numpy(fvecs, n * d) return a.reshape((n, d))
def fvecs_read(filename, nmax=-1, c_contiguous=True): if nmax < 0: fv = numpy.fromfile(filename, dtype=numpy.float32) if fv.size == 0: return numpy.zeros((0, 0)) dim = fv.view(numpy.int32)[0] assert dim > 0 fv = fv.reshape(-1, 1 + dim) if not all(fv.view(numpy.int32)[:, 0] == dim): raise IOError("non-uniform vector sizes in " + filename) fv = fv[:, 1:] if c_contiguous: fv = fv.copy() return fv (fvecs, n, d) = yael.fvecs_new_fread_max(open(filename, "r"), nmax) if n == -1: raise IOError("could not read " + filename) elif n == 0: d = 0 fvecs = yael.fvec.acquirepointer(fvecs) # TODO find a way to avoid copy a = yael.fvec_to_numpy(fvecs, n * d) return a.reshape((n, d))
def load_features(filename, file_format, total_nuse, dimension, offset = 0): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin , verbose = True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy(feature_vecs, int(actual_nuse) * dimension) part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate((np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs actual_total_nuse += int(actual_nuse) print np_feature_vecs.shape return np_feature_vecs
def load_features(filename, file_format, total_nuse, dimension, lsh, index_folder, offset=0, run_index='n'): np_feature_vecs = None actual_total_nuse = 0 for feature_idx_begin in range(offset, total_nuse + offset, 10000000): print "loading from " + str(feature_idx_begin) nuse = 0 if (total_nuse + offset) > (feature_idx_begin + 10000000): nuse = 10000000 else: nuse = (total_nuse + offset) - feature_idx_begin (feature_vecs, actual_nuse) = yutils.load_vectors_fmt(filename, file_format, dimension, nuse, feature_idx_begin, verbose=True) part_np_feature_vecs = None if file_format == 'fvecs': part_np_feature_vecs = yael.fvec_to_numpy( feature_vecs, int(actual_nuse) * dimension) elif file_format == 'bvecs': part_np_feature_vecs = yael.bvec_to_numpy( feature_vecs, int(actual_nuse) * dimension) # for CUDA-based batch indexing, skip the reshaping #part_np_feature_vecs = part_np_feature_vecs.reshape((int(actual_nuse), dimension)) if run_index != 'y': part_np_feature_vecs = part_np_feature_vecs.reshape( (int(actual_nuse), dimension)) if np_feature_vecs != None: np_feature_vecs = numpy.concatenate( (np_feature_vecs, part_np_feature_vecs)) else: np_feature_vecs = part_np_feature_vecs else: index(lsh, part_np_feature_vecs, actual_total_nuse) del part_np_feature_vecs if index_folder != None: save_index(lsh, index_folder, feature_idx_begin) actual_total_nuse += int(actual_nuse) if run_index != 'y': print np_feature_vecs.shape return np_feature_vecs