def build(self, pardic=None): pardic['vals'] = pardic['vals'].copy() # training data vals = pardic['vals'] # the number of coarse centroids coarsek = pardic.get('coarsek', 1024) logging.info('Building coarse quantizer - BEGIN') coa_centroids = kmeans(vals.astype(np.float32), coarsek, niter=100) cids = pq_kmeans_assign(coa_centroids, vals) logging.info('Building coarse quantizer - DONE') pardic['vals'] -= coa_centroids[cids, :] super(IVFPQEncoder, self).build(pardic) self.ecdat['coa_centroids'] = coa_centroids self.ecdat['coarsek'] = coarsek
def build(self, pardic=None): # training data vals = pardic['vals'] # the number of subquantizers nsubq = pardic['nsubq'] # the number bits of each subquantizer nsubqbits = pardic.get('nsubqbits', 8) # the number of items in one block blksize = pardic.get('blksize', 16384) # vector dimension dim = vals.shape[1] # dimension of the subvectors to quantize dsub = dim / nsubq # number of centroids per subquantizer ksub = 2 ** nsubqbits """ Initializing indexer data """ ecdat = dict() ecdat['nsubq'] = nsubq ecdat['ksub'] = ksub ecdat['dsub'] = dsub ecdat['blksize'] = blksize ecdat['centroids'] = [None for q in range(nsubq)] logging.info("Building codebooks in subspaces - BEGIN") for q in range(nsubq): logging.info("\tsubspace %d/%d" % (q, nsubq)) vs = np.require(vals[:, q*dsub:(q+1)*dsub], requirements='C', dtype=np.float32) ecdat['centroids'][q] = kmeans(vs, ksub, niter=100) logging.info("Building codebooks in subspaces - DONE") self.ecdat = ecdat
def build(self, pardic=None): # training data vals = pardic['vals'] # the number of subquantizers nsubq = pardic['nsubq'] # the number bits of each subquantizer nsubqbits = pardic.get('nsubqbits', 8) # the number of items in one block blksize = pardic.get('blksize', 16384) # vector dimension dim = vals.shape[1] # dimension of the subvectors to quantize dsub = dim / nsubq # number of centroids per subquantizer ksub = 2**nsubqbits """ Initializing indexer data """ ecdat = dict() ecdat['nsubq'] = nsubq ecdat['ksub'] = ksub ecdat['dsub'] = dsub ecdat['blksize'] = blksize ecdat['centroids'] = [None for q in range(nsubq)] logging.info("Building codebooks in subspaces - BEGIN") for q in range(nsubq): logging.info("\tsubspace %d/%d" % (q, nsubq)) vs = np.require(vals[:, q * dsub:(q + 1) * dsub], requirements='C', dtype=np.float32) ecdat['centroids'][q] = kmeans(vs, ksub, niter=100) logging.info("Building codebooks in subspaces - DONE") self.ecdat = ecdat