def _cluster(self, descriptors, numClusters, maxiter=0, numruns=20): """ Cluster the L{descriptors} using k-means clustering. This method uses the mpi_kmeans library from U{http://mloss.org/software/view/48/}. @param descriptors: The SIFT descriptors which are to be clustered. @type descriptors: C{numpy.ndarray} @param numClusters: The number of clusters used for k-means clustering. @type numClusters: int @param maxiter: The maximum iterations of one k-means run. "0" means there is no iteration limit. @type maxiter: int @param numruns: The number of k-means runs. @type numruns: int @return: The codebook. @rtype: C{numpy.ndarray} """ self.numRuns = numruns # Stack all descriptors from all images into one 2-D Array stackedDescriptors = numpy.empty((0, 128), dtype=numpy.uint8) for desc in descriptors: stackedDescriptors = numpy.vstack((stackedDescriptors, desc)) if numpy.size(stackedDescriptors, 0) < numClusters: raise ValueError("Cannot compute a codebook with %i clusters from only %i features." % (numClusters, numpy.size(stackedDescriptors, 0))) # Perform clustering recognosco.logger.info("Performing kmeans clustering with %i cluster and %i features", numClusters, numpy.size(stackedDescriptors, 0)) codebook, _, _ = mpi_kmeans.kmeans(numpy.array(stackedDescriptors, dtype=ctypes.c_double), numClusters, maxiter, numruns) return codebook
def _clusterPerClass(self, descPerClass, numClusters, maxiter, numruns, alpha, masterCBs): """ Cluster the L{descriptors} using k-means clustering B{per class}. This method uses the mpi_kmeans library from U{http://mloss.org/software/view/48/}. @param descPerClass: The SIFT descriptors which are to be clustered. @type descPerClass: C{numpy.ndarray} @param numClusters: The number of clusters used for k-means clustering. @type numClusters: int @param maxiter: The maximum iterations of one k-means run. "0" means there is no iteration limit. @type maxiter: int @param numruns: The number of k-means runs. @type numruns: int @return: The codebook. @rtype: C{numpy.ndarray} """ cbPerClass = dict() for label, stackedDesc in descPerClass.items(): if numpy.size(stackedDesc[1], 0) < numClusters: raise ValueError("Cannot compute a codebook with %i clusters from only %i features." % (numClusters, numpy.size(stackedDesc[1], 0))) # Perform clustering for every category seperatly recognosco.logger.info("Performing kmeans clustering with %i cluster" " and %i features for class '%s'", numClusters, numpy.size(stackedDesc[1], 0), label) codebook, _, _, variance = mpi_kmeans.kmeans(stackedDesc[1], numClusters, maxiter, numruns) cbPerClass[label] = (codebook, variance) # Compare codebooks of every category with the master codebook masterCBsize = 0 for cbInd in range(masterCBs): cb, _ = cbPerClass.values()[cbInd] masterCBsize += numpy.size(cb, 0) recognosco.logger.info("Using the first %i clusters as master clusters.", masterCBsize) codebooks = numpy.empty((0, 128), numpy.float) variances = numpy.empty((0), numpy.float) for cb, var in cbPerClass.values(): codebooks = numpy.vstack((codebooks, cb)) variances = numpy.hstack((variances, var)) mergedCB = recognosco.mergecb.merge(codebooks, variances, masterCBsize, alpha) recognosco.logger.info("Merged CB has %d cluster.", numpy.size(mergedCB, 0)) self.numCluster = numpy.size(mergedCB, 0) return mergedCB