Пример #1
0
    def _cluster(self, descriptors, numClusters, maxiter=0, numruns=20):
        """
        Cluster the L{descriptors} using k-means clustering. This method uses the mpi_kmeans library from U{http://mloss.org/software/view/48/}.

        @param descriptors:  The SIFT descriptors which are to be clustered.
        @type descriptors:   C{numpy.ndarray}

        @param numClusters:      The number of clusters used for k-means clustering.
        @type numClusters:       int

        @param maxiter:         The maximum iterations of one k-means run. "0" means there is no iteration limit.
        @type maxiter:          int

        @param numruns:         The number of k-means runs.
        @type numruns:          int

        @return:    The codebook.
        @rtype:     C{numpy.ndarray}
        """
        self.numRuns = numruns

        # Stack all descriptors from all images into one 2-D Array
        stackedDescriptors = numpy.empty((0, 128), dtype=numpy.uint8)
        for desc in descriptors:
            stackedDescriptors = numpy.vstack((stackedDescriptors, desc))

        if numpy.size(stackedDescriptors, 0) < numClusters:
            raise ValueError("Cannot compute a codebook with %i clusters from only %i features." % (numClusters, numpy.size(stackedDescriptors, 0)))

        # Perform clustering
        recognosco.logger.info("Performing kmeans clustering with %i cluster and %i features", numClusters, numpy.size(stackedDescriptors, 0))
        codebook, _, _ = mpi_kmeans.kmeans(numpy.array(stackedDescriptors, dtype=ctypes.c_double), numClusters, maxiter, numruns)

        return codebook
Пример #2
0
    def _clusterPerClass(self, descPerClass, numClusters, maxiter, numruns, alpha, masterCBs):
        """
        Cluster the L{descriptors} using k-means clustering B{per class}. This method uses the mpi_kmeans library from U{http://mloss.org/software/view/48/}.

        @param descPerClass:  The SIFT descriptors which are to be clustered.
        @type descPerClass:   C{numpy.ndarray}

        @param numClusters:      The number of clusters used for k-means clustering.
        @type numClusters:       int

        @param maxiter:         The maximum iterations of one k-means run. "0" means there is no iteration limit.
        @type maxiter:          int

        @param numruns:         The number of k-means runs.
        @type numruns:          int

        @return:    The codebook.
        @rtype:     C{numpy.ndarray}
        """
        cbPerClass = dict()
        for label, stackedDesc in descPerClass.items():
            if numpy.size(stackedDesc[1], 0) < numClusters:
                raise ValueError("Cannot compute a codebook with %i clusters from only %i features." % (numClusters, numpy.size(stackedDesc[1], 0)))

            # Perform clustering for every category seperatly
            recognosco.logger.info("Performing kmeans clustering with %i cluster"
                                   " and %i features for class '%s'", numClusters,
                                   numpy.size(stackedDesc[1], 0), label)
            codebook, _, _, variance = mpi_kmeans.kmeans(stackedDesc[1], numClusters, maxiter, numruns)
            cbPerClass[label] = (codebook, variance)

        # Compare codebooks of every category with the master codebook
        masterCBsize = 0
        for cbInd in range(masterCBs):
            cb, _ = cbPerClass.values()[cbInd]
            masterCBsize += numpy.size(cb, 0)

        recognosco.logger.info("Using the first %i clusters as master clusters.", masterCBsize)

        codebooks = numpy.empty((0, 128), numpy.float)
        variances = numpy.empty((0), numpy.float)
        for cb, var in cbPerClass.values():
            codebooks = numpy.vstack((codebooks, cb))
            variances = numpy.hstack((variances, var))

        mergedCB = recognosco.mergecb.merge(codebooks, variances, masterCBsize, alpha)

        recognosco.logger.info("Merged CB has %d cluster.", numpy.size(mergedCB, 0))
        self.numCluster = numpy.size(mergedCB, 0)

        return mergedCB