Пример #1
0
def clusteringParamSearch(trainPath, trainClasses, trainCount, clusterRange, ptRange, etRange, fisherRange):

    trainImgs = im.loadImages(trainPath, trainClasses, trainCount)
    trainLabels = im.loadLabels(trainPath, trainClasses, trainCount)

    for peakVal in ptRange:
        for edgeVal in etRange:

            # Extract SIFT features
            rec.logger.info("SIFT Parameter. Peak: %f Edge: %f", peakVal, edgeVal)
            trainDesc, numTrainingDesc = im.extractFeatures(trainImgs, peakVal, edgeVal)

            for clusterParam in clusterRange:
                for fDist in fisherRange:

                    learner = cl.Learner()
                    learner.peakThreshold = peakVal
                    learner.edgeThreshold = edgeVal
                    learner.numTrainingDesc = numTrainingDesc

                    rec.logger.info("Cluster parameter. Cluster: %i MinDist %f", clusterParam, fDist)
                    learner.averageSilhouetteCoefficient(trainLabels, trainDesc, clusterParam, fDist)
                    learner.saveToDB()
Пример #2
0
    def train(self, featOrImgs, labels, numCluster, peakThreshold=10.0, edgeThreshold=10.0, maxiter=0, numruns=20):
        """
        Perform k-means clustering and vector quantize all features. 
        The codebook and the histograms of visual words are stored as members.
        
        If L{featOrImgs} is a list of images, SIFT features are extracted
        before all other operations are done.
                
        @param featOrImgs:    Either a list of SIFT descriptor arrays or a list of images.
        @type featOrImgs:     [numpy.ndarray] or [Image.Image]
    
        @param labels:        A list of labels corresponding to the list of descriptors/images.
        @type labels:         [string]
    
        @param numCluster:    The number of clusters used for k-means clustering.
        @type numCluster:     int
        
        @param peakThreshold:   The peak threshold used during SIFT feature extraction.
                              This argument is ignored if L{featOrImgs} is a list of SIFT features.
        @type peakThreshold:    float

        @param edgeThreshold:   The edge threshold used during SIFT feature extraction.
                              This argument is ignored if L{featOrImgs} is a list of SIFT features.
        @type edgeThreshold:    float
    
        @param maxiter:       The maximum iterations of one k-means run. 
                              "0" means there is no iteration limit.
        @type maxiter:        int
    
        @param numruns:       The number of k-means runs.
        @type numruns:        int
        """
        self.numCluster = numCluster

        if isinstance(featOrImgs[0], Image.Image):
            self.peakThreshold = peakThreshold
            self.edgeThreshold = edgeThreshold

            descriptors = im.extractFeatures(featOrImgs, peakThreshold, edgeThreshold)
        else:
            descriptors = featOrImgs

        starttime = time.time()

        # Compute codebook from all sift descriptors of all images
        self.codebook = self._cluster(descriptors, numCluster, maxiter, numruns)

        # Get the histograms as a list of numpy arrays
        tempHistograms = _buildHistograms(descriptors)

        # Create orange domain
        classes = _getAllClasses(labels)
        self.strClasses = str(classes)
        histogramDomain = [orange.FloatVariable('a%i' % x) for x in xrange(len(tempHistograms[0]))]
        classDomain = [orange.EnumVariable("class", values=orange.StringList(classes))]
        self.domain = orange.Domain(histogramDomain + classDomain)

        # Create orange exampleTable
        self.histograms = _convertToOrangeDataSet(tempHistograms, self.domain, labels)

        endtime = time.time()
        self.trainingTime = endtime - starttime
Пример #3
0
    def classificationAccuracy(self, histOrImgs, labels=None, confThr=0.0,
                               peakThr=None, edgeThr=None, nu=0.6, gamma=2.0, doCrossVal=False):
        """
        Classify test data and (optionally) perform a cross validation on the training data.

        @param histOrImgs:    Either a list of SIFT descriptor arrays or an iterator of images.
        @type histOrImgs:     numpy.ndarray or [numpy.ndarray] or Image.Image or [Image.Image]
        
        @param labels:        A list of labels corresponding to the list of descriptors/images.
        @type labels:         [string]
    
        @param nu:            The S{nu}-Parameter of the support vector machine.
        @type nu:             float

        @param gamma:         The S{gamma}-Parameter of the RBF-Kernel.
        @type gamma:          float
        
        @param confidenceThreshold:    All classifications who are classified with a lower
                                       confidence than this threshold are rejected. 1.0: 
                                       Everything is rejected, 0.0: Nothing is rejected.
        @type confidenceThreshold:     float
        
        @param peakThreshold:         A SIFT parameter. Sensible values: 0.0 < x < 30.0.
        @type peakThreshold:          float
        
        @param edgeThreshold:         A SIFT parameter. Sensible values: 0.0 < x < 10.0.
        @type edgeThreshold:          float

        @rtype:               (float,float)
        @return:              The cross validation accuracy and the test data classification accuracy.
        """
        if self.learner is None:
            raise ValueError("Learner has to be loaded before classification can be done.")

        # Set SIFT member variables (so they get stored in the DB if requested)
        if peakThr is None:
            self.peakThreshold = self.learner.peakThreshold
        else:
            self.peakThreshold = peakThr

        if edgeThr is None:
            self.edgeThreshold = self.learner.edgeThreshold
        else:
            self.edgeThreshold = edgeThr

        # If we've been given an images iterator, extract features and vector quantize
        if isinstance(histOrImgs, collections.Iterator):
            if labels is None:
                raise ValueError("If argument 'histOrImgs' is an iterator of images, \
                                  the argument 'lables' must not be None.")

            desc, self.numTestDesc = im.extractFeatures(histOrImgs, self.peakThreshold, self.edgeThreshold)

            recognosco.logger.info("Found %i features/image on average.", self.numTestDesc / len(desc))

            tmpHistograms = _buildHistograms(self.learner.codebook, desc)
            histograms = _convertToOrangeDataSet(tmpHistograms, self.learner.domain, labels)
        else:
            histograms = histOrImgs


        values = histograms.domain.classVar.values
        self.values = values
        length = len(values)
        self.confusion = numpy.zeros((length, length), int)
        starttime = time.time()

        self.nu = nu
        self.gamma = gamma

        svm = orange.SVMLearner()
        svm.svm_type = orange.SVMLearner.Nu_SVC
        svm.nu = nu
        svm.gamma = gamma
        svm.kernel_type = orange.SVMLearner.RBF
        svm.probability = True

        recognosco.logger.debug("Training Support Vector Machine...")
        self.classifier = svm(self.learner.histograms)
        recognosco.logger.debug("Done...")

        crossVal = 0.0
        if doCrossVal:
            crossVal = orngTest.crossValidation([svm], self.learner.histograms, folds=10)

        numCorrectClassified = 0.0
        numClassified = 0.0
        for i in range(len(histograms)):
                c = self.classifier(histograms[i])
                recognosco.logger.info("Has the Class: %s", histograms[i].getclass())
                recognosco.logger.info("Classified as: %s", c)

                prob = self.classifier(histograms[i], self.classifier.GetProbabilities)
                conf = self.__getConfidence(prob)
                recognosco.logger.info("Confidence: %f", conf)
                if conf < confThr:
                    recognosco.logger.info("Rejected classification (Threshold: %.2f)", confThr)
                    continue

                numClassified += 1.0
                predicted = values.index(str(c))
                actual = values.index(str(histograms[i].getclass()))
                self.confusion[predicted][actual] += 1

                if(c == histograms[i].getclass()):
                    numCorrectClassified += 1.0

        endtime = time.time()
        self.confusion = str(self.confusion)
        self.clAccuracy = numCorrectClassified / numClassified
        self.fracClassified = numClassified / len(histograms)

        if doCrossVal:
            self.cvAccuracy = orngStat.CA(crossVal)[0]
            recognosco.logger.info("Cross validation accuracy: %s", self.cvAccuracy)
        else:
            self.cvAccuracy = -1.0

        recognosco.logger.info("Classification accuracy of test data: %s", self.clAccuracy)
        self.testTime = endtime - starttime
        return (self.cvAccuracy, self.classificationAccuracy)
Пример #4
0
    def classify(self, images, nu=0.6, gamma=4.0, confidenceThreshold=0.03, minFeatures=5):
        """
        Classify test data and return the label(s).

        @param images:          A single image or a list of images.
        @type images:           Image.Image or [Image.Image]
        
        @param nu:              The S{nu}-Parameter of the support vector machine.
        @type nu:               float

        @param gamma:           The S{gamma}-Parameter of the RBF-Kernel..
        @type gamma:            float
        
        @param confidenceThreshold:    All classifications who are classified with a lower
                                       confidence than this threshold are rejected. 1.0: 
                                       Everything is rejected, 0.0: Nothing is rejected.
        @type confidenceThreshold:     float
        
        @param gamma:           The minimum amount of features which should be extracted from
                                every image. 
        @type gamma:            float

        @return:                The labels which result from the classification
        @rtype:                 [string]
        """
        if self.learner is None:
            raise ValueError("Learner has to be loaded before classification can be done.")

        if not isinstance(images, collections.Iterable):
            images = [images]

        # We have no labels, so set them to None
        inputLabels = [None for i in range(len(images))]

        # Use same SIFT parameters as during training, but change SIFT parameters
        # if they result in too less features
        self.numTestDesc = 0
        peak = self.learner.peakThreshold
        edge = self.learner.edgeThreshold
        peakDecr = 1.0
        edgeInc = 1.0
        descIter1, descIter2 = itertools.tee(im.extractFeatures(images, peak, edge))
        for desc in descIter1:
            self.numTestDesc += numpy.size(desc, 0)

        recognosco.logger.info("Found %i features.", self.numTestDesc)

        while self.numTestDesc < minFeatures and peak >= 0.0 and edge <= 10.0:
            # Adjust SIFT parameters
            peakDecr = peakDecr - 0.2
            peak = self.learner.peakThreshold * peakDecr

            edgeInc = edgeInc + 0.2
            edge = self.learner.peakThreshold * edgeInc

            recognosco.logger.warning("Less than %i SIFT features have been extracted. "
                                      "Adjusting SIFT parameters (peak: %f, edge: %f).",
                                      minFeatures, peak, edge)

            descIter1, descIter2 = itertools.tee(im.extractFeatures(images, peak, edge))

            self.numTestDesc = 0
            for desc in descIter1:
                self.numTestDesc += numpy.size(desc, 0)

            recognosco.logger.info("Found %i features.", self.numTestDesc)

        if self.numTestDesc < minFeatures:
            recognosco.logger.error("Less than %i SIFT features have been extracted. Aborting.", minFeatures)
            return ["<not enough features>"]

        tmpHistograms = _buildHistograms(self.learner.codebook, descIter2)
        histograms = _convertToOrangeDataSet(tmpHistograms, self.learner.domain, inputLabels)

        self.nu = nu
        self.gamma = gamma

        svm = orange.SVMLearner()
        svm.svm_type = orange.SVMLearner.Nu_SVC
        svm.nu = nu
        svm.gamma = gamma
        svm.kernel_type = orange.SVMLearner.RBF
        svm.probability = True

        classifier = svm(self.learner.histograms)

        outputLabels = []
        for i in range(len(histograms)):
                c = classifier(histograms[i])
                prob = classifier(histograms[i], classifier.GetProbabilities)
                conf = self.__getConfidence(prob)

                if conf < confidenceThreshold:
                    recognosco.logger.info("Rejected classification. Nearest match:"
                                           " %s (Confidence: %.2f%%)", c, conf * 100.0)
                    continue

                recognosco.logger.info("Classified as: %s. Confidence: %.2f%%",
                                       c, conf * 100.0)
                outputLabels.append(str(c))

        return outputLabels