示例#1
0
    def train(self, data, **args):
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
        """

        Classifier.train(self, data, **args)

        maxSuccessRate = 0
        bestClassifier = None
        classifierIdx = 0
        args['numFolds'] = self.numFolds
        args['foldsToPerform'] = self.foldsToPerform

        for r in self.param.stratifiedCV(data, **args):
            successRate = getattr(r, self.measure)
            if successRate > maxSuccessRate:
                bestClassifier = classifierIdx
                maxSuccessRate = successRate
            classifierIdx += 1

        self.log.maxSuccessRate = maxSuccessRate

        self.classifier = self.param.classifiers[bestClassifier].__class__(
            self.param.classifiers[bestClassifier])

        if 'train' not in args or args['train'] is True:
            self.classifier.train(data, **args)

        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = str(
            self.classifier.__class__(self.classifier))
示例#2
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        numFeatures = []
        n = 1
        while n < data.numFeatures:
            numFeatures.append(n)
            n *= 2

        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(len(numFeatures))
        ]

        featureSelector = self.featureSelector.__class__(self.featureSelector)
        rankedFeatures = featureSelector.rank(data)

        for i in range(len(numFeatures)):
            selectedData = data.__class__(data)
            selectedData.keepFeatures(rankedFeatures[:numFeatures[i]])
            self.classifiers[i].train(selectedData)
            self.classifiers[i].log.numFeatures = selectedData.numFeatures

        self.classifier.log.trainingTime = self.getTrainingTime()
示例#3
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        numFeatures = []
        n = 1
        while n < data.numFeatures:
            numFeatures.append(n)
            n *= 2

        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(len(numFeatures))
        ]

        featureSelector = self.featureSelector.__class__(self.featureSelector)
        rankedFeatures = featureSelector.rank(data)

        for i in range(len(numFeatures)):
            selectedData = data.__class__(data)
            selectedData.keepFeatures(rankedFeatures[:numFeatures[i]])
            self.classifiers[i].train(selectedData)
            self.classifiers[i].log.numFeatures = selectedData.numFeatures

        self.classifier.log.trainingTime = self.getTrainingTime()
示例#4
0
    def train(self, data, **args):
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
          - `vdata` - data to use for testing instead of using cross-validation
            (not implemented yet)
        """
        Classifier.train(self, data, **args)

        kernel = ker.Gaussian()
        gammaSelect = ModelSelector(Param(svm.SVM(kernel, C=self.Clow),
                                          'kernel.gamma', self.gamma),
                                    measure=self.measure,
                                    numFolds=self.numFolds)
        gammaSelect.train(data)

        kernel = ker.Gaussian(gamma=gammaSelect.classifier.kernel.gamma)
        cSelect = ModelSelector(Param(svm.SVM(kernel), 'C', self.C),
                                measure=self.measure,
                                numFolds=self.numFolds)
        cSelect.train(data)

        self.classifier = cSelect.classifier.__class__(cSelect.classifier)

        if 'train' not in args or args['train'] is True:
            self.classifier.train(data, **args)

        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = self.classifier.__class__(
            self.classifier)
示例#5
0
    def train(self, data, **args) :
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
          - `vdata` - data to use for testing instead of using cross-validation
            (not implemented yet)
        """
        Classifier.train(self, data, **args)

        kernel = ker.Gaussian()
        gammaSelect = ModelSelector(Param(svm.SVM(kernel, C = self.Clow),
                                          'kernel.gamma', self.gamma),
                                    measure = self.measure,
                                    numFolds = self.numFolds)
        gammaSelect.train(data)

        kernel = ker.Gaussian(gamma = gammaSelect.classifier.kernel.gamma)
        cSelect = ModelSelector(Param(svm.SVM(kernel), 'C', self.C),
                                measure = self.measure,
                                numFolds = self.numFolds)
        cSelect.train(data)
        
        self.classifier = cSelect.classifier.__class__(cSelect.classifier)

        if 'train' not in args or args['train'] is True :
            self.classifier.train(data, **args)
        
        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = self.classifier.__class__(self.classifier)
示例#6
0
    def train(self, data, **args) :
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
        """

        Classifier.train(self, data, **args)

        maxSuccessRate = 0
        bestClassifier = None
        classifierIdx = 0
        args['numFolds'] = self.numFolds
        args['foldsToPerform'] = self.foldsToPerform
        
        for r in self.param.stratifiedCV(data, **args) :
            successRate = getattr(r, self.measure)
            if successRate > maxSuccessRate :
                bestClassifier = classifierIdx
                maxSuccessRate = successRate
            classifierIdx += 1

        self.log.maxSuccessRate = maxSuccessRate
        
        self.classifier = self.param.classifiers[bestClassifier].__class__(
            self.param.classifiers[bestClassifier])

        if 'train' not in args or args['train'] is True :
            self.classifier.train(data, **args)
        
        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = str(self.classifier.__class__(self.classifier))
示例#7
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)

        # this should be the last command in the train function
        # if you redefine the "test" function you can follow the code in
        # assess.test to save the testingTime
        self.log.trainingTime = self.getTrainingTime()
示例#8
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        if not data.__class__.__name__ == 'DataAggregate':
            raise ValueError, 'train requires a DataAggregate dataset'

        for i in range(len(self.classifiers)):
            self.classifiers[i].train(data.datas[i], **args)
        self.log.trainingTime = self.getTrainingTime()
示例#9
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        for component in self.chain:
            component.train(data, **args)

        self.classifier.train(data, **args)
        self.log.trainingTime = self.getTrainingTime()
示例#10
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        if not data.__class__.__name__ == 'DataAggregate' :
            raise ValueError, 'train requires a DataAggregate dataset'

        for i in range(len(self.classifiers)) :
            self.classifiers[i].train(data.datas[i], **args)
        self.log.trainingTime = self.getTrainingTime()
示例#11
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        
        for component in self.chain :
            component.train(data, **args)

        self.classifier.train(data, **args)
        self.log.trainingTime = self.getTrainingTime()
示例#12
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)

        self.featureSelector.select(data, **args)
        #self.numFeatures = data.numFeatures
        self.classifier.log.numFeatures = data.numFeatures
        self.classifier.log.features = data.featureID[:]
        
        self.classifier.train(data, **args)
        self.classifier.log.trainingTime = self.getTrainingTime()
示例#13
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        self.featureSelector.select(data, **args)
        #self.numFeatures = data.numFeatures
        self.classifier.log.numFeatures = data.numFeatures
        self.classifier.log.features = data.featureID[:]

        self.classifier.train(data, **args)
        self.classifier.log.trainingTime = self.getTrainingTime()
示例#14
0
    def load(self, fileName, data) :
        """load a trained classifier from a file.  Also provide the data on which
        the classifier was trained.  It assumes the underlying binary classifier is
        an SVM"""

        from PyML import svm
        Classifier.train(self, data)
        file_handle = open(fileName)
        numClasses = self.labels.numClasses
        self.classifiers = [self.classifier.__class__(self.classifier)
                            for i in range(numClasses)]
        for i in range(numClasses) :
            datai = data.__class__(data, deepcopy = self.classifier.deepcopy)
            datai =  oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i] = svm.loadSVM(file_handle, datai)
示例#15
0
    def train(self, data, **args) :
        '''train k classifiers'''
        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses
        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'
        self.classifiers = [self.classifier.__class__(self.classifier)
                            for i in range(numClasses)]
        for i in range(numClasses) :
            # make a copy of the data; this is done in case the classifier modifies the data
            datai = data.__class__(data, deepcopy = self.classifier.deepcopy)
            datai =  oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i].train(datai)

        self.log.trainingTime = self.getTrainingTime()
示例#16
0
    def load(self, fileName, data):
        """load a trained classifier from a file.  Also provide the data on which
        the classifier was trained.  It assumes the underlying binary classifier is
        an SVM"""

        from PyML import svm
        Classifier.train(self, data)
        file_handle = open(fileName)
        numClasses = self.labels.numClasses
        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(numClasses)
        ]
        for i in range(numClasses):
            datai = data.__class__(data, deepcopy=self.classifier.deepcopy)
            datai = oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i] = svm.loadSVM(file_handle, datai)
示例#17
0
    def train(self, data, **args) :
        '''train k(k-1)/2 classifiers'''

        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses

        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'

        self.classifiers = misc.matrix((numClasses, numClasses))
        for i in range(numClasses - 1) :
            for j in range(i+1, numClasses) :
                self.classifiers[i][j] = self.classifier.__class__(self.classifier)
                dataij=data.__class__(data, deepcopy = self.classifier.deepcopy,
                                      classID = [i,j])
                self.classifiers[i][j].train(dataij)
        self.log.trainingTime = self.getTrainingTime()                
示例#18
0
    def train(self, data, **args):
        '''train k classifiers'''
        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses
        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'
        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(numClasses)
        ]
        for i in range(numClasses):
            # make a copy of the data; this is done in case the classifier modifies the data
            datai = data.__class__(data, deepcopy=self.classifier.deepcopy)
            datai = oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i].train(datai)

        self.log.trainingTime = self.getTrainingTime()
示例#19
0
    def train(self, data, **args):
        '''train k(k-1)/2 classifiers'''

        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses

        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'

        self.classifiers = misc.matrix((numClasses, numClasses))
        for i in range(numClasses - 1):
            for j in range(i + 1, numClasses):
                self.classifiers[i][j] = self.classifier.__class__(
                    self.classifier)
                dataij = data.__class__(data,
                                        deepcopy=self.classifier.deepcopy,
                                        classID=[i, j])
                self.classifiers[i][j].train(dataij)
        self.log.trainingTime = self.getTrainingTime()