示例#1
0
    def __init__(self, arg, **args):
        """
        :Parameters:
          - `arg` - another ModelSelector or a Param object

        :Keywords:
          - `measure` - which measure of accuracy to use for selecting the
            best classifier (default = 'balancedSuccessRate')
            supported measures are: 'balancedSuccessRate', 'successRate',
            'roc', 'roc50' (you can substitute any number instead of 50)
          - `numFolds` - number of CV folds to use when performing model selection
          - `foldsToPerform` - the number of folds to actually perform
        """

        Classifier.__init__(self, **args)

        if arg.__class__ == self.__class__:
            self.param = arg.param.__class__(arg.param)
            self.measure = arg.measure
            self.numFolds = arg.numFolds
        elif arg.__class__.__name__.find('Param') >= 0:
            self.param = arg.__class__(arg)
        else:
            raise ValueError, 'wrong type of input for ModelSelector'

        self.classifier = None
示例#2
0
    def __init__(self, arg, **args) :
        """
        :Parameters:
          - `arg` - another ModelSelector or a Param object

        :Keywords:
          - `measure` - which measure of accuracy to use for selecting the
            best classifier (default = 'balancedSuccessRate')
            supported measures are: 'balancedSuccessRate', 'successRate',
            'roc', 'roc50' (you can substitute any number instead of 50)
          - `numFolds` - number of CV folds to use when performing model selection
          - `foldsToPerform` - the number of folds to actually perform
        """
        
        
        Classifier.__init__(self, **args)

        if arg.__class__ == self.__class__ :
            self.param = arg.param.__class__(arg.param)
            self.measure = arg.measure
            self.numFolds = arg.numFolds
        elif arg.__class__.__name__.find('Param') >= 0 :
            self.param = arg.__class__(arg)
        else :
            raise ValueError, 'wrong type of input for ModelSelector'
        
        self.classifier = None
示例#3
0
    def train(self, data, **args) :
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
          - `vdata` - data to use for testing instead of using cross-validation
            (not implemented yet)
        """
        Classifier.train(self, data, **args)

        kernel = ker.Gaussian()
        gammaSelect = ModelSelector(Param(svm.SVM(kernel, C = self.Clow),
                                          'kernel.gamma', self.gamma),
                                    measure = self.measure,
                                    numFolds = self.numFolds)
        gammaSelect.train(data)

        kernel = ker.Gaussian(gamma = gammaSelect.classifier.kernel.gamma)
        cSelect = ModelSelector(Param(svm.SVM(kernel), 'C', self.C),
                                measure = self.measure,
                                numFolds = self.numFolds)
        cSelect.train(data)
        
        self.classifier = cSelect.classifier.__class__(cSelect.classifier)

        if 'train' not in args or args['train'] is True :
            self.classifier.train(data, **args)
        
        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = self.classifier.__class__(self.classifier)
示例#4
0
    def train(self, data, **args) :
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
        """

        Classifier.train(self, data, **args)

        maxSuccessRate = 0
        bestClassifier = None
        classifierIdx = 0
        args['numFolds'] = self.numFolds
        args['foldsToPerform'] = self.foldsToPerform
        
        for r in self.param.stratifiedCV(data, **args) :
            successRate = getattr(r, self.measure)
            if successRate > maxSuccessRate :
                bestClassifier = classifierIdx
                maxSuccessRate = successRate
            classifierIdx += 1

        self.log.maxSuccessRate = maxSuccessRate
        
        self.classifier = self.param.classifiers[bestClassifier].__class__(
            self.param.classifiers[bestClassifier])

        if 'train' not in args or args['train'] is True :
            self.classifier.train(data, **args)
        
        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = str(self.classifier.__class__(self.classifier))
示例#5
0
    def train(self, data, **args):
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
        """

        Classifier.train(self, data, **args)

        maxSuccessRate = 0
        bestClassifier = None
        classifierIdx = 0
        args['numFolds'] = self.numFolds
        args['foldsToPerform'] = self.foldsToPerform

        for r in self.param.stratifiedCV(data, **args):
            successRate = getattr(r, self.measure)
            if successRate > maxSuccessRate:
                bestClassifier = classifierIdx
                maxSuccessRate = successRate
            classifierIdx += 1

        self.log.maxSuccessRate = maxSuccessRate

        self.classifier = self.param.classifiers[bestClassifier].__class__(
            self.param.classifiers[bestClassifier])

        if 'train' not in args or args['train'] is True:
            self.classifier.train(data, **args)

        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = str(
            self.classifier.__class__(self.classifier))
示例#6
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        numFeatures = []
        n = 1
        while n < data.numFeatures:
            numFeatures.append(n)
            n *= 2

        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(len(numFeatures))
        ]

        featureSelector = self.featureSelector.__class__(self.featureSelector)
        rankedFeatures = featureSelector.rank(data)

        for i in range(len(numFeatures)):
            selectedData = data.__class__(data)
            selectedData.keepFeatures(rankedFeatures[:numFeatures[i]])
            self.classifiers[i].train(selectedData)
            self.classifiers[i].log.numFeatures = selectedData.numFeatures

        self.classifier.log.trainingTime = self.getTrainingTime()
示例#7
0
    def train(self, data, **args):
        """
        :Keywords:
          - `train` - boolean - whether to train the best classifier
            (default: True)
          - `vdata` - data to use for testing instead of using cross-validation
            (not implemented yet)
        """
        Classifier.train(self, data, **args)

        kernel = ker.Gaussian()
        gammaSelect = ModelSelector(Param(svm.SVM(kernel, C=self.Clow),
                                          'kernel.gamma', self.gamma),
                                    measure=self.measure,
                                    numFolds=self.numFolds)
        gammaSelect.train(data)

        kernel = ker.Gaussian(gamma=gammaSelect.classifier.kernel.gamma)
        cSelect = ModelSelector(Param(svm.SVM(kernel), 'C', self.C),
                                measure=self.measure,
                                numFolds=self.numFolds)
        cSelect.train(data)

        self.classifier = cSelect.classifier.__class__(cSelect.classifier)

        if 'train' not in args or args['train'] is True:
            self.classifier.train(data, **args)

        self.classifier.log.trainingTime = self.getTrainingTime()
        self.classifier.log.classifier = self.classifier.__class__(
            self.classifier)
示例#8
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        numFeatures = []
        n = 1
        while n < data.numFeatures:
            numFeatures.append(n)
            n *= 2

        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(len(numFeatures))
        ]

        featureSelector = self.featureSelector.__class__(self.featureSelector)
        rankedFeatures = featureSelector.rank(data)

        for i in range(len(numFeatures)):
            selectedData = data.__class__(data)
            selectedData.keepFeatures(rankedFeatures[:numFeatures[i]])
            self.classifiers[i].train(selectedData)
            self.classifiers[i].log.numFeatures = selectedData.numFeatures

        self.classifier.log.trainingTime = self.getTrainingTime()
示例#9
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)

        # this should be the last command in the train function
        # if you redefine the "test" function you can follow the code in
        # assess.test to save the testingTime
        self.log.trainingTime = self.getTrainingTime()
示例#10
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        
        for component in self.chain :
            component.train(data, **args)

        self.classifier.train(data, **args)
        self.log.trainingTime = self.getTrainingTime()
示例#11
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        for component in self.chain:
            component.train(data, **args)

        self.classifier.train(data, **args)
        self.log.trainingTime = self.getTrainingTime()
示例#12
0
    def __init__ (self, arg) :

        Classifier.__init__(self)
        if arg.__class__ == self.__class__ :
            self.classifiers = [classifier.__class__(classifier)
                                for classifier in arg.classifiers]
        elif type(arg) == type([]) :
            self.classifiers = [classifier.__class__(classifier)
                                for classifier in arg]
示例#13
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)
        if not data.__class__.__name__ == 'DataAggregate' :
            raise ValueError, 'train requires a DataAggregate dataset'

        for i in range(len(self.classifiers)) :
            self.classifiers[i].train(data.datas[i], **args)
        self.log.trainingTime = self.getTrainingTime()
示例#14
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)
        if not data.__class__.__name__ == 'DataAggregate':
            raise ValueError, 'train requires a DataAggregate dataset'

        for i in range(len(self.classifiers)):
            self.classifiers[i].train(data.datas[i], **args)
        self.log.trainingTime = self.getTrainingTime()
示例#15
0
    def train(self, data, **args) :

        Classifier.train(self, data, **args)

        self.featureSelector.select(data, **args)
        #self.numFeatures = data.numFeatures
        self.classifier.log.numFeatures = data.numFeatures
        self.classifier.log.features = data.featureID[:]
        
        self.classifier.train(data, **args)
        self.classifier.log.trainingTime = self.getTrainingTime()
示例#16
0
    def train(self, data, **args):

        Classifier.train(self, data, **args)

        self.featureSelector.select(data, **args)
        #self.numFeatures = data.numFeatures
        self.classifier.log.numFeatures = data.numFeatures
        self.classifier.log.features = data.featureID[:]

        self.classifier.train(data, **args)
        self.classifier.log.trainingTime = self.getTrainingTime()
示例#17
0
    def __init__(self, classifier, **args) :

        Classifier.__init__(self, classifier, **args)
	if type(classifier) == type('') : return
        if (not hasattr(classifier, 'type')) or classifier.type != 'classifier' :
            raise ValueError, 'argument should be a classifier'
        if classifier.__class__ == self.__class__ :
            self.classifier = classifier.classifier.__class__(
                classifier.classifier)
        else :
            self.classifier = classifier.__class__(classifier)
示例#18
0
    def __init__(self, classifier, **args):

        Classifier.__init__(self, classifier, **args)
        if type(classifier) == type(''): return
        if (not hasattr(classifier,
                        'type')) or classifier.type != 'classifier':
            raise ValueError, 'argument should be a classifier'
        if classifier.__class__ == self.__class__:
            self.classifier = classifier.classifier.__class__(
                classifier.classifier)
        else:
            self.classifier = classifier.__class__(classifier)
示例#19
0
    def __init__(self, arg):

        Classifier.__init__(self)
        if arg.__class__ == self.__class__:
            self.classifiers = [
                classifier.__class__(classifier)
                for classifier in arg.classifiers
            ]
        elif type(arg) == type([]):
            self.classifiers = [
                classifier.__class__(classifier) for classifier in arg
            ]
示例#20
0
    def load(self, fileName, data) :
        """load a trained classifier from a file.  Also provide the data on which
        the classifier was trained.  It assumes the underlying binary classifier is
        an SVM"""

        from PyML import svm
        Classifier.train(self, data)
        file_handle = open(fileName)
        numClasses = self.labels.numClasses
        self.classifiers = [self.classifier.__class__(self.classifier)
                            for i in range(numClasses)]
        for i in range(numClasses) :
            datai = data.__class__(data, deepcopy = self.classifier.deepcopy)
            datai =  oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i] = svm.loadSVM(file_handle, datai)
示例#21
0
    def train(self, data, **args) :
        '''train k classifiers'''
        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses
        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'
        self.classifiers = [self.classifier.__class__(self.classifier)
                            for i in range(numClasses)]
        for i in range(numClasses) :
            # make a copy of the data; this is done in case the classifier modifies the data
            datai = data.__class__(data, deepcopy = self.classifier.deepcopy)
            datai =  oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i].train(datai)

        self.log.trainingTime = self.getTrainingTime()
示例#22
0
    def train(self, data, **args) :
        '''train k(k-1)/2 classifiers'''

        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses

        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'

        self.classifiers = misc.matrix((numClasses, numClasses))
        for i in range(numClasses - 1) :
            for j in range(i+1, numClasses) :
                self.classifiers[i][j] = self.classifier.__class__(self.classifier)
                dataij=data.__class__(data, deepcopy = self.classifier.deepcopy,
                                      classID = [i,j])
                self.classifiers[i][j].train(dataij)
        self.log.trainingTime = self.getTrainingTime()                
示例#23
0
    def load(self, fileName, data):
        """load a trained classifier from a file.  Also provide the data on which
        the classifier was trained.  It assumes the underlying binary classifier is
        an SVM"""

        from PyML import svm
        Classifier.train(self, data)
        file_handle = open(fileName)
        numClasses = self.labels.numClasses
        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(numClasses)
        ]
        for i in range(numClasses):
            datai = data.__class__(data, deepcopy=self.classifier.deepcopy)
            datai = oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i] = svm.loadSVM(file_handle, datai)
示例#24
0
    def train(self, data, **args):
        '''train k classifiers'''
        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses
        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'
        self.classifiers = [
            self.classifier.__class__(self.classifier)
            for i in range(numClasses)
        ]
        for i in range(numClasses):
            # make a copy of the data; this is done in case the classifier modifies the data
            datai = data.__class__(data, deepcopy=self.classifier.deepcopy)
            datai = oneAgainstRest(datai, data.labels.classLabels[i])
            self.classifiers[i].train(datai)

        self.log.trainingTime = self.getTrainingTime()
示例#25
0
    def __init__(self, arg1, arg2 = None) :

        Classifier.__init__(self)

        if arg1.__class__ == self.__class__ :
            other = arg1
            self.classifier = other.classifier.__class__(other.classifier)
            self.featureSelector = other.featureSelector.__class__(
                other.featureSelector)
        else :
            for arg in (arg1, arg2) :
                if arg.type == 'classifier' :
                    self.classifier = arg.__class__(arg)
                elif arg.type == 'featureSelector' :
                    self.featureSelector = arg.__class__(arg)
                else :
                    raise ValueError, \
                          'argument should be either classifier or featureSelector'
示例#26
0
    def __init__(self, arg = None, **args) :
        """
        :Parameters:
          - `arg` - another ModelSelector object

        :Keywords:
          - `C` - a list of values to try for C
          - `gamma` - a list of value to try for gamma
          - `measure` - which measure of accuracy to use for selecting the
            best classifier (default = 'balancedSuccessRate')
            supported measures are: 'balancedSuccessRate', 'successRate',
            'roc', 'roc50' (you can substitute another number instead of 50)
          - `numFolds` - number of CV folds to use when performing model selection
        """

        Classifier.__init__(self, arg, **args)

        self.classifier = None
示例#27
0
    def __init__(self, arg1, arg2=None):

        Classifier.__init__(self)

        if arg1.__class__ == self.__class__:
            other = arg1
            self.classifier = other.classifier.__class__(other.classifier)
            self.featureSelector = other.featureSelector.__class__(
                other.featureSelector)
        else:
            for arg in (arg1, arg2):
                if arg.type == 'classifier':
                    self.classifier = arg.__class__(arg)
                elif arg.type == 'featureSelector':
                    self.featureSelector = arg.__class__(arg)
                else:
                    raise ValueError, \
                          'argument should be either classifier or featureSelector'
示例#28
0
    def __init__(self, arg=None, **args):
        """
        :Parameters:
          - `arg` - another ModelSelector object

        :Keywords:
          - `C` - a list of values to try for C
          - `gamma` - a list of value to try for gamma
          - `measure` - which measure of accuracy to use for selecting the
            best classifier (default = 'balancedSuccessRate')
            supported measures are: 'balancedSuccessRate', 'successRate',
            'roc', 'roc50' (you can substitute another number instead of 50)
          - `numFolds` - number of CV folds to use when performing model selection
        """

        Classifier.__init__(self, arg, **args)

        self.classifier = None
示例#29
0
    def __init__(self, arg) :
        """
        :Parameters:
          - `arg` - a Chain object of a list of objects, each of which implements
            a 'train', 'test' and has a copy constructor
        
        """
        Classifier.__init__(self)

        if arg.__class__ == self.__class__ :
            other = arg
            self.classifier = other.classifier.__class__(other.classifier)
            self.chain = [component.__class__(component)
                          for component in other.chain]
            
        elif type(arg) == type([]) :
            self.classifier = arg[-1].__class__(arg[-1])
            self.chain = [arg[i].__class__(arg[i])
                          for i in range(len(arg) - 1)]
示例#30
0
    def train(self, data, **args):
        '''train k(k-1)/2 classifiers'''

        Classifier.train(self, data, **args)
        numClasses = self.labels.numClasses

        if numClasses <= 2:
            raise ValueError, 'Not a multi class problem'

        self.classifiers = misc.matrix((numClasses, numClasses))
        for i in range(numClasses - 1):
            for j in range(i + 1, numClasses):
                self.classifiers[i][j] = self.classifier.__class__(
                    self.classifier)
                dataij = data.__class__(data,
                                        deepcopy=self.classifier.deepcopy,
                                        classID=[i, j])
                self.classifiers[i][j].train(dataij)
        self.log.trainingTime = self.getTrainingTime()
示例#31
0
    def __init__(self, arg):
        """
        :Parameters:
          - `arg` - a Chain object of a list of objects, each of which implements
            a 'train', 'test' and has a copy constructor
        
        """
        Classifier.__init__(self)

        if arg.__class__ == self.__class__:
            other = arg
            self.classifier = other.classifier.__class__(other.classifier)
            self.chain = [
                component.__class__(component) for component in other.chain
            ]

        elif type(arg) == type([]):
            self.classifier = arg[-1].__class__(arg[-1])
            self.chain = [
                arg[i].__class__(arg[i]) for i in range(len(arg) - 1)
            ]
示例#32
0
    def __init__(self, arg = None, **args) :

        Classifier.__init__(self, **args)