Пример #1
0
    def iterGridSearchSVM(self,
                          c_info=None,
                          g_info=None,
                          fold=5,
                          probability=False,
                          compensation=True):
        swap = lambda a, b: (b, a)
        if not c_info is None and len(c_info) >= 3:
            c_begin, c_end, c_step = c_info[:3]
        else:
            c_begin, c_end, c_step = -5, 15, 2
        if c_end < c_begin:
            c_begin, c_end = swap(c_begin, c_end)
        c_step = abs(c_step)

        if not g_info is None and len(g_info) >= 3:
            g_begin, g_end, g_step = g_info[:3]
        else:
            g_begin, g_end, g_step = -15, 3, 2
        if g_end < g_begin:
            g_begin, g_end = swap(g_begin, g_end)
        g_step = abs(g_step)

        labels, samples = self.getData(normalize=True)
        problem = svm.svm_problem(labels, samples)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)

        n = (c_end - c_begin) / c_step + 1
        n *= (g_end - g_begin) / g_step + 1

        l2c = c_begin
        while l2c <= c_end:
            l2g = g_begin
            while l2g <= g_end:

                param = svm.svm_parameter(kernel_type=svm.RBF,
                                          C=2.**l2c,
                                          gamma=2.**l2g,
                                          probability=1 if probability else 0)
                if compensation:
                    param.weight = weight
                    param.weight_label = weight_label
                    param.nr_weight = len(weight)

                predictions = svm.cross_validation(problem, param, fold)
                predictions = map(int, predictions)

                conf = ConfusionMatrix.from_lists(labels, predictions,
                                                  self.class_names.keys())
                yield n, l2c, l2g, conf

                l2g += g_step
            l2c += c_step
Пример #2
0
    def iterGridSearchSVM(self, c_info=None, g_info=None, fold=5,
                          probability=False, compensation=True):
        swap = lambda a,b: (b,a)
        if not c_info is None and len(c_info) >= 3:
            c_begin, c_end, c_step = c_info[:3]
        else:
            c_begin, c_end, c_step = -5,  15, 2
        if c_end < c_begin:
            c_begin, c_end = swap(c_begin, c_end)
        c_step = abs(c_step)

        if not g_info is None and len(g_info) >= 3:
            g_begin, g_end, g_step = g_info[:3]
        else:
            g_begin, g_end, g_step = -15, 3, 2
        if g_end < g_begin:
            g_begin, g_end = swap(g_begin, g_end)
        g_step = abs(g_step)

        labels, samples = self.getData(normalize=True)
        #print len(labels), len(samples)
        problem = svm.svm_problem(labels, samples)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)

        n = (c_end - c_begin) / c_step + 1
        n *= (g_end - g_begin) / g_step + 1

        l2c = c_begin
        while l2c <= c_end:
            l2g = g_begin
            while l2g <= g_end:

                param = svm.svm_parameter(kernel_type=svm.RBF,
                                          C=2.**l2c, gamma=2.**l2g,
                                          probability=1 if probability else 0)
                if compensation:
                    param.weight = weight
                    param.weight_label = weight_label
                    param.nr_weight = len(weight)

                predictions = svm.cross_validation(problem, param, fold)
                predictions = map(int, predictions)

                #print n,c,g
                conf = ConfusionMatrix.from_lists(labels, predictions,
                                                  self.l2nl)
                yield n,l2c,l2g,conf

                l2g += g_step
            l2c += c_step
Пример #3
0
def test(word, documents):
    import svm, random
    docs = [d.copy() for d in documents if d[reverse_map[word]]]
    nondocs = [d.copy() for d in documents if not d[reverse_map[word]]]
    nondocs = random.sample(nondocs, min(5 * len(docs), len(nondocs)))
    print float(len(nondocs)) / (len(docs) + len(nondocs))
    cats = [1 for i in docs] + [0 for i in nondocs]
    obs = docs + nondocs
    for i in xrange(len(obs)):
        obs[i][reverse_map[word]] = 0.
    zobs = zip(obs, cats)
    random.shuffle(zobs)
    obs, cats = zip(*zobs)
    params = svm.svm_parameter(C=1, kernel_type=svm.LINEAR)
    problem = svm.svm_problem(cats, obs)
    target = svm.cross_validation(problem, params, 20)
    return sum(target[i] == cats[i] for i in cats) / float(len(cats))
Пример #4
0
def test(word, documents):
    import svm,random
    docs = [d.copy() for d in documents if d[reverse_map[word]]]
    nondocs = [d.copy() for d in documents if not d[reverse_map[word]]]
    nondocs = random.sample(nondocs,min(5*len(docs),len(nondocs)))
    print float(len(nondocs))/(len(docs)+len(nondocs))
    cats = [1 for i in docs] + [0 for i in nondocs]
    obs = docs + nondocs
    for i in xrange(len(obs)):
        obs[i][reverse_map[word]] = 0.
    zobs = zip(obs,cats)
    random.shuffle(zobs)
    obs,cats = zip(*zobs)
    params = svm.svm_parameter(C=1, kernel_type=svm.LINEAR)
    problem = svm.svm_problem(cats,obs)
    target = svm.cross_validation(problem,params,20)
    return sum(target[i] == cats[i] for i in cats)/float(len(cats))
Пример #5
0
    def __Linear_Search__(self, C_min, C_steps, C_step_by=1.):
        #Utility function used by Parameter_Search() to find the best parameters
        param_grid = np.array( [ C for C in 2**(np.arange(C_steps, dtype=float)*C_step_by+C_min) ] )
        error_grid = np.zeros( len(param_grid) )
        
        
        for i in range( len(param_grid) ):
            self.svm_params['C'] = float( param_grid[i] )
            
            CV_predictions = svm.cross_validation(self.svm_problem, svm.svm_parameter(**self.svm_params), self.folds)
            
            error = sum(abs(CV_predictions-self.training_labels))/len(self.training_labels)
            error_grid[i] = error

        best = mlab.find(error_grid == error_grid.flatten().min())

        C = param_grid[best][0]

        return C
Пример #6
0
 def search(self):
     """ iterate successive parameter grid refinement and evaluation; adapted from LIBSVM grid search tool """
     jobs = self.calculate_jobs()
     scores = []
     for line in jobs:
         for (c, g) in line:
             # run cross-validation for this point
             self.setParams(C=2 ** c, gamma=2 ** g)
             param = svm_parameter(**self.params)
             cvresult = array(cross_validation(self.problem, param, self.crossval))
             corr, = where(cvresult == self.targets)
             res = (c, g, float(corr.size) / self.targets.size)
             scores.append(res)
             self._save_points(res)
         self._redraw(scores)
     scores = array(scores)
     best = scores[scores[:, 0].argmax(), 1:]
     self.setParams(C=2 ** best[0], gamma=2 ** best[1])
     logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1]))
     param = svm_parameter(**self.params)
     return param
Пример #7
0
 def search(self):
     """ iterate successive parameter grid refinement and evaluation; adapted from LIBSVM grid search tool """
     jobs = self.calculate_jobs()
     scores = []
     for line in jobs:
         for (c, g) in line:
             # run cross-validation for this point
             self.setParams(C=2 ** c, gamma=2 ** g)
             param = svm_parameter(**self.params)
             cvresult = array(cross_validation(self.problem, param, self.crossval))
             corr, = where(cvresult == self.targets)
             res = (c, g, float(corr.size) / self.targets.size)                
             scores.append(res)
             self._save_points(res)
         self._redraw(scores)
     scores = array(scores)
     best = scores[scores[:, 0].argmax(), 1:]
     self.setParams(C=2 ** best[0], gamma=2 ** best[1])
     logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1]))
     param = svm_parameter(**self.params)
     return param
Пример #8
0
    def __Search__(self, C_min, C_steps,  gamma_min, gamma_steps, C_step_by=1., gamma_step_by=1.):
        #Utility function used by Parameter_Search() to find the best parameters
        param_grid = np.array( [[ (C,gamma) for C in 2**(np.arange(C_steps, dtype=float)*C_step_by+C_min)] for gamma in 2**(np.arange(gamma_steps, dtype=float)*gamma_step_by+gamma_min)] )
        error_grid = np.zeros( shape=param_grid.shape[0:2] )
        
        
        for row in range( param_grid.shape[0] ):
            for col in range( param_grid.shape[1] ):
                self.svm_params['C'] = float( param_grid[row,col,0] )
                self.svm_params['gamma'] = float( param_grid[row,col,1] )
                
                CV_predictions = svm.cross_validation(self.svm_problem, svm.svm_parameter(**self.svm_params), self.folds)
                
                error = sum(abs(CV_predictions-self.training_labels))/len(self.training_labels)
                error_grid[row,col] = error

        best = mlab.find(error_grid == error_grid.flatten().min())
        row = best // C_steps
        col = best % C_steps

        (C, gamma) = param_grid[row, col][0].flatten()

        return (C, gamma)
Пример #9
0
    def search(self, cmin=None, cmax=None):
        """ iterate parameter grid refinement and evaluation recursively """
        if self.depth > self.maxdepth:
            # maximum search depth reached - finish up
            best = self.allPts[self.allScores.argmax(), :]
            logging.info("best log2C=%12.7g, log2g=%11.7g " %
                         (best[0], best[1]))
            self.setParams(C=2**best[0], gamma=2**best[1])
            param = svm_parameter(**self.params)
            logging.info("Grid search completed! Final parameters:")
            logging.info(repr(param))
            return param

        # generate DOE gridpoints using current range
        if cmin is None:
            # use initial values, if none given
            cmin = array(self.usermin)
            cmax = array(self.usermax)
        points = self.refineGrid(cmin, cmax)

        # calculate scores for all grid points using n-fold cross-validation
        scores = []
        isnew = array([True] * self.nPts)
        for i in range(self.nPts):
            idx = self._findIndex(points[i, :])
            if idx >= 0:
                # point already exists
                isnew[i] = False
                scores.append(self.allScores[idx])
            else:
                # new point, run cross-validation
                self.setParams(C=2**points[i, 0], gamma=2**points[i, 1])
                param = svm_parameter(**self.params)
                cvresult = array(
                    cross_validation(self.problem, param, self.crossval))
                # save cross validation result as "% correct"
                corr, = where(cvresult == self.targets)
                corr = float(corr.size) / self.targets.size
                scores.append(corr)
                self._save_points((points[i, 0], points[i, 1], corr))

        scores = array(scores)

        # find max and new ranges by halving the old ones, whereby
        # entire search region must lie within original search range
        newctr = points[scores.argmax(), :].copy()
        newdiff = (cmax - cmin) / 4.0
        for i in range(self.nPars):
            newctr[i] = min([
                max([newctr[i], self.usermin[i] + newdiff[i]]),
                self.usermax[i] - newdiff[i]
            ])
        cmin = newctr - newdiff
        cmax = newctr + newdiff
        logging.info("depth:\t%3d\tcrange:\t%g\tscore:\t%g" %
                     (self.depth, cmax[0] - cmin[0], scores.max()))

        # append points and scores to the full list
        if self.depth == 0:
            self.allPts = points[isnew, :].copy()
            self.allScores = scores[isnew].copy()
        else:
            self.allPts = append(self.allPts, points[isnew, :], axis=0)
            self.allScores = append(self.allScores, scores[isnew], axis=0)

        if self.plotflag:
            import pylab as p
            if self.depth == 0:
                self.oPlot = p.plot(self.allPts[:, 0], self.allPts[:, 1],
                                    'o')[0]
            # insert new data into plot
            self.oPlot.set_data(self.allPts[:, 0], self.allPts[:, 1])
            p.draw()

        # recursively call ourselves
        self.depth += 1
        return self.search(cmin, cmax)
Пример #10
0
    def search(self, cmin=None, cmax=None):
        """ iterate parameter grid refinement and evaluation recursively """
        if self.depth > self.maxdepth:
            # maximum search depth reached - finish up
            best = self.allPts[self.allScores.argmax(), :]
            logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1]))
            self.setParams(C=2 ** best[0], gamma=2 ** best[1])
            param = svm_parameter(**self.params)
            logging.info("Grid search completed! Final parameters:")
            logging.info(repr(param))
            return param

        # generate DOE gridpoints using current range
        if cmin is None:
            # use initial values, if none given
            cmin = array(self.usermin)
            cmax = array(self.usermax)
        points = self.refineGrid(cmin, cmax)

        # calculate scores for all grid points using n-fold cross-validation
        scores = []
        isnew = array([True] * self.nPts)
        for i in range(self.nPts):
            idx = self._findIndex(points[i, :])
            if idx >= 0:
                # point already exists
                isnew[i] = False
                scores.append(self.allScores[idx])
            else:
                # new point, run cross-validation
                self.setParams(C=2 ** points[i, 0], gamma=2 ** points[i, 1])
                param = svm_parameter(**self.params)
                cvresult = array(cross_validation(self.problem, param, self.crossval))
                # save cross validation result as "% correct"
                corr, = where(cvresult == self.targets)
                corr = float(corr.size) / self.targets.size
                scores.append(corr)
                self._save_points((points[i, 0], points[i, 1], corr))

        scores = array(scores)

        # find max and new ranges by halving the old ones, whereby
        # entire search region must lie within original search range
        newctr = points[scores.argmax(), :].copy()
        newdiff = (cmax - cmin) / 4.0
        for i in range(self.nPars):
            newctr[i] = min([max([newctr[i], self.usermin[i] + newdiff[i]]), self.usermax[i] - newdiff[i]])
        cmin = newctr - newdiff
        cmax = newctr + newdiff
        logging.info("depth:\t%3d\tcrange:\t%g\tscore:\t%g" % (self.depth, cmax[0] - cmin[0], scores.max()))

        # append points and scores to the full list
        if self.depth == 0:
            self.allPts = points[isnew, :].copy()
            self.allScores = scores[isnew].copy()
        else:
            self.allPts = append(self.allPts, points[isnew, :], axis=0)
            self.allScores = append(self.allScores, scores[isnew], axis=0)

        if self.plotflag:
            import pylab as p

            if self.depth == 0:
                self.oPlot = p.plot(self.allPts[:, 0], self.allPts[:, 1], "o")[0]
            # insert new data into plot
            self.oPlot.set_data(self.allPts[:, 0], self.allPts[:, 1])
            p.draw()

        # recursively call ourselves
        self.depth += 1
        return self.search(cmin, cmax)