def iterGridSearchSVM(self, c_info=None, g_info=None, fold=5, probability=False, compensation=True): swap = lambda a, b: (b, a) if not c_info is None and len(c_info) >= 3: c_begin, c_end, c_step = c_info[:3] else: c_begin, c_end, c_step = -5, 15, 2 if c_end < c_begin: c_begin, c_end = swap(c_begin, c_end) c_step = abs(c_step) if not g_info is None and len(g_info) >= 3: g_begin, g_end, g_step = g_info[:3] else: g_begin, g_end, g_step = -15, 3, 2 if g_end < g_begin: g_begin, g_end = swap(g_begin, g_end) g_step = abs(g_step) labels, samples = self.getData(normalize=True) problem = svm.svm_problem(labels, samples) if compensation: weight, weight_label = self._calculateCompensation(labels) n = (c_end - c_begin) / c_step + 1 n *= (g_end - g_begin) / g_step + 1 l2c = c_begin while l2c <= c_end: l2g = g_begin while l2g <= g_end: param = svm.svm_parameter(kernel_type=svm.RBF, C=2.**l2c, gamma=2.**l2g, probability=1 if probability else 0) if compensation: param.weight = weight param.weight_label = weight_label param.nr_weight = len(weight) predictions = svm.cross_validation(problem, param, fold) predictions = map(int, predictions) conf = ConfusionMatrix.from_lists(labels, predictions, self.class_names.keys()) yield n, l2c, l2g, conf l2g += g_step l2c += c_step
def iterGridSearchSVM(self, c_info=None, g_info=None, fold=5, probability=False, compensation=True): swap = lambda a,b: (b,a) if not c_info is None and len(c_info) >= 3: c_begin, c_end, c_step = c_info[:3] else: c_begin, c_end, c_step = -5, 15, 2 if c_end < c_begin: c_begin, c_end = swap(c_begin, c_end) c_step = abs(c_step) if not g_info is None and len(g_info) >= 3: g_begin, g_end, g_step = g_info[:3] else: g_begin, g_end, g_step = -15, 3, 2 if g_end < g_begin: g_begin, g_end = swap(g_begin, g_end) g_step = abs(g_step) labels, samples = self.getData(normalize=True) #print len(labels), len(samples) problem = svm.svm_problem(labels, samples) if compensation: weight, weight_label = self._calculateCompensation(labels) n = (c_end - c_begin) / c_step + 1 n *= (g_end - g_begin) / g_step + 1 l2c = c_begin while l2c <= c_end: l2g = g_begin while l2g <= g_end: param = svm.svm_parameter(kernel_type=svm.RBF, C=2.**l2c, gamma=2.**l2g, probability=1 if probability else 0) if compensation: param.weight = weight param.weight_label = weight_label param.nr_weight = len(weight) predictions = svm.cross_validation(problem, param, fold) predictions = map(int, predictions) #print n,c,g conf = ConfusionMatrix.from_lists(labels, predictions, self.l2nl) yield n,l2c,l2g,conf l2g += g_step l2c += c_step
def test(word, documents): import svm, random docs = [d.copy() for d in documents if d[reverse_map[word]]] nondocs = [d.copy() for d in documents if not d[reverse_map[word]]] nondocs = random.sample(nondocs, min(5 * len(docs), len(nondocs))) print float(len(nondocs)) / (len(docs) + len(nondocs)) cats = [1 for i in docs] + [0 for i in nondocs] obs = docs + nondocs for i in xrange(len(obs)): obs[i][reverse_map[word]] = 0. zobs = zip(obs, cats) random.shuffle(zobs) obs, cats = zip(*zobs) params = svm.svm_parameter(C=1, kernel_type=svm.LINEAR) problem = svm.svm_problem(cats, obs) target = svm.cross_validation(problem, params, 20) return sum(target[i] == cats[i] for i in cats) / float(len(cats))
def test(word, documents): import svm,random docs = [d.copy() for d in documents if d[reverse_map[word]]] nondocs = [d.copy() for d in documents if not d[reverse_map[word]]] nondocs = random.sample(nondocs,min(5*len(docs),len(nondocs))) print float(len(nondocs))/(len(docs)+len(nondocs)) cats = [1 for i in docs] + [0 for i in nondocs] obs = docs + nondocs for i in xrange(len(obs)): obs[i][reverse_map[word]] = 0. zobs = zip(obs,cats) random.shuffle(zobs) obs,cats = zip(*zobs) params = svm.svm_parameter(C=1, kernel_type=svm.LINEAR) problem = svm.svm_problem(cats,obs) target = svm.cross_validation(problem,params,20) return sum(target[i] == cats[i] for i in cats)/float(len(cats))
def __Linear_Search__(self, C_min, C_steps, C_step_by=1.): #Utility function used by Parameter_Search() to find the best parameters param_grid = np.array( [ C for C in 2**(np.arange(C_steps, dtype=float)*C_step_by+C_min) ] ) error_grid = np.zeros( len(param_grid) ) for i in range( len(param_grid) ): self.svm_params['C'] = float( param_grid[i] ) CV_predictions = svm.cross_validation(self.svm_problem, svm.svm_parameter(**self.svm_params), self.folds) error = sum(abs(CV_predictions-self.training_labels))/len(self.training_labels) error_grid[i] = error best = mlab.find(error_grid == error_grid.flatten().min()) C = param_grid[best][0] return C
def search(self): """ iterate successive parameter grid refinement and evaluation; adapted from LIBSVM grid search tool """ jobs = self.calculate_jobs() scores = [] for line in jobs: for (c, g) in line: # run cross-validation for this point self.setParams(C=2 ** c, gamma=2 ** g) param = svm_parameter(**self.params) cvresult = array(cross_validation(self.problem, param, self.crossval)) corr, = where(cvresult == self.targets) res = (c, g, float(corr.size) / self.targets.size) scores.append(res) self._save_points(res) self._redraw(scores) scores = array(scores) best = scores[scores[:, 0].argmax(), 1:] self.setParams(C=2 ** best[0], gamma=2 ** best[1]) logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1])) param = svm_parameter(**self.params) return param
def __Search__(self, C_min, C_steps, gamma_min, gamma_steps, C_step_by=1., gamma_step_by=1.): #Utility function used by Parameter_Search() to find the best parameters param_grid = np.array( [[ (C,gamma) for C in 2**(np.arange(C_steps, dtype=float)*C_step_by+C_min)] for gamma in 2**(np.arange(gamma_steps, dtype=float)*gamma_step_by+gamma_min)] ) error_grid = np.zeros( shape=param_grid.shape[0:2] ) for row in range( param_grid.shape[0] ): for col in range( param_grid.shape[1] ): self.svm_params['C'] = float( param_grid[row,col,0] ) self.svm_params['gamma'] = float( param_grid[row,col,1] ) CV_predictions = svm.cross_validation(self.svm_problem, svm.svm_parameter(**self.svm_params), self.folds) error = sum(abs(CV_predictions-self.training_labels))/len(self.training_labels) error_grid[row,col] = error best = mlab.find(error_grid == error_grid.flatten().min()) row = best // C_steps col = best % C_steps (C, gamma) = param_grid[row, col][0].flatten() return (C, gamma)
def search(self, cmin=None, cmax=None): """ iterate parameter grid refinement and evaluation recursively """ if self.depth > self.maxdepth: # maximum search depth reached - finish up best = self.allPts[self.allScores.argmax(), :] logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1])) self.setParams(C=2**best[0], gamma=2**best[1]) param = svm_parameter(**self.params) logging.info("Grid search completed! Final parameters:") logging.info(repr(param)) return param # generate DOE gridpoints using current range if cmin is None: # use initial values, if none given cmin = array(self.usermin) cmax = array(self.usermax) points = self.refineGrid(cmin, cmax) # calculate scores for all grid points using n-fold cross-validation scores = [] isnew = array([True] * self.nPts) for i in range(self.nPts): idx = self._findIndex(points[i, :]) if idx >= 0: # point already exists isnew[i] = False scores.append(self.allScores[idx]) else: # new point, run cross-validation self.setParams(C=2**points[i, 0], gamma=2**points[i, 1]) param = svm_parameter(**self.params) cvresult = array( cross_validation(self.problem, param, self.crossval)) # save cross validation result as "% correct" corr, = where(cvresult == self.targets) corr = float(corr.size) / self.targets.size scores.append(corr) self._save_points((points[i, 0], points[i, 1], corr)) scores = array(scores) # find max and new ranges by halving the old ones, whereby # entire search region must lie within original search range newctr = points[scores.argmax(), :].copy() newdiff = (cmax - cmin) / 4.0 for i in range(self.nPars): newctr[i] = min([ max([newctr[i], self.usermin[i] + newdiff[i]]), self.usermax[i] - newdiff[i] ]) cmin = newctr - newdiff cmax = newctr + newdiff logging.info("depth:\t%3d\tcrange:\t%g\tscore:\t%g" % (self.depth, cmax[0] - cmin[0], scores.max())) # append points and scores to the full list if self.depth == 0: self.allPts = points[isnew, :].copy() self.allScores = scores[isnew].copy() else: self.allPts = append(self.allPts, points[isnew, :], axis=0) self.allScores = append(self.allScores, scores[isnew], axis=0) if self.plotflag: import pylab as p if self.depth == 0: self.oPlot = p.plot(self.allPts[:, 0], self.allPts[:, 1], 'o')[0] # insert new data into plot self.oPlot.set_data(self.allPts[:, 0], self.allPts[:, 1]) p.draw() # recursively call ourselves self.depth += 1 return self.search(cmin, cmax)
def search(self, cmin=None, cmax=None): """ iterate parameter grid refinement and evaluation recursively """ if self.depth > self.maxdepth: # maximum search depth reached - finish up best = self.allPts[self.allScores.argmax(), :] logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1])) self.setParams(C=2 ** best[0], gamma=2 ** best[1]) param = svm_parameter(**self.params) logging.info("Grid search completed! Final parameters:") logging.info(repr(param)) return param # generate DOE gridpoints using current range if cmin is None: # use initial values, if none given cmin = array(self.usermin) cmax = array(self.usermax) points = self.refineGrid(cmin, cmax) # calculate scores for all grid points using n-fold cross-validation scores = [] isnew = array([True] * self.nPts) for i in range(self.nPts): idx = self._findIndex(points[i, :]) if idx >= 0: # point already exists isnew[i] = False scores.append(self.allScores[idx]) else: # new point, run cross-validation self.setParams(C=2 ** points[i, 0], gamma=2 ** points[i, 1]) param = svm_parameter(**self.params) cvresult = array(cross_validation(self.problem, param, self.crossval)) # save cross validation result as "% correct" corr, = where(cvresult == self.targets) corr = float(corr.size) / self.targets.size scores.append(corr) self._save_points((points[i, 0], points[i, 1], corr)) scores = array(scores) # find max and new ranges by halving the old ones, whereby # entire search region must lie within original search range newctr = points[scores.argmax(), :].copy() newdiff = (cmax - cmin) / 4.0 for i in range(self.nPars): newctr[i] = min([max([newctr[i], self.usermin[i] + newdiff[i]]), self.usermax[i] - newdiff[i]]) cmin = newctr - newdiff cmax = newctr + newdiff logging.info("depth:\t%3d\tcrange:\t%g\tscore:\t%g" % (self.depth, cmax[0] - cmin[0], scores.max())) # append points and scores to the full list if self.depth == 0: self.allPts = points[isnew, :].copy() self.allScores = scores[isnew].copy() else: self.allPts = append(self.allPts, points[isnew, :], axis=0) self.allScores = append(self.allScores, scores[isnew], axis=0) if self.plotflag: import pylab as p if self.depth == 0: self.oPlot = p.plot(self.allPts[:, 0], self.allPts[:, 1], "o")[0] # insert new data into plot self.oPlot.set_data(self.allPts[:, 0], self.allPts[:, 1]) p.draw() # recursively call ourselves self.depth += 1 return self.search(cmin, cmax)