def calculate_race(): correct = 0 answers = [] input = [] count = 0 for d in data: answers.append(question2b_race_truth.truth[count]) input.append(d) if count == 49: break count += 1 prob = svmutil.svm_problem(answers, input) param = svmutil.svm_parameter('-t 2 -c 4') param.cross_validation = 1 param.nr_fold = 10 cv = svmutil.svm_train(prob, param) param = svmutil.svm_parameter('-t 2 -c 4') m = svmutil.svm_train(prob, param) count = 0 for d in data: if count < 50: count += 1 continue else: x0, max_idx = gen_svm_nodearray(d) p = libsvm.svm_predict(m, x0) if p == question2b_race_truth.truth[count]: correct += 1 count += 1 return cv, correct / float(50) * 100
def TrainSvmRbf2(Y, X, sweep_c=range(-5,5), sweep_g=range(-5,5)): num_negatives = float(Y.count(-1)) num_positives = float(Y.count(1)) best_c = -1 best_g = -1 best_acc = -1 for c_pow in sweep_c: for g_pow in sweep_g: current_c = np.power(2.0,c_pow) current_g = np.power(2.0,g_pow) prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 2 -c %f -g %f -w-1 %f -w1 %f -q' % (current_c, current_g, 100/num_negatives, 100/num_positives)) current_pos_acc, current_neg_acc = CrossValidate(Y, X, param) current_acc = current_pos_acc print 'c = %f, g = %f, cv acc = %f, neg acc = %f' % (current_c, current_g, current_acc, current_neg_acc) if best_acc < current_acc: best_acc = current_acc best_c = current_c best_g = current_g prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 2 -c %f -g %f -w-1 %f -w1 %f -q' % (best_c, best_g, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') pdb.set_trace() return svm_model
def TrainSvmLinear2(Y, X, sweep_c=range(-2,18)): num_positives = float(Y.count(1)) num_negatives = float(Y.count(-1)) best_c = -1 best_acc = -1 for c_pow in sweep_c: current_c = np.power(2.0,c_pow) param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (current_c, 100/num_negatives, 100/num_positives)) current_pos_acc, current_neg_acc = CrossValidate(Y, X, param) current_acc = current_pos_acc print '%f, %f, %f' % (current_c, current_acc, current_neg_acc) if best_acc < current_acc: best_acc = current_acc best_c = current_c prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (best_c, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') return svm_model
def TrainSvmLinear(Y, X, sweep_c=range(-2,8)): num_positives = float(Y.count(1)) num_negatives = float(Y.count(-1)) best_c = -1 best_acc = -1 for c_pow in sweep_c: current_c = np.power(2.0,c_pow) prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-v 5 -t 0 -c %f -w-1 %f -w1 %f -q' % (current_c, 100/num_negatives, 100/num_positives)) current_acc = svm.svm_train(prob, param) print '%f, %f' % (current_c, current_acc) if best_acc < current_acc: best_acc = current_acc best_c = current_c # recompute accuracy param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (best_c, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (best_c, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') pdb.set_trace() return svm_model
def train(self): for i in range(4): self.convert() #rbf param1 = svmutil.svm_parameter("-t 2 -b 1 -c 1 -g 0.001") param2 = svmutil.svm_parameter("-t 2 -b 1 -c 0.1 -g 0.001") self.mr.append(svmutil.svm_train(self.problem[0], param1))#hist self.mr.append(svmutil.svm_train(self.problem[1], param2))#vector #linear param3 = svmutil.svm_parameter("-t 0 -b 1 -c 0.1") param4 = svmutil.svm_parameter("-t 0 -b 1 -c 0.01") self.ml.append(svmutil.svm_train(self.problem[0], param3))#hist self.ml.append(svmutil.svm_train(self.problem[1], param4))#vector self.images = self.images[1:]+self.images[:1]
def main(path, k): prabs = [] lns = [] for kk in range(0,k-1): testLabel = [] trainPoint = [] trainLabel = [] testPoint = [] wcCount = 0 for u in os.listdir(path): if u[-2:] == 'WC':r wcCount += 1 filePath = path+u WC = pickle.load(open(filePath, 'rb')) if wcCount % k == 0 + kk: testLabel.append(int(u[1])) testPoint.append(WC) else: trainLabel.append(int(u[1])) trainPoint.append(WC) lns.append(len(testLabel)) prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0])
def kfold(data, labels, k): try: import svmutil except: return 0 prabs = [] for xxx in range(0, 10): picks = np.random.choice(len(data), len(data) / k, replace=False) testLabel = labels[picks] testPoint = data[picks] trainPoint = data[np.setdiff1d(range(0, len(data)), picks)] trainLabel = labels[np.setdiff1d(range(0, len(data)), picks)] trainLabel = trainLabel.tolist() trainPoint = trainPoint.tolist() prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 3 -c 4 -b 1 -q') testLabel = testLabel.tolist() testPoint = testPoint.tolist() m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0]) print sum(prabs) / float(len(prabs)) print 'std' + str(np.std(prabs)) return sum(prabs) / float(len(prabs))
def train(self,x,y): """ training using y=list,x=dict parameter = string of parameters """ prob=su.svm_problem(y,x) para="" para+= "-s %d -t %d -d %d -g %f -r %f -c %f -n %f -p %f -e %f -b %d" %\ ( self.type, self.kernel, self.degree, self.gamma, self.coef0, self.c, self.nu, self.p, self.eps, self.prob ) if(self.v!=0): para+=" -v %d" % self.v if(self.q!=0): para+= " -q" print para para1=su.svm_parameter(para) self.model=su.svm_train(prob,para1) return True
def train(cls, featuresets, params="-t 0 -q"): """Train a classifier using the given featuresets. Args: featuresets: List of featuresets. params: Parameter string to pass to svmutil.svm_parameter. Returns: SvmClassifier object. """ all_features = set() all_labels = set() for featuredict, label in featuresets: all_features.update(set(featuredict.keys())) all_labels.add(label) all_labels = sorted(all_labels) all_features = sorted(all_features) featureindex = dict(zip(all_features, range(1, len(all_features) + 1))) labelindex = dict(zip(all_labels, range(1, len(all_labels) + 1))) vectors, labels = cls.featuresets_to_svm(featureindex, labelindex, featuresets) prob = svmutil.svm_problem(labels, vectors) param = svmutil.svm_parameter(params) model = svmutil.svm_train(prob, param) return cls(featureindex, labelindex, model)
def _lib_train_libsvm(user_tfidf, num_pos, num_neg, ignore): sparse_user_tfidf, num_pos, num_neg = _convert_to_sparse_matrix(user_tfidf, num_pos, num_neg, ignore) labels = ([1] * num_pos) + ([-1] * num_neg) param = svm_parameter("-t %d" % KERNEL_NUMBER) prob = svm_problem(labels, sparse_user_tfidf) modellog = svm_train(prob, param) return modellog
def trainSVM(trainMatrix, trainCategory): svm.svm_model.predict = lambda self, x: svm.svm_predict([0], [x], self)[0][0] prob = svm.svm_problem(trainCategory, trainMatrix) param = svm.svm_parameter() param.kernel_type = svm.LINEAR param.C = 10 model = svm.svm_train(prob, param) return model
def get_cross_val(x, y, x_val, y_val, gamma_c): prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m) return p_acc_validation[0]
def getSvmParam(cross_validation_only = False): param = svmutil.svm_parameter() param.parse_options('-q') #quiet if cross_validation_only: param.cross_validation = True param.nr_fold = 10 param.kernel_type = svmutil.LINEAR param.C = 10.0 ** 2.0 return param
def __init__(self, data=[], labels=[], kernel=svmutil.RBF, c=10): self.__svmparam__ = svmutil.svm_parameter('-q') self.__svmparam__.kernel_type = kernel self.__svmparam__.C = c self.c = c self.data = data self.labels = labels if len(data) > 0: self.problem = svmutil.svm_problem(labels,data) self.model = svmutil.svm_train(self.problem,self.__svmparam__,'-q') else: self.problem = None self.model = None
def multiclass_train(valid_labels, labels, data, svm_parameters=None): if svm_parameters == None: # make default empty parameters svm_parameters = [] for i in valid_labels: svm_parameters.append(svmutil.svm_parameter()) models = [] for i in valid_labels: oaa_labels = relabel_one_against_all(labels, i) prob = svmutil.svm_problem(oaa_labels, data) model = svmutil.svm_train(prob, svm_parameters[i]) models.append(model) return models
def compute_auc_kern(data, labels, k=10, C=1.0, kern=6, gamma=None): ''' This is an interface to the extended libsvm implementation with new kernels ''' kv = cross_validation.StratifiedKFold(labels, n_folds=k, random_state=1) s = 0.0 for train_index, test_index in kv: data_train = data[train_index] labels_train = labels[train_index] data_test = data[test_index] labels_test = labels[test_index] y_train = labels_train.tolist() x_train = data_train.tolist() prob = svmutil.svm_problem(y_train, x_train) if gamma != None: param = svmutil.svm_parameter("-t %i -c %.410f -q -g %i" % (kern, C, gamma)) else: param = svmutil.svm_parameter("-t %i -c %.410f -q" % (kern, C)) model = svmutil.svm_train(prob, param) y_test = labels_test.tolist() x_test = data_test.tolist() p_label, p_acc, p_val = svmutil.svm_predict(y_test, x_test, m=model) fpr, tpr, thresholds = roc_curve(labels_test, p_val, pos_label=-1.0) AUC = roc_auc_score(labels_test, p_val) s += AUC return s / k
def cSvmTrainSet(self): dataMat = [] labelMat = [] file_pattern = re.compile('^%s-\d.rec' % self.legalName) for fdata in os.listdir('data'): if file_pattern.match(fdata): data,label = loadDataSet('data/'+fdata,1) else: data,label = loadDataSet('data/'+fdata,-1) dataMat+=data labelMat+=label libSvmFormatSaveInFile(dataMat,labelMat,'data_format/%s.mat' % self.legalName) # todo: duoxiancheng y,x = svmutil.svm_read_problem('data_format/%s.mat' % self.legalName) prob = svmutil.svm_problem(y,x,isKernel = True) param = svmutil.svm_parameter('-t 0 ') self.model = svmutil.svm_train(prob,param) print self.model
def __init__(self, c=None, gamma=None, filename=None, neighbours=3, verbose=0): self.neighbours = neighbours self.verbose = verbose if filename: # If a filename is given, load a model from the given filename if verbose: print 'Loading classifier from "%s"...' % filename self.model = svm_load_model(filename) elif c == None or gamma == None: raise Exception("Please specify both C and gamma.") else: self.param = svm_parameter() self.param.C = c # Soft margin self.param.kernel_type = RBF # Radial kernel type self.param.gamma = gamma # Parameter for radial kernel self.model = None
def main(): parser = argparse.ArgumentParser() parser.add_argument('-r', '--resdir', type=str, required=True, help="Results directory") parser.add_argument('-f', '--feature', type=str, required=True, help='feature to use to learn') args = parser.parse_args() SCORES_FPATH = os.path.join(args.resdir, 'scores.txt') FEAT_DIR = os.path.join(args.resdir, 'features', args.feature) scores = np.fromfile(SCORES_FPATH, sep='\n') feats = [] for i in range(1, len(scores) + 1): feats.append(np.fromfile(os.path.join(FEAT_DIR, str(i) + '.txt'), sep='\n').tolist()) # feats = np.array(feats) print('Read all features') params = svmutil.svm_parameter('-s 4 -t 2') model = svmutil.svm_train(svmutil.svm_problem(scores, feats), params) svmutil.svm_save_model(os.path.join(args.resdir, 'svr.model'), model) print svmutil.svm_predict(scores, feats, model)
def main(path): label = [] points = [] for u in os.listdir(path): if u[-2:] == 'WC': filePath = path+u WC = pickle.load(open(filePath, 'rb')) label.append(u[1]) points.append(WC) label = [int(i) for i in label] prob = svmutil.svm_problem(label, points) param = svmutil.svm_parameter('-t 0 -c 4 -b 1') m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(label, points, m, '-b 1') return p_acc
def prob2_to_4(): x = np.array([ [ 1, 0], [ 0, 1], [ 0, -1], [-1, 0], [ 0, 2], [ 0, -2], [-2, 0] ]) y = np.array([-1,-1,-1,1,1,1,1]) print "===prob 2===" xf = np.fliplr(x.copy()) print xf*xf - 2*x + np.array([[3,-3]]) print "===prob 3===" prob = SVM.svm_problem(y.tolist(), x.tolist()) param = SVM.svm_parameter('-t 1 -c 100 -d 2 -r 1 -g 1') # very large C for hard margin m = SVM.svm_train(prob, param) sumA = 0 poly = [0] * 6 # xx, xy, yy, x, y, 1 for i in xrange(m.l): idx = m.sv_indices[i] alphay = m.sv_coef[0][i] alpha = abs(m.sv_coef[0][i]) print "{:d} {:+1.2f}".format(idx, alpha) sumA += alpha v = x[idx-1] poly[0] += alphay*v[0]*v[0] poly[1] += alphay*v[1]*v[0]*2 poly[2] += alphay*v[1]*v[1] poly[3] += alphay*v[0]*2 poly[4] += alphay*v[1]*2 # poly[5] += alphay*1 # no need because Sum(alphay) = 0 poly[5] -= m.rho[0] print "Sum of alpha is {:1.3f}\nb = {}".format(sumA, m.rho[0]) print "{:+2.2f}xx {:+2.2f}xy {:+2.2f}yy {:+2.2f}x {:+2.2f}y {:+2.2f}".format(*poly)
def bench_svm(X, Y): """ bench with swig-generated wrappers that come with libsvm """ import svmutil X1 = X.tolist() Y1 = Y.tolist() gc.collect() # start time tstart = datetime.now() problem = svmutil.svm_problem(Y1, X1) param = svmutil.svm_parameter() param.svm_type=0 param.kernel_type=2 model = svmutil.svm_train(problem, param) svmutil.svm_predict([0]*len(X1), X1, model) delta = (datetime.now() - tstart) # stop time svm_results.append(delta.seconds + delta.microseconds/mu_second)
def svm_learning_curve(x, y): m = len(y) n = len(x) steep = m / 100; training_examples = [] train_accuracy = [] validation_accuracy = [] for i in range(steep, m, steep): prob = svmutil.svm_problem(y[:i], x[:i]) param = svmutil.svm_parameter('-t 2 -q -c 0.01') m = svmutil.svm_train(prob, param) p_label_train, p_acc_train, p_val_train = svmutil.svm_predict(y[:i], x[:i], m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y[i:], x[i:], m) print p_acc_train[0], "\t", p_acc_validation[0], "\n" training_examples.append(i) train_accuracy.append(p_acc_train[0]) validation_accuracy.append(p_acc_validation[0]) return training_examples, train_accuracy, validation_accuracy
def __init__(self, kernel=None, classifier=None, probability=True, params=None, input_dim=None, output_dim=None, dtype=None): """ kernel -- The kernel to use classifier -- The type of the SVM params -- a dict of parameters to be passed to the svm_parameter probability -- Must be set to True, if algorithms based on probability shall be used. """ if not params: params = {} # initialise the parameter and be quiet self.parameter = libsvmutil.svm_parameter("-q") if probability: # allow for probability estimates self.parameter.probability = 1 super(LibSVMClassifier, self).__init__(input_dim=input_dim, output_dim=output_dim, dtype=dtype) if kernel: self.set_kernel(kernel) if classifier: self.set_classifier(classifier) # set all other parameters for k, v in params.iteritems(): if not k in self.parameter._names: # check that the name is a valid parameter msg = "'{}' is not a valid parameter for libsvm".format(k) raise mdp.NodeException(msg) if hasattr(self.parameter, k): setattr(self.parameter, k, v) else: msg = "'svm_parameter' has no attribute {}".format(k) raise AttributeError(msg)
p_label_train, p_acc_train, p_val_train = svmutil.svm_predict(y[:i], x[:i], m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y[i:], x[i:], m) print p_acc_train[0], "\t", p_acc_validation[0], "\n" training_examples.append(i) train_accuracy.append(p_acc_train[0]) validation_accuracy.append(p_acc_validation[0]) return training_examples, train_accuracy, validation_accuracy def get_cross_val(x, y, x_val, y_val, gamma_c): prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m) return p_acc_validation[0] if __name__ == '__main__': y, x = svmutil.svm_read_problem("char_recon_shuffled.db") gamma = 1.0 / (2.0 * (3.0 ** 7) ** 2) C = 3.0 ** 3.0 prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(C, gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m)
def _complete_training(self, debug=False): """ Iterate over the complete data to get the initial model """ ########## read complexities file if given ########## if self.complexities_path is not None: import yaml complexities_file=open(self.complexities_path, 'r') complexities = yaml.load(complexities_file) # nr of channels = nr of features (==dim) / features_per_channel if not 'features_per_channel' in complexities: complexities['features_per_channel'] = 1 self.complexity = complexities[ round(self.dim/complexities['features_per_channel'])] self._log("Read complexity %s from file. Dimension is %s" % (self.complexity, self.dim), level=logging.INFO) # not compatible with regression! # self._log("Instances of Class %s: %s, %s: %s" \ # % (self.classes[0], # self.labels.count(self.classes.index(self.classes[0])), # self.classes[1], # self.labels.count(self.classes.index(self.classes[1])))) # instead this?: self._log("Performing training of SVM.") ########## Calculation of default gamma ########## self.calculate_gamma() self.num_samples = len(self.samples) # nr_weight is the number of elements in the array weight_label and # weight. Each weight[i] corresponds to weight_label[i], meaning that # the penalty of class weight_label[i] is scaled by a factor of # weight[i]. If you do not want to change penalty for any of the # classes, just set nr_weight to 0. ########## preparation of the libsvm command ########## # for probability output add "-b 1" to options options = \ "-c %.42f -d %d -g %.42f -r %.42f -n %.42f -p %.42f -e %.20f -m %.42f" % \ (self.complexity, self.exponent, self.gamma, self.offset, self.nu, self.epsilon, self.tolerance, 1000) # use 1000MB instead of 100MB (default) # options += " -b 1" un-comment this for probabilistic output! if self.multinomial: options += " -b 1" for i,w in enumerate(self.weight): options += " -w%d %.42f" % (i, w) if self.kernel_type == 'LINEAR': options += " -t 0" elif self.kernel_type == 'POLY': options += " -t 1" elif self.kernel_type == 'RBF': options += " -t 2" elif self.kernel_type == 'SIGMOID': options += " -t 3" else: self.kernel_type = 'LINEAR' options += " -t 0" warnings.warn("Kernel unknown! Precomputed Kernels are not " + "yet implemented. Linear Kernel used.") # PRECOMPUTED: kernel values in training_set_file # (not yet implemented) if self.svm_type == 'C-SVC': options += " -s 0" elif self.svm_type == 'nu-SVR': options += " -s 1" elif self.svm_type == 'one-class SVM': options += " -s 2" elif self.svm_type == 'epsilon-SVR': options += " -s 3" else: options += " -s 0" self.svm_type = 'C-SVC' warnings.warn("SVM-type unknown. C-SVC will be used!") if not self.debug: options += " -q" self._log("Libsvm is now quiet!") old_libsvm_options = options if self.max_iterations != 0: options += " -i %d" % self.max_iterations try: param = svmutil.svm_parameter(options) except ValueError: param = svmutil.svm_parameter(old_libsvm_options) self._log( "Using max_iterations is not supported by the standard " + "LIBSVM. Change your Python path to our customized version!", level=logging.CRITICAL) # transform labels with *label_function* if self.str_label_function is not None: self.label_function = eval(self.str_label_function) self.labels = self.label_function(self.labels) # build the classifier # h = [map(float,list(data)) for data in self.samples] problem = svmutil.svm_problem(self.labels, [ map(float, list(data)) for data in self.samples]) model = svmutil.svm_train(problem, param) if not self.multinomial: if (self.svm_type == 'C-SVC' or self.svm_type == 'one-class SVM') \ and self.kernel_type == 'LINEAR': self.calculate_classification_vector(model) if self.debug: # This calculation is needed for further analysis self.calculate_slack_variables(model) print "LIBSVM Parameter:" self.print_variables() else: # Slack variables are the same no matter which kernel is used # This method is mainly used to reduce the number of samples # being stored later on. if self.debug: self.calculate_slack_variables(model) self.model = model else: self.model = model # Slack variables are the same no matter which kernel is used # This method is mainly used to reduce the number of samples # being stored later on. # read number of iterations needed to solve the problem if self.max_iterations != 0: try: predictor_iterations = model.get_num_iterations() self.classifier_information["~~Solver_Iterations~~"] = \ predictor_iterations if predictor_iterations == 0 or \ predictor_iterations == numpy.Inf: self.classifier_information["~~SVM_Converged~~"] = False else: self.classifier_information["~~SVM_Converged~~"] = True except: warnings.warn("Could not read state of the LibSVM Solver " + "from the C-Library!") try: self.classifier_information["~~offset~~"] = self.b self.classifier_information["~~w0~~"] = self.w[0] self.classifier_information["~~w1~~"] = self.w[1] except: pass self.delete_training_data()
# use targets to train one svm for each hidden neuron print "Training SVMs..." probs = [] params = [] svms = [] ws = [] bs = [] werrs = 0 for n in range(hl.n_out): print "Hidden neuron: %d" % n, print " Problem...", if n == 0: probs.append(svmutil.svm_problem(svm_targets[n], svm_inputs)) else: probs.append(svmutil.svm_problem(svm_targets[n], None, tmpl=probs[0])) params.append(svmutil.svm_parameter("-q -s 0 -t 0 -c 100")) print " Training...", svms.append(svmutil.svm_train(probs[n], params[n])) print " Saving...", svmutil.svm_save_model("hidden%04d.svm" % n, svms[n]) print " Testing..." # get weights from SVM w, b = get_svm_weights(svms[n], hl.n_in) ws.append(w) bs.append(b) # test model predv = numpy.dot(w, trsx.T) + b pred = numpy.sign(predv) pos = 0
#gamma_vals = np.linspace(0.4,1,3) #gamma_vals = [0.05,0.03,0.01] #c_vals = [math.pow(2, x) for x in range(100,104, 1)] #gamma_vals = [math.pow(2, x) for x in range(100,104,1)] #c_vals = [0.1,1] #gamma_vals = [0.1,2] print "c", c_vals print "gamma", gamma_vals redo = False if redo == True: accs = np.zeros((len(c_vals), len(gamma_vals))) for c_index in range(len(c_vals)): for gamma_index in range(len(gamma_vals)): param = svmutil.svm_parameter('-s 0 -t 2 -c {0} -g {1}'.format(c_vals[c_index], gamma_vals[gamma_index])) #accuracy = svmutil.svm_train(prob, param) model = svmutil.svm_train(prob, param) predicted_labels, accuracy, decision_vals = svmutil.svm_predict(paramsellabels, paramseldata, model) accs[gamma_index, c_index] = accuracy[0] gr = open("grid_results", 'w') pickle.dump(accs, gr) gr.close() else: gr = open("grid_results", 'r') accs = pickle.load(gr) gr.close() caxis, gaxis = np.meshgrid(np.log(c_vals), np.log(gamma_vals))
# # print prediction_accuracy(predicted_labels, testinglabels) #=========================================================================== ttlabels = [] ttlabels.extend(traininglabels) ttlabels.extend(testinglabels) ttdata = [] ttdata.extend(trainingdata) ttdata.extend(testingdata) prob = svmutil.svm_problem(traininglabels, trainingdata) #prob = svmutil.svm_problem(traininglabels.extend(testinglabels), trainingdata.extend(testingdata)) param = svmutil.svm_parameter('-s 0 -t 2 -c {0} -g {1}'.format(np.exp(0.5), np.exp(-3.56))) #accuracy = svmutil.svm_train(prob, param) model = svmutil.svm_train(prob, param) predicted_labels, accuracy, decision_vals = svmutil.svm_predict(testinglabels, testingdata, model) # constructing a confusion matrix svm_confusion = make_confusion(testinglabels, predicted_labels) print svm_confusion svm_conf_file = open('svm_conf', 'w') write_confusion(svm_confusion, svm_conf_file) svm_conf_file.close()
def train(self, positive_classes, negatives): """ Train the supervised SVM classifier model. The class label ``negative`` is reserved for the negative class. If a model is already loaded, we will raise an exception in order to prevent accidental overwrite. NOTE: This abstract method provides generalized error checking and should be called via ``super`` in implementing methods. :param positive_classes: Dictionary mapping positive class labels to iterables of DescriptorElement training examples. :type positive_classes: dict[collections.Hashable, collections.Iterable[smqtk.representation.DescriptorElement]] :param negatives: Iterable of negative DescriptorElement examples. :type negatives: collections.Iterable[smqtk.representation.DescriptorElement] :raises ValueError: The ``negative`` label was found in the ``positive_classes`` dictionary. This is reserved for the negative example class. :raises ValueError: There were no positive or negative examples. :raises RuntimeError: A model already exists in this instance.Following through with training would overwrite this model. Throwing an exception for information protection. """ super(LibSvmClassifier, self).train(positive_classes, negatives) # Offset from 0 for positive class labels to use # - not using label of 0 because we think libSVM wants positive labels CLASS_LABEL_OFFSET = 1 # Stuff for debug reporting etm_ri = None param_debug = {"-q": ""} if self._log.getEffectiveLevel() <= logging.DEBUG: etm_ri = 1.0 param_debug = {} # Form libSVM problem input values self._log.debug("Formatting problem input") train_labels = [] train_vectors = [] train_group_sizes = [] self.svm_label_map = {} # Making SVM label assignment deterministic to alphabetic order for i, l in enumerate(sorted(positive_classes), CLASS_LABEL_OFFSET): # Map integer SVM label to semantic label self.svm_label_map[i] = l self._log.debug("-- class %d (%s)", i, l) # requires a sequence, so making the iterable ``g`` a tuple g = positive_classes[l] if not isinstance(g, collections.Sequence): g = tuple(g) train_group_sizes.append(float(len(g))) x = elements_to_matrix(g, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([i] * x.shape[0]) train_vectors.extend(x.tolist()) del g, x self._log.debug("-- negatives (-1)") # Map integer SVM label to semantic label self.svm_label_map[-1] = self.NEGATIVE_LABEL # requires a sequence, so making the iterable ``negatives`` a tuple if not isinstance(negatives, collections.Sequence): negatives = tuple(negatives) negatives_size = float(len(negatives)) x = elements_to_matrix(negatives, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([-1] * x.shape[0]) train_vectors.extend(x.tolist()) del negatives, x self._log.debug( "Training elements: %d labels, %d vectors " "(should be the same)", len(train_labels), len(train_vectors) ) self._log.debug("Forming train params") #: :type: dict params = deepcopy(self.train_params) params.update(param_debug) # Only need to calculate positive class weights when C-SVC type if "-s" not in params or int(params["-s"]) == 0: for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET): params["-w" + str(i)] = max(1.0, negatives_size / float(n)) self._log.debug("Making parameters obj") svm_params = svmutil.svm_parameter(self._gen_param_string(params)) self._log.debug("Creating SVM problem") svm_problem = svm.svm_problem(train_labels, train_vectors) self._log.debug("Training SVM model") self.svm_model = svmutil.svm_train(svm_problem, svm_params) self._log.debug("Training SVM model -- Done") if self.svm_label_map_fp: self._log.debug("saving file -- labels -- %s", self.svm_label_map_fp) with open(self.svm_label_map_fp, "wb") as f: cPickle.dump(self.svm_label_map, f) if self.svm_model_fp: self._log.debug("saving file -- model -- %s", self.svm_model_fp) svmutil.svm_save_model(self.svm_model_fp, self.svm_model)
def train(self, class_examples=None, **kwds): """ Train the supervised classifier model. If a model is already loaded, we will raise an exception in order to prevent accidental overwrite. If the same label is provided to both ``class_examples`` and ``kwds``, the examples given to the reference in ``kwds`` will prevail. :param class_examples: Dictionary mapping class labels to iterables of DescriptorElement training examples. :type class_examples: dict[collections.Hashable, collections.Iterable[smqtk.representation.DescriptorElement]] :param kwds: Keyword assignment of labels to iterables of DescriptorElement training examples. :type kwds: dict[str, collections.Iterable[smqtk.representation.DescriptorElement]] :raises ValueError: There were no class examples provided. :raises ValueError: Less than 2 classes were given. :raises RuntimeError: A model already exists in this instance.Following through with training would overwrite this model. Throwing an exception for information protection. """ class_examples = \ super(LibSvmClassifier, self).train(class_examples, **kwds) # Offset from 0 for positive class labels to use # - not using label of 0 because we think libSVM wants positive labels CLASS_LABEL_OFFSET = 1 # Stuff for debug reporting etm_ri = None param_debug = {'-q': ''} if self._log.getEffectiveLevel() <= logging.DEBUG: etm_ri = 1.0 param_debug = {} # Form libSVM problem input values self._log.debug("Formatting problem input") train_labels = [] train_vectors = [] train_group_sizes = [] # number of examples per class self.svm_label_map = {} # Making SVM label assignment deterministic to alphabetic order for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET): # Map integer SVM label to semantic label self.svm_label_map[i] = l self._log.debug('-- class %d (%s)', i, l) # requires a sequence, so making the iterable ``g`` a tuple g = class_examples[l] if not isinstance(g, collections.Sequence): g = tuple(g) train_group_sizes.append(float(len(g))) x = elements_to_matrix(g, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([i] * x.shape[0]) train_vectors.extend(x.tolist()) del g, x assert len(train_labels) == len(train_vectors), \ "Count miss-match between parallel labels and descriptor vectors" \ "being sent to libSVM (%d != %d)" \ % (len(train_labels), len(train_vectors)) self._log.debug("Forming train params") #: :type: dict params = deepcopy(self.train_params) params.update(param_debug) # Calculating class weights for C-SVC SVM if '-s' not in params or int(params['-s']) == 0: total_examples = sum(train_group_sizes) for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET): # weight is the ratio of between number of other-class examples # to the number of examples in this class. other_class_examples = total_examples - n w = max(1.0, other_class_examples / float(n)) params['-w' + str(i)] = w self._log.debug("-- class '%s' weight: %s", self.svm_label_map[i], w) self._log.debug("Making parameters obj") svm_params = svmutil.svm_parameter(self._gen_param_string(params)) self._log.debug("Creating SVM problem") svm_problem = svm.svm_problem(train_labels, train_vectors) self._log.debug("Training SVM model") self.svm_model = svmutil.svm_train(svm_problem, svm_params) self._log.debug("Training SVM model -- Done") if self.svm_label_map_fp: self._log.debug("saving file -- labels -- %s", self.svm_label_map_fp) with open(self.svm_label_map_fp, 'wb') as f: cPickle.dump(self.svm_label_map, f, -1) if self.svm_model_fp: self._log.debug("saving file -- model -- %s", self.svm_model_fp) svmutil.svm_save_model(self.svm_model_fp, self.svm_model)