def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5 ): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() out_dense = dense_classifier.apply(feats_test).get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() out_sparse = sparse_classifier.apply(feats_test).get_labels() return out_sparse, out_dense
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCHDDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() out_mc = mc_classifier.apply(feats_test).get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCHDDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() out_ecoc = ecoc_classifier.apply(feats_test).get_labels() n_diff = (out_mc != out_ecoc).sum() if n_diff == 0: print("Same results for OvR and ECOCOvR") else: print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) return out_ecoc, out_mc
def modelselection_grid_search_linear_modular(traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.ModelSelection import GridSearchModelSelection from shogun.ModelSelection import ModelSelectionParameters, R_EXP from shogun.ModelSelection import ParameterCombination from shogun.Features import Labels from shogun.Features import RealFeatures from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # build parameter tree to select C1 and C2 param_tree_root=ModelSelectionParameters() c1=ModelSelectionParameters("C1"); param_tree_root.append_child(c1) c1.build_values(-2.0, 2.0, R_EXP); c2=ModelSelectionParameters("C2"); param_tree_root.append_child(c2); c2.build_values(-2.0, 2.0, R_EXP); # training data features=RealFeatures(traindat) labels=Labels(label_traindat) # classifier classifier=LibLinear(L2R_L2LOSS_SVC) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list classifier.print_modsel_params() # splitting strategy for cross-validation splitting_strategy=StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) # model selection instance model_selection=GridSearchModelSelection(param_tree_root, cross_validation) # perform model selection with selected methods #print "performing model selection of" #param_tree_root.print_tree() best_parameters=model_selection.select_model() # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(classifier) result=cross_validation.evaluate()
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def train_svm(feats_train, labels, C=1): from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(False) svm.train() return svm
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, ONE_VS_REST_STRATEGY, ONE_VS_ONE_STRATEGY feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(ONE_VS_ONE_STRATEGY, feats_train, classifier, labels) mc_classifier.train() out = mc_classifier.apply().get_labels() return out
def classifier_multiclass_ecoc_ovr (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() if n_diff == 0: print("Same results for OvR and ECOCOvR") else: print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) print('Normal OVR Accuracy = %.4f' % acc_mc) print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def create_svm(param, data, lab): """ create SVM object with standard settings @param param: parameter object @param data: kernel or feature object (for kernelized/linear svm) @param lab: label object @return: svm object """ # create SVM if param.flags.has_key("svm_type") and param.flags["svm_type"] == "liblineardual": print "creating LibLinear object" svm = LibLinear(param.cost, data, lab) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) # set solver type if param.flags.has_key("solver_type") and param.flags["solver_type"] == "L2R_LR": print "setting linear solver type to: L2R_LR" svm.set_liblinear_solver_type(L2R_LR) else: print "creating SVMLight object" svm = SVMLight(param.cost, data, lab) return set_svm_parameters(svm, param)
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_liblinear_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=Labels(label_train_twoclass) svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def classifier_liblinear_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_twoclass) svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
import re encoders = [x for x in dir(Classifier) if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)] decoders = [x for x in dir(Classifier) if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)] fea_train = RealFeatures(traindat) fea_test = RealFeatures(testdat) gnd_train = MulticlassLabels(label_traindat) if label_testdat is None: gnd_test = None else: gnd_test = MulticlassLabels(label_testdat) base_classifier = LibLinear(L2R_L2LOSS_SVC) base_classifier.set_bias_enabled(True) print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders))) print('-' * 70) format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s' print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy')) def run_ecoc(ier, idr): encoder = getattr(Classifier, encoders[ier])() decoder = getattr(Classifier, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train)
def classifier_multiclass_ecoc (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): import shogun.Classifier as Classifier from shogun.Classifier import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Evaluation import MulticlassAccuracy from shogun.Features import RealFeatures, MulticlassLabels def nonabstract_class(name): try: getattr(Classifier, name)() except TypeError: return False return True encoders = [x for x in dir(Classifier) if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)] decoders = [x for x in dir(Classifier) if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)] fea_train = RealFeatures(fm_train_real) fea_test = RealFeatures(fm_test_real) gnd_train = MulticlassLabels(label_train_multiclass) if label_test_multiclass is None: gnd_test = None else: gnd_test = MulticlassLabels(label_test_multiclass) base_classifier = LibLinear(L2R_L2LOSS_SVC) base_classifier.set_bias_enabled(True) #print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders))) #print('-' * 70) #format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s' #print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy')) def run_ecoc(ier, idr): encoder = getattr(Classifier, encoders[ier])() decoder = getattr(Classifier, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train) strategy = ECOCStrategy(encoder, decoder) classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train) classifier.train() label_pred = classifier.apply(fea_test) if gnd_test is not None: evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, gnd_test) else: acc = None return (classifier.get_num_machines(), acc) for ier in range(len(encoders)): for idr in range(len(decoders)): t_begin = time.clock() (codelen, acc) = run_ecoc(ier, idr) if acc is None: acc_fmt = 's' acc = 'N/A' else: acc_fmt = '.4f' t_elapse = time.clock() - t_begin
#if use_masks2>0: #descs_neg, origdescs_neg = sel_descs(config, [3], ratio_keep, use_masks2=2) #visualize_descs(pylab.imread(config.img_filename(3)), origdescs_neg) descs_neg, origdescs_neg = sel_descs(config, imnos, ratio_keep, use_masks2=2) #SVM npos = descs.shape[0] sel = range(descs_neg.shape[0]) random.shuffle(sel) sel = sel[:npos] feats = np.vstack((descs.astype('float64'), descs_neg[sel].astype('float64'))) feats = norm_descs(feats) feats = RealFeatures(feats.T) labels = Labels(np.hstack((np.ones((1,descs.shape[0])), -1*np.ones((1,len(sel)))))[0]) svm = LibLinear(1, feats, labels) #k = Chi2Kernel(feats,feats, 1.0, 100) #svm = LibSVM(1, k, labels) svm.train() visualize_classes(svm, config, range(max(imnos),config.nimg)) print "Writting SVM" pf=open('last_classifier.pkl','w') cPickle.dump(svm, pf) pf.close() #VISUALIZE CLASSES #pylab.ion() #for i in range(len(centroids[0])): #pylab.figure(2)
def features_director_dot_modular (fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=BinaryLabels(label_train_twoclass) dfeats_train=NumpyFeatures(fm_train_real) dfeats_test=NumpyFeatures(fm_test_real) dlabels=BinaryLabels(label_train_twoclass) print feats_train.get_computed_dot_feature_matrix() print dfeats_train.get_computed_dot_feature_matrix() svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() dfeats_train.__disown__() dfeats_train.parallel.set_num_threads(1) dsvm=LibLinear(C, dfeats_train, dlabels) dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) dsvm.set_epsilon(epsilon) dsvm.set_bias_enabled(True) dsvm.train() dfeats_test.__disown__() dfeats_test.parallel.set_num_threads(1) dsvm.set_features(dfeats_test) dsvm.apply().get_labels() dpredictions = dsvm.apply() return predictions, svm, predictions.get_labels()
def SVMLinear(traindata, trainlabs, testdata, C=1.0, eps=1e-5, threads=1, getw=False, useLibLinear=False, useL1R=False): """ Does efficient linear SVM using the OCAS subgradient solver (as interfaced by shogun). Handles multiclass problems using a one-versus-all approach. NOTE: the training and testing data should both be scaled such that each dimension ranges from 0 to 1 traindata = n by d training data array trainlabs = n-length training data label vector (should be normalized so labels range from 0 to c-1, where c is the number of classes) testdata = m by d array of data to test C = SVM regularization constant eps = precision parameter used by OCAS threads = number of threads to use getw = whether or not to return the learned weight vector from the SVM (note: only works for 2-class problems) returns: m-length vector containing the predicted labels of the instances in testdata if problem is 2-class and getw == True, then a d-length weight vector is also returned """ numc = trainlabs.max() + 1 # # when using an L1 solver, we need the data transposed # # trainfeats = wrapFeatures(traindata, sparse=True) # testfeats = wrapFeatures(testdata, sparse=True) if not useL1R: ### traindata directly here for LR2_L2LOSS_SVC trainfeats = wrapFeatures(traindata, sparse=False) else: ### traindata.T here for L1R_LR trainfeats = wrapFeatures(traindata.T, sparse=False) testfeats = wrapFeatures(testdata, sparse=False) if numc > 2: preds = np.zeros(testdata.shape[0], dtype=np.int32) predprobs = np.zeros(testdata.shape[0]) predprobs[:] = -np.inf for i in xrange(numc): # set up svm tlabs = np.int32(trainlabs == i) tlabs[tlabs == 0] = -1 # print tlabs # print i, ' ', np.sum(tlabs==-1), ' ', np.sum(tlabs==1) labels = BinaryLabels(np.float64(tlabs)) if useLibLinear: # Use LibLinear and set the solver type svm = LibLinear(C, trainfeats, labels) if useL1R: # this is L1 regularization on logistic loss svm.set_liblinear_solver_type(L1R_LR) else: # most of my results were computed with this (ucf50) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) else: # Or Use SVMOcas svm = SVMOcas(C, trainfeats, labels) svm.set_epsilon(eps) svm.parallel.set_num_threads(threads) svm.set_bias_enabled(True) # train svm.train() # test res = svm.apply(testfeats).get_labels() thisclass = res > predprobs preds[thisclass] = i predprobs[thisclass] = res[thisclass] return preds else: tlabs = trainlabs.copy() tlabs[tlabs == 0] = -1 labels = Labels(np.float64(tlabs)) svm = SVMOcas(C, trainfeats, labels) svm.set_epsilon(eps) svm.parallel.set_num_threads(threads) svm.set_bias_enabled(True) # train svm.train() # test res = svm.classify(testfeats).get_labels() res[res > 0] = 1 res[res <= 0] = 0 if getw == True: return res, svm.get_w() else: return res
def features_director_dot_modular (fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): try: from shogun.Features import DirectorDotFeatures from shogun.Library import RealVector except ImportError: print "recompile shogun with --enable-swig-directors" return class NumpyFeatures(DirectorDotFeatures): # variables data=numpy.empty((1,1)) # constructor def __init__(self, d): DirectorDotFeatures.__init__(self) self.data = d # overloaded methods def add_to_dense_sgvec(self, alpha, vec_idx1, vec2, abs): if abs: vec2+=alpha*numpy.abs(self.data[:,vec_idx1]) else: vec2+=alpha*self.data[:,vec_idx1] def dot(self, vec_idx1, df, vec_idx2): return numpy.dot(self.data[:,vec_idx1], df.get_computed_dot_feature_vector(vec_idx2)) def dense_dot_sgvec(self, vec_idx1, vec2): return numpy.dot(self.data[:,vec_idx1], vec2[0:vec2.vlen]) def get_num_vectors(self): return self.data.shape[1] def get_dim_feature_space(self): return self.data.shape[0] # operators # def __add__(self, other): # return NumpyFeatures(self.data+other.data) # def __sub__(self, other): # return NumpyFeatures(self.data-other.data) # def __iadd__(self, other): # return NumpyFeatures(self.data+other.data) # def __isub__(self, other): # return NumpyFeatures(self.data-other.data) from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=BinaryLabels(label_train_twoclass) dfeats_train=NumpyFeatures(fm_train_real) dfeats_test=NumpyFeatures(fm_test_real) dlabels=BinaryLabels(label_train_twoclass) print feats_train.get_computed_dot_feature_matrix() print dfeats_train.get_computed_dot_feature_matrix() svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() dfeats_train.__disown__() dfeats_train.parallel.set_num_threads(1) dsvm=LibLinear(C, dfeats_train, dlabels) dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) dsvm.set_epsilon(epsilon) dsvm.set_bias_enabled(True) dsvm.train() dfeats_test.__disown__() dfeats_test.parallel.set_num_threads(1) dsvm.set_features(dfeats_test) dsvm.apply().get_labels() dpredictions = dsvm.apply() return predictions, svm, predictions.get_labels()
def modelselection_grid_search_liblinear_modular(traindat=traindat, label_traindat=label_traindat ): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.ModelSelection import GridSearchModelSelection from shogun.ModelSelection import ModelSelectionParameters, R_EXP from shogun.ModelSelection import ParameterCombination from shogun.Features import Labels from shogun.Features import RealFeatures from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # build parameter tree to select C1 and C2 param_tree_root = ModelSelectionParameters() c1 = ModelSelectionParameters("C1") param_tree_root.append_child(c1) c1.build_values(-2.0, 2.0, R_EXP) c2 = ModelSelectionParameters("C2") param_tree_root.append_child(c2) c2.build_values(-2.0, 2.0, R_EXP) # training data features = RealFeatures(traindat) labels = Labels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list #classifier.print_modsel_params() # splitting strategy for cross-validation splitting_strategy = StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # model selection instance model_selection = GridSearchModelSelection(param_tree_root, cross_validation) # perform model selection with selected methods #print "performing model selection of" #param_tree_root.print_tree() best_parameters = model_selection.select_model() # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(classifier) result = cross_validation.evaluate()
def liblinear (): print 'LibLinear' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=LibLinear(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) print svm.classify().get_labels()
############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm) #presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm) #presvm_libsvm.io.set_loglevel(MSG_DEBUG) presvm_libsvm.set_bias_enabled(False) presvm_libsvm.train() my_w = presvm_liblinear.get_w() presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_w(my_w)