def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCHDDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() out_mc = mc_classifier.apply(feats_test).get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCHDDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() out_ecoc = ecoc_classifier.apply(feats_test).get_labels() n_diff = (out_mc != out_ecoc).sum() if n_diff == 0: print("Same results for OvR and ECOCOvR") else: print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) return out_ecoc, out_mc
def create_svm(param, data, lab): """ create SVM object with standard settings @param param: parameter object @param data: kernel or feature object (for kernelized/linear svm) @param lab: label object @return: svm object """ # create SVM if param.flags.has_key( "svm_type") and param.flags["svm_type"] == "liblineardual": print "creating LibLinear object" svm = LibLinear(param.cost, data, lab) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) # set solver type if param.flags.has_key( "solver_type") and param.flags["solver_type"] == "L2R_LR": print "setting linear solver type to: L2R_LR" svm.set_liblinear_solver_type(L2R_LR) else: print "creating SVMLight object" svm = SVMLight(param.cost, data, lab) return set_svm_parameters(svm, param)
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def modelselection_grid_search_linear_modular(traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.ModelSelection import GridSearchModelSelection from shogun.ModelSelection import ModelSelectionParameters, R_EXP from shogun.ModelSelection import ParameterCombination from shogun.Features import Labels from shogun.Features import RealFeatures from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # build parameter tree to select C1 and C2 param_tree_root = ModelSelectionParameters() c1 = ModelSelectionParameters("C1") param_tree_root.append_child(c1) c1.build_values(-2.0, 2.0, R_EXP) c2 = ModelSelectionParameters("C2") param_tree_root.append_child(c2) c2.build_values(-2.0, 2.0, R_EXP) # training data features = RealFeatures(traindat) labels = Labels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list classifier.print_modsel_params() # splitting strategy for cross-validation splitting_strategy = StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) # model selection instance model_selection = GridSearchModelSelection(param_tree_root, cross_validation) # perform model selection with selected methods #print "performing model selection of" #param_tree_root.print_tree() best_parameters = model_selection.select_model() # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(classifier) result = cross_validation.evaluate()
def features_director_dot_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = BinaryLabels(label_train_twoclass) dfeats_train = NumpyFeatures(fm_train_real) dfeats_test = NumpyFeatures(fm_test_real) dlabels = BinaryLabels(label_train_twoclass) print feats_train.get_computed_dot_feature_matrix() print dfeats_train.get_computed_dot_feature_matrix() svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() dfeats_train.__disown__() dfeats_train.parallel.set_num_threads(1) dsvm = LibLinear(C, dfeats_train, dlabels) dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) dsvm.set_epsilon(epsilon) dsvm.set_bias_enabled(True) dsvm.train() dfeats_test.__disown__() dfeats_test.parallel.set_num_threads(1) dsvm.set_features(dfeats_test) dsvm.apply().get_labels() dpredictions = dsvm.apply() return predictions, svm, predictions.get_labels()
def train_svm(feats_train, labels, C=1): from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(False) svm.train() return svm
def classifier_multiclass_ecoc_ovr(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() if n_diff == 0: print("Same results for OvR and ECOCOvR") else: print( "Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) print('Normal OVR Accuracy = %.4f' % acc_mc) print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def classifier_multiclass_ecoc_random(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def evaluation_cross_validation_classification(traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.Features import Labels from shogun.Features import RealFeatures from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # training data features = RealFeatures(traindat) labels = Labels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) # (optional) repeat x-val 10 times cross_validation.set_num_runs(10) # (optional) request 95% confidence intervals for results (not actually needed # for this toy example) cross_validation.set_conf_int_alpha(0.05) # perform cross-validation and print results result = cross_validation.evaluate() print "mean:", result.mean if result.has_conf_int: print "[", result.conf_int_low, ",", result.conf_int_up, "] with alpha=", result.conf_int_alpha
def solver_dcd_shogun_debug(C, all_xt, all_lt, task_indicator, M, L): """ use standard LibLinear for debugging purposes """ xt = numpy.array(all_xt) lt = numpy.array(all_lt) tt = numpy.array(task_indicator, dtype=numpy.int32) tsm = numpy.array(M) num_tasks = L.shape[0] # sanity checks assert len(xt) == len(lt) == len(tt) assert M.shape == L.shape assert num_tasks == len(set(tt)) # set up shogun objects if type(xt[0]) == str: feat = create_hashed_features_wdk(xt, 8) else: feat = RealFeatures(xt.T) lab = Labels(lt) # set up machinery svm = LibLinear() svm.set_liblinear_solver_type(L2R_L1LOSS_SVC_DUAL) svm.io.set_loglevel(MSG_DEBUG) svm.set_C(C, C) svm.set_bias_enabled(False) # invoke training svm.set_labels(lab) svm.train(feat) # get model parameters W = [svm.get_w()] return W, 42, 42
def classifier_liblinear_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_twoclass) svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() out_dense = dense_classifier.apply(feats_test).get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() out_sparse = sparse_classifier.apply(feats_test).get_labels() return out_sparse, out_dense
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() out = mc_classifier.apply().get_labels() return out
def get_presvm(B=2.0): examples_presvm = [numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 0.93099452, 0.38871617, 1.57968949, 1.25672527, -0.8123137 , 0.20786586, 1.378121 , 1.15598866, 0.80265343]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([-0.98028302, -0.23974489, 2.1687206 , 1.99338824, -0.67070205, -0.33167281, 1.3500379 , 1.34915685, 1.13747975]), numpy.array([ 0.67109612, 0.12662017, -0.48254886, -0.49091898, 1.31522237, -0.34108933, 0.57832179, -0.01992828, -0.26581628]), numpy.array([ 0.3193611 , 0.44903416, 3.62187778, 4.1490827 , 1.58832961, 1.95583397, 1.36836023, 1.92521945, 2.41114998])] labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0] examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012, 0.14621013, -0.50415481, 0.32317317, -0.00317602, -0.21422637]), numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116, 0.31639514, 0.32558471, 0.49364473, 0.04515591, -0.06963456]), numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 , 1.31488853, -0.33165365, 0.60176018, -0.00384946, -0.15603975]), numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552, 1.63314995, -0.51199338, 0.33340685, -0.0170519 , -0.19211039]), numpy.array([-0.18338766, -0.07783465, 0.42019824, 0.201753 , 2.01160098, 0.33326111, 0.75591909, 0.36631525, 0.1761829 ]), numpy.array([ 0.10273793, -0.02189574, 0.91092358, 0.74827973, 0.51882902, -0.1286531 , 0.64463658, 0.67468349, 0.55587266]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([-0.22494375, -0.15492964, 0.28017737, 0.29794467, 0.96403895, 0.43880289, 0.08053425, 0.07456818, 0.12102371]), numpy.array([-0.18161417, -0.17692039, 0.19554942, -0.00785625, 1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]), numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256, 0.94205875, 0.40162798, 0.00327328, -0.24107393])] labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0] examples_test = [numpy.array([-0.45159799, -0.11401394, 1.28574573, 1.09144306, 0.92253119, -0.47230164, 0.77032486, 0.83047366, 0.74768906]), numpy.array([ 0.42613105, 0.0092778 , -0.78640296, -0.71632445, 0.41154244, 0.88380309, 0.19475759, -0.14195876, -0.30479425]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017, -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]), numpy.array([ 1.23679696, 0.18753081, -0.25593329, -0.12051991, 0.64976989, -0.17184101, 0.14951337, 0.01988587, -0.0356698 ]), numpy.array([ 1.03355002, 0.05316195, -0.97905368, -0.75482121, 0.28673776, 2.27142733, 0.02654739, -0.31109851, -0.44555277]), numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257, 0.66947047, 0.05474302, -0.00717455, -0.17700575, -0.22253444]), numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609, 0.88518932, -0.51558669, -0.12000557, -0.32973613, -0.38488736]), numpy.array([ 0.8372111 , 0.06972199, -1.00454229, -0.79869642, 1.19376333, -0.40160273, -0.25122157, -0.46417918, -0.50234858]), numpy.array([-0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948, -0.51699463, 0.59566286, 0.35363369, 0.10545559])] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() #return presvm_liblinear #def get_da_svm(presvm_liblinear): ############################################# # compute linear term manually ############################################# examples = numpy.array(examples, dtype=numpy.float64) examples = numpy.transpose(examples) feat = RealFeatures(examples) lab = Labels(numpy.array(labels)) dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() helper.save("/tmp/svm", presvm_liblinear) presvm_pickle = helper.load("/tmp/svm") dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B) dasvm_pickle.set_bias_enabled(False) dasvm_pickle.train() helper.save("/tmp/dasvm", dasvm_liblinear) dasvm_pickle2 = helper.load("/tmp/dasvm") ############################################# # load test data ############################################# examples_test = numpy.array(examples_test, dtype=numpy.float64) examples_test = numpy.transpose(examples_test) feat_test = RealFeatures(examples_test) # check if pickled and unpickled classifiers behave the same out1 = dasvm_liblinear.classify(feat_test).get_labels() out2 = dasvm_pickle.classify(feat_test).get_labels() # compare outputs for i in xrange(len(out1)): try: assert(abs(out1[i]-out2[i])<= 0.001) except: print "(%.5f, %.5f)" % (out1[i], out2[i]) print "classification agrees."
if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x) ] decoders = [ x for x in dir(Classifier) if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x) ] fea_train = RealFeatures(traindat) fea_test = RealFeatures(testdat) gnd_train = MulticlassLabels(label_traindat) if label_testdat is None: gnd_test = None else: gnd_test = MulticlassLabels(label_testdat) base_classifier = LibLinear(L2R_L2LOSS_SVC) base_classifier.set_bias_enabled(True) print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders))) print('-' * 70) format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s' print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy')) def run_ecoc(ier, idr): encoder = getattr(Classifier, encoders[ier])() decoder = getattr(Classifier, decoders[idr])() # whether encoder is data dependent
]) ] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm) #presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm) #presvm_libsvm.io.set_loglevel(MSG_DEBUG) presvm_libsvm.set_bias_enabled(False) presvm_libsvm.train() my_w = presvm_liblinear.get_w() presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_w(my_w)
def classifier_multiclass_ecoc (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): import shogun.Classifier as Classifier from shogun.Classifier import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Evaluation import MulticlassAccuracy from shogun.Features import RealFeatures, MulticlassLabels def nonabstract_class(name): try: getattr(Classifier, name)() except TypeError: return False return True encoders = [x for x in dir(Classifier) if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)] decoders = [x for x in dir(Classifier) if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)] fea_train = RealFeatures(fm_train_real) fea_test = RealFeatures(fm_test_real) gnd_train = MulticlassLabels(label_train_multiclass) if label_test_multiclass is None: gnd_test = None else: gnd_test = MulticlassLabels(label_test_multiclass) base_classifier = LibLinear(L2R_L2LOSS_SVC) base_classifier.set_bias_enabled(True) #print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders))) #print('-' * 70) #format_str = '%%15s + %%-10s %%-10%s %%-10%s %%-10%s' #print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy')) def run_ecoc(ier, idr): encoder = getattr(Classifier, encoders[ier])() decoder = getattr(Classifier, decoders[idr])() # whether encoder is data dependent if hasattr(encoder, 'set_labels'): encoder.set_labels(gnd_train) encoder.set_features(fea_train) strategy = ECOCStrategy(encoder, decoder) classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train) classifier.train() label_pred = classifier.apply(fea_test) if gnd_test is not None: evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, gnd_test) else: acc = None return (classifier.get_num_machines(), acc) for ier in range(len(encoders)): for idr in range(len(decoders)): t_begin = time.clock() (codelen, acc) = run_ecoc(ier, idr) if acc is None: acc_fmt = 's' acc = 'N/A' else: acc_fmt = '.4f' t_elapse = time.clock() - t_begin
def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1., max_iter=1000, num_threads=1, seed=None, nperceptrons=5): from shogun.Features import RealFeatures, BinaryLabels from shogun.Classifier import Perceptron, LibLinear, L2R_L2LOSS_SVC from modshogun import MSG_INFO # 2D data _DIM = 2 # To get the nice message that the perceptron has converged dummy = BinaryLabels() # dummy.io.set_loglevel(MSG_INFO) np.random.seed(seed) # Produce some (probably) linearly separable training data by hand # Two Gaussians at a far enough distance X = np.array(np.random.randn(_DIM,n))+distance Y = np.array(np.random.randn(_DIM,n)) label_train_twoclass = np.hstack((np.ones(n), -np.ones(n))) fm_train_real = np.hstack((X,Y)) feats_train = RealFeatures(fm_train_real) labels = BinaryLabels(label_train_twoclass) perceptron = Perceptron(feats_train, labels) perceptron.set_learn_rate(learn_rate) perceptron.set_max_iter(max_iter) perceptron.set_initialize_hyperplane(False) # Find limits for visualization x_min = min(np.min(X[0,:]), np.min(Y[0,:])) x_max = max(np.max(X[0,:]), np.max(Y[0,:])) y_min = min(np.min(X[1,:]), np.min(Y[1,:])) y_max = max(np.max(X[1,:]), np.max(Y[1,:])) fig1, axes1 = plt.subplots(1,1) fig2, axes2 = plt.subplots(1,1) for i in xrange(nperceptrons): # Initialize randomly weight vector and bias perceptron.set_w(np.random.random(2)) perceptron.set_bias(np.random.random()) # Run the perceptron algorithm perceptron.train() # Construct the hyperplane for visualization # Equation of the decision boundary is w^T x + b = 0 b = perceptron.get_bias() w = perceptron.get_w() hx = np.linspace(x_min-1,x_max+1) hy = -w[1]/w[0] * hx axes1.plot(hx, -1/w[1]*(w[0]*hx+b)) axes2.plot(hx, -1/w[1]*(w[0]*hx+b), alpha=0.5) print('minimum distance with perceptron is %f' % min_distance(w, b, feats_train)) C = 1 epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() b = svm.get_bias() w = svm.get_w() print('minimum distance with svm is %f' % min_distance(w, b, feats_train)) hx = np.linspace(x_min-1,x_max+1) hy = -w[1]/w[0] * hx axes2.plot(hx, -1/w[1]*(w[0]*hx+b), 'k', linewidth=2.0) # Plot the two-class data axes1.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b') axes1.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r') axes2.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b') axes2.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r') # Customize the plot axes1.axis([x_min-1, x_max+1, y_min-1, y_max+1]) axes1.set_title('Rosenblatt\'s Perceptron Algorithm') axes1.set_xlabel('x') axes1.set_ylabel('y') axes2.axis([x_min-1, x_max+1, y_min-1, y_max+1]) axes2.set_title('Support Vector Machine') axes2.set_xlabel('x') axes2.set_ylabel('y') plt.show() return perceptron
def classifier_non_separable_svm(n=100, m=10, distance=5, seed=None): ''' n is the number of examples per class and m is the number of examples per class that gets its label swapped to force non-linear separability ''' from shogun.Features import RealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # 2D data _DIM = 2 # To get the nice message that the perceptron has converged dummy = BinaryLabels() np.random.seed(seed) # Produce some (probably) linearly separable training data by hand # Two Gaussians at a far enough distance X = np.array(np.random.randn(_DIM, n)) + distance Y = np.array(np.random.randn(_DIM, n)) # The last five points of each class are swapped to force non-linear separable data label_train_twoclass = np.hstack( (np.ones(n - m), -np.ones(m), -np.ones(n - m), np.ones(m))) fm_train_real = np.hstack((X, Y)) feats_train = RealFeatures(fm_train_real) labels = BinaryLabels(label_train_twoclass) # Train linear SVM C = 1 epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() # Get hyperplane parameters b = svm.get_bias() w = svm.get_w() # Find limits for visualization x_min = min(np.min(X[0, :]), np.min(Y[0, :])) x_max = max(np.max(X[0, :]), np.max(Y[0, :])) y_min = min(np.min(X[1, :]), np.min(Y[1, :])) y_max = max(np.max(X[1, :]), np.max(Y[1, :])) hx = np.linspace(x_min - 1, x_max + 1) hy = -w[1] / w[0] * hx plt.plot(hx, -1 / w[1] * (w[0] * hx + b), 'k', linewidth=2.0) # Plot the two-class data pos_idxs = label_train_twoclass == +1 plt.scatter(fm_train_real[0, pos_idxs], fm_train_real[1, pos_idxs], s=40, marker='o', facecolors='none', edgecolors='b') neg_idxs = label_train_twoclass == -1 plt.scatter(fm_train_real[0, neg_idxs], fm_train_real[1, neg_idxs], s=40, marker='s', facecolors='none', edgecolors='r') # Customize the plot plt.axis([x_min - 1, x_max + 1, y_min - 1, y_max + 1]) plt.title('SVM with non-linearly separable data') plt.xlabel('x') plt.ylabel('y') plt.show() return svm