def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import LibSVM, MKLClassification from modshogun import Statistics # training data, combined features all on same data features=RealFeatures(traindat) comb_features=CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels=BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel=CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm=MKLClassification(LibSVM()); svm.set_interleaved_optimization_enabled(False); svm.set_kernel(kernel); # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage=CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result=cross_validation.evaluate() # print mkl weights weights=mkl_storage.get_mkl_weights()
def make_combined_feature (astringv, minseq = 3, maxseq = 8): from modshogun import CombinedFeatures feats_train=CombinedFeatures() raw_feats = [] # initialize the subfeats for seqlen in range(minseq, maxseq+1): subkfeats_train=make_string_feature(astringv, start=seqlen-1, order=seqlen) raw_feats.append(subkfeats_train) feats_train.append_feature_obj(subkfeats_train) return feats_train, raw_feats
def construct_features(features): """ makes a list """ feat_all = [inst for inst in features] feat_lhs = [inst[0:15] for inst in features] feat_rhs = [inst[15:] for inst in features] feat_wd = get_wd_features(feat_all) feat_spec_1 = get_spectrum_features(feat_lhs, order=3) feat_spec_2 = get_spectrum_features(feat_rhs, order=3) feat_comb = CombinedFeatures() feat_comb.append_feature_obj(feat_wd) feat_comb.append_feature_obj(feat_spec_1) feat_comb.append_feature_obj(feat_spec_2) return feat_comb
def runShogunSVMDNACombinedSpectrumKernel(train_xt, train_lt, test_xt): """ run svm with combined spectrum kernel """ ################################################## # set up svm kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() for K in KList: # Iterate through the K's and make a spectrum kernel for each charfeat_train = StringCharFeatures(train_xt, DNA) current_feats_train = StringWordFeatures(DNA) current_feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False) preproc=SortWordString() preproc.init(current_feats_train) current_feats_train.add_preprocessor(preproc) current_feats_train.apply_preprocessor() feats_train.append_feature_obj(current_feats_train) charfeat_test = StringCharFeatures(test_xt, DNA) current_feats_test=StringWordFeatures(DNA) current_feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False) current_feats_test.add_preprocessor(preproc) current_feats_test.apply_preprocessor() feats_test.append_feature_obj(current_feats_test) current_kernel=CommWordStringKernel(10, False) kernel.append_kernel(current_kernel) kernel.io.set_loglevel(MSG_DEBUG) # init kernel labels = BinaryLabels(train_lt) # run svm model print "Ready to train!" kernel.init(feats_train, feats_train) svm=LibSVM(SVMC, kernel, labels) svm.io.set_loglevel(MSG_DEBUG) svm.train() # predictions print "Making predictions!" out1DecisionValues = svm.apply(feats_train) out1=out1DecisionValues.get_labels() kernel.init(feats_train, feats_test) out2DecisionValues = svm.apply(feats_test) out2=out2DecisionValues.get_labels() return out1,out2,out1DecisionValues,out2DecisionValues
def evaluation_cross_validation_mkl_weight_storage( traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage from modshogun import ContingencyTableEvaluation, ACCURACY from modshogun import StratifiedCrossValidationSplitting from modshogun import BinaryLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import LibSVM, MKLClassification # training data, combined features all on same data features = RealFeatures(traindat) comb_features = CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels = BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm = MKLClassification(LibSVM()) svm.set_interleaved_optimization_enabled(False) svm.set_kernel(kernel) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage = CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result = cross_validation.evaluate() # print mkl weights weights = mkl_storage.get_mkl_weights()
def kernel_combined_custom_poly_modular(train_fname=traindat, test_fname=testdat, train_label_fname=label_traindat): from modshogun import CombinedFeatures, RealFeatures, BinaryLabels from modshogun import CombinedKernel, PolyKernel, CustomKernel from modshogun import LibSVM, CSVFile kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(CSVFile(train_fname)) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(CSVFile(train_fname)) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = BinaryLabels(CSVFile(train_label_fname)) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(CSVFile(test_fname)) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(CSVFile(test_fname)) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.apply() km_train = kernel.get_kernel_matrix() return km_train, kernel
def kernel_combined_custom_poly_modular (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat): from modshogun import CombinedFeatures, RealFeatures, BinaryLabels from modshogun import CombinedKernel, PolyKernel, CustomKernel from modshogun import LibSVM, CSVFile kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(CSVFile(train_fname)) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(CSVFile(train_fname)) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10,2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = BinaryLabels(CSVFile(train_label_fname)) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(CSVFile(test_fname)) tkernel = PolyKernel(10,3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(CSVFile(test_fname)) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.apply() km_train=kernel.get_kernel_matrix() return km_train,kernel
def evaluation_cross_validation_multiclass_storage (traindat=traindat, label_traindat=label_traindat): from modshogun import CrossValidation, CrossValidationResult from modshogun import CrossValidationPrintOutput from modshogun import CrossValidationMKLStorage, CrossValidationMulticlassStorage from modshogun import MulticlassAccuracy, F1Measure from modshogun import StratifiedCrossValidationSplitting from modshogun import MulticlassLabels from modshogun import RealFeatures, CombinedFeatures from modshogun import GaussianKernel, CombinedKernel from modshogun import MKLMulticlass from modshogun import Statistics, MSG_DEBUG, Math Math.init_random(1) # training data, combined features all on same data features=RealFeatures(traindat) comb_features=CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels=MulticlassLabels(label_traindat) # kernel, different Gaussians combined kernel=CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm=MKLMulticlass(1.0,kernel,labels); svm.set_kernel(kernel); # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 3) # evaluation method evaluation_criterium=MulticlassAccuracy() # cross-validation instance cross_validation=CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) #mkl_storage=CrossValidationMKLStorage() #cross_validation.add_cross_validation_output(mkl_storage) multiclass_storage=CrossValidationMulticlassStorage() multiclass_storage.append_binary_evaluation(F1Measure()) cross_validation.add_cross_validation_output(multiclass_storage) cross_validation.set_num_runs(3) # perform cross-validation result=cross_validation.evaluate() roc_0_0_0 = multiclass_storage.get_fold_ROC(0,0,0) #print roc_0_0_0 auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0,0,0,0) #print auc_0_0_0 return roc_0_0_0, auc_0_0_0
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file): print 'reading extracted features' graph_feature = read_feature_data(graph_file) graph_feature = get_normalized_given_max_min(graph_feature, 'models/grtaph_max_size') cons_feature = read_feature_data(cons_file) cons_feature = get_normalized_given_max_min(cons_feature, 'models/cons_max_size') CC_feature = read_feature_data(tri_file) CC_feature = get_normalized_given_max_min(CC_feature, 'models/tri_max_size') ATOS_feature = read_feature_data(other_feature_file) ATOS_feature = get_normalized_given_max_min(ATOS_feature, 'models/alu_max_size') width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5 kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() #pdb.set_trace() subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/graph.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/cons.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/tri.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/alu.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) model_file = "models/mkl.dat" if not os.path.exists(model_file): print 'downloading model file' url_add = 'http://rth.dk/resources/mirnasponge/data/mkl.dat' urllib.urlretrieve(url_add, model_file) print 'loading trained model' fstream = SerializableAsciiFile("models/mkl.dat", "r") new_mkl = MKLClassification() status = new_mkl.load_serializable(fstream) print 'model predicting' kernel.init(feats_train, feats_test) new_mkl.set_kernel(kernel) y_out = new_mkl.apply().get_labels() return y_out
def mkl_multiclass_modular (fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from modshogun import CombinedFeatures, RealFeatures, MulticlassLabels from modshogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel from modshogun import MKLMulticlass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10,2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = MulticlassLabels(label_train_multiclass) mkl = MKLMulticlass(C, kernel, labels) mkl.set_epsilon(epsilon); mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.apply().get_labels() return out
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file): print "reading extracted features" graph_feature = read_feature_data(graph_file) graph_feature = get_normalized_given_max_min(graph_feature, "models/grtaph_max_size") cons_feature = read_feature_data(cons_file) cons_feature = get_normalized_given_max_min(cons_feature, "models/cons_max_size") CC_feature = read_feature_data(tri_file) CC_feature = get_normalized_given_max_min(CC_feature, "models/tri_max_size") ATOS_feature = read_feature_data(other_feature_file) ATOS_feature = get_normalized_given_max_min(ATOS_feature, "models/alu_max_size") width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5 kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() # pdb.set_trace() subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/graph.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/cons.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/tri.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures() subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature))) subkernel = GaussianKernel(10, width) feats_test.append_feature_obj(subkfeats_test) fstream = SerializableAsciiFile("models/alu.dat", "r") status = subkfeats_train.load_serializable(fstream) feats_train.append_feature_obj(subkfeats_train) kernel.append_kernel(subkernel) model_file = "models/mkl.dat" if not os.path.exists(model_file): print "downloading model file" url_add = "http://rth.dk/resources/mirnasponge/data/mkl.dat" urllib.urlretrieve(url_add, model_file) print "loading trained model" fstream = SerializableAsciiFile("models/mkl.dat", "r") new_mkl = MKLClassification() status = new_mkl.load_serializable(fstream) print "model predicting" kernel.init(feats_train, feats_test) new_mkl.set_kernel(kernel) y_out = new_mkl.apply().get_labels() return y_out
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt): """ Run SVM with Multiple Kernels """ ################################################## # Take all examples idxs = np.random.randint(1, 14000, 14000) train_xt = np.array(train_xt)[idxs] train_lt = np.array(train_lt)[idxs] # Initialize kernel and features kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() labels = BinaryLabels(train_lt) ##################### Multiple Spectrum Kernels ######################### for i in range(K1, K2, -1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) feats_train_k1 = StringWordFeatures(DNA) feats_train_k1.obtain_from_char(charfeat_train, i - 1, i, GAP, False) preproc = SortWordString() preproc.init(feats_train_k1) feats_train_k1.add_preprocessor(preproc) feats_train_k1.apply_preprocessor() # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) feats_test_k1 = StringWordFeatures(DNA) feats_test_k1.obtain_from_char(charfeat_test, i - 1, i, GAP, False) feats_test_k1.add_preprocessor(preproc) feats_test_k1.apply_preprocessor() # append features feats_train.append_feature_obj(charfeat_train) feats_test.append_feature_obj(charfeat_test) # append spectrum kernel kernel1 = CommWordStringKernel(10, i) kernel1.io.set_loglevel(MSG_DEBUG) kernel.append_kernel(kernel1) ''' Uncomment this for Multiple Weighted degree kernels and comment the multiple spectrum kernel block above instead ##################### Multiple Weighted Degree Kernel ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # setup weighted degree kernel kernel1=WeightedDegreePositionStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32)) kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64)); kernel.append_kernel(kernel1); ''' ##################### Training ######################### print "Starting MKL training.." mkl = MKLClassification() mkl.set_mkl_norm(3) #1,2,3 mkl.set_C(SVMC, SVMC) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.train(feats_train) print "Making predictions!" out1 = mkl.apply(feats_train).get_labels() out2 = mkl.apply(feats_test).get_labels() return out1, out2, train_lt
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10,2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(),kernel
def mkl(train_features, train_labels, test_features, test_labels, width=5, C=1.2, epsilon=1e-2, mkl_epsilon=0.001, mkl_norm=2): from modshogun import CombinedKernel, CombinedFeatures from modshogun import GaussianKernel, LinearKernel, PolyKernel from modshogun import MKLMulticlass, MulticlassAccuracy kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = GaussianKernel(10, width) kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = LinearKernel() kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) mkl = MKLMulticlass(C, kernel, train_labels) mkl.set_epsilon(epsilon) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() train_output = mkl.apply() kernel.init(feats_train, feats_test) test_output = mkl.apply() evaluator = MulticlassAccuracy() print 'MKL training error is %.4f' % ( (1 - evaluator.evaluate(train_output, train_labels)) * 100) print 'MKL test error is %.4f' % ( (1 - evaluator.evaluate(test_output, test_labels)) * 100)
def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ): from modshogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel from modshogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, 1.1) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) degree=3 subkernel=FixedDegreeStringKernel(10, degree) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) subkernel=LocalAlignmentStringKernel(10) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def mkl(train_features, train_labels, test_features, test_labels, width=5, C=1.2, epsilon=1e-2, mkl_epsilon=0.001, mkl_norm=2): from modshogun import CombinedKernel, CombinedFeatures from modshogun import GaussianKernel, LinearKernel, PolyKernel from modshogun import MKLMulticlass, MulticlassAccuracy kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = GaussianKernel(10,width) kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = LinearKernel() kernel.append_kernel(subkernel) feats_train.append_feature_obj(train_features) feats_test.append_feature_obj(test_features) subkernel = PolyKernel(10,2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) mkl = MKLMulticlass(C, kernel, train_labels) mkl.set_epsilon(epsilon); mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() train_output = mkl.apply() kernel.init(feats_train, feats_test) test_output = mkl.apply() evaluator = MulticlassAccuracy() print 'MKL training error is %.4f' % ((1-evaluator.evaluate(train_output, train_labels))*100) print 'MKL test error is %.4f' % ((1-evaluator.evaluate(test_output, test_labels))*100)
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt): """ Run SVM with Multiple Kernels """ ################################################## # Take all examples idxs = np.random.randint(1,14000,14000); train_xt = np.array(train_xt)[idxs]; train_lt = np.array(train_lt)[idxs]; # Initialize kernel and features kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() labels = BinaryLabels(train_lt) ##################### Multiple Spectrum Kernels ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) feats_train_k1 = StringWordFeatures(DNA) feats_train_k1.obtain_from_char(charfeat_train, i-1, i, GAP, False) preproc=SortWordString() preproc.init(feats_train_k1) feats_train_k1.add_preprocessor(preproc) feats_train_k1.apply_preprocessor() # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) feats_test_k1=StringWordFeatures(DNA) feats_test_k1.obtain_from_char(charfeat_test, i-1, i, GAP, False) feats_test_k1.add_preprocessor(preproc) feats_test_k1.apply_preprocessor() # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # append spectrum kernel kernel1=CommWordStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel.append_kernel(kernel1); ''' Uncomment this for Multiple Weighted degree kernels and comment the multiple spectrum kernel block above instead ##################### Multiple Weighted Degree Kernel ######################### for i in range(K1,K2,-1): # append training data to combined feature object charfeat_train = StringCharFeatures(list(train_xt), DNA) # append testing data to combined feature object charfeat_test = StringCharFeatures(test_xt, DNA) # append features feats_train.append_feature_obj(charfeat_train); feats_test.append_feature_obj(charfeat_test); # setup weighted degree kernel kernel1=WeightedDegreePositionStringKernel(10,i); kernel1.io.set_loglevel(MSG_DEBUG); kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32)) kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64)); kernel.append_kernel(kernel1); ''' ##################### Training ######################### print "Starting MKL training.." mkl = MKLClassification(); mkl.set_mkl_norm(3) #1,2,3 mkl.set_C(SVMC, SVMC) mkl.set_kernel(kernel) mkl.set_labels(labels) mkl.train(feats_train) print "Making predictions!" out1 = mkl.apply(feats_train).get_labels(); out2 = mkl.apply(feats_test).get_labels(); return out1,out2,train_lt