def svm_train_classify(trimat_tr, seqids_tr, labels_tr, fullmat_te, options, icv): sys.stderr.write('..kernel building..\n') kernel=CustomKernel() kernel.set_triangle_kernel_matrix_from_triangle(trimat_tr) sys.stderr.write('..svm learning..\n') svm = svm_learn(kernel, labels_tr, options.svmC, options.epsilon, options.weight) global g_svm_bias g_svm_bias = svm.get_bias() if (options.alphaprefix != "") and (len(seqids_tr) > 0): save_cv_taining_result(svm, options, seqids_tr, icv) sys.stderr.write('..svm classifying..\n') kernel.set_full_kernel_matrix_from_full(fullmat_te) ################################################### #for testing #alphas = svm.get_alphas() #svids = svm.get_support_vectors() #for j in xrange(len(preds)): # p = svm.get_bias() # for i in xrange(len(alphas)): # p += (alphas[i]*fullmat_te[int(svids[i]),j]) # print preds[j], p #sys.exit(0) ################################################### return svm.classify().get_labels().tolist()
def runSVM(options, args): """ set global variable """ if (options.ktype == 1 or options.ktype == 5 or options.ktype == 6) and (options.kmerlen <= 8): global g_kmers global g_rcmap g_kmers = generate_kmers(options.kmerlen) g_rcmap = generate_rcmap_table(options.kmerlen, g_kmers) print 'Read genome sequence.\n' genome = preprocessGenome(args[0], options.subs) print 'Get sliding window.\n' seqs, sids = sliding_window(genome, options.window, options.step) print 'Get features and kernel.\n' if options.ktype == 1: get_features = get_spectrum_features get_kernel = get_spectrum_kernel elif options.ktype == 2: get_features = get_weighted_spectrum_features get_kernel = get_weighted_spectrum_kernel elif options.ktype == 3: get_features = get_char_features get_kernel = get_wd_kernel elif options.ktype == 5: get_features = get_char_features get_kernel = get_gaussian_kernel elif options.ktype == 6: get_features = get_char_features get_kernel = get_linear_kernel if options.ktype == 4: print 'This is custom kernel.\n' npos = len(sids) nneg = 0 fullmat = get_full_matrix(options.matrixFile, npos, nneg) kernel = CustomKernel() kernel.set_full_kernel_matrix_from_full(fullmat) else: feats = get_features(seqs, options) kernel = get_kernel(feats, options) print '\nSVM training.\n' svm = svm_learn(kernel, options) processSVMOutput(svm, sids, options)
def kernel_combined_custom_poly_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel from shogun.Classifier import LibSVM kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(fm_train_real) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(fm_label_twoclass) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(fm_test_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(fm_test_real) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.classify() km_train = kernel.get_kernel_matrix() return km_train, kernel
def classifier_custom_kernel_modular (C=1,dim=7): from shogun.Features import RealFeatures, BinaryLabels from shogun.Kernel import CustomKernel from shogun.Classifier import LibSVM from numpy import diag,ones,sign from numpy.random import rand,seed seed((C,dim)) lab=sign(2*rand(dim) - 1) data=rand(dim, dim) symdata=data*data.T + diag(ones(dim)) kernel=CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels=BinaryLabels(lab) svm=LibSVM(C, kernel, labels) svm.train() predictions =svm.apply() out=svm.apply().get_labels() return svm,out
def classifier_custom_kernel_modular(C=1, dim=7): from shogun.Features import RealFeatures, Labels from shogun.Kernel import CustomKernel from shogun.Classifier import LibSVM from numpy import diag, ones, sign from numpy.random import rand, seed seed((C, dim)) lab = sign(2 * rand(dim) - 1) data = rand(dim, dim) symdata = data * data.T + diag(ones(dim)) kernel = CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels = Labels(lab) svm = LibSVM(C, kernel, labels) svm.train() predictions = svm.apply() out = svm.apply().get_labels() return svm, out
def custom (): print 'Custom' from numpy.random import rand from numpy import array from shogun.Features import RealFeatures from shogun.Kernel import CustomKernel dim=7 data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y<=x]) kernel=CustomKernel() kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(data) km_fullfull=kernel.get_kernel_matrix()
def svm_train_classify(trimat_tr, seqids_tr, labels_tr, fullmat_te, options, icv): sys.stderr.write('..kernel building..\n') kernel = CustomKernel() kernel.set_triangle_kernel_matrix_from_triangle(trimat_tr) sys.stderr.write('..svm learning..\n') svm = svm_learn(kernel, labels_tr, options.svmC, options.epsilon, options.weight) global g_svm_bias g_svm_bias = svm.get_bias() if (options.alphaprefix != "") and (len(seqids_tr) > 0): save_cv_taining_result(svm, options, seqids_tr, icv) sys.stderr.write('..svm classifying..\n') kernel.set_full_kernel_matrix_from_full(fullmat_te) ################################################### #for testing #alphas = svm.get_alphas() #svids = svm.get_support_vectors() #for j in xrange(len(preds)): # p = svm.get_bias() # for i in xrange(len(alphas)): # p += (alphas[i]*fullmat_te[int(svids[i]),j]) # print preds[j], p #sys.exit(0) ################################################### return svm.classify().get_labels().tolist()
def mkl_binclass_modular(fm_train_real=traindat, fm_test_real=testdat, fm_label_twoclass=label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(), kernel
km = wdk.get_kernel_matrix() for i in xrange(N): for j in xrange(N): km[i,j] = km[i,j]*relate_tasks(i,j) #km = km*1.0 print km #precompute kernel matrix using shogun y = numpy.array(labels) K = numpy.transpose(y.flatten() * (km*y.flatten()).transpose()) f = -numpy.ones(N) C = 1.0 # Important!! QP does not accept ndarray as a type, it must be an array p = QP(K, f, Aeq=y, beq=0, lb=numpy.zeros(N), ub=C*numpy.ones(N)) r = p.solve('cvxopt_qp', iprint = 0) #print "cvxopt objective:", r.ff print "externally modified kernel. objective:", r.ff ck = CustomKernel() ck.set_full_kernel_matrix_from_full(km) # svm = LibSVM(1, ck, lab) svm.train() print "externally modified kernel. objective:", svm.get_objective()
################################################################## km = wdk.get_kernel_matrix() for i in xrange(N): for j in xrange(N): km[i, j] = km[i, j] * relate_tasks(i, j) #km = km*1.0 print km #precompute kernel matrix using shogun y = numpy.array(labels) K = numpy.transpose(y.flatten() * (km * y.flatten()).transpose()) f = -numpy.ones(N) C = 1.0 # Important!! QP does not accept ndarray as a type, it must be an array p = QP(K, f, Aeq=y, beq=0, lb=numpy.zeros(N), ub=C * numpy.ones(N)) r = p.solve('cvxopt_qp', iprint=0) #print "cvxopt objective:", r.ff print "externally modified kernel. objective:", r.ff ck = CustomKernel() ck.set_full_kernel_matrix_from_full(km) # svm = LibSVM(1, ck, lab) svm.train() print "externally modified kernel. objective:", svm.get_objective()
def statistics_quadratic_time_mmd (m,dim,difference): from shogun.Features import RealFeatures from shogun.Features import MeanShiftDataGenerator from shogun.Kernel import GaussianKernel, CustomKernel from shogun.Statistics import QuadraticTimeMMD from shogun.Statistics import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED from shogun.Mathematics import Statistics, IntVector, RealVector, Math # init seed for reproducability Math.init_random(1) # number of examples kept low in order to make things fast # streaming data generator for mean shift distributions gen_p=MeanShiftDataGenerator(0, dim); gen_q=MeanShiftDataGenerator(difference, dim); # stream some data from generator feat_p=gen_p.get_streamed_features(m); feat_q=gen_q.get_streamed_features(m); # set kernel a-priori. usually one would do some kernel selection. See # other examples for this. width=10; kernel=GaussianKernel(10, width); # create quadratic time mmd instance. Note that this constructor # copies p and q and does not reference them mmd=QuadraticTimeMMD(kernel, feat_p, feat_q); # perform test: compute p-value and test if null-hypothesis is rejected for # a test level of 0.05 alpha=0.05; # using bootstrapping (slow, not the most reliable way. Consider pre- # computing the kernel when using it, see below). # Also, in practice, use at least 250 iterations mmd.set_null_approximation_method(BOOTSTRAP); mmd.set_bootstrap_iterations(3); p_value_boot=mmd.perform_test(); # reject if p-value is smaller than test level #print "bootstrap: p!=q: ", p_value_boot<alpha # using spectrum method. Use at least 250 samples from null. # This is consistent but sometimes breaks, always monitor type I error. # See tutorial for number of eigenvalues to use . # Only works with BIASED statistic mmd.set_statistic_type(BIASED); mmd.set_null_approximation_method(MMD2_SPECTRUM); mmd.set_num_eigenvalues_spectrum(3); mmd.set_num_samples_sepctrum(250); p_value_spectrum=mmd.perform_test(); # reject if p-value is smaller than test level #print "spectrum: p!=q: ", p_value_spectrum<alpha # using gamma method. This is a quick hack, which works most of the time # but is NOT guaranteed to. See tutorial for details. # Only works with BIASED statistic mmd.set_statistic_type(BIASED); mmd.set_null_approximation_method(MMD2_GAMMA); p_value_gamma=mmd.perform_test(); # reject if p-value is smaller than test level #print "gamma: p!=q: ", p_value_gamma<alpha # compute tpye I and II error (use many more trials in practice). # Type I error is not necessary if one uses bootstrapping. We do it here # anyway, but note that this is an efficient way of computing it. # Also note that testing has to happen on # difference data than kernel selection, but the linear time mmd does this # implicitly and we used a fixed kernel here. mmd.set_null_approximation_method(BOOTSTRAP); mmd.set_bootstrap_iterations(5); num_trials=5; type_I_errors=RealVector(num_trials); type_II_errors=RealVector(num_trials); inds=int32(array([x for x in range(2*m)])) # numpy p_and_q=mmd.get_p_and_q(); # use a precomputed kernel to be faster kernel.init(p_and_q, p_and_q); precomputed=CustomKernel(kernel); mmd.set_kernel(precomputed); for i in range(num_trials): # this effectively means that p=q - rejecting is tpye I error inds=random.permutation(inds) # numpy permutation precomputed.add_row_subset(inds); precomputed.add_col_subset(inds); type_I_errors[i]=mmd.perform_test()>alpha; precomputed.remove_row_subset(); precomputed.remove_col_subset(); # on normal data, this gives type II error type_II_errors[i]=mmd.perform_test()>alpha; return type_I_errors.get(),type_I_errors.get(),p_value_boot,p_value_spectrum,p_value_gamma,
def kernel_custom_modular(dim=7): from numpy.random import rand, seed from numpy import array, float32 from shogun.Features import RealFeatures from shogun.Kernel import CustomKernel seed(17) data = rand(dim, dim) feats = RealFeatures(data) symdata = data + data.T lowertriangle = array([ symdata[(x, y)] for x in range(symdata.shape[1]) for y in range(symdata.shape[0]) if y <= x ]) kernel = CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull = kernel.get_kernel_matrix() # now once with float32's data = array(data, dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull = kernel.get_kernel_matrix() return km_fullfull, kernel
def kernel_custom_modular (dim=7): from numpy.random import rand, seed from numpy import array, float32 from shogun.Features import RealFeatures from shogun.Kernel import CustomKernel seed(17) data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1]) for y in range(symdata.shape[0]) if y<=x]) kernel=CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull=kernel.get_kernel_matrix() # now once with float32's data=array(data,dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull=kernel.get_kernel_matrix() return km_fullfull,kernel
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # merge data sets data = PreparedMultitaskData(train_data, shuffle=False) # create shogun data objects base_wdk = shogun_factory.create_kernel(data.examples, param) kernel_matrix = base_wdk.get_kernel_matrix() lab = shogun_factory.create_labels(data.labels) # fetch taxonomy from parameter object taxonomy = param.taxonomy.data # create name to leaf map nodes = taxonomy.get_all_nodes() ######################################################## print "creating a kernel for each node:" ######################################################## # assemble combined kernel from shogun.Kernel import CombinedKernel, CustomKernel combined_kernel = CombinedKernel() # indicator to which task each example belongs task_vector = data.task_vector_names for node in nodes: print "creating kernel for ", node.name # fetch sub-tree leaf_names = [leaf.name for leaf in node.get_leaves()] print "masking all entries other than:", leaf_names # init matrix kernel_matrix_node = numpy.zeros(kernel_matrix.shape) # fill matrix for node for (i, task_lhs) in enumerate(task_vector): for (j, task_rhs) in enumerate(task_vector): # only copy values, if both tasks are present in subtree if task_lhs in leaf_names and task_rhs in leaf_names: kernel_matrix_node[i,j] = kernel_matrix[i,j] # create custom kernel kernel_node = CustomKernel() kernel_node.set_full_kernel_matrix_from_full(kernel_matrix_node) # append custom kernel to CombinedKernel combined_kernel.append_kernel(kernel_node) print "------" print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.transform >= 1.0) if param.transform >= 1.0: num_threads = 4 svm = MKLClassification() svm.set_mkl_norm(param.transform) svm.set_solver_type(ST_GLPK) #DIRECT) #NEWTON)#ST_CPLEX) svm.set_C(param.cost, param.cost) svm.set_kernel(combined_kernel) svm.set_labels(lab) svm.parallel.set_num_threads(num_threads) #svm.set_linadd_enabled(False) #svm.set_batch_computation_enabled(False) svm.train() print "subkernel weights (after):", combined_kernel.get_subkernel_weights() else: # create SVM (disable unsupported optimizations) svm = SVMLight(param.cost, combined_kernel, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.train() ######################################################## print "svm objective:" print svm.get_objective() ######################################################## # wrap up predictors svms = {} # use a reference to the same svm several times for task_id in train_data.keys(): svms[task_id] = svm return svms
from numpy import * from numpy.random import rand from shogun.Features import RealFeatures, Labels from shogun.Kernel import CustomKernel from shogun.Classifier import LibSVM C=1 dim=7 lab=sign(2*rand(dim) - 1) data=rand(dim, dim) symdata=data*data.T kernel=CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels=Labels(lab) svm=LibSVM(C, kernel, labels) svm.train() out=svm.classify().get_labels()