def kernel_combined_custom_poly_modular(train_fname=traindat, test_fname=testdat, train_label_fname=label_traindat): from modshogun import CombinedFeatures, RealFeatures, BinaryLabels from modshogun import CombinedKernel, PolyKernel, CustomKernel from modshogun import LibSVM, CSVFile kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(CSVFile(train_fname)) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(CSVFile(train_fname)) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = BinaryLabels(CSVFile(train_label_fname)) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(CSVFile(test_fname)) tkernel = PolyKernel(10, 3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(CSVFile(test_fname)) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.apply() km_train = kernel.get_kernel_matrix() return km_train, kernel
def word_kernel(words): N = len(words) dist_matrix = np.zeros([N, N]) for i in xrange(N): for j in xrange(i, N): s = difflib.SequenceMatcher(None, words[i], words[j]) dist_matrix[i, j] = s.ratio() dist_matrix = 0.5 * (dist_matrix + dist_matrix.T) return CustomKernel(dist_matrix)
def classifier_custom_kernel_modular(C=1, dim=7): from modshogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM from numpy import diag, ones, sign from numpy.random import rand, seed seed((C, dim)) lab = sign(2 * rand(dim) - 1) data = rand(dim, dim) symdata = data * data.T + diag(ones(dim)) kernel = CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels = BinaryLabels(lab) svm = LibSVM(C, kernel, labels) svm.train() predictions = svm.apply() out = svm.apply().get_labels() return svm, out
def classifier_custom_kernel_modular (C=1,dim=7): from modshogun import RealFeatures, BinaryLabels, CustomKernel, LibSVM from numpy import diag,ones,sign from numpy.random import rand,seed seed((C,dim)) lab=sign(2*rand(dim) - 1) data=rand(dim, dim) symdata=data*data.T + diag(ones(dim)) kernel=CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels=BinaryLabels(lab) svm=LibSVM(C, kernel, labels) svm.train() predictions =svm.apply() out=svm.apply().get_labels() return svm,out
def kernel_custom_modular(dim=7): from numpy.random import rand, seed from numpy import array, float32, int32 from modshogun import RealFeatures from modshogun import CustomKernel from modshogun import IndexFeatures seed(17) data = rand(dim, dim) feats = RealFeatures(data) symdata = data + data.T lowertriangle = array([ symdata[(x, y)] for x in range(symdata.shape[1]) for y in range(symdata.shape[0]) if y <= x ]) kernel = CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull = kernel.get_kernel_matrix() # get subset of kernel row_idx = array(range(3), dtype=int32) col_idx = array(range(2), dtype=int32) row_idx_feat = IndexFeatures(row_idx) col_idx_feat = IndexFeatures(col_idx) kernel.init(row_idx_feat, col_idx_feat) km_sub_kernel = kernel.get_kernel_matrix() # print('Subkernel(3x2):\n%s'%km_sub_kernel) kernel.remove_all_col_subsets() kernel.remove_all_row_subsets() # now once with float32's data = array(data, dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle = kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle = kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull = kernel.get_kernel_matrix() return km_fullfull, kernel, km_sub_kernel
def statistics_quadratic_time_mmd(m, dim, difference): from modshogun import RealFeatures from modshogun import MeanShiftDataGenerator from modshogun import GaussianKernel, CustomKernel from modshogun import QuadraticTimeMMD from modshogun import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED from modshogun import Statistics, IntVector, RealVector, Math # init seed for reproducability Math.init_random(1) random.seed(17) # number of examples kept low in order to make things fast # streaming data generator for mean shift distributions gen_p = MeanShiftDataGenerator(0, dim) #gen_p.parallel.set_num_threads(1) gen_q = MeanShiftDataGenerator(difference, dim) # stream some data from generator feat_p = gen_p.get_streamed_features(m) feat_q = gen_q.get_streamed_features(m) # set kernel a-priori. usually one would do some kernel selection. See # other examples for this. width = 10 kernel = GaussianKernel(10, width) # create quadratic time mmd instance. Note that this constructor # copies p and q and does not reference them mmd = QuadraticTimeMMD(kernel, feat_p, feat_q) # perform test: compute p-value and test if null-hypothesis is rejected for # a test level of 0.05 alpha = 0.05 # using bootstrapping (slow, not the most reliable way. Consider pre- # computing the kernel when using it, see below). # Also, in practice, use at least 250 iterations mmd.set_null_approximation_method(BOOTSTRAP) mmd.set_bootstrap_iterations(3) p_value_boot = mmd.perform_test() # reject if p-value is smaller than test level #print "bootstrap: p!=q: ", p_value_boot<alpha # using spectrum method. Use at least 250 samples from null. # This is consistent but sometimes breaks, always monitor type I error. # See tutorial for number of eigenvalues to use . # Only works with BIASED statistic mmd.set_statistic_type(BIASED) mmd.set_null_approximation_method(MMD2_SPECTRUM) mmd.set_num_eigenvalues_spectrum(3) mmd.set_num_samples_sepctrum(250) p_value_spectrum = mmd.perform_test() # reject if p-value is smaller than test level #print "spectrum: p!=q: ", p_value_spectrum<alpha # using gamma method. This is a quick hack, which works most of the time # but is NOT guaranteed to. See tutorial for details. # Only works with BIASED statistic mmd.set_statistic_type(BIASED) mmd.set_null_approximation_method(MMD2_GAMMA) p_value_gamma = mmd.perform_test() # reject if p-value is smaller than test level #print "gamma: p!=q: ", p_value_gamma<alpha # compute tpye I and II error (use many more trials in practice). # Type I error is not necessary if one uses bootstrapping. We do it here # anyway, but note that this is an efficient way of computing it. # Also note that testing has to happen on # difference data than kernel selection, but the linear time mmd does this # implicitly and we used a fixed kernel here. mmd.set_null_approximation_method(BOOTSTRAP) mmd.set_bootstrap_iterations(5) num_trials = 5 type_I_errors = RealVector(num_trials) type_II_errors = RealVector(num_trials) inds = int32(array([x for x in range(2 * m)])) # numpy p_and_q = mmd.get_p_and_q() # use a precomputed kernel to be faster kernel.init(p_and_q, p_and_q) precomputed = CustomKernel(kernel) mmd.set_kernel(precomputed) for i in range(num_trials): # this effectively means that p=q - rejecting is tpye I error inds = random.permutation(inds) # numpy permutation precomputed.add_row_subset(inds) precomputed.add_col_subset(inds) type_I_errors[i] = mmd.perform_test() > alpha precomputed.remove_row_subset() precomputed.remove_col_subset() # on normal data, this gives type II error type_II_errors[i] = mmd.perform_test() > alpha return type_I_errors.get(), type_I_errors.get( ), p_value_boot, p_value_spectrum, p_value_gamma,
def statistics_quadratic_time_mmd (m,dim,difference): from modshogun import RealFeatures from modshogun import MeanShiftDataGenerator from modshogun import GaussianKernel, CustomKernel from modshogun import QuadraticTimeMMD from modshogun import PERMUTATION, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, BIASED_DEPRECATED from modshogun import Statistics, IntVector, RealVector, Math # init seed for reproducability Math.init_random(1) random.seed(17) # number of examples kept low in order to make things fast # streaming data generator for mean shift distributions gen_p=MeanShiftDataGenerator(0, dim); #gen_p.parallel.set_num_threads(1) gen_q=MeanShiftDataGenerator(difference, dim); # stream some data from generator feat_p=gen_p.get_streamed_features(m); feat_q=gen_q.get_streamed_features(m); # set kernel a-priori. usually one would do some kernel selection. See # other examples for this. width=10; kernel=GaussianKernel(10, width); # create quadratic time mmd instance. Note that this constructor # copies p and q and does not reference them mmd=QuadraticTimeMMD(kernel, feat_p, feat_q); # perform test: compute p-value and test if null-hypothesis is rejected for # a test level of 0.05 alpha=0.05; # using permutation (slow, not the most reliable way. Consider pre- # computing the kernel when using it, see below). # Also, in practice, use at least 250 iterations mmd.set_null_approximation_method(PERMUTATION); mmd.set_num_null_samples(3); p_value_null=mmd.perform_test(); # reject if p-value is smaller than test level #print "bootstrap: p!=q: ", p_value_null<alpha # using spectrum method. Use at least 250 samples from null. # This is consistent but sometimes breaks, always monitor type I error. # See tutorial for number of eigenvalues to use . mmd.set_statistic_type(BIASED); mmd.set_null_approximation_method(MMD2_SPECTRUM); mmd.set_num_eigenvalues_spectrum(3); mmd.set_num_samples_spectrum(250); p_value_spectrum=mmd.perform_test(); # reject if p-value is smaller than test level #print "spectrum: p!=q: ", p_value_spectrum<alpha # using gamma method. This is a quick hack, which works most of the time # but is NOT guaranteed to. See tutorial for details. # Only works with BIASED_DEPRECATED statistic mmd.set_statistic_type(BIASED_DEPRECATED); mmd.set_null_approximation_method(MMD2_GAMMA); p_value_gamma=mmd.perform_test(); # reject if p-value is smaller than test level #print "gamma: p!=q: ", p_value_gamma<alpha # compute tpye I and II error (use many more trials in practice). # Type I error is not necessary if one uses permutation. We do it here # anyway, but note that this is an efficient way of computing it. # Also note that testing has to happen on # difference data than kernel selection, but the linear time mmd does this # implicitly and we used a fixed kernel here. mmd.set_statistic_type(BIASED); mmd.set_null_approximation_method(PERMUTATION); mmd.set_num_null_samples(5); num_trials=5; type_I_errors=RealVector(num_trials); type_II_errors=RealVector(num_trials); inds=int32(array([x for x in range(2*m)])) # numpy p_and_q=mmd.get_p_and_q(); # use a precomputed kernel to be faster kernel.init(p_and_q, p_and_q); precomputed=CustomKernel(kernel); mmd.set_kernel(precomputed); for i in range(num_trials): # this effectively means that p=q - rejecting is tpye I error inds=random.permutation(inds) # numpy permutation precomputed.add_row_subset(inds); precomputed.add_col_subset(inds); type_I_errors[i]=mmd.perform_test()>alpha; precomputed.remove_row_subset(); precomputed.remove_col_subset(); # on normal data, this gives type II error type_II_errors[i]=mmd.perform_test()>alpha; return type_I_errors.get(),type_I_errors.get(),p_value_null,p_value_spectrum,p_value_gamma,
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10,2)) kernel.init(feats_train, feats_train) # train mkl labels = BinaryLabels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.apply() return mkl.apply(),kernel
def kernel_custom_modular (dim=7): from numpy.random import rand, seed from numpy import array, float32 from modshogun import RealFeatures from modshogun import CustomKernel seed(17) data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1]) for y in range(symdata.shape[0]) if y<=x]) kernel=CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull=kernel.get_kernel_matrix() # now once with float32's data=array(data,dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull=kernel.get_kernel_matrix() return km_fullfull,kernel
def kernel_custom_modular (dim=7): from numpy.random import rand, seed from numpy import array, float32, int32 from modshogun import RealFeatures from modshogun import CustomKernel from modshogun import IndexFeatures seed(17) data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1]) for y in range(symdata.shape[0]) if y<=x]) kernel=CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull=kernel.get_kernel_matrix() # get subset of kernel row_idx=array(range(3),dtype=int32) col_idx=array(range(2),dtype=int32) row_idx_feat=IndexFeatures(row_idx) col_idx_feat=IndexFeatures(col_idx) kernel.init(row_idx_feat, col_idx_feat) km_sub_kernel=kernel.get_kernel_matrix() # print('Subkernel(3x2):\n%s'%km_sub_kernel) kernel.remove_all_col_subsets() kernel.remove_all_row_subsets() # now once with float32's data=array(data,dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(symdata) km_fullfull=kernel.get_kernel_matrix() return km_fullfull,kernel,km_sub_kernel