def statistics_kmm(n, d): from modshogun import RealFeatures from modshogun import DataGenerator from modshogun import GaussianKernel, MSG_DEBUG from modshogun import KernelMeanMatching from modshogun import Math # init seed for reproducability Math.init_random(1) random.seed(1) data = random.randn(d, n) # create shogun feature representation features = RealFeatures(data) # use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization # which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard # k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2 kernel = GaussianKernel(10, 8) kernel.init(features, features) kmm = KernelMeanMatching(kernel, array([0, 1, 2, 3, 7, 8, 9], dtype=int32), array([4, 5, 6], dtype=int32)) w = kmm.compute_weights() #print w return w
def statistics_kmm (n,d): from modshogun import RealFeatures from modshogun import DataGenerator from modshogun import GaussianKernel, MSG_DEBUG from modshogun import KernelMeanMatching from modshogun import Math # init seed for reproducability Math.init_random(1) random.seed(1); data = random.randn(d,n) # create shogun feature representation features=RealFeatures(data) # use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization # which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard # k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2 kernel=GaussianKernel(10,8) kernel.init(features,features) kmm = KernelMeanMatching(kernel,array([0,1,2,3,7,8,9],dtype=int32),array([4,5,6],dtype=int32)) w = kmm.compute_weights() #print w return w
def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \ width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel try: from modshogun import SVRLight except ImportError: print('No support for SVRLight available.') return feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) kernel=GaussianKernel(feats_train, feats_train, width) labels=RegressionLabels(label_train) svr=SVRLight(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.parallel.set_num_threads(num_threads) svr.train() kernel.init(feats_train, feats_test) out = svr.apply().get_labels() return out, kernel
def kernel_io_modular(train_fname=traindat, test_fname=testdat, width=1.9): from modshogun import RealFeatures, GaussianKernel, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) kernel = GaussianKernel(feats_train, feats_train, width) km_train = kernel.get_kernel_matrix() f = CSVFile("tmp/gaussian_train.csv", "w") kernel.save(f) del f kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() f = CSVFile("tmp/gaussian_test.csv", "w") kernel.save(f) del f # clean up import os os.unlink("tmp/gaussian_test.csv") os.unlink("tmp/gaussian_train.csv") return km_train, km_test, kernel
def classifier_multiclassmachine_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = MulticlassLabels(label_train_multiclass) classifier = LibSVM() classifier.set_epsilon(epsilon) #print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(), kernel, classifier, labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
def kernel_gaussian_modular (train_fname=traindat,test_fname=testdat, width=1.3): from modshogun import RealFeatures, GaussianKernel, CSVFile feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_gaussian_modular(train_fname=traindat, test_fname=testdat, width=1.3): from modshogun import RealFeatures, GaussianKernel, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) kernel = GaussianKernel(feats_train, feats_train, width) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ): from modshogun import SparseRealFeatures from modshogun import GaussianKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_sparse_gaussian_modular(fm_train_real=traindat, fm_test_real=testdat, width=1.1): from modshogun import SparseRealFeatures from modshogun import GaussianKernel feats_train = SparseRealFeatures(fm_train_real) feats_test = SparseRealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def classifier_multiclasslibsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import MulticlassLibSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=MulticlassLabels(label_train_multiclass) svm=MulticlassLibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR # reproducable results random.seed(seed) # easy regression data: one dimensional noisy sine wave n=15 n_test=100 x_range_test=10 noise_var=0.5; X=random.rand(1,n)*x_range X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]]) Y_test=sin(X_test) Y=sin(X)+random.randn(n)*noise_var # shogun representation labels=RegressionLabels(Y[0]) feats_train=RealFeatures(X) feats_test=RealFeatures(X_test) kernel=GaussianKernel(feats_train, feats_train, width) # two svr models: epsilon and nu svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR) svr_epsilon.train() svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR) svr_nu.train() # predictions kernel.init(feats_train, feats_test) out1_epsilon=svr_epsilon.apply().get_labels() out2_epsilon=svr_epsilon.apply(feats_test).get_labels() out1_nu=svr_epsilon.apply().get_labels() out2_nu=svr_epsilon.apply(feats_test).get_labels() return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
def regression_kernel_ridge_modular (n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, tau=1e-6, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import KernelRidgeRegression # reproducable results random.seed(seed) # easy regression data: one dimensional noisy sine wave n = 15 n_test = 100 x_range_test = 10 noise_var = 0.5 X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) kernel = GaussianKernel(feats_train, feats_train, width) krr = KernelRidgeRegression(tau, kernel, labels) krr.train(feats_train) kernel.init(feats_train, feats_test) out = krr.apply().get_labels() # plot results #plot(X[0],Y[0],'x') # training observations #plot(X_test[0],Y_test[0],'-') # ground truth of test #plot(X_test[0],out, '-') # mean predictions of test #legend(["training", "ground truth", "mean predictions"]) #show() return out, kernel, krr
def regression_kernel_ridge_modular (n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, tau=1e-6, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import KernelRidgeRegression # reproducable results random.seed(seed) # easy regression data: one dimensional noisy sine wave n=15 n_test=100 x_range_test=10 noise_var=0.5; X=random.rand(1,n)*x_range X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]]) Y_test=sin(X_test) Y=sin(X)+random.randn(n)*noise_var # shogun representation labels=RegressionLabels(Y[0]) feats_train=RealFeatures(X) feats_test=RealFeatures(X_test) kernel=GaussianKernel(feats_train, feats_train, width) krr=KernelRidgeRegression(tau, kernel, labels) krr.train(feats_train) kernel.init(feats_train, feats_test) out = krr.apply().get_labels() # plot results #plot(X[0],Y[0],'x') # training observations #plot(X_test[0],Y_test[0],'-') # ground truth of test #plot(X_test[0],out, '-') # mean predictions of test #legend(["training", "ground truth", "mean predictions"]) #show() return out,kernel,krr
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \ x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1): from modshogun import RegressionLabels, RealFeatures from modshogun import GaussianKernel from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR # reproducable results random.seed(seed) # easy regression data: one dimensional noisy sine wave n = 15 n_test = 100 x_range_test = 10 noise_var = 0.5 X = random.rand(1, n) * x_range X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]]) Y_test = sin(X_test) Y = sin(X) + random.randn(n) * noise_var # shogun representation labels = RegressionLabels(Y[0]) feats_train = RealFeatures(X) feats_test = RealFeatures(X_test) kernel = GaussianKernel(feats_train, feats_train, width) # two svr models: epsilon and nu svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR) svr_epsilon.train() svr_nu = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR) svr_nu.train() # predictions kernel.init(feats_train, feats_test) out1_epsilon = svr_epsilon.apply().get_labels() out2_epsilon = svr_epsilon.apply(feats_test).get_labels() out1_nu = svr_epsilon.apply().get_labels() out2_nu = svr_epsilon.apply(feats_test).get_labels() return out1_epsilon, out2_epsilon, out1_nu, out2_nu, kernel
def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=MulticlassLabels(label_train_multiclass) classifier = LibSVM() classifier.set_epsilon(epsilon) #print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
def classifier_multiclasslibsvm_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from modshogun import RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import MulticlassLibSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = MulticlassLabels(label_train_multiclass) svm = MulticlassLibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def kernel_io_modular(train_fname=traindat, test_fname=testdat, width=1.9): from modshogun import RealFeatures, GaussianKernel, CSVFile feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) kernel = GaussianKernel(feats_train, feats_train, width) km_train = kernel.get_kernel_matrix() f = CSVFile("tmp/gaussian_train.csv", "w") kernel.save(f) del f kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() f = CSVFile("tmp/gaussian_test.csv", "w") kernel.save(f) del f #clean up import os os.unlink("tmp/gaussian_test.csv") os.unlink("tmp/gaussian_train.csv") return km_train, km_test, kernel
def statistics_quadratic_time_mmd(m, dim, difference): from modshogun import RealFeatures from modshogun import MeanShiftDataGenerator from modshogun import GaussianKernel, CustomKernel from modshogun import QuadraticTimeMMD from modshogun import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED from modshogun import Statistics, IntVector, RealVector, Math # init seed for reproducability Math.init_random(1) random.seed(17) # number of examples kept low in order to make things fast # streaming data generator for mean shift distributions gen_p = MeanShiftDataGenerator(0, dim) #gen_p.parallel.set_num_threads(1) gen_q = MeanShiftDataGenerator(difference, dim) # stream some data from generator feat_p = gen_p.get_streamed_features(m) feat_q = gen_q.get_streamed_features(m) # set kernel a-priori. usually one would do some kernel selection. See # other examples for this. width = 10 kernel = GaussianKernel(10, width) # create quadratic time mmd instance. Note that this constructor # copies p and q and does not reference them mmd = QuadraticTimeMMD(kernel, feat_p, feat_q) # perform test: compute p-value and test if null-hypothesis is rejected for # a test level of 0.05 alpha = 0.05 # using bootstrapping (slow, not the most reliable way. Consider pre- # computing the kernel when using it, see below). # Also, in practice, use at least 250 iterations mmd.set_null_approximation_method(BOOTSTRAP) mmd.set_bootstrap_iterations(3) p_value_boot = mmd.perform_test() # reject if p-value is smaller than test level #print "bootstrap: p!=q: ", p_value_boot<alpha # using spectrum method. Use at least 250 samples from null. # This is consistent but sometimes breaks, always monitor type I error. # See tutorial for number of eigenvalues to use . # Only works with BIASED statistic mmd.set_statistic_type(BIASED) mmd.set_null_approximation_method(MMD2_SPECTRUM) mmd.set_num_eigenvalues_spectrum(3) mmd.set_num_samples_sepctrum(250) p_value_spectrum = mmd.perform_test() # reject if p-value is smaller than test level #print "spectrum: p!=q: ", p_value_spectrum<alpha # using gamma method. This is a quick hack, which works most of the time # but is NOT guaranteed to. See tutorial for details. # Only works with BIASED statistic mmd.set_statistic_type(BIASED) mmd.set_null_approximation_method(MMD2_GAMMA) p_value_gamma = mmd.perform_test() # reject if p-value is smaller than test level #print "gamma: p!=q: ", p_value_gamma<alpha # compute tpye I and II error (use many more trials in practice). # Type I error is not necessary if one uses bootstrapping. We do it here # anyway, but note that this is an efficient way of computing it. # Also note that testing has to happen on # difference data than kernel selection, but the linear time mmd does this # implicitly and we used a fixed kernel here. mmd.set_null_approximation_method(BOOTSTRAP) mmd.set_bootstrap_iterations(5) num_trials = 5 type_I_errors = RealVector(num_trials) type_II_errors = RealVector(num_trials) inds = int32(array([x for x in range(2 * m)])) # numpy p_and_q = mmd.get_p_and_q() # use a precomputed kernel to be faster kernel.init(p_and_q, p_and_q) precomputed = CustomKernel(kernel) mmd.set_kernel(precomputed) for i in range(num_trials): # this effectively means that p=q - rejecting is tpye I error inds = random.permutation(inds) # numpy permutation precomputed.add_row_subset(inds) precomputed.add_col_subset(inds) type_I_errors[i] = mmd.perform_test() > alpha precomputed.remove_row_subset() precomputed.remove_col_subset() # on normal data, this gives type II error type_II_errors[i] = mmd.perform_test() > alpha return type_I_errors.get(), type_I_errors.get( ), p_value_boot, p_value_spectrum, p_value_gamma,
def statistics_quadratic_time_mmd (m,dim,difference): from modshogun import RealFeatures from modshogun import MeanShiftDataGenerator from modshogun import GaussianKernel, CustomKernel from modshogun import QuadraticTimeMMD from modshogun import PERMUTATION, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, BIASED_DEPRECATED from modshogun import Statistics, IntVector, RealVector, Math # init seed for reproducability Math.init_random(1) random.seed(17) # number of examples kept low in order to make things fast # streaming data generator for mean shift distributions gen_p=MeanShiftDataGenerator(0, dim); #gen_p.parallel.set_num_threads(1) gen_q=MeanShiftDataGenerator(difference, dim); # stream some data from generator feat_p=gen_p.get_streamed_features(m); feat_q=gen_q.get_streamed_features(m); # set kernel a-priori. usually one would do some kernel selection. See # other examples for this. width=10; kernel=GaussianKernel(10, width); # create quadratic time mmd instance. Note that this constructor # copies p and q and does not reference them mmd=QuadraticTimeMMD(kernel, feat_p, feat_q); # perform test: compute p-value and test if null-hypothesis is rejected for # a test level of 0.05 alpha=0.05; # using permutation (slow, not the most reliable way. Consider pre- # computing the kernel when using it, see below). # Also, in practice, use at least 250 iterations mmd.set_null_approximation_method(PERMUTATION); mmd.set_num_null_samples(3); p_value_null=mmd.perform_test(); # reject if p-value is smaller than test level #print "bootstrap: p!=q: ", p_value_null<alpha # using spectrum method. Use at least 250 samples from null. # This is consistent but sometimes breaks, always monitor type I error. # See tutorial for number of eigenvalues to use . mmd.set_statistic_type(BIASED); mmd.set_null_approximation_method(MMD2_SPECTRUM); mmd.set_num_eigenvalues_spectrum(3); mmd.set_num_samples_spectrum(250); p_value_spectrum=mmd.perform_test(); # reject if p-value is smaller than test level #print "spectrum: p!=q: ", p_value_spectrum<alpha # using gamma method. This is a quick hack, which works most of the time # but is NOT guaranteed to. See tutorial for details. # Only works with BIASED_DEPRECATED statistic mmd.set_statistic_type(BIASED_DEPRECATED); mmd.set_null_approximation_method(MMD2_GAMMA); p_value_gamma=mmd.perform_test(); # reject if p-value is smaller than test level #print "gamma: p!=q: ", p_value_gamma<alpha # compute tpye I and II error (use many more trials in practice). # Type I error is not necessary if one uses permutation. We do it here # anyway, but note that this is an efficient way of computing it. # Also note that testing has to happen on # difference data than kernel selection, but the linear time mmd does this # implicitly and we used a fixed kernel here. mmd.set_statistic_type(BIASED); mmd.set_null_approximation_method(PERMUTATION); mmd.set_num_null_samples(5); num_trials=5; type_I_errors=RealVector(num_trials); type_II_errors=RealVector(num_trials); inds=int32(array([x for x in range(2*m)])) # numpy p_and_q=mmd.get_p_and_q(); # use a precomputed kernel to be faster kernel.init(p_and_q, p_and_q); precomputed=CustomKernel(kernel); mmd.set_kernel(precomputed); for i in range(num_trials): # this effectively means that p=q - rejecting is tpye I error inds=random.permutation(inds) # numpy permutation precomputed.add_row_subset(inds); precomputed.add_col_subset(inds); type_I_errors[i]=mmd.perform_test()>alpha; precomputed.remove_row_subset(); precomputed.remove_col_subset(); # on normal data, this gives type II error type_II_errors[i]=mmd.perform_test()>alpha; return type_I_errors.get(),type_I_errors.get(),p_value_null,p_value_spectrum,p_value_gamma,