Пример #1
0
def statistics_kmm(n, d):
    from modshogun import RealFeatures
    from modshogun import DataGenerator
    from modshogun import GaussianKernel, MSG_DEBUG
    from modshogun import KernelMeanMatching
    from modshogun import Math

    # init seed for reproducability
    Math.init_random(1)
    random.seed(1)

    data = random.randn(d, n)

    # create shogun feature representation
    features = RealFeatures(data)

    # use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization
    # which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard
    # k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2
    kernel = GaussianKernel(10, 8)
    kernel.init(features, features)

    kmm = KernelMeanMatching(kernel, array([0, 1, 2, 3, 7, 8, 9], dtype=int32),
                             array([4, 5, 6], dtype=int32))
    w = kmm.compute_weights()
    #print w
    return w
Пример #2
0
def statistics_kmm (n,d):
	from modshogun import RealFeatures
	from modshogun import DataGenerator
	from modshogun import GaussianKernel, MSG_DEBUG
	from modshogun import KernelMeanMatching
	from modshogun import Math

	# init seed for reproducability
	Math.init_random(1)
	random.seed(1);

	data = random.randn(d,n)

	# create shogun feature representation
	features=RealFeatures(data)

	# use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization
	# which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard
	# k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2
	kernel=GaussianKernel(10,8)
	kernel.init(features,features)

	kmm = KernelMeanMatching(kernel,array([0,1,2,3,7,8,9],dtype=int32),array([4,5,6],dtype=int32))
	w = kmm.compute_weights()
	#print w
	return w
def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
				    width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):


	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	try:
		from modshogun import SVRLight
	except ImportError:
		print('No support for SVRLight available.')
		return

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=RegressionLabels(label_train)

	svr=SVRLight(C, epsilon, kernel, labels)
	svr.set_tube_epsilon(tube_epsilon)
	svr.parallel.set_num_threads(num_threads)
	svr.train()

	kernel.init(feats_train, feats_test)
	out = svr.apply().get_labels()

	return out, kernel
Пример #4
0
def kernel_io_modular(train_fname=traindat, test_fname=testdat, width=1.9):
    from modshogun import RealFeatures, GaussianKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()
    f = CSVFile("tmp/gaussian_train.csv", "w")
    kernel.save(f)
    del f

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    f = CSVFile("tmp/gaussian_test.csv", "w")
    kernel.save(f)
    del f

    # clean up
    import os

    os.unlink("tmp/gaussian_test.csv")
    os.unlink("tmp/gaussian_train.csv")

    return km_train, km_test, kernel
def classifier_multiclassmachine_modular(fm_train_real=traindat,
                                         fm_test_real=testdat,
                                         label_train_multiclass=label_traindat,
                                         width=2.1,
                                         C=1,
                                         epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibSVM()
    classifier.set_epsilon(epsilon)
    #print labels.get_labels()
    mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),
                                            kernel, classifier, labels)
    mc_classifier.train()

    kernel.init(feats_train, feats_test)
    out = mc_classifier.apply().get_labels()
    return out
def kernel_gaussian_modular (train_fname=traindat,test_fname=testdat, width=1.3):
	from modshogun import RealFeatures, GaussianKernel, CSVFile

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))

	kernel=GaussianKernel(feats_train, feats_train, width)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
Пример #7
0
def kernel_gaussian_modular(train_fname=traindat,
                            test_fname=testdat,
                            width=1.3):
    from modshogun import RealFeatures, GaussianKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ):
	from modshogun import SparseRealFeatures
	from modshogun import GaussianKernel

	feats_train=SparseRealFeatures(fm_train_real)
	feats_test=SparseRealFeatures(fm_test_real)


	kernel=GaussianKernel(feats_train, feats_train, width)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def kernel_sparse_gaussian_modular(fm_train_real=traindat,
                                   fm_test_real=testdat,
                                   width=1.1):
    from modshogun import SparseRealFeatures
    from modshogun import GaussianKernel

    feats_train = SparseRealFeatures(fm_train_real)
    feats_test = SparseRealFeatures(fm_test_real)

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def classifier_multiclasslibsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from modshogun import RealFeatures, MulticlassLabels
	from modshogun import GaussianKernel
	from modshogun import MulticlassLibSVM

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	svm=MulticlassLibSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train()

	kernel.init(feats_train, feats_test)
	out = svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

	# reproducable results
	random.seed(seed)

	# easy regression data: one dimensional noisy sine wave
	n=15
	n_test=100
	x_range_test=10
	noise_var=0.5;
	X=random.rand(1,n)*x_range

	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y=sin(X)+random.randn(n)*noise_var

	# shogun representation
	labels=RegressionLabels(Y[0])
	feats_train=RealFeatures(X)
	feats_test=RealFeatures(X_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	# two svr models: epsilon and nu
	svr_epsilon=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.train()
	svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
	svr_nu.train()

	# predictions
	kernel.init(feats_train, feats_test)
	out1_epsilon=svr_epsilon.apply().get_labels()
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()
	out1_nu=svr_epsilon.apply().get_labels()
	out2_nu=svr_epsilon.apply(feats_test).get_labels()

	return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
def regression_kernel_ridge_modular (n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, tau=1e-6, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel
    from modshogun import KernelRidgeRegression

    # reproducable results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    krr = KernelRidgeRegression(tau, kernel, labels)
    krr.train(feats_train)

    kernel.init(feats_train, feats_test)
    out = krr.apply().get_labels()

    # plot results
    #plot(X[0],Y[0],'x') # training observations
    #plot(X_test[0],Y_test[0],'-') # ground truth of test
    #plot(X_test[0],out, '-') # mean predictions of test
    #legend(["training", "ground truth", "mean predictions"])
    #show()

    return out, kernel, krr
def regression_kernel_ridge_modular (n=100,n_test=100, \
		x_range=6,x_range_test=10,noise_var=0.5,width=1, tau=1e-6, seed=1):

	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	from modshogun import KernelRidgeRegression

	# reproducable results
	random.seed(seed)

	# easy regression data: one dimensional noisy sine wave
	n=15
	n_test=100
	x_range_test=10
	noise_var=0.5;
	X=random.rand(1,n)*x_range

	X_test=array([[float(i)/n_test*x_range_test for i in range(n_test)]])
	Y_test=sin(X_test)
	Y=sin(X)+random.randn(n)*noise_var

	# shogun representation
	labels=RegressionLabels(Y[0])
	feats_train=RealFeatures(X)
	feats_test=RealFeatures(X_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	krr=KernelRidgeRegression(tau, kernel, labels)
	krr.train(feats_train)

	kernel.init(feats_train, feats_test)
	out = krr.apply().get_labels()

	# plot results
	#plot(X[0],Y[0],'x') # training observations
	#plot(X_test[0],Y_test[0],'-') # ground truth of test
	#plot(X_test[0],out, '-') # mean predictions of test
	#legend(["training", "ground truth", "mean predictions"])
	#show()

	return out,kernel,krr
Пример #14
0
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel
    from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

    # reproducable results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    svr_epsilon.train()
    svr_nu = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    svr_nu.train()

    # predictions
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    out1_nu = svr_epsilon.apply().get_labels()
    out2_nu = svr_epsilon.apply(feats_test).get_labels()

    return out1_epsilon, out2_epsilon, out1_nu, out2_nu, kernel
def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from modshogun import RealFeatures, MulticlassLabels
	from modshogun import GaussianKernel
	from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = LibSVM()
	classifier.set_epsilon(epsilon)
	#print labels.get_labels()
	mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels)
	mc_classifier.train()

	kernel.init(feats_train, feats_test)
	out = mc_classifier.apply().get_labels()
	return out
def classifier_multiclasslibsvm_modular(fm_train_real=traindat,
                                        fm_test_real=testdat,
                                        label_train_multiclass=label_traindat,
                                        width=2.1,
                                        C=1,
                                        epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import MulticlassLibSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = MulticlassLabels(label_train_multiclass)

    svm = MulticlassLibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
Пример #17
0
def kernel_io_modular(train_fname=traindat, test_fname=testdat, width=1.9):
    from modshogun import RealFeatures, GaussianKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()
    f = CSVFile("tmp/gaussian_train.csv", "w")
    kernel.save(f)
    del f

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    f = CSVFile("tmp/gaussian_test.csv", "w")
    kernel.save(f)
    del f

    #clean up
    import os
    os.unlink("tmp/gaussian_test.csv")
    os.unlink("tmp/gaussian_train.csv")

    return km_train, km_test, kernel
def statistics_quadratic_time_mmd(m, dim, difference):
    from modshogun import RealFeatures
    from modshogun import MeanShiftDataGenerator
    from modshogun import GaussianKernel, CustomKernel
    from modshogun import QuadraticTimeMMD
    from modshogun import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
    from modshogun import Statistics, IntVector, RealVector, Math

    # init seed for reproducability
    Math.init_random(1)
    random.seed(17)

    # number of examples kept low in order to make things fast

    # streaming data generator for mean shift distributions
    gen_p = MeanShiftDataGenerator(0, dim)
    #gen_p.parallel.set_num_threads(1)
    gen_q = MeanShiftDataGenerator(difference, dim)

    # stream some data from generator
    feat_p = gen_p.get_streamed_features(m)
    feat_q = gen_q.get_streamed_features(m)

    # set kernel a-priori. usually one would do some kernel selection. See
    # other examples for this.
    width = 10
    kernel = GaussianKernel(10, width)

    # create quadratic time mmd instance. Note that this constructor
    # copies p and q and does not reference them
    mmd = QuadraticTimeMMD(kernel, feat_p, feat_q)

    # perform test: compute p-value and test if null-hypothesis is rejected for
    # a test level of 0.05
    alpha = 0.05

    # using bootstrapping (slow, not the most reliable way. Consider pre-
    # computing the kernel when using it, see below).
    # Also, in practice, use at least 250 iterations
    mmd.set_null_approximation_method(BOOTSTRAP)
    mmd.set_bootstrap_iterations(3)
    p_value_boot = mmd.perform_test()
    # reject if p-value is smaller than test level
    #print "bootstrap: p!=q: ", p_value_boot<alpha

    # using spectrum method. Use at least 250 samples from null.
    # This is consistent but sometimes breaks, always monitor type I error.
    # See tutorial for number of eigenvalues to use .
    # Only works with BIASED statistic
    mmd.set_statistic_type(BIASED)
    mmd.set_null_approximation_method(MMD2_SPECTRUM)
    mmd.set_num_eigenvalues_spectrum(3)
    mmd.set_num_samples_sepctrum(250)
    p_value_spectrum = mmd.perform_test()
    # reject if p-value is smaller than test level
    #print "spectrum: p!=q: ", p_value_spectrum<alpha

    # using gamma method. This is a quick hack, which works most of the time
    # but is NOT guaranteed to. See tutorial for details.
    # Only works with BIASED statistic
    mmd.set_statistic_type(BIASED)
    mmd.set_null_approximation_method(MMD2_GAMMA)
    p_value_gamma = mmd.perform_test()
    # reject if p-value is smaller than test level
    #print "gamma: p!=q: ", p_value_gamma<alpha

    # compute tpye I and II error (use many more trials in practice).
    # Type I error is not necessary if one uses bootstrapping. We do it here
    # anyway, but note that this is an efficient way of computing it.
    # Also note that testing has to happen on
    # difference data than kernel selection, but the linear time mmd does this
    # implicitly and we used a fixed kernel here.
    mmd.set_null_approximation_method(BOOTSTRAP)
    mmd.set_bootstrap_iterations(5)
    num_trials = 5
    type_I_errors = RealVector(num_trials)
    type_II_errors = RealVector(num_trials)
    inds = int32(array([x for x in range(2 * m)]))  # numpy
    p_and_q = mmd.get_p_and_q()

    # use a precomputed kernel to be faster
    kernel.init(p_and_q, p_and_q)
    precomputed = CustomKernel(kernel)
    mmd.set_kernel(precomputed)
    for i in range(num_trials):
        # this effectively means that p=q - rejecting is tpye I error
        inds = random.permutation(inds)  # numpy permutation
        precomputed.add_row_subset(inds)
        precomputed.add_col_subset(inds)
        type_I_errors[i] = mmd.perform_test() > alpha
        precomputed.remove_row_subset()
        precomputed.remove_col_subset()

        # on normal data, this gives type II error
        type_II_errors[i] = mmd.perform_test() > alpha

    return type_I_errors.get(), type_I_errors.get(
    ), p_value_boot, p_value_spectrum, p_value_gamma,
def statistics_quadratic_time_mmd (m,dim,difference):
	from modshogun import RealFeatures
	from modshogun import MeanShiftDataGenerator
	from modshogun import GaussianKernel, CustomKernel
	from modshogun import QuadraticTimeMMD
	from modshogun import PERMUTATION, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, BIASED_DEPRECATED
	from modshogun import Statistics, IntVector, RealVector, Math

	# init seed for reproducability
	Math.init_random(1)
	random.seed(17)

	# number of examples kept low in order to make things fast

	# streaming data generator for mean shift distributions
	gen_p=MeanShiftDataGenerator(0, dim);
	#gen_p.parallel.set_num_threads(1)
	gen_q=MeanShiftDataGenerator(difference, dim);

	# stream some data from generator
	feat_p=gen_p.get_streamed_features(m);
	feat_q=gen_q.get_streamed_features(m);

	# set kernel a-priori. usually one would do some kernel selection. See
	# other examples for this.
	width=10;
	kernel=GaussianKernel(10, width);

	# create quadratic time mmd instance. Note that this constructor
	# copies p and q and does not reference them
	mmd=QuadraticTimeMMD(kernel, feat_p, feat_q);

	# perform test: compute p-value and test if null-hypothesis is rejected for
	# a test level of 0.05
	alpha=0.05;

	# using permutation (slow, not the most reliable way. Consider pre-
	# computing the kernel when using it, see below).
	# Also, in practice, use at least 250 iterations
	mmd.set_null_approximation_method(PERMUTATION);
	mmd.set_num_null_samples(3);
	p_value_null=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "bootstrap: p!=q: ", p_value_null<alpha

	# using spectrum method. Use at least 250 samples from null.
	# This is consistent but sometimes breaks, always monitor type I error.
	# See tutorial for number of eigenvalues to use .
	mmd.set_statistic_type(BIASED);
	mmd.set_null_approximation_method(MMD2_SPECTRUM);
	mmd.set_num_eigenvalues_spectrum(3);
	mmd.set_num_samples_spectrum(250);
	p_value_spectrum=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "spectrum: p!=q: ", p_value_spectrum<alpha

	# using gamma method. This is a quick hack, which works most of the time
	# but is NOT guaranteed to. See tutorial for details.
	# Only works with BIASED_DEPRECATED statistic
	mmd.set_statistic_type(BIASED_DEPRECATED);
	mmd.set_null_approximation_method(MMD2_GAMMA);
	p_value_gamma=mmd.perform_test();
	# reject if p-value is smaller than test level
	#print "gamma: p!=q: ", p_value_gamma<alpha

	# compute tpye I and II error (use many more trials in practice).
	# Type I error is not necessary if one uses permutation. We do it here
	# anyway, but note that this is an efficient way of computing it.
	# Also note that testing has to happen on
	# difference data than kernel selection, but the linear time mmd does this
	# implicitly and we used a fixed kernel here.
	mmd.set_statistic_type(BIASED);
	mmd.set_null_approximation_method(PERMUTATION);
	mmd.set_num_null_samples(5);
	num_trials=5;
	type_I_errors=RealVector(num_trials);
	type_II_errors=RealVector(num_trials);
	inds=int32(array([x for x in range(2*m)])) # numpy
	p_and_q=mmd.get_p_and_q();

	# use a precomputed kernel to be faster
	kernel.init(p_and_q, p_and_q);
	precomputed=CustomKernel(kernel);
	mmd.set_kernel(precomputed);
	for i in range(num_trials):
		# this effectively means that p=q - rejecting is tpye I error
		inds=random.permutation(inds) # numpy permutation
		precomputed.add_row_subset(inds);
		precomputed.add_col_subset(inds);
		type_I_errors[i]=mmd.perform_test()>alpha;
		precomputed.remove_row_subset();
		precomputed.remove_col_subset();

		# on normal data, this gives type II error
		type_II_errors[i]=mmd.perform_test()>alpha;

	return type_I_errors.get(),type_I_errors.get(),p_value_null,p_value_spectrum,p_value_gamma,