def get_kernel(request, features): try: kernel_name = request.POST["kernel"] except: raise ValueError("Unknown kernel") if kernel_name == "GaussianKernel": try: sigma = float(request.POST["sigma"]) except: raise ValueError("Sigma is not correct") kernel = sg.GaussianKernel(features, features, sigma) elif kernel_name == "LinearKernel": kernel = sg.LinearKernel(features, features) kernel.set_normalizer(sg.IdentityKernelNormalizer()) elif kernel_name == "PolynomialKernel": try: degree = int(request.POST["degree"]) except: raise ValueError("degree is not correct") kernel = sg.PolyKernel(features, features, degree, True) kernel.set_normalizer(sg.IdentityKernelNormalizer()) else: raise ValueError("Unknown kernel") return kernel
def get_shogun_statistics(self): # turn data into Shogun representation (columns vectors) feat_p = sg.RealFeatures(self._x.reshape(1, len(self._x))) feat_q = sg.RealFeatures(self._y.reshape(1, len(self._y))) # choose kernel for testing. Here: Gaussian kernel_width = 1 kernel = sg.GaussianKernel(10, kernel_width) # create mmd instance of test-statistic self._mmd = sg.QuadraticTimeMMD() self._mmd.set_kernel(kernel) self._mmd.set_p(feat_p) self._mmd.set_q(feat_q) # compute biased and unbiased test statistic (default is unbiased) self._mmd.set_statistic_type(sg.ST_BIASED_FULL) biased_statistic = self._mmd.compute_statistic() self._mmd.set_statistic_type(sg.ST_UNBIASED_FULL) unbiased_statistic = self._mmd.compute_statistic() self._statistic = unbiased_statistic print("\nShogun tests statistics:") print( f"biased test statistic {len(self._x)} x MMD_b[X,Y]^2={biased_statistic:.2f}" ) print( f"unbiased test statistic {len(self._x)} x MMD_u[X,Y]^2={unbiased_statistic:.2f}" ) return self
def mix_rbf_kernel(X, Y): import shogun as sg mmd = sg.QuadraticTimeMMD() mmd.set_p(sg.Features(X)) mmd.set_q(sg.Features(Y)) mmd.add_kernel(sg.GaussianKernel(10, 1.0)) mmd.set_kernel_selection_strategy(sg.KSM_MAXIMIZE_MMD) mmd.set_train_test_mode(True) mmd.set_train_test_ratio(1) mmd.select_kernel() statistic = mmd.compute_statistic() return statistic
def visualise_distribution_test_statistic(self, alpha=0.05): num_samples = 500 # we first sample null distribution null_samples = self._mmd.sample_null() # we then sample alternative distribution, generate new data for that alt_samples = np.zeros(num_samples) for i in range(num_samples): x = norm.rvs(size=self._n, loc=self._mu, scale=self._sigma_squared) y = laplace.rvs(size=self._n, loc=self._mu, scale=self._b) feat_p = sg.RealFeatures(np.reshape(x, (1, len(x)))) feat_q = sg.RealFeatures(np.reshape(y, (1, len(y)))) kernel_width = 1 kernel = sg.GaussianKernel(10, kernel_width) mmd = sg.QuadraticTimeMMD() mmd.set_kernel(kernel) mmd.set_p(feat_p) mmd.set_q(feat_q) alt_samples[i] = mmd.compute_statistic() np.std(alt_samples) plt.figure(figsize=(18, 5)) plt.subplot(131) plt.hist(null_samples, 50, color='blue') plt.title('Null distribution') plt.subplot(132) plt.title('Alternative distribution') plt.hist(alt_samples, 50, color='green') plt.subplot(133) plt.hist(null_samples, 50, color='blue') plt.hist(alt_samples, 50, color='green', alpha=0.5) plt.title('Null and alternative distriution') # find (1-alpha) element of null distribution null_samples_sorted = np.sort(null_samples) quantile_idx = int(len(null_samples) * (1 - alpha)) quantile = null_samples_sorted[quantile_idx] plt.axvline(x=quantile, ymin=0, ymax=100, color='red', label=str(int(round( (1 - alpha) * 100))) + '% quantile of null') plt.show() return self
def kernel_prepare(self): kernel = shogun.CombinedKernel() for kernel_type in self.kernel_dict.keys(): if kernel_type == 'GaussianKernel': for kernel_feature in self.kernel_dict[kernel_type].values(): kernel.append_kernel( shogun.GaussianKernel(self.gaussian_width)) if kernel_type == 'PolyKernel': for kernel_feature in self.kernel_dict[kernel_type].values(): kernel.append_kernel( shogun.PolyKernel(10, self.poly_degree)) if kernel_type == 'LinearKernel': for kernel_feature in self.kernel_dict[kernel_type].values(): kernel.append_kernel(shogun.LinearKernel()) return kernel
def get_kernel_function(self, kernel, name): if name == 'Matern': return ( 'Not sure whether this library supports the specified kernel type' ) elif name == 'RBF': return shogun.GaussianKernel(kernel[name]['lengthscale']) elif name == 'White': return ( 'Not sure whether this library supports the specified kernel type' ) elif name == 'Const': return shogun.ConstKernel(kernel[name]['constant']) elif name == 'RatQd': return ( 'Not sure whether this library supports the specified kernel type' )
def shogun_mmd(X, Y, kernel_width, null_samples=1000, median_samples=1000, cache_size=32): ''' Run an MMD test using a Gaussian kernel. Parameters ---------- X : row-instance feature array Y : row-instance feature array kernel_width : float The bandwidth of the RBF kernel (sigma). null_samples : int How many times to sample from the null distribution. Returns ------- p_val : float The obtained p value of the test. stat : float The test statistic. null_samples : array of length null_samples The samples from the null distribution. ''' import shogun as sg mmd = sg.QuadraticTimeMMD() mmd.set_p(sg.RealFeatures(X.T.astype(np.float64))) mmd.set_q(sg.RealFeatures(Y.T.astype(np.float64))) mmd.set_kernel(sg.GaussianKernel(cache_size, float(kernel_width))) mmd.set_num_null_samples(null_samples) samps = mmd.sample_null() stat = mmd.compute_statistic() p_val = np.mean(stat <= samps) return p_val, stat, samps
def _process(x1_set, x2_set, kernel_width, kernel_name, degree): num = len(x1_set) if num == 0: raise Http404 examples = np.zeros((2, num)) for i in xrange(num): examples[0, i] = x1_set[i] examples[1, i] = x2_set[i] feat_train = sg.RealFeatures(examples) # construct covariance function if kernel_name == "LinearKernel": kernel = sg.LinearKernel(feat_train, feat_train) elif kernel_name == "PolynomialKernel": kernel = sg.PolyKernel(feat_train, feat_train, degree, True) elif kernel_name == "GaussianKernel": kernel = sg.GaussianKernel(feat_train, feat_train, kernel_width) kernel_matrix = kernel.get_kernel_matrix() return kernel_matrix.tolist()
def mmd_test(Sample1, Sample2): for i in range(Sample1.shape[1]): x = Sample1[:, i] y = Sample2[:, i] feat_p = sg.RealFeatures(x.reshape(1, len(x))) feat_q = sg.RealFeatures(y.reshape(1, len(y))) # choose kernel for testing. Here: Gaussian kernel_width = 1 kernel = sg.GaussianKernel(10, kernel_width) # create mmd instance of test-statistic mmd = sg.QuadraticTimeMMD() mmd.set_kernel(kernel) mmd.set_p(feat_p) mmd.set_q(feat_q) # compute biased and unbiased test statistic (default is unbiased) mmd.set_statistic_type(sg.ST_UNBIASED_FULL) statistic = mmd.compute_statistic() return statistic
def _read_data(request): labels = [] features = [] data = json.loads(request.POST['point_set']) tau = float(request.POST['Tau']) for pt in data: labels.append(float(pt["y"])) features.append(float(pt["x"])) labels = np.array(labels, dtype=np.float64) num = len(features) if num == 0: raise TypeError examples = np.zeros((1, num)) for i in xrange(num): examples[0, i] = features[i] lab = sg.RegressionLabels(labels) train = sg.RealFeatures(examples) sigma = float(request.POST["sigma"]) kernel = sg.GaussianKernel(train, train, sigma) return (tau, lab, kernel, train)
f = [f_gauss, f_sif, f_pca, f_mwv, f_wmd] g = [g_gauss, g_sif, g_pca, g_mwv, g_wmd] w = [w_gauss, w_sif, w_pca, w_mwv, w_wmd] rejected = np.zeros((3, 5, 5)) for k, emb in enumerate([f, g, w]): for i, X in enumerate(emb): for j, Y in enumerate(emb): sg.Math.init_random(0) # turn data into Shogun representation (columns vectors) feat_p = sg.RealFeatures(X.reshape(1, len(X))) feat_q = sg.RealFeatures(Y.reshape(1, len(Y))) # choose kernel for testing. Here: Gaussian kernel_width = 1 kernel = sg.GaussianKernel(10, kernel_width) # create mmd instance of test-statistic mmd = sg.QuadraticTimeMMD() mmd.set_kernel(kernel) mmd.set_p(feat_p) mmd.set_q(feat_q) # compute unbiased test statistic mmd.set_statistic_type(sg.ST_UNBIASED_FULL) statistic = unbiased_statistic = mmd.compute_statistic() mmd.set_null_approximation_method(sg.NAM_PERMUTATION) mmd.set_num_null_samples(1000) # compute p-value for computed test statistic p_value = mmd.compute_p_value(statistic) # compute threshold for rejecting H_0 for a given test power
import numpy as np import shogun as sg X = np.random.randn(100, 3) Y = np.random.randn(100, 3) + .5 mmd = sg.QuadraticTimeMMD() mmd.set_p(sg.RealFeatures(X.T)) mmd.set_q(sg.RealFeatures(Y.T)) mmd.set_kernel(sg.GaussianKernel(32, 1)) mmd.set_num_null_samples(200) samps = mmd.sample_null() stat = mmd.compute_statistic()