def job_lin_mmd(sample_source, tr, te, r): """Linear mmd with grid search to choose the best Gaussian width.""" # should be completely deterministic # If n is too large, pairwise meddian computation can cause a memory error. with util.ContextTimer() as t: X, Y = tr.xy() Xr = X[:min(X.shape[0], 1000), :] Yr = Y[:min(Y.shape[0], 1000), :] med = util.meddistance(np.vstack((Xr, Yr))) widths = [(med * f) for f in 2.0**np.linspace(-1, 4, 40)] list_kernels = [kernel.KGauss(w**2) for w in widths] # grid search to choose the best Gaussian width besti, powers = tst.LinearMMDTest.grid_search_kernel( tr, list_kernels, alpha) # perform test best_ker = list_kernels[besti] lin_mmd_test = tst.LinearMMDTest(best_ker, alpha) test_result = lin_mmd_test.perform_test(te) result = { 'test_method': lin_mmd_test, 'test_result': test_result, 'time_secs': t.secs } return result
def mmd(p, q, alpha=0.05): if (p.ndim == 1): p = p[:, np.newaxis] if (q.ndim == 1): q = q[:, np.newaxis] d = data.TSTData(p, q) d_tr, d_te = d.split_tr_te(tr_proportion=0.5) med = util.meddistance(d_tr.stack_xy()) widths = [(med * f) for f in 2.0**np.linspace(-1, 4, 20)] list_kernels = [kernel.KGauss(w**2) for w in widths] besti, powers = tst.LinearMMDTest.grid_search_kernel( d_tr, list_kernels, alpha) best_ker = list_kernels[besti] lin_mmd_test = tst.LinearMMDTest(best_ker, alpha) r = lin_mmd_test.perform_test(d_te) return r['test_stat'], r['pvalue']