示例#1
0
def job_lin_mmd(sample_source, tr, te, r):
    """Linear mmd with grid search to choose the best Gaussian width."""
    # should be completely deterministic

    # If n is too large, pairwise meddian computation can cause a memory error.
    with util.ContextTimer() as t:
        X, Y = tr.xy()
        Xr = X[:min(X.shape[0], 1000), :]
        Yr = Y[:min(Y.shape[0], 1000), :]

        med = util.meddistance(np.vstack((Xr, Yr)))
        widths = [(med * f) for f in 2.0**np.linspace(-1, 4, 40)]
        list_kernels = [kernel.KGauss(w**2) for w in widths]
        # grid search to choose the best Gaussian width
        besti, powers = tst.LinearMMDTest.grid_search_kernel(
            tr, list_kernels, alpha)
        # perform test
        best_ker = list_kernels[besti]
        lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
        test_result = lin_mmd_test.perform_test(te)

    result = {
        'test_method': lin_mmd_test,
        'test_result': test_result,
        'time_secs': t.secs
    }
    return result
def mmd(p, q, alpha=0.05):
    if (p.ndim == 1): p = p[:, np.newaxis]
    if (q.ndim == 1): q = q[:, np.newaxis]
    d = data.TSTData(p, q)
    d_tr, d_te = d.split_tr_te(tr_proportion=0.5)
    med = util.meddistance(d_tr.stack_xy())
    widths = [(med * f) for f in 2.0**np.linspace(-1, 4, 20)]
    list_kernels = [kernel.KGauss(w**2) for w in widths]
    besti, powers = tst.LinearMMDTest.grid_search_kernel(
        d_tr, list_kernels, alpha)
    best_ker = list_kernels[besti]
    lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
    r = lin_mmd_test.perform_test(d_te)
    return r['test_stat'], r['pvalue']