Пример #1
0
def perform_mmd_test(train_miss_impute,
                     test_miss_impute,
                     train_full,
                     test_full,
                     alpha,
                     mmd_miss_impute=None,
                     mmd_full=None):

    mmd_result = np.zeros(2)

    sb_data_miss_impute = TSTData(train_miss_impute, test_miss_impute)
    if mmd_miss_impute is None:
        print('ini')
        x, y = sb_data_miss_impute.xy()
        dist_mat_miss_impute = metrics.pairwise_distances(x, y)
        the_kernel = kernel.KGauss(dist_mat_miss_impute.std())
        mmd_miss_impute = tst.QuadMMDTest(the_kernel, alpha=alpha)
    test_result = mmd_miss_impute.perform_test(sb_data_miss_impute)
    if test_result['h0_rejected']:
        mmd_result[0] = 1

    sb_data_full = TSTData(train_full, test_full)
    if mmd_full is None:
        x, y = sb_data_full.xy()
        dist_mat_full = metrics.pairwise_distances(x, y)
        the_kernel = kernel.KGauss(dist_mat_full.std())
        mmd_full = tst.QuadMMDTest(the_kernel, alpha=alpha)
    test_result = mmd_full.perform_test(sb_data_full)
    if test_result['h0_rejected']:
        mmd_result[1] = 1

    return mmd_result, mmd_miss_impute, mmd_full
Пример #2
0
def perform_mmd_test(train_miss_impute,
                     test_miss_impute,
                     train_full,
                     test_full,
                     alpha,
                     mmd_miss_impute=None,
                     mmd_full=None):

    mmd_result = np.zeros(2)

    sb_data_miss_impute = TSTData(train_miss_impute, test_miss_impute)
    if mmd_miss_impute is None:
        print('ini')
        the_kernel = kernel.KGauss(sb_data_miss_impute.mean_std())
        mmd_miss_impute = tst.QuadMMDTest(the_kernel, alpha=alpha)
    test_result = mmd_miss_impute.perform_test(sb_data_miss_impute)
    if test_result['h0_rejected']:
        mmd_result[0] = 1

    sb_data_full = TSTData(train_full, test_full)
    if mmd_full is None:
        the_kernel = kernel.KGauss(sb_data_full.mean_std())
        mmd_full = tst.QuadMMDTest(the_kernel, alpha=alpha)
    test_result = mmd_full.perform_test(sb_data_full)
    if test_result['h0_rejected']:
        mmd_result[1] = 1

    return mmd_result, mmd_miss_impute, mmd_full
Пример #3
0
def job_lin_mmd(sample_source, tr, te, r):
    """Linear mmd with grid search to choose the best Gaussian width."""
    # should be completely deterministic

    # If n is too large, pairwise meddian computation can cause a memory error.
    with util.ContextTimer() as t:
        X, Y = tr.xy()
        Xr = X[:min(X.shape[0], 1000), :]
        Yr = Y[:min(Y.shape[0], 1000), :]

        med = util.meddistance(np.vstack((Xr, Yr)))
        widths = [(med * f) for f in 2.0**np.linspace(-1, 4, 40)]
        list_kernels = [kernel.KGauss(w**2) for w in widths]
        # grid search to choose the best Gaussian width
        besti, powers = tst.LinearMMDTest.grid_search_kernel(
            tr, list_kernels, alpha)
        # perform test
        best_ker = list_kernels[besti]
        lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
        test_result = lin_mmd_test.perform_test(te)

    result = {
        'test_method': lin_mmd_test,
        'test_result': test_result,
        'time_secs': t.secs
    }
    return result
Пример #4
0
def job_quad_mmd_2U(sample_source, tr, te, r):
    """Quadratic mmd with grid search to choose the best Gaussian width.
    Use two-sample U statistics to compute k(X,Y).
    """
    # If n is too large, pairwise meddian computation can cause a memory error.

    with util.ContextTimer() as t:
        med = util.meddistance(tr.stack_xy(), 1000)
        list_gwidth = np.hstack(((med**2) * (2.0**np.linspace(-4, 4, 40))))
        list_gwidth.sort()
        list_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]

        # grid search to choose the best Gaussian width
        besti, powers = tst.QuadMMDTest.grid_search_kernel(
            tr, list_kernels, alpha)
        # perform test
        best_ker = list_kernels[besti]
        mmd_test = tst.QuadMMDTest(best_ker,
                                   n_permute=1000,
                                   alpha=alpha,
                                   use_1sample_U=False)
        test_result = mmd_test.perform_test(te)
    result = {
        'test_method': mmd_test,
        'test_result': test_result,
        'time_secs': t.secs
    }
    return result
Пример #5
0
    def test_basic_H1(self):
        """
        Nothing special. Just test basic things.
        """
        seed = 12
        # sample
        n = 271
        alpha = 0.01
        for d in [1, 4]:
            # h1 is true
            ss = data.SSGaussMeanDiff(d=d, my=2.0)
            dat = ss.sample(n, seed=seed)
            xy = dat.stack_xy()

            sig2 = util.meddistance(xy, subsample=1000)**2
            k = kernel.KGauss(sig2)

            # Test
            for J in [1, 6]:
                # random test locations
                V = util.fit_gaussian_draw(xy, J, seed=seed + 1)
                ume = tst.UMETest(V, k, n_simulate=2000, alpha=alpha)
                tresult = ume.perform_test(dat)

                # assertions
                self.assertGreaterEqual(tresult['pvalue'], 0.0)
                # H1 is true. Should reject with a small p-value
                self.assertLessEqual(tresult['pvalue'], 0.1)
Пример #6
0
    def test(self, X, Y):
        XY = self.preprocess(X, Y)

        locations = fot_tst.MeanEmbeddingTest.init_locs_subset(XY, self.J)
        med = fot_util.meddistance(XY.stack_xy(), 1000)
        kernel = fot_kernel.KGauss(med)
        ME = fot_tst.MeanEmbeddingTest(locations, med, alpha=self.alpha)

        result = ME.perform_test(XY)
        p_val = result['pvalue']
        return p_val
Пример #7
0
    def test(self, X, Y):
        XY = self.preprocess(X, Y)

        med = fot_util.meddistance(XY.stack_xy(), 1000)
        kernel = fot_kernel.KGauss(med)

        MMD = fot_tst.QuadMMDTest(kernel,
                                  n_permute=self.n_permute,
                                  alpha=self.alpha)

        result = MMD.perform_test(XY)
        p_val = result['pvalue']
        return p_val
Пример #8
0
def mmd(p, q, alpha=0.05):
    if (p.ndim == 1): p = p[:, np.newaxis]
    if (q.ndim == 1): q = q[:, np.newaxis]
    d = data.TSTData(p, q)
    d_tr, d_te = d.split_tr_te(tr_proportion=0.5)
    med = util.meddistance(d_tr.stack_xy())
    widths = [(med * f) for f in 2.0**np.linspace(-1, 4, 20)]
    list_kernels = [kernel.KGauss(w**2) for w in widths]
    besti, powers = tst.LinearMMDTest.grid_search_kernel(
        d_tr, list_kernels, alpha)
    best_ker = list_kernels[besti]
    lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
    r = lin_mmd_test.perform_test(d_te)
    return r['test_stat'], r['pvalue']
Пример #9
0
def job_quad_mmd(sample_source, tr, te, r):
    """Quadratic mmd with grid search to choose the best Gaussian width."""
    # If n is too large, pairwise meddian computation can cause a memory error. 

    med = util.meddistance(tr.stack_xy(), 1000)
    list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-4, 4, 30) ) ) )
    list_gwidth.sort()
    list_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]

    # grid search to choose the best Gaussian width
    besti, powers = tst.QuadMMDTest.grid_search_kernel(tr, list_kernels, alpha)
    # perform test 
    best_ker = list_kernels[besti]
    mmd_test = tst.QuadMMDTest(best_ker, n_permute=400, alpha=alpha)
    test_result = mmd_test.perform_test(te)
    return test_result
Пример #10
0
    def test(self, X, Y):
        XY = self.preprocess(X, Y)

        train, test = XY.split_tr_te(tr_proportion=self.split_ratio)
        med = fot_util.meddistance(train.stack_xy(), 1000)

        bandwidths = (med**2) * (2.**np.linspace(-4, 4, 20))
        kernels = [fot_kernel.KGauss(width) for width in bandwidths]
        with contextlib.redirect_stdout(None):
            best_i, powers = fot_tst.QuadMMDTest.grid_search_kernel(
                train, kernels, alpha=self.alpha)
        best_kernel = kernels[best_i]

        MMD = fot_tst.QuadMMDTest(best_kernel,
                                  n_permute=self.n_permute,
                                  alpha=self.alpha)

        result = MMD.perform_test(test)
        p_val = result['pvalue']
        return p_val
Пример #11
0
    def test_perform_test(self):
        # Full sample size
        n = 200

        # mean shift
        my = 0.1
        dim = 3
        ss = data.SSGaussMeanDiff(dim, my=my)
        # Consider two dimensions here
        for s in [2, 8, 9]:
            with util.NumpySeedContext(seed=s):
                tst_data = ss.sample(n, seed=s)
                locs = np.random.randn(2, dim)
                k = kernel.KGauss(1)

                me1 = tst.METest(locs[[0], :], k, alpha=0.01)
                result1 = me1.perform_test(tst_data)
                self.assertGreaterEqual(result1['pvalue'], 0)
                self.assertGreaterEqual(result1['test_stat'], 0)

                me2 = tst.METest(locs, k, alpha=0.01)
                result2 = me2.perform_test(tst_data)
                self.assertGreaterEqual(result2['pvalue'], 0)
                self.assertGreaterEqual(result2['test_stat'], 0)