Пример #1
0
    def test_basic_H1(self):
        """
        Nothing special. Just test basic things.
        """
        seed = 12
        # sample
        n = 271
        alpha = 0.01
        for d in [1, 4]:
            # h1 is true
            ss = data.SSGaussMeanDiff(d=d, my=2.0)
            dat = ss.sample(n, seed=seed)
            xy = dat.stack_xy()

            sig2 = util.meddistance(xy, subsample=1000)**2
            k = kernel.KGauss(sig2)

            # Test
            for J in [1, 6]:
                # random test locations
                V = util.fit_gaussian_draw(xy, J, seed=seed + 1)
                ume = tst.UMETest(V, k, n_simulate=2000, alpha=alpha)
                tresult = ume.perform_test(dat)

                # assertions
                self.assertGreaterEqual(tresult['pvalue'], 0.0)
                # H1 is true. Should reject with a small p-value
                self.assertLessEqual(tresult['pvalue'], 0.1)
Пример #2
0
def get_sample_source(prob_label):
    """Return a SampleSource corresponding to the problem label.
    """
    # map: prob_label -> sample_source
    prob2ss = { 
            'SSBlobs': data.SSBlobs(),
            'gmd_d100': data.SSGaussMeanDiff(d=100, my=1.0),
            'gmd_d2': data.SSGaussMeanDiff(d=2, my=1.0),
            'gvd_d50': data.SSGaussVarDiff(d=50), 
            'gvd_d100': data.SSGaussVarDiff(d=100), 
            # The null is true
            'sg_d50': data.SSSameGauss(d=50)
            }
    if prob_label not in prob2ss:
        raise ValueError('Unknown problem label. Need to be one of %s'%str(prob2ss.keys()) )
    return prob2ss[prob_label]
Пример #3
0
    def test_optimize_locs_width(self):
        """
        Test the function optimize_locs_width(..). Make sure it does not return 
        unusual results.
        """
        # sample source
        n = 600
        dim = 2
        seed = 17

        ss = data.SSGaussMeanDiff(dim, my=1.0)
        #ss = data.SSGaussVarDiff(dim)
        #ss = data.SSSameGauss(dim)
        # ss = data.SSBlobs()
        dim = ss.dim()

        dat = ss.sample(n, seed=seed)
        tr, te = dat.split_tr_te(tr_proportion=0.5, seed=10)
        xy_tr = tr.stack_xy()

        # initialize test_locs by drawing the a Gaussian fitted to the data
        # number of test locations
        J = 3
        V0 = util.fit_gaussian_draw(xy_tr, J, seed=seed + 1)
        med = util.meddistance(xy_tr, subsample=1000)
        gwidth0 = med**2
        assert gwidth0 > 0

        # optimize
        V_opt, gw2_opt, opt_info = tst.GaussUMETest.optimize_locs_width(
            tr,
            V0,
            gwidth0,
            reg=1e-2,
            max_iter=100,
            tol_fun=1e-5,
            disp=False,
            locs_bounds_frac=100,
            gwidth_lb=None,
            gwidth_ub=None)

        # perform the test using the optimized parameters on the test set
        alpha = 0.01
        ume_opt = tst.GaussUMETest(V_opt,
                                   gw2_opt,
                                   n_simulate=2000,
                                   alpha=alpha)
        test_result = ume_opt.perform_test(te)

        assert test_result['h0_rejected']
        assert util.is_real_num(gw2_opt)
        assert gw2_opt > 0
        assert np.all(np.logical_not((np.isnan(V_opt))))
        assert np.all(np.logical_not((np.isinf(V_opt))))
Пример #4
0
def get_sample_source_list(prob_label):
    """Return a list of SampleSource's representing the problems, each 
    corresponding to one dimension in the list.
    """
    # map: prob_label -> [sample_source]
    # dimensions to try 
    dimensions = [5] + [100*i for i in range(1, 5+1)] 
    high_dims = [5] + [300*i for i in range(1, 5+1)]
    low_dims = [2*d for d in range(1, 7+1)]
    prob2ss = { 
            'gmd': [data.SSGaussMeanDiff(d=d, my=1.0) for d in high_dims],
            'gvd': [data.SSGaussVarDiff(d=d) for d in dimensions], 
            # The null is true
            'sg': [data.SSSameGauss(d=d) for d in high_dims],
            'sg_low': [data.SSSameGauss(d=d) for d in low_dims]
            }
    if prob_label not in prob2ss:
        raise ValueError('Unknown problem label. Need to be one of %s'%str(prob2ss.keys()) )
    return prob2ss[prob_label]
Пример #5
0
def get_sample_source(prob_label):
    """Return a SampleSource representing the problem, and sample_sizes to try
    in a 2-tuple"""

    # map: prob_label -> (sample_source, sample_sizes)
    sample_sizes = [i * 2000 for i in range(1, 5 + 1)]
    #sample_sizes = [i*1000 for i in range(1, 3+1)]
    prob2ss = {
        'SSBlobs': (data.SSBlobs(), sample_sizes),
        'gmd_d100': (data.SSGaussMeanDiff(d=100, my=1.0), sample_sizes),
        'gvd_d50': (data.SSGaussVarDiff(d=50), sample_sizes),
        # The null is true
        'sg_d50': (data.SSSameGauss(d=50), sample_sizes),
        'sg_d5': (data.SSSameGauss(d=5), sample_sizes)
    }
    if prob_label not in prob2ss:
        raise ValueError('Unknown problem label. Need to be one of %s' %
                         str(list(prob2ss.keys())))
    return prob2ss[prob_label]
Пример #6
0
    def test_perform_test(self):
        # Full sample size
        n = 200

        # mean shift
        my = 0.1
        dim = 3
        ss = data.SSGaussMeanDiff(dim, my=my)
        # Consider two dimensions here
        for s in [2, 8, 9]:
            with util.NumpySeedContext(seed=s):
                tst_data = ss.sample(n, seed=s)
                locs = np.random.randn(2, dim)
                k = kernel.KGauss(1)

                me1 = tst.METest(locs[[0], :], k, alpha=0.01)
                result1 = me1.perform_test(tst_data)
                self.assertGreaterEqual(result1['pvalue'], 0)
                self.assertGreaterEqual(result1['test_stat'], 0)

                me2 = tst.METest(locs, k, alpha=0.01)
                result2 = me2.perform_test(tst_data)
                self.assertGreaterEqual(result2['pvalue'], 0)
                self.assertGreaterEqual(result2['test_stat'], 0)