示例#1
0
    def test_sample(self):
        ps = data.PS2DSinFreq(1)
        for n in [5, 613]:
            pdata = ps.sample(n=n)
            X, Y = pdata.xy()
            XY = np.hstack((X, Y))

            self.assertEqual(X.shape[1], 1)
            self.assertEqual(Y.shape[1], 1)
            self.assertEqual(XY.shape[0], n)
示例#2
0
def get_paired_source_list(prob_label):
    """Return (prob_params, ps_list) where 
    - ps_list: a list of PairedSource's representing the problems, each 
    corresponding to one parameter setting.
    - prob_params: the list of problem parameters. Each parameter has to be a
      scalar (so that we can plot them later). Parameters are preferably
      positive integers.
    """
    # map: prob_label -> [paired_source]
    degrees = [float(deg) for deg in range(0, 10 + 1, 2)]
    noise_dims = list(range(0, 8, 2))
    sg_dims = list(range(10, 100, 20))
    # Medium-sized Gaussian problem
    msg_dims = list(range(50, 250 + 1, 50))
    # Big Gaussian problem
    bsg_dims = list(range(100, 400 + 1, 100))
    sin_freqs = list(range(1, 6 + 1))
    multi_sin_d = list(range(1, 4 + 1, 1))
    pwsign_d = list(range(10, 50 + 1, 10))
    gauss_sign_d = list(range(1, 6 + 1, 1))
    prob2ps = {
        'u2drot': (degrees, [
            data.PS2DUnifRotate(2.0 * np.pi * deg / 360,
                                xlb=-1,
                                xub=1,
                                ylb=-1,
                                yub=1) for deg in degrees
        ]),
        'urot_noise': (noise_dims, [
            data.PSUnifRotateNoise(angle=np.pi / 4,
                                   xlb=-1,
                                   xub=1,
                                   ylb=-1,
                                   yub=1,
                                   noise_dim=d) for d in noise_dims
        ]),
        'sg': (sg_dims, [data.PSIndSameGauss(dx=d, dy=d) for d in sg_dims]),
        'msg': (msg_dims, [data.PSIndSameGauss(dx=d, dy=d) for d in msg_dims]),
        'bsg': (bsg_dims, [data.PSIndSameGauss(dx=d, dy=d) for d in bsg_dims]),
        'sin': (sin_freqs, [data.PS2DSinFreq(freq=f) for f in sin_freqs]),
        'msin':
        (multi_sin_d, [data.PSSinFreq(freq=1, d=d) for d in multi_sin_d]),
        'pwsign': (pwsign_d, [data.PSPairwiseSign(dx=d) for d in pwsign_d]),
        'gsign':
        (gauss_sign_d, [data.PSGaussSign(dx=d) for d in gauss_sign_d]),
    }
    if prob_label not in prob2ps:
        raise ValueError('Unknown problem label. Need to be one of %s' %
                         str(list(prob2ps.keys())))
    return prob2ps[prob_label]
示例#3
0
def get_paired_source(prob_label):
    """
    Return (sample_sizes, ps) where 
    - ps: one PairedSource representing a problem
    - sample_sizes: list of sample sizes for that particular PairedSource.
    """
    # map: prob_label -> [paired_source]
    #exp_n = [1000, 3000, 10000, 30000, 100000, 300000, 1000000]
    exp_n = [1000, 3000, 6000, 10000, 30000, 100000]
    #exp_n = [1000, 10000, 100000 ]

    sg_d50_n = exp_n
    sg_d1000_n = exp_n
    sg_d250_n = exp_n
    sg_d500_n = exp_n
    sin_w3_n = list(range(1000, 4000 + 1, 1000))
    sin_w4_n = exp_n
    sin_w5_n = exp_n
    gsign_d3_n = list(range(1000, 4000 + 1, 1000))
    gsign_d4_n = exp_n
    gsign_d5_n = exp_n
    prob2ps = {
        'sg_d50': (sg_d50_n, data.PSIndSameGauss(dx=50, dy=50)),
        'sg_d250': (sg_d250_n, data.PSIndSameGauss(dx=250, dy=250)),
        'sg_d500': (sg_d500_n, data.PSIndSameGauss(dx=500, dy=500)),
        'sg_d1000': (sg_d1000_n, data.PSIndSameGauss(dx=1000, dy=1000)),
        'sin_w3': (sin_w3_n, data.PS2DSinFreq(freq=3)),
        'sin_w4': (sin_w4_n, data.PS2DSinFreq(freq=4)),
        'sin_w5': (sin_w5_n, data.PS2DSinFreq(freq=5)),
        'gsign_d3': (gsign_d3_n, data.PSGaussSign(dx=3)),
        'gsign_d4': (gsign_d4_n, data.PSGaussSign(dx=4)),
        'gsign_d5': (gsign_d5_n, data.PSGaussSign(dx=5)),
    }
    if prob_label not in prob2ps:
        raise ValueError('Unknown problem label. Need to be one of %s' %
                         str(list(prob2ps.keys())))
    return prob2ps[prob_label]
示例#4
0
    def test_rdc(self):
        feature_pairs = 10
        n = 30
        for f in range(1, 7):
            ps = data.PS2DSinFreq(freq=1)
            pdata = ps.sample(n, seed=f + 4)
            fmx = fea.RFFKGauss(1, feature_pairs, seed=f + 10)
            fmy = fea.RFFKGauss(2.0, feature_pairs + 1, seed=f + 9)
            rdc = it.RDC(fmx, fmy, alpha=0.01)
            stat, evals = rdc.compute_stat(pdata, return_eigvals=True)

            self.assertGreaterEqual(stat, 0)
            abs_evals = np.abs(evals)
            self.assertTrue(np.all(abs_evals >= 0))
            self.assertTrue(np.all(abs_evals <= 1))
示例#5
0
    def test_list_permute(self):
        # Check that the relative frequency in the simulated histogram is
        # accurate enough.
        ps = data.PS2DSinFreq(freq=2)
        n_permute = 1000
        J = 4
        for s in [284, 77]:
            with util.NumpySeedContext(seed=s):
                pdata = ps.sample(n=200, seed=s + 1)
                dx = pdata.dx()
                dy = pdata.dy()
                X, Y = pdata.xy()

                k = kernel.KGauss(2)
                l = kernel.KGauss(3)
                V = np.random.randn(J, dx)
                W = np.random.randn(J, dy)
                #nfsic = it.NFSIC(k, l, V, W, alpha=0.01, reg=0, n_permute=n_permute,
                #        seed=s+3):

                #nfsic_result = nfsic.perform_test(pdata)
                arr = it.NFSIC.list_permute(X,
                                            Y,
                                            k,
                                            l,
                                            V,
                                            W,
                                            n_permute=n_permute,
                                            seed=s + 34,
                                            reg=0)
                arr_naive = it.NFSIC._list_permute_naive(X,
                                                         Y,
                                                         k,
                                                         l,
                                                         V,
                                                         W,
                                                         n_permute=n_permute,
                                                         seed=s + 389,
                                                         reg=0)

                # make sure that the relative frequency of the histogram does
                # not differ much.
                freq_a, edge_a = np.histogram(arr)
                freq_n, edge_n = np.histogram(arr_naive)
                nfreq_a = freq_a / float(np.sum(freq_a))
                nfreq_n = freq_n / float(np.sum(freq_n))
                arr_diff = np.abs(nfreq_a - nfreq_n)
                self.assertTrue(np.all(arr_diff <= 0.2))
示例#6
0
    def test_list_permute_spectral(self):
        # make sure that simulating from the spectral approach is roughly the
        # same as doing permutations.
        ps = data.PS2DSinFreq(freq=2)
        n_features = 5
        n_simulate = 3000
        n_permute = 3000
        for s in [283, 2]:
            with util.NumpySeedContext(seed=s):
                pdata = ps.sample(n=200, seed=s + 1)
                X, Y = pdata.xy()

                sigmax2 = 1
                sigmay2 = 0.8
                fmx = feature.RFFKGauss(
                    sigmax2, n_features=n_features, seed=s + 3
                )
                fmy = feature.RFFKGauss(
                    sigmay2, n_features=n_features, seed=s + 23
                )

                Zx = fmx.gen_features(X)
                Zy = fmy.gen_features(Y)
                list_perm = indtest.FiniteFeatureHSIC.list_permute(
                    X, Y, fmx, fmy, n_permute=n_permute, seed=s + 82
                )
                (
                    list_spectral,
                    _,
                    _,
                ) = indtest.FiniteFeatureHSIC.list_permute_spectral(
                    Zx, Zy, n_simulate=n_simulate, seed=s + 119
                )

                # make sure that the relative frequency of the histogram does
                # not differ much.
                freq_p, _ = np.histogram(list_perm)
                freq_s, _ = np.histogram(list_spectral)
                nfreq_p = freq_p / np.sum(freq_p)
                nfreq_s = freq_s / np.sum(freq_s)
                arr_diff = np.abs(nfreq_p - nfreq_s)
                self.assertTrue(np.all(arr_diff <= 0.2))