def test_sample(self): ps = data.PS2DSinFreq(1) for n in [5, 613]: pdata = ps.sample(n=n) X, Y = pdata.xy() XY = np.hstack((X, Y)) self.assertEqual(X.shape[1], 1) self.assertEqual(Y.shape[1], 1) self.assertEqual(XY.shape[0], n)
def get_paired_source_list(prob_label): """Return (prob_params, ps_list) where - ps_list: a list of PairedSource's representing the problems, each corresponding to one parameter setting. - prob_params: the list of problem parameters. Each parameter has to be a scalar (so that we can plot them later). Parameters are preferably positive integers. """ # map: prob_label -> [paired_source] degrees = [float(deg) for deg in range(0, 10 + 1, 2)] noise_dims = list(range(0, 8, 2)) sg_dims = list(range(10, 100, 20)) # Medium-sized Gaussian problem msg_dims = list(range(50, 250 + 1, 50)) # Big Gaussian problem bsg_dims = list(range(100, 400 + 1, 100)) sin_freqs = list(range(1, 6 + 1)) multi_sin_d = list(range(1, 4 + 1, 1)) pwsign_d = list(range(10, 50 + 1, 10)) gauss_sign_d = list(range(1, 6 + 1, 1)) prob2ps = { 'u2drot': (degrees, [ data.PS2DUnifRotate(2.0 * np.pi * deg / 360, xlb=-1, xub=1, ylb=-1, yub=1) for deg in degrees ]), 'urot_noise': (noise_dims, [ data.PSUnifRotateNoise(angle=np.pi / 4, xlb=-1, xub=1, ylb=-1, yub=1, noise_dim=d) for d in noise_dims ]), 'sg': (sg_dims, [data.PSIndSameGauss(dx=d, dy=d) for d in sg_dims]), 'msg': (msg_dims, [data.PSIndSameGauss(dx=d, dy=d) for d in msg_dims]), 'bsg': (bsg_dims, [data.PSIndSameGauss(dx=d, dy=d) for d in bsg_dims]), 'sin': (sin_freqs, [data.PS2DSinFreq(freq=f) for f in sin_freqs]), 'msin': (multi_sin_d, [data.PSSinFreq(freq=1, d=d) for d in multi_sin_d]), 'pwsign': (pwsign_d, [data.PSPairwiseSign(dx=d) for d in pwsign_d]), 'gsign': (gauss_sign_d, [data.PSGaussSign(dx=d) for d in gauss_sign_d]), } if prob_label not in prob2ps: raise ValueError('Unknown problem label. Need to be one of %s' % str(list(prob2ps.keys()))) return prob2ps[prob_label]
def get_paired_source(prob_label): """ Return (sample_sizes, ps) where - ps: one PairedSource representing a problem - sample_sizes: list of sample sizes for that particular PairedSource. """ # map: prob_label -> [paired_source] #exp_n = [1000, 3000, 10000, 30000, 100000, 300000, 1000000] exp_n = [1000, 3000, 6000, 10000, 30000, 100000] #exp_n = [1000, 10000, 100000 ] sg_d50_n = exp_n sg_d1000_n = exp_n sg_d250_n = exp_n sg_d500_n = exp_n sin_w3_n = list(range(1000, 4000 + 1, 1000)) sin_w4_n = exp_n sin_w5_n = exp_n gsign_d3_n = list(range(1000, 4000 + 1, 1000)) gsign_d4_n = exp_n gsign_d5_n = exp_n prob2ps = { 'sg_d50': (sg_d50_n, data.PSIndSameGauss(dx=50, dy=50)), 'sg_d250': (sg_d250_n, data.PSIndSameGauss(dx=250, dy=250)), 'sg_d500': (sg_d500_n, data.PSIndSameGauss(dx=500, dy=500)), 'sg_d1000': (sg_d1000_n, data.PSIndSameGauss(dx=1000, dy=1000)), 'sin_w3': (sin_w3_n, data.PS2DSinFreq(freq=3)), 'sin_w4': (sin_w4_n, data.PS2DSinFreq(freq=4)), 'sin_w5': (sin_w5_n, data.PS2DSinFreq(freq=5)), 'gsign_d3': (gsign_d3_n, data.PSGaussSign(dx=3)), 'gsign_d4': (gsign_d4_n, data.PSGaussSign(dx=4)), 'gsign_d5': (gsign_d5_n, data.PSGaussSign(dx=5)), } if prob_label not in prob2ps: raise ValueError('Unknown problem label. Need to be one of %s' % str(list(prob2ps.keys()))) return prob2ps[prob_label]
def test_rdc(self): feature_pairs = 10 n = 30 for f in range(1, 7): ps = data.PS2DSinFreq(freq=1) pdata = ps.sample(n, seed=f + 4) fmx = fea.RFFKGauss(1, feature_pairs, seed=f + 10) fmy = fea.RFFKGauss(2.0, feature_pairs + 1, seed=f + 9) rdc = it.RDC(fmx, fmy, alpha=0.01) stat, evals = rdc.compute_stat(pdata, return_eigvals=True) self.assertGreaterEqual(stat, 0) abs_evals = np.abs(evals) self.assertTrue(np.all(abs_evals >= 0)) self.assertTrue(np.all(abs_evals <= 1))
def test_list_permute(self): # Check that the relative frequency in the simulated histogram is # accurate enough. ps = data.PS2DSinFreq(freq=2) n_permute = 1000 J = 4 for s in [284, 77]: with util.NumpySeedContext(seed=s): pdata = ps.sample(n=200, seed=s + 1) dx = pdata.dx() dy = pdata.dy() X, Y = pdata.xy() k = kernel.KGauss(2) l = kernel.KGauss(3) V = np.random.randn(J, dx) W = np.random.randn(J, dy) #nfsic = it.NFSIC(k, l, V, W, alpha=0.01, reg=0, n_permute=n_permute, # seed=s+3): #nfsic_result = nfsic.perform_test(pdata) arr = it.NFSIC.list_permute(X, Y, k, l, V, W, n_permute=n_permute, seed=s + 34, reg=0) arr_naive = it.NFSIC._list_permute_naive(X, Y, k, l, V, W, n_permute=n_permute, seed=s + 389, reg=0) # make sure that the relative frequency of the histogram does # not differ much. freq_a, edge_a = np.histogram(arr) freq_n, edge_n = np.histogram(arr_naive) nfreq_a = freq_a / float(np.sum(freq_a)) nfreq_n = freq_n / float(np.sum(freq_n)) arr_diff = np.abs(nfreq_a - nfreq_n) self.assertTrue(np.all(arr_diff <= 0.2))
def test_list_permute_spectral(self): # make sure that simulating from the spectral approach is roughly the # same as doing permutations. ps = data.PS2DSinFreq(freq=2) n_features = 5 n_simulate = 3000 n_permute = 3000 for s in [283, 2]: with util.NumpySeedContext(seed=s): pdata = ps.sample(n=200, seed=s + 1) X, Y = pdata.xy() sigmax2 = 1 sigmay2 = 0.8 fmx = feature.RFFKGauss( sigmax2, n_features=n_features, seed=s + 3 ) fmy = feature.RFFKGauss( sigmay2, n_features=n_features, seed=s + 23 ) Zx = fmx.gen_features(X) Zy = fmy.gen_features(Y) list_perm = indtest.FiniteFeatureHSIC.list_permute( X, Y, fmx, fmy, n_permute=n_permute, seed=s + 82 ) ( list_spectral, _, _, ) = indtest.FiniteFeatureHSIC.list_permute_spectral( Zx, Zy, n_simulate=n_simulate, seed=s + 119 ) # make sure that the relative frequency of the histogram does # not differ much. freq_p, _ = np.histogram(list_perm) freq_s, _ = np.histogram(list_spectral) nfreq_p = freq_p / np.sum(freq_p) nfreq_s = freq_s / np.sum(freq_s) arr_diff = np.abs(nfreq_p - nfreq_s) self.assertTrue(np.all(arr_diff <= 0.2))