def job_rdcperm_med(paired_source, tr, te, r, n_features=10): """ The Randomized Dependence Coefficient test with permutations. """ pdata = tr + te n_permute = 500 # n_features=10 from Lopez-Paz et al., 2013 paper. with util.ContextTimer() as t: # get the median distances X, Y = pdata.xy() # copula transform to both X and Y cop_map = fea.MarginalCDFMap() Xcdf = cop_map.gen_features(X) Ycdf = cop_map.gen_features(Y) medx = util.meddistance(Xcdf, subsample=1000) medy = util.meddistance(Ycdf, subsample=1000) sigmax2 = medx**2 sigmay2 = medy**2 fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19) fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220) rdcperm = it.RDCPerm(fmx, fmy, n_permute=n_permute, alpha=alpha, seed=r + 100) rdcperm_result = rdcperm.perform_test(pdata) return { 'indtest': rdcperm, 'test_result': rdcperm_result, 'time_secs': t.secs }
def job_rdc_med(paired_source, tr, te, r, n_features=10): """ The Randomized Dependence Coefficient test. - Gaussian width = median heuristic on the copula-transformed data - 10 random features for each X andY - Use full dataset for testing """ pdata = tr + te # n_features=10 from Lopez-Paz et al., 2013 paper. with util.ContextTimer() as t: # get the median distances X, Y = pdata.xy() # copula transform to both X and Y cop_map = fea.MarginalCDFMap() Xcdf = cop_map.gen_features(X) Ycdf = cop_map.gen_features(Y) medx = util.meddistance(Xcdf, subsample=1000) medy = util.meddistance(Ycdf, subsample=1000) sigmax2 = medx**2 sigmay2 = medy**2 fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19) fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220) rdc = it.RDC(fmx, fmy, alpha=alpha) rdc_result = rdc.perform_test(pdata) return {'indtest': rdc, 'test_result': rdc_result, 'time_secs': t.secs}
def test_general(self): n = 30 d = 4 X = np.random.randn(n, d) * 3 + 4 M = feature.MarginalCDFMap() Z = M.gen_features(X) # assert self.assertEqual(Z.shape[1], d) self.assertEqual(Z.shape[0], n) self.assertEqual(M.num_features(X), d) self.assertTrue(np.all(Z >= 0)) self.assertTrue(np.all(Z <= 1))
def job_nfsicJ10_cperm_stoopt(paired_source, tr, te, r): """ - Copula transform the data - Use permutations to simulate from the null distribution. """ n_permute = 500 with util.ContextTimer() as t: # copula transform to both X and Y cop_map = fea.MarginalCDFMap() xtr, ytr = tr.xy() xte, yte = te.xy() xtr = cop_map.gen_features(xtr) ytr = cop_map.gen_features(ytr) xte = cop_map.gen_features(xte) yte = cop_map.gen_features(yte) tr = data.PairedData(xtr, ytr) te = data.PairedData(xte, yte) to_return = job_nfsicJ10_stoopt(paired_source, tr, te, r, n_permute) to_return['time_secs'] = t.secs return to_return