def test_pooled(self): np.random.seed(123) A1 = er_np(20, 0.3) A2 = er_np(100, 0.3) ldt = LatentDistributionTest(pooled=True) ldt.fit(A1, A2)
def test_different_sizes_null(self): np.random.seed(314) A1 = er_np(100, 0.8) A2 = er_np(1000, 0.8) ldt_not_corrected = LatentDistributionTest("hsic", "gaussian", n_components=2, n_bootstraps=100, size_correction=False) ldt_corrected_1 = LatentDistributionTest("hsic", "gaussian", n_components=2, n_bootstraps=100, size_correction=True) ldt_corrected_2 = LatentDistributionTest("hsic", "gaussian", n_components=2, n_bootstraps=100, size_correction=True) p_not_corrected = ldt_not_corrected.fit_predict(A1, A2) p_corrected_1 = ldt_corrected_1.fit_predict(A1, A2) p_corrected_2 = ldt_corrected_2.fit_predict(A2, A1) self.assertTrue(p_not_corrected <= 0.05) self.assertTrue(p_corrected_1 > 0.05) self.assertTrue(p_corrected_2 > 0.05)
def test_different_sizes(self): np.random.seed(3) A = er_np(50, 0.3) B = er_np(100, 0.3) npt = NonparametricTest() with self.assertRaises(ValueError): npt.fit(A, B)
def test_directed_inputs(self): np.random.seed(2) A = er_np(100, 0.3, directed=True) B = er_np(100, 0.3, directed=True) npt = LatentDistributionTest() p = npt.fit(A, B) self.assertTrue(p > 0.05)
def test_different_sizes(self): np.random.seed(3) A = er_np(50, 0.3) B = er_np(100, 0.3) npt = LatentDistributionTest() with self.assertRaises(ValueError): npt.fit(A, B)
def test_directed_inputs(self): np.random.seed(2) A = er_np(100, 0.3, directed=True) B = er_np(100, 0.3, directed=True) npt = NonparametricTest() with self.assertRaises(NotImplementedError): npt.fit(A, B)
def setUpClass(cls): np.random.seed(123456) cls.tests = { "dcorr": "euclidean", "hsic": "gaussian", "mgc": "euclidean" } cls.A1 = er_np(20, 0.3) cls.A2 = er_np(20, 0.3)
def test_directed_inputs(self): np.random.seed(2) A = er_np(100, 0.3, directed=True) B = er_np(100, 0.3, directed=True) C = er_np(100, 0.3, directed=False) # two directed graphs is okay ldt = LatentDistributionTest("dcorr") ldt.fit(A, B) # an undirected and a direced graph is not okay with self.assertRaises(ValueError): ldt.fit(A, C) with self.assertRaises(ValueError): ldt.fit(C, B)
def test_er_corr(n, p, rho=0.2): G1 = er_np(n, p) origin_G1 = copy.deepcopy(G1) print(origin_G1.mean()) for i in range(n): for j in range(n): if G1[i][j] == 1: G1[i][j] = np.random.binomial(1, p + rho * (1 - p)) else: G1[i][j] = np.random.binomial(1, p * (1 - rho)) print(G1.mean()) prob1 = 0 prob2 = 0 for i in range(n): for j in range(n): if origin_G1[i][j] == 1 and G1[i][j] == 1: prob1 += 1 if origin_G1[i][j] == 0 and G1[i][j] == 1: prob2 += 1 exp_prob1 = p + rho * (1 - p) real_prob1 = prob1 / (origin_G1.mean() * n**2) exp_prob2 = p * (1 - rho) real_prob2 = prob2 / (G1.mean() * n**2) # ratio = ((real_prob1/exp_prob1)+ (real_prob2/exp_prob2))/2 var = np.sqrt((exp_prob1 - real_prob1)**2 + (exp_prob1 - real_prob1)**2) print('expected prob1 = ', exp_prob1) print('real prob1 = ', real_prob1) print('expected prob2 = ', exp_prob2) print('real prob2 = ', real_prob2) print('the variance between estimation and real values =', var) return origin_G1, G1
def setup_class(cls): np.random.seed(8888) cls.graph = er_np(1000, 0.5) cls.p = 0.5 cls.p_mat = np.full((1000, 1000), 0.5) cls.estimator = EREstimator(directed=True, loops=False) cls.estimator.fit(cls.graph) cls.p_hat = cls.estimator.p_
def mc_iter(n, m, p, q, tilde, i=1): X_graph = er_np(n, p*p) ase = AdjacencySpectralEmbed(n_components=1) X = ase.fit_transform(X_graph) Y_graph = er_np(m, q*q) ase = AdjacencySpectralEmbed(n_components=1) Y = ase.fit_transform(Y_graph) if tilde: X_new, Y_new = sample_noisy_points(X, Y) else: X_new, Y_new = X, Y ldt = LatentDistributionTest() pval = ldt.fit(X_new, Y_new, pass_graph=False) return pval
def test_ER_score(self): p_mat = self.p_mat graph = self.graph estimator = EREstimator(directed=False) _test_score(estimator, p_mat, graph) with pytest.raises(ValueError): estimator.score_samples(graph=er_np(500, 0.5))
def test_passing_embeddings(self): np.random.seed(123) A1 = er_np(20, 0.8) A2 = er_np(20, 0.8) ase_1 = AdjacencySpectralEmbed(n_components=2) X1 = ase_1.fit_transform(A1) ase_2 = AdjacencySpectralEmbed(n_components=2) X2 = ase_2.fit_transform(A2) ase_3 = AdjacencySpectralEmbed(n_components=1) X3 = ase_3.fit_transform(A2) # check embeddings having weird ndim with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2.reshape(-1, 1, 1)) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1.reshape(-1, 1, 1), X2) # check embeddings having mismatching number of components with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X3) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X3, X1) # check passing weird stuff as input (caught by us) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict("hello there", X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, "hello there") with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict({"hello": "there"}, X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, {"hello": "there"}) # check passing infinite in input (caught by check_array) with self.assertRaises(ValueError): X1_w_inf = X1.copy() X1_w_inf[1, 1] = np.inf ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1_w_inf, X2) # check that the appropriate input works ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2)
def test_passing_networkx(self): np.random.seed(123) A1 = er_np(20, 0.8) A2 = er_np(20, 0.8) A1_nx = nx.from_numpy_matrix(A1) A2_nx = nx.from_numpy_matrix(A2) # check passing nx, when exepect embeddings with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(A1_nx, A2) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(A1, A2_nx) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(A1_nx, A2_nx) # check that the appropriate input works ldt = LatentDistributionTest(input_graph=True) ldt.fit_predict(A1_nx, A2_nx)
def test_ER_sample(self): with pytest.raises(ValueError): self.estimator.sample(n_samples=-1) with pytest.raises(TypeError): self.estimator.sample(n_samples="nope") g = er_np(100, 0.5) estimator = EREstimator(directed=True, loops=False) estimator.fit(g) p_mat = np.full((100, 100), 0.5) p_mat -= np.diag(np.diag(p_mat)) _test_sample(estimator, p_mat)
def test_DCER_inputs(self): with pytest.raises(TypeError): DCEREstimator(directed="hey") with pytest.raises(TypeError): DCEREstimator(loops=6) graph = er_np(100, 0.5) dcere = DCEREstimator() with pytest.raises(ValueError): dcere.fit(graph[:, :99]) with pytest.raises(ValueError): dcere.fit(graph[..., np.newaxis])
def test_DCSBM_inputs(self): with pytest.raises(TypeError): DCSBMEstimator(directed="hey") with pytest.raises(TypeError): DCSBMEstimator(loops=6) with pytest.raises(TypeError): DCSBMEstimator(n_components="XD") with pytest.raises(ValueError): DCSBMEstimator(n_components=-1) with pytest.raises(TypeError): DCSBMEstimator(min_comm="1") with pytest.raises(ValueError): DCSBMEstimator(min_comm=-1) with pytest.raises(TypeError): DCSBMEstimator(max_comm="ay") with pytest.raises(ValueError): DCSBMEstimator(max_comm=-1) with pytest.raises(ValueError): DCSBMEstimator(min_comm=4, max_comm=2) graph = er_np(100, 0.5) bad_y = np.zeros(99) dcsbe = DCSBMEstimator() with pytest.raises(ValueError): dcsbe.fit(graph, y=bad_y) with pytest.raises(ValueError): dcsbe.fit(graph[:, :99]) with pytest.raises(ValueError): dcsbe.fit(graph[..., np.newaxis]) with pytest.raises(TypeError): DCSBMEstimator(cluster_kws=1) with pytest.raises(TypeError): DCSBMEstimator(embed_kws=1)
#%% import numpy as np from graspy.simulations import sample_edges, er_np from graspy.plot import heatmap g = er_np(10, 0.5) heatmap(g) P = 0.5 * np.ones((10, 10)) g = sample_edges(P) heatmap(g) #%% g == 1 P[g == 1] = 100 P[g == 0] = -100 P heatmap(g) heatmap(P) # %% directed = True if directed: sample_edges(P, directed=True) else: sample_edges(P, directed=False) sample_edges(P, directed=directed) # %% def sample(P, directed=False): print(directed) print(P)
import seaborn as sns from graspy.embed import AdjacencySpectralEmbed from graspy.simulations import er_np # Experiment parameters n_verts = 200 p = 0.5 n_components = 1 n_sims = 1000 # Run experiment estimated_latents = np.zeros((n_sims, 2)) for i in range(n_sims): graph = er_np(n_verts, p, directed=False, loops=False) ase_diag = AdjacencySpectralEmbed(n_components=n_components, diag_aug=True) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) diag_latent = ase_diag.fit_transform(graph) ase_latent = ase.fit_transform(graph) mean_diag_latent = np.mean(diag_latent) mean_latent = np.mean(ase_latent) estimated_latents[i, 0] = mean_diag_latent estimated_latents[i, 1] = mean_latent diffs = estimated_latents - np.sqrt(p) # the true latent position is sqrt(p)
n_blocks = len(prop) subblock_labels = block_labels.copy() for i, (n_in_block, block_prop) in enumerate(zip(n, prop)): block_n = [] for p in block_prop: num = int(p * n_in_block) block_n.append(num) temp_labels = n_to_labels(block_n) + n_blocks + i * 3 subblock_labels[block_labels == i] = temp_labels B_list = [B1, B2, B3, B1, B3, B3, B2, B1] # B_list = [B1, B2, B1, B1, B3, B3, B1, B2] graph = er_np(n_verts, global_p) for i, n_sub_verts in enumerate(n): p = prop[i, :] n_vec = n_sub_verts * p n_vec = n_vec.astype(int) B = B_list[i] subgraph = sbm(n_vec, B) inds = block_labels == i graph[np.ix_(inds, inds)] = subgraph heatmap( graph, figsize=(15, 15), cbar=False, inner_hier_labels=subblock_labels, outer_hier_labels=block_labels,
def setUpClass(cls): np.random.seed(1234556) cls.A1 = er_np(20, 0.3) cls.A2 = er_np(20, 0.3)
stashfig("ffwSBM-adj") plt.show() labels = n_to_labels(community_sizes).astype(str) # %% [markdown] # # Demonstrate that FAQ works # Shuffle the true adjacency matrix and then show that it can be recovered n_verts = 100 p = 0.1 n_init = 10 n_iter = 30 tol = 100 eps = 0.0001 A = er_np(n_verts, p=p) shuffle_inds = np.random.permutation(n_verts) B = A[np.ix_(shuffle_inds, shuffle_inds)] faq = FastApproximateQAP( max_iter=n_iter, eps=eps, init_method="rand", n_init=n_init, shuffle_input=False, maximize=True, ) start = timer() A_found, B_found = faq.fit_predict(A, B) diff = (timer() - start) / 60