def test_independence(self, graph_gen, seed, num_nodes): """ test whether the relation is accurate, implicitely tests sequence of nodes. """ sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None) nodes = sm.nodes() df = generate_binary_dataframe( sm, n_samples=100000, distribution="normal", seed=seed, noise_scale=0.5, intercept=False, ) tol = 0.05 for node in nodes: if node == "aa": continue joint_proba, factored_proba = calculate_proba(df, "aa", node) if node == "ab": # this is the only link assert not np.isclose( joint_proba, factored_proba, atol=tol, rtol=0), df.mean() else: assert np.isclose(joint_proba, factored_proba, atol=tol, rtol=0)
def test_independence(self, graph_gen, seed, num_nodes, n_categories, distribution): """ test whether the relation is accurate, implicitely tests sequence of nodes. """ sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None) nodes = sm.nodes() df = generate_categorical_dataframe( sm, n_samples=100000, distribution=distribution, n_categories=n_categories, seed=seed, noise_scale=1, intercept=False, ) tol = 0.05 # independent links for node in nodes: if node == "aa": continue joint_proba, factored_proba = calculate_proba( df, "aa_0", node + "_0") if node == "ab": assert not np.isclose( joint_proba, factored_proba, rtol=tol, atol=0) else: assert np.isclose(joint_proba, factored_proba, rtol=tol, atol=0)
def test_order_is_correct(self, graph_gen, num_nodes, seed): """ Check if the order of the nodes is the same order as `sm.nodes`, which in turn is the same order as the adjacency matrix. To do so, we create graphs with degree in {0,1} by doing permutations on identity. The edge values are always 100 and the noise is 1, so we expect `edge_from` < `edge_to` in absolute value almost every time. """ sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None) nodes = sm.nodes() node_seq = {node: ix for ix, node in enumerate(sm.nodes())} data = generate_binary_data( sm, n_samples=10000, distribution="normal", seed=seed, noise_scale=0.1, intercept=False, ) tol = 0.15 # since we dont have an intercept, the mean proba for the parent is 0.5, # which has the highest possible std for a binary feature (std= p(1-p)), # hence, any child necessarily has a lower probability. assert data[:, node_seq["aa"]].std() > data[:, node_seq["ab"]].std() for node in nodes: if node == "aa": continue joint_proba, factored_proba = calculate_proba( data, node_seq["aa"], node_seq[node]) if node == "ab": # this is the only link assert not np.isclose( joint_proba, factored_proba, rtol=tol, atol=0) else: assert np.isclose(joint_proba, factored_proba, rtol=tol, atol=0)