def test_independence(self, graph_gen, seed, num_nodes): """ test whether the relation is accurate, implicitely tests sequence of nodes. """ sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None) nodes = sm.nodes() df = generate_binary_dataframe( sm, n_samples=100000, distribution="normal", seed=seed, noise_scale=0.5, intercept=False, ) tol = 0.05 for node in nodes: if node == "aa": continue joint_proba, factored_proba = calculate_proba(df, "aa", node) if node == "ab": # this is the only link assert not np.isclose( joint_proba, factored_proba, atol=tol, rtol=0), df.mean() else: assert np.isclose(joint_proba, factored_proba, atol=tol, rtol=0)
def test_f1score_generated_binary(self): """ Binary strucutre learned should have good f1 score """ np.random.seed(10) sm = generate_structure(5, 2.0) df = generate_binary_dataframe(sm, 1000, intercept=False, noise_scale=0.1, seed=10) dist_type_schema = {i: "bin" for i in range(df.shape[1])} sm_fitted = from_pandas( df, dist_type_schema=dist_type_schema, lasso_beta=0.1, ridge_beta=0.0, w_threshold=0.1, use_bias=False, ) right_edges = sm.edges n_predictions_made = len(sm_fitted.edges) n_correct_predictions = len( set(sm_fitted.edges).intersection(set(right_edges))) n_relevant_predictions = len(right_edges) precision = n_correct_predictions / n_predictions_made recall = n_correct_predictions / n_relevant_predictions f1_score = 2 * (precision * recall) / (precision + recall) assert f1_score > 0.8
def test_dataframe(self, graph, distribution, noise_std, intercept, seed, kernel): """ Tests equivalence of dataframe wrapper """ data = generate_binary_data( graph, 100, distribution, noise_scale=noise_std, seed=seed, intercept=intercept, kernel=kernel, ) df = generate_binary_dataframe( graph, 100, distribution, noise_scale=noise_std, seed=seed, intercept=intercept, kernel=kernel, ) assert np.array_equal(data, df[list(graph.nodes())].values)