Exemplo n.º 1
0
    def test_intercept(self, distribution, noise_scale):
        graph = StructureModel()
        graph.add_node("123")

        data_noint = generate_continuous_data(
            graph,
            n_samples=100000,
            distribution=distribution,
            noise_scale=noise_scale,
            seed=10,
            intercept=False,
        )
        data_intercept = generate_continuous_data(
            graph,
            n_samples=100000,
            distribution=distribution,
            noise_scale=noise_scale,
            seed=10,
            intercept=True,
        )
        assert not np.isclose(data_noint[:, 0].mean(),
                              data_intercept[:, 0].mean())
        assert np.isclose(data_noint[:, 0].std(),
                          data_intercept[:, 0].std(),
                          rtol=0.01)
Exemplo n.º 2
0
 def test_bad_distribution_type(self):
     """Test that invalid sem-type other than "gaussian", "normal", "student-t",
     "exponential", "gumbel" is not accepted"""
     graph_type, degree, d_nodes = "erdos-renyi", 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     with pytest.raises(ValueError,
                        match="Unknown continuous distribution"):
         generate_continuous_data(sm,
                                  distribution="invalid",
                                  n_samples=10,
                                  seed=10)
Exemplo n.º 3
0
    def test_dataframe(self, graph, distribution, noise_std, intercept, seed,
                       kernel):
        """
        Tests equivalence of dataframe wrapper
        """
        data = generate_continuous_data(
            graph,
            1000,
            distribution,
            noise_scale=noise_std,
            seed=seed,
            intercept=intercept,
            kernel=kernel,
        )
        df = generate_continuous_dataframe(
            graph,
            1000,
            distribution,
            noise_scale=noise_std,
            seed=seed,
            intercept=intercept,
            kernel=kernel,
        )

        assert np.array_equal(data, df[list(graph.nodes())].values)
Exemplo n.º 4
0
 def test_returns_ndarray(self, distribution):
     """Return value is an ndarray - test over all sem_types"""
     graph_type, degree, d_nodes = "erdos-renyi", 4, 10
     sm = generate_structure(d_nodes, degree, graph_type)
     ndarray = generate_continuous_data(sm,
                                        distribution=distribution,
                                        n_samples=10)
     assert isinstance(ndarray, np.ndarray)
Exemplo n.º 5
0
 def test_number_of_samples(self, num_samples, graph):
     """Assert number of samples generated (rows) = num_samples"""
     data = generate_continuous_data(graph,
                                     num_samples,
                                     "gaussian",
                                     1,
                                     seed=10)
     assert len(data) == num_samples
Exemplo n.º 6
0
    def test_number_of_nodes(self, num_nodes):
        """Length of each row in generated data equals num_nodes"""
        graph = StructureModel()
        edges = [(n, n + 1, 1) for n in range(num_nodes - 1)]
        graph.add_weighted_edges_from(edges)

        data = generate_continuous_data(graph, 100, seed=10)
        assert all(len(sample) == num_nodes for sample in data)
Exemplo n.º 7
0
    def test_linear_gumbel_parent_dist(self, graph):
        """Anderson-Darling test for data coming from a particular distribution, for gumbel."""
        data = generate_continuous_data(graph,
                                        distribution="gumbel",
                                        noise_scale=1,
                                        n_samples=100000,
                                        seed=10)

        stat, crit, sig = anderson(data[:, 0], "gumbel_r")
        assert stat < crit[list(sig).index(5)]
Exemplo n.º 8
0
    def test_linear_studentt_parent_dist(self, graph):
        """
        Kolmogorov-Smirnov test for data coming from a student-t (degree of freedom = 3).
        """
        np.random.seed(10)

        data = generate_continuous_data(graph,
                                        distribution="student-t",
                                        noise_scale=1,
                                        n_samples=100000,
                                        seed=10)

        x = data[:, 0]
        _, p_val = stats.kstest(x, "t", args=[3])
        assert p_val < 0.01
Exemplo n.º 9
0
    def test_order_is_correct(self, graph_gen, num_nodes, seed):
        """
        Check if the order of the nodes is the same order as `sm.nodes`, which in turn is the same order as the
        adjacency matrix.

        To do so, we create graphs with degree in {0,1} by doing permutations on identity.
        The edge values are always 100 and the noise is 1, so we expect `edge_from` < `edge_to` in absolute value
        almost every time.
        """
        sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=100)
        nodes = sm.nodes()
        node_seq = {node: ix for ix, node in enumerate(sm.nodes())}

        data = generate_continuous_data(
            sm,
            n_samples=10000,
            distribution="normal",
            seed=seed,
            noise_scale=1.0,
            intercept=False,
        )

        assert data[:, node_seq["aa"]].std() < data[:, node_seq["ab"]].std()

        tol = 0.15
        # for gaussian distribution: var=0 iff independent:
        for node in nodes:
            if node == "aa":
                continue
            if node == "ab":
                assert not np.isclose(
                    np.corrcoef(
                        data[:, [node_seq["aa"], node_seq["ab"]]].T)[0, 1],
                    0,
                    atol=tol,
                )
            else:
                assert np.isclose(
                    np.corrcoef(data[:,
                                     [node_seq["aa"], node_seq[node]]].T)[0,
                                                                          1],
                    0,
                    atol=tol,
                )
Exemplo n.º 10
0
    def test_linear_gauss_parent_dist(self, graph):
        """Anderson-Darling test for data coming from a particular distribution, for gaussian."""
        data = generate_continuous_data(graph, 1000000, "gaussian", 1, seed=10)

        stat, crit, sig = anderson(data[:, 0], "norm")
        assert stat < crit[list(sig).index(5)]