示例#1
0
    def test_serialization_fit_model(self):
        # Setup
        instance = Tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(
                X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)

        # Run
        result = Tree.from_dict(instance.to_dict())

        # Check
        assert result.to_dict() == instance.to_dict()
示例#2
0
    def test_prepare_next_tree_first_level(self, bivariate_mock):
        """prepare_next_tree computes the conditional U matrices on its edges."""
        # Setup
        instance = Tree(TreeTypes.REGULAR)
        instance.level = 1
        instance.u_matrix = np.array([[0.1, 0.2], [0.3, 0.4]])

        edge = MagicMock(spec=Edge)
        edge.L = 0
        edge.R = 1
        edge.name = 'copula_type'
        edge.theta = 'copula_theta'
        instance.edges = [edge]

        copula_mock = bivariate_mock.return_value
        copula_mock.partial_derivative.return_value = np.array(
            [0.0, 0.25, 0.5, 0.75, 1.0])

        expected_univariate = np.array(
            [[EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON],
             [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON]])

        expected_partial_derivative_call_args = [
            ((instance.u_matrix, ), {}),
            ((instance.u_matrix[:, np.argsort([1, 0])], ), {})
        ]

        # Run
        instance.prepare_next_tree()

        # Check
        compare_nested_iterables(instance.edges[0].U, expected_univariate)

        bivariate_mock.assert_called_once_with('copula_type')

        assert copula_mock.theta == 'copula_theta'
        compare_nested_iterables(copula_mock.partial_derivative.call_args_list,
                                 expected_partial_derivative_call_args)
示例#3
0
class TestDirectTree(TestCase):
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col])
            count += 1
        self.tree = Tree(TreeTypes.DIRECT)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)

    def test_first_tree(self):
        """ Assert 0 is the center node"""
        assert self.tree.edges[0].L == 0

    @pytest.mark.xfail
    def test_first_tree_likelihood(self):
        """ Assert first tree likehood is correct"""
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        value, new_u = self.tree.get_likelihood(uni_matrix)

        expected = -0.1207611551427385
        assert abs(value - expected) < 10E-3

    def test_get_constraints(self):
        """ Assert get constraint gets correct neighbor nodes"""
        self.tree._get_constraints()

        assert self.tree.edges[0].neighbors == [1]
        assert self.tree.edges[1].neighbors == [0, 2]

    def test_get_tau_matrix_no_edges_empty(self):
        """get_tau_matrix returns an empty array if there are no edges."""
        # Setup
        tree = Tree(TreeTypes.DIRECT)
        tree.edges = []

        # Run
        result = tree.get_tau_matrix()

        # Check
        assert result.shape == (0, 0)

    def test_get_tau_matrix(self):
        """Assert none of get tau matrix is NaN."""
        self.tau = self.tree.get_tau_matrix()

        test = np.isnan(self.tau)

        self.assertFalse(test.all())

    @pytest.mark.xfail
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        tau = self.tree.get_tau_matrix()

        second_tree = Tree(TreeTypes.DIRECT)
        second_tree.fit(1, 3, tau, self.tree)

        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        first_value, new_u = self.tree.get_likelihood(uni_matrix)
        second_value, out_u = second_tree.get_likelihood(new_u)

        expected = 0.24428294700258632
        assert abs(second_value - expected) < 10E-3
示例#4
0
class TestRegularTree(TestCase):
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col])
            count += 1
        self.tree = Tree(TreeTypes.REGULAR)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)

    def test_first_tree(self):
        """ Assert the construction of first tree is correct
        The first tree should be:
                   1
                0--2--3
        """
        sorted_edges = Edge.sort_edge(self.tree.edges)

        assert sorted_edges[0].L == 0
        assert sorted_edges[0].R == 2
        assert sorted_edges[1].L == 1
        assert sorted_edges[1].R == 2
        assert sorted_edges[2].L == 2
        assert sorted_edges[2].R == 3

    @pytest.mark.xfail
    def test_first_tree_likelihood(self):
        """ Assert first tree likehood is correct"""
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        value, new_u = self.tree.get_likelihood(uni_matrix)

        expected = 0.9545348664739628
        assert abs(value - expected) < 10E-3

    def test_get_constraints(self):
        """ Assert get constraint gets correct neighbor nodes"""
        self.tree._get_constraints()

        assert self.tree.edges[0].neighbors == [1, 2]
        assert self.tree.edges[1].neighbors == [0, 2]

    def test_get_tau_matrix(self):
        """ Assert second tree likelihood is correct """
        self.tau = self.tree.get_tau_matrix()

        test = np.isnan(self.tau)

        self.assertFalse(test.all())

    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        tau = self.tree.get_tau_matrix()
        second_tree = Tree(TreeTypes.REGULAR)
        second_tree.fit(1, 3, tau, self.tree)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        first_value, new_u = self.tree.get_likelihood(uni_matrix)
        second_value, out_u = second_tree.get_likelihood(new_u)
示例#5
0
    def test_to_dict_fit_model(self):
        # Setup
        instance = Tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)
        expected_result = {
            'type': 'copulas.multivariate.tree.RegularTree',
            'fitted': True,
            'level': 1,
            'n_nodes': 3,
            'previous_tree': [
                [0.8230112726144534, 0.3384880496294825, 0.3384880496294825],
                [0.3384880496294825, 0.8230112726144534, 0.3384880496294825],
                [0.3384880496294825, 0.3384880496294825, 0.8230112726144534]
            ],
            'tau_matrix': [
                [1.0, -0.49999999999999994, -0.49999999999999994],
                [-0.49999999999999994, 1.0, -0.49999999999999994],
                [-0.49999999999999994, -0.49999999999999994, 1.0]
            ],
            'tree_type': TreeTypes.REGULAR,
            'edges': [
                {
                    'index': 0,
                    'D': set(),
                    'L': 0,
                    'R': 1,
                    'U': [
                        [0.7969535322648066, 0.6887525261721343, 0.12077958383821545],
                        [0.6887525261721343, 0.7969535322648066, 0.12077958383821545]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                },
                {
                    'index': 1,
                    'D': set(),
                    'L': 1,
                    'R': 2,
                    'U': [
                        [0.12077958383821545, 0.7969535322648066, 0.6887525261721343],
                        [0.12077958383821545, 0.6887525261721343, 0.7969535322648066]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                }
            ],
        }

        # Run
        result = instance.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
示例#6
0
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        # Setup
        # Build first tree
        data = pd.read_csv('data/iris.data.csv')
        tau_mat = data.corr(method='kendall').values
        u_matrix = np.empty(data.shape)

        for index, col in enumerate(data):
            uni = GaussianKDE()
            uni.fit(data[col])
            u_matrix[:, index] = uni.cumulative_distribution(data[col])

        first_tree = Tree(TreeTypes.CENTER)
        first_tree.fit(0, 4, tau_mat, u_matrix)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])
        likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix)
        tau = first_tree.get_tau_matrix()

        # Build second tree
        second_tree = Tree(TreeTypes.CENTER)
        second_tree.fit(1, 3, tau, first_tree)
        expected_likelihood_second_tree = 0.4888802429313932

        # Run
        likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first)

        # Check
        assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
示例#7
0
class TestCenterTree(TestCase):
    def setUp(self):
        self.data = pd.read_csv('data/iris.data.csv')
        self.tau_mat = self.data.corr(method='kendall').values
        self.u_matrix = np.empty(self.data.shape)
        count = 0
        for col in self.data:
            uni = GaussianKDE()
            uni.fit(self.data[col])
            self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col])
            count += 1
        self.tree = Tree(TreeTypes.CENTER)
        self.tree.fit(0, 4, self.tau_mat, self.u_matrix)

    def test_first_tree(self):
        """Assert 0 is the center node on the first tree."""
        assert self.tree.edges[0].L == 0

    @pytest.mark.xfail
    def test_first_tree_likelihood(self):
        """Assert first tree likehood is correct."""
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])

        value, new_u = self.tree.get_likelihood(uni_matrix)

        expected = -0.19988720707143634
        assert abs(value - expected) < 10E-3

    def test_get_constraints(self):
        """Assert get constraint gets correct neighbor nodes."""
        self.tree._get_constraints()

        assert self.tree.edges[0].neighbors == [1, 2]
        assert self.tree.edges[1].neighbors == [0, 2]

    def test_get_tau_matrix(self):
        """Assert none of get tau matrix is NaN."""
        self.tau = self.tree.get_tau_matrix()

        test = np.isnan(self.tau)

        self.assertFalse(test.all())

    @pytest.mark.xfail
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        # Setup
        # Build first tree
        data = pd.read_csv('data/iris.data.csv')
        tau_mat = data.corr(method='kendall').values
        u_matrix = np.empty(data.shape)

        for index, col in enumerate(data):
            uni = GaussianKDE()
            uni.fit(data[col])
            u_matrix[:, index] = uni.cumulative_distribution(data[col])

        first_tree = Tree(TreeTypes.CENTER)
        first_tree.fit(0, 4, tau_mat, u_matrix)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])
        likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix)
        tau = first_tree.get_tau_matrix()

        # Build second tree
        second_tree = Tree(TreeTypes.CENTER)
        second_tree.fit(1, 3, tau, first_tree)
        expected_likelihood_second_tree = 0.4888802429313932

        # Run
        likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first)

        # Check
        assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
示例#8
0
    def test_to_dict(self):
        """ """
        # Setup
        instance = VineCopula('regular')
        instance.fitted = True
        instance.n_sample = 100
        instance.n_var = 10
        instance.depth = 3
        instance.truncated = 3
        tree = Tree('regular')
        instance.trees = [tree]
        uni = KDEUnivariate()
        instance.unis = [uni]

        tau_mat = np.array([
            [0, 1],
            [1, 0]
        ])
        instance.tau_mat = tau_mat

        u_matrix = np.array([
            [0, 1],
            [1, 0]
        ])
        instance.u_matrix = u_matrix

        expected_result = {
            'type': 'copulas.multivariate.vine.VineCopula',
            'fitted': True,
            'vine_type': 'regular',
            'n_sample': 100,
            'n_var': 10,
            'depth': 3,
            'truncated': 3,
            'trees': [
                {
                    'type': 'copulas.multivariate.tree.RegularTree',
                    'tree_type': 'regular',
                    'fitted': False
                }
            ],
            'tau_mat': [
                [0, 1],
                [1, 0]
            ],
            'u_matrix': [
                [0, 1],
                [1, 0]
            ],
            'unis': [
                {
                    'type': 'copulas.univariate.kde.KDEUnivariate',
                    'fitted': False
                }
            ]
        }

        # Run
        result = instance.to_dict()

        # Check
        assert result == expected_result
示例#9
0
    def test_to_dict_fit_model(self):
        # Setup
        instance = Tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = KDEUnivariate()
            distribution.fit(X[column])
            univariates_matrix[:, i] = [distribution.cumulative_distribution(x) for x in X[column]]

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)
        expected_result = {
            'type': 'copulas.multivariate.tree.RegularTree',
            'fitted': True,
            'level': 1,
            'n_nodes': 3,
            'previous_tree': [
                [0.8230112726144534, 0.3384880496294825, 0.3384880496294825],
                [0.3384880496294825, 0.8230112726144534, 0.3384880496294825],
                [0.3384880496294825, 0.3384880496294825, 0.8230112726144534]
            ],
            'tau_matrix': [
                [1.0, -0.49999999999999994, -0.49999999999999994],
                [-0.49999999999999994, 1.0, -0.49999999999999994],
                [-0.49999999999999994, -0.49999999999999994, 1.0]
            ],
            'tree_type': TreeTypes.REGULAR,
            'edges': [
                {
                    'D': set(),
                    'L': 0,
                    'R': 1,
                    'U': [
                        [6.533235975920359, 6.425034969827687, 5.857062027493768],
                        [6.425034969827687, 6.533235975920359, 5.857062027493768]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                },
                {
                    'D': set(),
                    'L': 1,
                    'R': 2,
                    'U': [
                        [5.857062027493768, 6.533235975920359, 6.425034969827687],
                        [5.857062027493768, 6.425034969827687, 6.533235975920359]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                }
            ],
        }

        # Run
        result = instance.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)