def test_to_dict(self):
        """To_dict returns the parameters to replicate the copula."""
        # Setup
        copula = GaussianMultivariate()
        data = pd.read_csv('data/iris.data.csv')
        copula.fit(data)
        covariance = [[
            1.006711409395973, -0.11010327176239865, 0.8776048563471857,
            0.823443255069628
        ],
                      [
                          -0.11010327176239865, 1.006711409395972,
                          -0.4233383520816991, -0.3589370029669186
                      ],
                      [
                          0.8776048563471857, -0.4233383520816991,
                          1.006711409395973, 0.9692185540781536
                      ],
                      [
                          0.823443255069628, -0.3589370029669186,
                          0.9692185540781536, 1.0067114093959735
                      ]]
        expected_result = {
            'covariance': covariance,
            'fitted': True,
            'type': 'copulas.multivariate.gaussian.GaussianMultivariate',
            'distribution': 'copulas.univariate.gaussian.GaussianUnivariate',
            'distribs': {
                'feature_01': {
                    'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                    'mean': 5.843333333333334,
                    'std': 0.8253012917851409,
                    'fitted': True,
                },
                'feature_02': {
                    'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                    'mean': 3.0540000000000003,
                    'std': 0.4321465800705435,
                    'fitted': True,
                },
                'feature_03': {
                    'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                    'mean': 3.758666666666666,
                    'std': 1.7585291834055212,
                    'fitted': True,
                },
                'feature_04': {
                    'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                    'mean': 1.1986666666666668,
                    'std': 0.7606126185881716,
                    'fitted': True,
                }
            }
        }

        # Run
        result = copula.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemplo n.º 2
0
    def test_to_dict(self, kde_mock):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        column = np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])

        kde_instance_mock = kde_mock.return_value
        kde_instance_mock.dataset = column
        kde_instance_mock.resample.return_value = column
        distribution = GaussianKDE()
        distribution.fit(column)

        expected_result = {
            'type':
            'copulas.univariate.gaussian_kde.GaussianKDE',
            'fitted':
            True,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemplo n.º 3
0
    def test_serialization_fit_model(self):
        """A fitted vine can be serialized and deserialized and kept unchanged."""
        # Setup
        instance = VineCopula('regular')
        X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        instance.fit(X)

        # Run
        result = VineCopula.from_dict(instance.to_dict())

        # Check
        compare_nested_dicts(result.to_dict(), instance.to_dict())
Exemplo n.º 4
0
    def test_save(self, json_mock):
        """Save stores the internal dictionary as a json in a file."""
        # Setup
        instance = GaussianMultivariate()
        data = pd.read_csv('data/iris.data.csv')
        instance.fit(data)
        covariance = [[
            1.006711409395973, -0.11010327176239865, 0.8776048563471857,
            0.823443255069628
        ],
                      [
                          -0.11010327176239865, 1.006711409395972,
                          -0.4233383520816991, -0.3589370029669186
                      ],
                      [
                          0.8776048563471857, -0.4233383520816991,
                          1.006711409395973, 0.9692185540781536
                      ],
                      [
                          0.823443255069628, -0.3589370029669186,
                          0.9692185540781536, 1.0067114093959735
                      ]]
        parameters = {
            'covariance': covariance,
            'distribs': {
                'feature_01': {
                    'mean': 5.843333333333334,
                    'std': 0.8253012917851409
                },
                'feature_02': {
                    'mean': 3.0540000000000003,
                    'std': 0.4321465800705435
                },
                'feature_03': {
                    'mean': 3.758666666666666,
                    'std': 1.7585291834055212
                },
                'feature_04': {
                    'mean': 1.1986666666666668,
                    'std': 0.7606126185881716
                }
            }
        }
        expected_content = parameters

        # Run
        instance.save('test.json')

        # Check
        compare_nested_dicts(json_mock.call_args[0][0], expected_content)
Exemplo n.º 5
0
    def test_serialization_fit_model(self):
        # Setup
        instance = VineCopula('regular')
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        instance.fit(X)

        # Run
        result = VineCopula.from_dict(instance.to_dict())

        # Check
        compare_nested_dicts(result.to_dict(), instance.to_dict())
Exemplo n.º 6
0
    def test_to_dict(self):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        distribution = KDEUnivariate()
        column = np.array([[
            0.4967141530112327,
            -0.13826430117118466,
            0.6476885381006925,
            1.5230298564080254,
            -0.23415337472333597,
            -0.23413695694918055,
            1.5792128155073915,
            0.7674347291529088,
            -0.4694743859349521,
            0.5425600435859647
        ]])
        distribution.fit(column)

        expected_result = {
            'type': 'copulas.univariate.kde.KDEUnivariate',
            'fitted': True,
            'd': 1,
            'n': 10,
            'dataset': [[
                0.4967141530112327,
                -0.13826430117118466,
                0.6476885381006925,
                1.5230298564080254,
                -0.23415337472333597,
                -0.23413695694918055,
                1.5792128155073915,
                0.7674347291529088,
                -0.4694743859349521,
                0.5425600435859647
            ]],
            'covariance': [[0.20810696044195218]],
            'factor': 0.6309573444801932,
            'inv_cov': [[4.805221304834407]]
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemplo n.º 7
0
    def test_save(self, json_mock):
        """Save stores the internal dictionary as a json in a file."""
        # Setup
        instance = Bivariate('frank')
        instance.fit(self.X)

        expected_content = {
            "copula_type": "FRANK",
            "tau": 0.014492753623188406,
            "theta": 0.13070829945417198
        }

        # Run
        instance.save('test.json')

        # Check
        assert json_mock.called
        compare_nested_dicts(json_mock.call_args[0][0], expected_content)
Exemplo n.º 8
0
    def test_save(self, json_mock, open_mock):
        """Save stores the internal dictionary as a json in a file."""
        # Setup
        instance = Bivariate(copula_type='frank')
        instance.fit(self.X)

        expected_content = {
            "copula_type": "FRANK",
            "tau": 0.9128709291752769,
            "theta": 44.2003852484162
        }

        # Run
        instance.save('test.json')

        # Check
        assert open_mock.called_once_with('test.json', 'w')
        assert json_mock.called
        compare_nested_dicts(json_mock.call_args[0][0], expected_content)
Exemplo n.º 9
0
    def test_to_dict_fit_model(self):
        # Setup
        instance = get_tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)
        expected_result = {
            'type': 'copulas.multivariate.tree.RegularTree',
            'fitted': True,
            'level': 1,
            'n_nodes': 3,
            'previous_tree': [
                [0.8230112726144534, 0.3384880496294825, 0.3384880496294825],
                [0.3384880496294825, 0.8230112726144534, 0.3384880496294825],
                [0.3384880496294825, 0.3384880496294825, 0.8230112726144534]
            ],
            'tau_matrix': [
                [1.0, -0.49999999999999994, -0.49999999999999994],
                [-0.49999999999999994, 1.0, -0.49999999999999994],
                [-0.49999999999999994, -0.49999999999999994, 1.0]
            ],
            'tree_type': TreeTypes.REGULAR,
            'edges': [
                {
                    'index': 0,
                    'D': set(),
                    'L': 0,
                    'R': 1,
                    'U': [
                        [0.7969636014074211, 0.6887638642325501, 0.12078520049364487],
                        [0.6887638642325501, 0.7969636014074211, 0.12078520049364487]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                },
                {
                    'index': 1,
                    'D': set(),
                    'L': 1,
                    'R': 2,
                    'U': [
                        [0.12078520049364491, 0.7969636014074213, 0.6887638642325501],
                        [0.12078520049364491, 0.6887638642325503, 0.7969636014074211]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                }
            ],
        }

        # Run
        result = instance.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemplo n.º 10
0
    def test_save(self, json_mock, open_mock):
        """Save stores the internal dictionary as a json in a file."""
        # Setup
        instance = GaussianMultivariate(
            distribution='copulas.univariate.gaussian.GaussianUnivariate')
        data = pd.read_csv('data/iris.data.csv')
        instance.fit(data)
        covariance = [[
            1.006711409395973, -0.11010327176239865, 0.8776048563471857,
            0.823443255069628
        ],
                      [
                          -0.11010327176239865, 1.006711409395972,
                          -0.4233383520816991, -0.3589370029669186
                      ],
                      [
                          0.8776048563471857, -0.4233383520816991,
                          1.006711409395973, 0.9692185540781536
                      ],
                      [
                          0.823443255069628, -0.3589370029669186,
                          0.9692185540781536, 1.0067114093959735
                      ]]
        expected_content = {
            'covariance':
            covariance,
            'fitted':
            True,
            'type':
            'copulas.multivariate.gaussian.GaussianMultivariate',
            'distribution':
            'copulas.univariate.gaussian.GaussianUnivariate',
            'columns':
            ['feature_01', 'feature_02', 'feature_03', 'feature_04'],
            'univariates': [{
                'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                'mean': 5.843333333333334,
                'std': 0.8253012917851409,
                'fitted': True,
            }, {
                'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                'mean': 3.0540000000000003,
                'std': 0.4321465800705435,
                'fitted': True,
            }, {
                'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                'mean': 3.758666666666666,
                'std': 1.7585291834055212,
                'fitted': True,
            }, {
                'type': 'copulas.univariate.gaussian.GaussianUnivariate',
                'mean': 1.1986666666666668,
                'std': 0.7606126185881716,
                'fitted': True,
            }]
        }

        # Run
        instance.save('test.json')

        # Check
        assert open_mock.called_once_with('test.json', 'w')
        compare_nested_dicts(json_mock.call_args[0][0], expected_content)
Exemplo n.º 11
0
    def test_to_dict_fit_model(self):
        # Setup
        instance = Tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = KDEUnivariate()
            distribution.fit(X[column])
            univariates_matrix[:, i] = [distribution.cumulative_distribution(x) for x in X[column]]

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)
        expected_result = {
            'type': 'copulas.multivariate.tree.RegularTree',
            'fitted': True,
            'level': 1,
            'n_nodes': 3,
            'previous_tree': [
                [0.8230112726144534, 0.3384880496294825, 0.3384880496294825],
                [0.3384880496294825, 0.8230112726144534, 0.3384880496294825],
                [0.3384880496294825, 0.3384880496294825, 0.8230112726144534]
            ],
            'tau_matrix': [
                [1.0, -0.49999999999999994, -0.49999999999999994],
                [-0.49999999999999994, 1.0, -0.49999999999999994],
                [-0.49999999999999994, -0.49999999999999994, 1.0]
            ],
            'tree_type': TreeTypes.REGULAR,
            'edges': [
                {
                    'D': set(),
                    'L': 0,
                    'R': 1,
                    'U': [
                        [6.533235975920359, 6.425034969827687, 5.857062027493768],
                        [6.425034969827687, 6.533235975920359, 5.857062027493768]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                },
                {
                    'D': set(),
                    'L': 1,
                    'R': 2,
                    'U': [
                        [5.857062027493768, 6.533235975920359, 6.425034969827687],
                        [5.857062027493768, 6.425034969827687, 6.533235975920359]
                    ],
                    'likelihood': None,
                    'name': CopulaTypes.FRANK,
                    'neighbors': [],
                    'parents': None,
                    'tau': -0.49999999999999994,
                    'theta': -5.736282443655552
                }
            ],
        }

        # Run
        result = instance.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)