Пример #1
0
    def test_fit_sample(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        assert isinstance(sampled_data, np.ndarray)
        assert sampled_data.shape == (50, )
Пример #2
0
    def test_to_dict_sample_size(self):
        model = GaussianKDE(sample_size=10)
        model.fit(self.constant)

        params = model.to_dict()

        assert params['type'] == 'copulas.univariate.gaussian_kde.GaussianKDE'
        assert len(params['dataset']) == 10
Пример #3
0
    def test_to_dict_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        params = model.to_dict()

        assert params == {
            'type': 'copulas.univariate.gaussian_kde.GaussianKDE',
            'dataset': [5] * 100
        }
Пример #4
0
    def test_fit_sample_distribution_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(distribution={'x': GaussianKDE()})
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Пример #5
0
    def test_save_load(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianKDE.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Пример #6
0
 def test_bimodal(self):
     """
     Suppose the data follows a bimodal distribution. The KS statistic should be larger
     for a Gaussian model than a GaussianKDE model (since it can't capture 2 modes).
     """
     kde_likelihood = ks_statistic(GaussianKDE(), self.bimodal_data)
     gaussian_likelihood = ks_statistic(GaussianUnivariate(),
                                        self.bimodal_data)
     assert kde_likelihood < gaussian_likelihood
Пример #7
0
 def test_binary(self):
     """
     Suppose the data follows a Bernoulli distribution. The KS statistic should be larger
     for a TruncatedGaussian model than a GaussianKDE model which can somewhat capture a
     Bernoulli distribution as it resembles a bimodal distribution.
     """
     model = select_univariate(
         self.binary_data,
         [GaussianKDE(), TruncatedGaussian()])
     assert isinstance(model, GaussianKDE)
Пример #8
0
 def test_binary(self):
     """
     Suppose the data follows a Bernoulli distribution. The KS statistic should be larger
     for a TruncatedGaussian model than a GaussianKDE model which can somewhat capture a
     Bernoulli distribution as it resembles a bimodal distribution.
     """
     kde_likelihood = ks_statistic(GaussianKDE(), self.binary_data)
     truncated_likelihood = ks_statistic(TruncatedGaussian(),
                                         self.binary_data)
     assert kde_likelihood < truncated_likelihood
Пример #9
0
    def test_to_dict_from_dict(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        params = model.to_dict()
        model2 = GaussianKDE.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Пример #10
0
    def test_fit_sample_distribution_dict_multiple(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            distribution={
                'x': Univariate(parametric=ParametricType.PARAMETRIC),
                'y': BetaUnivariate(),
                'z': GaussianKDE()
            })
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Пример #11
0
    def test_to_dict(self):
        """to_dict returns the internal parameters to replicate one instance."""
        # Setup
        instance = VineCopula('regular')
        instance.fitted = True
        instance.n_sample = 100
        instance.n_var = 10
        instance.depth = 3
        instance.truncated = 3
        tree = Tree('regular')
        instance.trees = [tree]
        uni = GaussianKDE()
        instance.unis = [uni]

        tau_mat = np.array([[0, 1], [1, 0]])
        instance.tau_mat = tau_mat

        u_matrix = np.array([[0, 1], [1, 0]])
        instance.u_matrix = u_matrix

        expected_result = {
            'type':
            'copulas.multivariate.vine.VineCopula',
            'fitted':
            True,
            'vine_type':
            'regular',
            'n_sample':
            100,
            'n_var':
            10,
            'depth':
            3,
            'truncated':
            3,
            'trees': [{
                'type': 'copulas.multivariate.tree.RegularTree',
                'tree_type': 'regular',
                'fitted': False
            }],
            'tau_mat': [[0, 1], [1, 0]],
            'u_matrix': [[0, 1], [1, 0]],
            'unis': [{
                'type': 'copulas.univariate.gaussian_kde.GaussianKDE',
                'fitted': False,
            }]
        }

        # Run
        result = instance.to_dict()

        # Check
        assert result == expected_result
Пример #12
0
    def test_pdf(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        # Test PDF
        pdf = model.probability_density(sampled_data)
        assert (0 < pdf).all()
Пример #13
0
    def test_cdf(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        # Test the CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 < cdf).all() and (cdf < 1).all()

        # Test CDF increasing function
        sorted_data = sorted(sampled_data)
        cdf = model.cumulative_distribution(sorted_data)
        assert (np.diff(cdf) >= 0).all()
Пример #14
0
    def test_fit_sample_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        sampled_data = model.sample(50)

        assert isinstance(sampled_data, np.ndarray)
        assert sampled_data.shape == (50, )

        assert model._constant_value == 5
        np.testing.assert_equal(np.full(50, 5), model.sample(50))
Пример #15
0
    def test_get_parameters_non_parametric(self):
        """Test the ``get_parameters`` method when model is parametric.

        If there is at least one distributions in the model that is not
        parametric, a NonParametricError should be raised.

        Setup:
        - ``self._model`` is set to a ``GaussianMultivariate`` that
          uses ``GaussianKDE`` as its ``distribution``.

        Side Effects:
        - A NonParametricError is raised.
        """
        # Setup
        gm = GaussianMultivariate(distribution=GaussianKDE())
        data = pd.DataFrame([1, 1, 1])
        gm.fit(data)
        gc = Mock()
        gc._model = gm

        # Run, Assert
        with pytest.raises(NonParametricError):
            GaussianCopula.get_parameters(gc)
Пример #16
0
    def _gaussian(self, dataset):
        """
        For the given dataset, this runs "everything but the kitchen sink" (i.e.
        every feature of GaussianMultivariate that is officially supported) and
        makes sure it doesn't crash.
        """
        model = GaussianMultivariate({
            dataset.columns[0]: GaussianKDE()  # Use a KDE for the first column
        })
        model.fit(dataset)
        for N in [10, 100, 50]:
            assert len(model.sample(N)) == N
        sampled_data = model.sample(10)
        pdf = model.probability_density(sampled_data)
        cdf = model.cumulative_distribution(sampled_data)

        # Test Save/Load from Dictionary
        config = model.to_dict()
        model2 = GaussianMultivariate.from_dict(config)

        for N in [10, 100, 50]:
            assert len(model2.sample(N)) == N
        pdf2 = model2.probability_density(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianMultivariate.load(path_to_model)
        for N in [10, 100, 50]:
            assert len(model2.sample(N)) == N
        pdf2 = model2.probability_density(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))