Exemplo n.º 1
0
    def test_to_dict(self, kde_mock):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        column = np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])

        kde_instance_mock = kde_mock.return_value
        kde_instance_mock.dataset = column
        kde_instance_mock.resample.return_value = column
        distribution = GaussianKDE()
        distribution.fit(column)

        expected_result = {
            'type':
            'copulas.univariate.gaussian_kde.GaussianKDE',
            'fitted':
            True,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
        }

        # Run
        result = distribution.to_dict()

        # Check
        compare_nested_dicts(result, expected_result)
Exemplo n.º 2
0
 def setup_norm(self):
     """set up the model to fit standard norm data."""
     self.kde = GaussianKDE()
     # use 42 as a fixed random seed
     np.random.seed(42)
     column = np.random.normal(0, 1, 1000)
     self.kde.fit(column)
Exemplo n.º 3
0
    def test_percent_point(self, kde_mock, brentq_mock, cdf_mock):
        """percent_point evaluates with the model."""
        # Setup
        model_mock = kde_mock.return_value
        brentq_mock.return_value = -250.0
        cdf_mock.return_value = 'a nice scalar bounded method'

        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)

        expected_result = np.array([-250.0])

        # Run
        result = instance.percent_point([0.5])

        # Check
        assert result == expected_result

        kde_mock.assert_called_once_with(fit_data)
        model_mock.assert_not_called()
        assert len(model_mock.method_calls) == 0

        brentq_mock.assert_called_once_with('a nice scalar bounded method',
                                            -1000, 1000)
Exemplo n.º 4
0
    def test__fit_sample_size(self):
        distribution = GaussianKDE(sample_size=3)

        distribution._fit(np.array([1, 2, 3, 4]))

        assert len(distribution._params['dataset']) == 1
        assert len(distribution._params['dataset'][0]) == 3
Exemplo n.º 5
0
    def test_cumulative_distribution(self, kde_mock):
        """cumulative_distribution evaluates with the model."""
        # Setup
        model_mock = kde_mock.return_value
        model_mock.integrate_box_1d.side_effect = [0.0, 0.5, 1.0]

        model_mock.dataset = MagicMock()
        model_mock.dataset.mean.return_value = 1
        model_mock.dataset.std.return_value = 0.1

        fit_data = np.array([1, 2, 3, 4, 5])
        instance = GaussianKDE()
        instance.fit(fit_data)

        call_data = np.array([-10, 0, 10])
        expected_result = np.array([0.0, 0.5, 1.0])

        expected_integrate_1d_box_call_args_list = [
            ((0.5, -10),
             {}),  # The first argument is the lower_bound (1 - 0.1*5)
            ((0.5, 0), {}),
            ((0.5, 10), {}),
        ]

        # Run
        result = instance.cumulative_distribution(call_data)

        # Check
        compare_nested_iterables(result, expected_result)

        kde_mock.assert_called_once_with(fit_data)
        assert (model_mock.integrate_box_1d.call_args_list ==
                expected_integrate_1d_box_call_args_list)
Exemplo n.º 6
0
    def test_serialization_fit_model(self):
        # Setup
        instance = get_tree(TreeTypes.REGULAR)
        X = pd.DataFrame(data=[
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1]
        ])
        index = 0
        n_nodes = X.shape[1]
        tau_matrix = X.corr(method='kendall').values

        univariates_matrix = np.empty(X.shape)
        for i, column in enumerate(X):
            distribution = GaussianKDE()
            distribution.fit(X[column])
            univariates_matrix[:, i] = distribution.cumulative_distribution(X[column])

        instance.fit(index, n_nodes, tau_matrix, univariates_matrix)

        # Run
        result = Tree.from_dict(instance.to_dict())

        # Check
        assert result.to_dict() == instance.to_dict()
Exemplo n.º 7
0
    def test__brentq_cdf(self, partial_mock, scalarize_mock):
        """_brentq_cdf returns a function that computes the cdf of a scalar minus its argument."""
        # Setup
        instance = GaussianKDE()

        def mock_partial_return_value(x):
            return x

        scalarize_mock.return_value = 'scalar_function'
        partial_mock.return_value = mock_partial_return_value

        # Run
        result = instance._brentq_cdf(0.5)

        # Check
        assert callable(result)

        # result uses the return_value of partial_mock, so every value returned
        # is (x - 0.5)
        assert result(1.0) == 0.5
        assert result(0.5) == 0
        assert result(0.0) == -0.5

        scalarize_mock.assert_called_once_with(
            GaussianKDE.cumulative_distribution)
        partial_mock.assert_called_once_with('scalar_function', instance)
Exemplo n.º 8
0
    def test_second_tree_likelihood(self):
        """Assert second tree likelihood is correct."""
        # Setup
        # Build first tree
        data = pd.read_csv('data/iris.data.csv')
        tau_mat = data.corr(method='kendall').values
        u_matrix = np.empty(data.shape)

        for index, col in enumerate(data):
            uni = GaussianKDE()
            uni.fit(data[col])
            u_matrix[:, index] = uni.cumulative_distribution(data[col])

        first_tree = get_tree(TreeTypes.CENTER)
        first_tree.fit(0, 4, tau_mat, u_matrix)
        uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]])
        likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix)
        tau = first_tree.get_tau_matrix()

        # Build second tree
        second_tree = get_tree(TreeTypes.CENTER)
        second_tree.fit(1, 3, tau, first_tree)
        expected_likelihood_second_tree = 0.4888802429313932

        # Run
        likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first)

        # Check
        assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
Exemplo n.º 9
0
    def test__fit_constant_sample_size(self):
        distribution = GaussianKDE(sample_size=3)

        distribution._fit_constant(np.array([1, 1, 1, 1]))

        assert distribution._params == {
            'dataset': [1, 1, 1],
        }
Exemplo n.º 10
0
    def test__fit(self):
        distribution = GaussianKDE()

        distribution._fit(np.array([1, 2, 3, 4]))

        assert distribution._params == {
            'dataset': [1, 2, 3, 4],
        }
Exemplo n.º 11
0
    def test_to_dict_sample_size(self):
        model = GaussianKDE(sample_size=10)
        model.fit(self.constant)

        params = model.to_dict()

        assert params['type'] == 'copulas.univariate.gaussian_kde.GaussianKDE'
        assert len(params['dataset']) == 10
Exemplo n.º 12
0
    def test_fit_sample(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        assert isinstance(sampled_data, np.ndarray)
        assert sampled_data.shape == (50, )
Exemplo n.º 13
0
    def test_fit_empty_data(self):
        """If fitting kde model with empty data it will raise ValueError."""
        # Setup
        instance = GaussianKDE()
        data = np.array([])

        # Run / Check
        with self.assertRaises(ValueError):
            instance.fit(data)
Exemplo n.º 14
0
    def test__extract_constant(self):
        distribution = GaussianKDE()
        distribution._params = {
            'dataset': [1, 1, 1, 1],
        }

        constant = distribution._extract_constant()

        assert 1 == constant
Exemplo n.º 15
0
    def test_to_dict_constant(self):
        model = GaussianKDE()
        model.fit(self.constant)

        params = model.to_dict()

        assert params == {
            'type': 'copulas.univariate.gaussian_kde.GaussianKDE',
            'dataset': [5] * 100
        }
Exemplo n.º 16
0
    def test_percent_point_bisect(self):
        """percent_point evaluates with the model."""
        instance = GaussianKDE()
        instance.fit(np.array([0.5, 1.0, 1.5]))

        cdf = instance.percent_point(np.array([0.001, 0.5, 0.999]), method='bisect')

        assert cdf[0] < 0.0, "The 0.001th percentile should be small."
        assert abs(cdf[1] - 1.0) < 0.1, "The 50% percentile should be the median."
        assert cdf[2] > 2.0, "The 0.999th percentile should be large."
Exemplo n.º 17
0
    def test_valid_serialization_unfit_model(self):
        """For a unfitted model to_dict and from_dict are opposites."""
        # Setup
        instance = GaussianKDE()

        # Run
        result = GaussianKDE.from_dict(instance.to_dict())

        # Check
        assert instance.to_dict() == result.to_dict()
Exemplo n.º 18
0
    def test_sample(self, kde_mock):
        """Sample calls the gaussian_kde.resample method."""
        instance = GaussianKDE()
        instance.fit(np.array([1, 2, 3, 4]))

        model = kde_mock.return_value
        model.resample.return_value = np.array([[1, 2, 3]])

        samples = instance.sample(3)

        instance._model.resample.assert_called_once_with(3)
        np.testing.assert_equal(samples, np.array([1, 2, 3]))
Exemplo n.º 19
0
 def setUp(self):
     self.data = pd.read_csv('data/iris.data.csv')
     self.tau_mat = self.data.corr(method='kendall').values
     self.u_matrix = np.empty(self.data.shape)
     count = 0
     for col in self.data:
         uni = GaussianKDE()
         uni.fit(self.data[col])
         self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col])
         count += 1
     self.tree = Tree(TreeTypes.DIRECT)
     self.tree.fit(0, 4, self.tau_mat, self.u_matrix)
Exemplo n.º 20
0
    def test_fit_constant(self):
        """If fit data is constant, no gaussian_kde model is created."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 1, 1, 1, 1])

        # Run
        instance.fit(X)

        # Check
        assert instance.model is None
        assert instance.constant_value == 1
        assert instance.fitted is True
Exemplo n.º 21
0
    def test_cumulative_distribution(self):
        """cumulative_distribution evaluates with the model."""
        instance = GaussianKDE()
        instance.fit(np.array([0.9, 1.0, 1.1]))

        cdf = instance.cumulative_distribution(np.array([
            0.0,  # There is no data below this (cdf = 0.0).
            1.0,  # Half the data is below this (cdf = 0.5).
            2.0,  # All the data is below this (cdf = 1.0).
            -1.0  # There is no data below this (cdf = 0).
        ]))

        assert np.all(np.isclose(cdf, np.array([0.0, 0.5, 1.0, 0.0]), atol=1e-3))
Exemplo n.º 22
0
    def test_probability_density(self, kde_mock):
        """Sample calls the gaussian_kde.resample method."""
        instance = GaussianKDE()
        instance.fit(np.array([1, 2, 3, 4]))

        model = kde_mock.return_value
        model.evaluate.return_value = np.array([0.1, 0.2, 0.3])

        pdf = instance.probability_density(np.array([1, 2, 3]))

        assert instance._model.evaluate.call_count == 1
        input_array = instance._model.evaluate.call_args[0][0]
        np.testing.assert_equal(input_array, np.array([1, 2, 3]))
        np.testing.assert_equal(pdf, np.array([0.1, 0.2, 0.3]))
Exemplo n.º 23
0
    def from_dict(cls, vine_dict):
        """Create a new instance from a parameters dictionary.

        Args:
            params (dict):
                Parameters of the Vine, in the same format as the one
                returned by the ``to_dict`` method.

        Returns:
            Vine:
                Instance of the Vine defined on the parameters.
        """
        instance = cls(vine_dict['vine_type'])
        fitted = vine_dict['fitted']
        if fitted:
            instance.fitted = fitted
            instance.n_sample = vine_dict['n_sample']
            instance.n_var = vine_dict['n_var']
            instance.truncated = vine_dict['truncated']
            instance.depth = vine_dict['depth']
            instance.trees = cls._deserialize_trees(vine_dict['trees'])
            instance.unis = [
                GaussianKDE.from_dict(uni) for uni in vine_dict['unis']
            ]
            instance.ppfs = [uni.percent_point for uni in instance.unis]
            instance.columns = vine_dict['columns']
            instance.tau_mat = np.array(vine_dict['tau_mat'])
            instance.u_matrix = np.array(vine_dict['u_matrix'])

        return instance
Exemplo n.º 24
0
    def test_from_dict(self):
        """From_dict sets the values of a dictionary as attributes of the instance."""
        # Setup
        parameters = {
            'fitted':
            True,
            'dataset': [[
                0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
                1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
                1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
                0.5425600435859647
            ]],
        }

        # Run
        distribution = GaussianKDE.from_dict(parameters)

        # Check
        assert distribution.model.d == 1
        assert distribution.model.n == 10
        assert distribution.model.covariance == np.array(
            [[0.20810696044195226]])
        assert distribution.model.factor == 0.6309573444801932
        assert distribution.model.inv_cov == np.array([[4.805221304834406]])
        assert (distribution.model.dataset == np.array([[
            0.4967141530112327, -0.13826430117118466, 0.6476885381006925,
            1.5230298564080254, -0.23415337472333597, -0.23413695694918055,
            1.5792128155073915, 0.7674347291529088, -0.4694743859349521,
            0.5425600435859647
        ]])).all()
Exemplo n.º 25
0
    def test_save_load(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianKDE.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Exemplo n.º 26
0
    def test_fit(self, kde_mock):
        """On fit, a new instance of gaussian_kde is fitted."""
        # Setup
        instance = GaussianKDE()
        X = np.array([1, 2, 3, 4, 5])

        kde_instance = MagicMock(evaluate='pdf')
        kde_mock.return_value = kde_instance

        # Run
        instance.fit(X)

        # Check
        assert instance.model == kde_instance
        assert instance.fitted is True
        assert instance.constant_value is None
        assert instance.probability_density == 'pdf'
        kde_mock.assert_called_once_with(X)
Exemplo n.º 27
0
    def test__get_bounds(self):
        self = MagicMock()
        self._params = {'dataset': np.array([1, 2, 3, 4, 5])}

        lower, upper = GaussianKDE._get_bounds(self)

        k = 5 * np.std([1, 2, 3, 4, 5])
        assert lower == 1 - k
        assert upper == 5 + k
Exemplo n.º 28
0
    def test___init__(self):
        """On init, model are set to None."""
        # Setup / Run
        instance = GaussianKDE()

        # Check
        instance.model is None
        instance.fitted is False
        instance.constant_value is None
Exemplo n.º 29
0
 def test_gaussiankde_arguments(self):
     size = 1000
     low = 0
     high = 9
     data = randint.rvs(low, high, size=size) + norm.rvs(0, 0.1, size=size)
     dist = GaussianMultivariate(distribution=GaussianKDE(bw_method=0.01))
     dist.fit(data)
     samples = dist.sample(size).to_numpy()[0]
     d, p = ks_2samp(data, samples)
     assert p >= 0.05
Exemplo n.º 30
0
    def test_to_dict_from_dict(self):
        model = GaussianKDE()
        model.fit(self.data)

        sampled_data = model.sample(50)

        params = model.to_dict()
        model2 = GaussianKDE.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))