Пример #1
0
    def test__extract_constant(self):
        distribution = GaussianUnivariate()
        distribution._params = {'loc': 1, 'scale': 0}

        constant = distribution._extract_constant()

        assert 1 == constant
Пример #2
0
    def test__fit_constant(self):
        distribution = GaussianUnivariate()

        distribution._fit_constant(np.array([1, 1, 1, 1]))

        assert distribution._params == {
            'loc': 1,
            'scale': 0
        }
Пример #3
0
    def test_fit_empty_data(self):
        """On fit, if column is empty an error is raised."""

        # Setup
        copula = GaussianUnivariate()
        column = pd.Series([])

        # Run
        with self.assertRaises(ValueError):
            copula.fit(column)
Пример #4
0
    def test_valid_serialization_unfit_model(self):
        """For a unfitted model to_dict and from_dict are opposites."""
        # Setup
        instance = GaussianUnivariate()

        # Run
        result = GaussianUnivariate.from_dict(instance.to_dict())

        # Check
        assert instance.to_dict() == result.to_dict()
Пример #5
0
    def test__fit(self):
        distribution = GaussianUnivariate()

        data = norm.rvs(size=1000, loc=1, scale=1)
        distribution._fit(data)

        assert distribution._params == {
            'loc': np.mean(data),
            'scale': np.std(data),
        }
Пример #6
0
    def test_test_fit_equal_values(self):
        """On fit, even if column has equal values, std is never 0."""

        # Setup
        copula = GaussianUnivariate()
        column = [1, 1, 1, 1, 1, 1]

        # Run
        copula.fit(column)

        # Check
        assert copula.mean == 1
        assert copula.std == 0.001
Пример #7
0
    def test_to_dict(self):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        copula = GaussianUnivariate()
        column = [0, 1, 2, 3, 4, 5]
        copula.fit(column)
        expected_result = {'mean': 2.5, 'std': 1.707825127659933}

        # Run
        result = copula.to_dict()

        # Check
        assert result == expected_result
Пример #8
0
    def test_sample(self):
        """After fitting, GaussianUnivariate is able to sample new data."""
        # Setup
        copula = GaussianUnivariate()
        column = [-1, 0, 1]
        copula.fit(column)

        # Run
        result = copula.sample(1000000)

        # Check
        assert len(result) == 1000000
        assert abs(np.mean(result) - copula.mean) < 10E-3
        assert abs(np.std(result) - copula.std) < 10E-3
Пример #9
0
    def test_test_fit_equal_values(self):
        """If it's fit with constant data, contant_value is set."""

        # Setup
        instance = GaussianUnivariate()
        column = np.array([5, 5, 5, 5, 5, 5])

        # Run
        instance.fit(column)

        # Check
        assert instance.mean == 0
        assert instance.std == 1
        assert instance.constant_value == 5
Пример #10
0
    def test_get_probability_density(self):
        """Probability_density returns the normal probability distribution for the given values."""

        # Setup
        copula = GaussianUnivariate()
        column = np.array([-1, 0, 1])
        copula.fit(column)
        expected_result = 0.48860251190292

        # Run
        result = copula.probability_density(0)

        # Check
        assert result == expected_result
Пример #11
0
    def test_sample_random_state(self):
        """When random state is set, samples are the same."""
        # Setup
        instance = GaussianUnivariate(random_seed=0)
        X = np.array([1, 2, 3, 4, 5])
        instance.fit(X)

        expected_result = np.array([5.494746752403546, 3.565907751154284, 4.384144531132039])

        # Run
        result = instance.sample(3)

        # Check
        assert (result == expected_result).all()
Пример #12
0
    def test_percent_point(self):
        """Percent_point returns the original point from the cumulative probability value."""

        # Setup
        copula = GaussianUnivariate()
        column = np.array([-1, 0, 1])
        copula.fit(column)
        x = 0.5
        expected_result = 0

        # Run
        result = copula.percent_point(x)

        # Check
        assert (result == expected_result).all()
Пример #13
0
    def test_cumulative_distribution(self):
        """Cumulative_density returns the cumulative distribution value for a point."""

        # Setup
        copula = GaussianUnivariate()
        column = np.array([-1, 0, 1])
        copula.fit(column)
        x = pd.Series([0])
        expected_result = [0.5]

        # Run
        result = copula.cumulative_distribution(x)

        # Check
        assert (result == expected_result).all()
Пример #14
0
    def test___str__(self):
        """str returns details about the model."""

        # Setup
        copula = GaussianUnivariate()
        expected_result = '\n'.join([
            'Distribution Type: Gaussian', 'Variable name: None', 'Mean: 0',
            'Standard deviation: 1'
        ])

        # Run
        result = copula.__str__()

        # Check
        assert result == expected_result
Пример #15
0
    def test_fit(self):
        """On fit, stats from fit data are set in the model."""

        # Setup
        copula = GaussianUnivariate()
        column = pd.Series([0, 1, 2, 3, 4, 5], name='column')

        # Run
        copula.fit(column)

        # Check
        assert copula.mean == 2.5
        assert copula.std == 1.707825127659933
        assert copula.name == 'column'
        assert copula.fitted is True
Пример #16
0
    def fit(self, X, distrib_map=None):
        """Compute the distribution for each variable and then its covariance matrix.

        Args:
            X: `numpy.ndarray` or `pandas.DataFrame`. Data to model.
            distrib_map: `dict` mapping of distributions for the columns in X.

        Returns:
            None
        """
        LOGGER.debug('Fitting Gaussian Copula')
        column_names = self.get_column_names(X)

        # create distributions based on user input
        if distrib_map:
            for key in distrib_map:
                # this isn't fully working yet
                self.distribs[key] = distrib_map[key](X[key])

        else:
            for column_name in column_names:
                self.distribs[column_name] = GaussianUnivariate()
                column = self.get_column(X, column_name)
                self.distribs[column_name].fit(column)

        self.covariance = self._get_covariance(X)
Пример #17
0
    def test_to_dict(self):
        """To_dict returns the defining parameters of a distribution in a dict."""
        # Setup
        copula = GaussianUnivariate()
        column = np.array([0, 1, 2, 3, 4, 5])
        copula.fit(column)
        expected_result = {
            'type': 'copulas.univariate.gaussian.GaussianUnivariate',
            'mean': 2.5,
            'std': 1.707825127659933,
            'fitted': True,
        }

        # Run
        result = copula.to_dict()

        # Check
        assert result == expected_result
Пример #18
0
    def from_dict(cls, copula_dict):
        """Set attributes with provided values."""
        instance = cls()
        instance.distribs = {}

        for name, parameters in copula_dict['distribs'].items():
            instance.distribs[name] = GaussianUnivariate.from_dict(parameters)

        instance.covariance = np.array(copula_dict['covariance'])
        return instance
Пример #19
0
    def test_sample(self, random_mock):
        """After fitting, GaussianUnivariate is able to sample new data."""
        # Setup
        instance = GaussianUnivariate()
        column = np.array([-1, 0, 1])
        instance.fit(column)

        expected_result = np.array([1, 2, 3, 4, 5])
        random_mock.return_value = expected_result

        # Run
        result = instance.sample(5)

        # Check
        compare_nested_iterables(result, expected_result)

        assert instance.mean == 0.0
        assert instance.std == 0.816496580927726
        random_mock.assert_called_once_with(0.0, 0.816496580927726, 5)
Пример #20
0
    def test___init__(self):
        """On init, default values are set on instance."""

        # Setup / Run
        copula = GaussianUnivariate()

        # Check
        assert not copula.name
        assert copula.mean == 0
        assert copula.std == 1
Пример #21
0
    def test_from_dict(self):
        """From_dict sets the values of a dictionary as attributes of the instance."""
        # Setup
        parameters = {'mean': 2.5, 'std': 1.707825127659933}

        # Run
        copula = GaussianUnivariate.from_dict(parameters)

        # Check
        assert copula.mean == 2.5
        assert copula.std == 1.707825127659933

        copula.sample(10)
Пример #22
0
    def test_percent_point_reverse_cumulative_distribution(self):
        """Combined cumulative_distribution and percent_point is the identity function."""

        # Setup
        copula = GaussianUnivariate()
        column = np.array([-1, 0, 1])
        copula.fit(column)
        initial_value = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5])

        # Run
        result_a = copula.percent_point(copula.cumulative_distribution(initial_value))
        result_b = copula.cumulative_distribution(copula.percent_point(initial_value))

        # Check
        assert (initial_value - result_a < 10E-5).all()
        assert (initial_value - result_b < 10E-5).all()
Пример #23
0
    def test_percent_point_reverse_cumulative_distribution(self):
        """Combined cumulative_distribution and percent_point is the identity function."""

        # Setup
        copula = GaussianUnivariate()
        column = [-1, 0, 1]
        copula.fit(column)
        initial_value = pd.Series([0])

        # Run
        result_a = copula.percent_point(
            copula.cumulative_distribution(initial_value))
        result_b = copula.cumulative_distribution(
            copula.percent_point(initial_value))

        # Check
        assert (initial_value == result_a).all()
        assert (initial_value == result_b).all()
Пример #24
0
    def test_sample_constant(self):
        """samples can be generated for constant distribution."""
        # Setup
        instance = GaussianUnivariate()
        instance.constant_value = 3
        instance._replace_constant_methods()
        instance.fitted = True

        expected_result = np.array([3, 3, 3, 3, 3])

        # Run
        result = instance.sample(5)

        # Check
        compare_nested_iterables(result, expected_result)
Пример #25
0
    def test_cumulative_distribution_constant(self):
        """cumulative_distribution can be computed for constant distribution."""
        # Setup
        instance = GaussianUnivariate()
        instance.constant_value = 3
        instance._replace_constant_methods()
        instance.fitted = True

        X = np.array([1, 2, 3, 4, 5])
        expected_result = np.array([0, 0, 1, 1, 1])

        # Run
        result = instance.cumulative_distribution(X)

        # Check
        compare_nested_iterables(result, expected_result)
Пример #26
0
    def test__is_constant_false(self):
        distribution = GaussianUnivariate()

        distribution.fit(np.array([1, 2, 3, 4]))

        assert not distribution._is_constant()
Пример #27
0
    def test__is_constant_true(self):
        distribution = GaussianUnivariate()

        distribution.fit(np.array([1, 1, 1, 1]))

        assert distribution._is_constant()