Exemplo n.º 1
0
    def test_fit_sample_distribution_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(distribution={'x': GaussianKDE()})
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 2
0
    def test_fit_sample_center(self):
        data = sample_trivariate_xyz()
        model = VineCopula('center')
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 3
0
    def test_cdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 <= cdf).all() and (cdf <= 1).all()

        # Test CDF increasing function
        for column in sampled_data.columns:
            sorted_data = sampled_data.sort_values(column)
            other_columns = data.columns.to_list()
            other_columns.remove(column)
            row = sorted_data.sample(1).iloc[0]
            for column in other_columns:
                sorted_data[column] = row[column]

            cdf = model.cumulative_distribution(sorted_data)
            diffs = np.diff(
                cdf
            ) + 0.001  # Add tolerance to avoid floating precision issues.
            assert (diffs >= 0).all()
Exemplo n.º 4
0
    def test_fit_sample_distribution_name(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            'copulas.univariate.gaussian_kde.GaussianKDE')
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 5
0
    def test_pdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test PDF
        pdf = model.probability_density(sampled_data)
        assert (0 < pdf).all()
Exemplo n.º 6
0
    def test_to_dict_from_dict(self):
        data = sample_trivariate_xyz()
        model = VineCopula('direct')
        model.fit(data)

        sampled_data = model.sample(10)

        params = model.to_dict()
        model2 = VineCopula.from_dict(params)

        sampled_data = model2.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 7
0
    def test_fit_sample_distribution_dict_multiple(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            distribution={
                'x': Univariate(parametric=ParametricType.PARAMETRIC),
                'y': BetaUnivariate(),
                'z': GaussianKDE()
            })
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 8
0
    def test_save_load(self):
        data = sample_trivariate_xyz()
        model = VineCopula('direct')
        model.fit(data)

        sampled_data = model.sample(10)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = VineCopula.load(path_to_model)

        sampled_data = model2.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 9
0
    def test_fit_sample_direct(self):
        data = sample_trivariate_xyz()
        model = VineCopula('direct')
        model.fit(data)

        for N in [10, 50, 100]:
            assert len(model.sample(N)) == N

        sampled_data = model.sample(10)

        assert sampled_data.shape == (10, 3)
        for column in data.columns:
            assert column in sampled_data
Exemplo n.º 10
0
    def test_fit_sample(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        for N in [10, 50, 100]:
            assert len(model.sample(N)) == N

        sampled_data = model.sample(10)

        assert sampled_data.shape == (10, 3)
        for column in data.columns:
            assert column in sampled_data
Exemplo n.º 11
0
    def test_to_dict_from_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        params = model.to_dict()
        model2 = GaussianMultivariate.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Exemplo n.º 12
0
    def test_save_load(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianMultivariate.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Exemplo n.º 13
0
def testMITCopulas():
    import warnings
    warnings.filterwarnings('ignore')

    from copulas.datasets import sample_trivariate_xyz
    from copulas.multivariate import GaussianMultivariate
    from copulas.visualization import compare_3d

    # Load a dataset with 3 columns that are not independent
    real_data = sample_trivariate_xyz()

    # Fit a gaussian copula to the data
    copula = GaussianMultivariate()
    copula.fit(real_data)

    # Sample synthetic data
    synthetic_data = copula.sample(len(real_data))

    # Plot the real and the synthetic data to compare
    compare_3d(real_data, synthetic_data)
    return True
Exemplo n.º 14
0
    def test_cdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 < cdf).all() and (cdf < 1).all()

        # Test CDF increasing function
        for column in sampled_data.columns:
            sorted_data = sampled_data.sort_values(column)
            other_columns = data.columns.to_list()
            other_columns.remove(column)
            row = sorted_data.sample(1).iloc[0]
            for column in other_columns:
                sorted_data[column] = row[column]

            cdf = model.cumulative_distribution(sorted_data)
            assert (np.diff(cdf) >= 0).all()
Exemplo n.º 15
0
def test_compare_3d():
    data = sample_trivariate_xyz()

    compare_3d(data, data)