def test_fit_default_distribution(self): """On fit, a distribution is created for each column along the covariance and means""" copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) for i, key in enumerate(self.data.columns): assert copula.columns[i] == key assert copula.univariates[i].__class__ == GaussianUnivariate assert copula.univariates[i]._params['loc'] == self.data[key].mean() assert copula.univariates[i]._params['scale'] == np.std(self.data[key]) expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test__get_covariance_numpy_array(self): """_get_covariance computes the covariance matrix of normalized values.""" # Setup copula = GaussianMultivariate() copula.fit(self.data.values) expected_covariance = np.array([[1.04347826, -0.01316681, -0.20683455], [-0.01316681, 1.04347826, -0.176307], [-0.20683455, -0.176307, 1.04347826]]) # Run covariance = copula._get_covariance(self.data.values) # Check assert np.isclose(covariance, expected_covariance).all().all()
def test__get_covariance(self): """_get_covariance computes the covariance matrix of normalized values.""" # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) expected_covariance = np.array([[1., -0.01261819, -0.19821644], [-0.01261819, 1., -0.16896087], [-0.19821644, -0.16896087, 1.]]) # Run covariance = copula._get_covariance(self.data) # Check assert np.isclose(covariance, expected_covariance).all().all()
def test_fit_numpy_array(self): """Fit should work indistinctly with numpy arrays and pandas dataframes """ # Setup copula = GaussianMultivariate( distribution='copulas.univariate.gaussian.GaussianUnivariate') # Run copula.fit(self.data.values) # Check for key, (column, univariate) in enumerate(zip(self.data.columns, copula.univariates)): assert univariate._params['loc'] == np.mean(self.data[column]) assert univariate._params['scale'] == np.std(self.data[column]) expected_covariance = copula._get_covariance(pd.DataFrame(self.data.values)) assert (copula.covariance == expected_covariance).all().all()
def test_fit(self): """On fit, a distribution is created for each column along the covariance and means""" # Setup copula = GaussianMultivariate() # Run copula.fit(self.data) # Check for key in self.data.columns: assert copula.distribs[key] assert copula.distribs[key].mean == self.data[key].mean() assert copula.distribs[key].std == np.std(self.data[key]) expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_fit_numpy_array(self): """Fit should work indistinctly with numpy arrays and pandas dataframes """ # Setup copula = GaussianMultivariate() # Run copula.fit(self.data.values) # Check for key, column in enumerate(self.data.columns): assert copula.distribs[key] assert copula.distribs[key].mean == np.mean(self.data[column]) assert copula.distribs[key].std == np.std(self.data[column]) expected_covariance = copula._get_covariance( pd.DataFrame(self.data.values)) assert (copula.covariance == expected_covariance).all().all()
def test_fit_distribution_arg(self): """On fit, the distributions for each column use instances of copula.distribution.""" # Setup distribution = 'copulas.univariate.gaussian_kde.GaussianKDE' copula = GaussianMultivariate(distribution=distribution) # Run copula.fit(self.data) # Check assert copula.distribution == 'copulas.univariate.gaussian_kde.GaussianKDE' for i, key in enumerate(self.data.columns): assert copula.columns[i] == key assert get_qualified_name(copula.univariates[i].__class__) == copula.distribution expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_to_dict(self): """To_dict returns the parameters to replicate the copula.""" # Setup copula = GaussianMultivariate() copula.fit(self.data) # Run result = copula.to_dict() # Asserts assert result['type'] == 'copulas.multivariate.gaussian.GaussianMultivariate' assert result['columns'] == ['column1', 'column2', 'column3'] assert len(result['univariates']) == 3 expected_cov = copula._get_covariance(self.data).to_numpy().tolist() np.testing.assert_equal(result['covariance'], expected_cov) for univariate, result_univariate in zip(copula.univariates, result['univariates']): assert univariate.to_dict() == result_univariate
def test_fit_distribution_arg(self): """On fit, the distributions for each column use instances of copula.distribution.""" # Setup distribution = 'copulas.univariate.kde.KDEUnivariate' copula = GaussianMultivariate(distribution=distribution) # Run copula.fit(self.data) # Check assert copula.distribution == 'copulas.univariate.kde.KDEUnivariate' for key in self.data.columns: assert key in copula.distribs assert get_qualified_name( copula.distribs[key].__class__) == copula.distribution expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()
def test_fit_default_distribution(self): """On fit, a distribution is created for each column along the covariance and means""" # Setup copula = GaussianMultivariate() # Run copula.fit(self.data) # Check assert copula.distribution == 'copulas.univariate.gaussian.GaussianUnivariate' for key in self.data.columns: assert key in copula.distribs assert get_qualified_name( copula.distribs[key].__class__) == copula.distribution assert copula.distribs[key].mean == self.data[key].mean() assert copula.distribs[key].std == np.std(self.data[key]) expected_covariance = copula._get_covariance(self.data) assert (copula.covariance == expected_covariance).all().all()