def test_to_dict(self, kde_mock): """To_dict returns the defining parameters of a distribution in a dict.""" # Setup column = np.array([[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]]) kde_instance_mock = kde_mock.return_value kde_instance_mock.dataset = column kde_instance_mock.resample.return_value = column distribution = GaussianKDE() distribution.fit(column) expected_result = { 'type': 'copulas.univariate.gaussian_kde.GaussianKDE', 'fitted': True, 'dataset': [[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]], } # Run result = distribution.to_dict() # Check compare_nested_dicts(result, expected_result)
def setup_norm(self): """set up the model to fit standard norm data.""" self.kde = GaussianKDE() # use 42 as a fixed random seed np.random.seed(42) column = np.random.normal(0, 1, 1000) self.kde.fit(column)
def test_percent_point(self, kde_mock, brentq_mock, cdf_mock): """percent_point evaluates with the model.""" # Setup model_mock = kde_mock.return_value brentq_mock.return_value = -250.0 cdf_mock.return_value = 'a nice scalar bounded method' fit_data = np.array([1, 2, 3, 4, 5]) instance = GaussianKDE() instance.fit(fit_data) expected_result = np.array([-250.0]) # Run result = instance.percent_point([0.5]) # Check assert result == expected_result kde_mock.assert_called_once_with(fit_data) model_mock.assert_not_called() assert len(model_mock.method_calls) == 0 brentq_mock.assert_called_once_with('a nice scalar bounded method', -1000, 1000)
def test__fit_sample_size(self): distribution = GaussianKDE(sample_size=3) distribution._fit(np.array([1, 2, 3, 4])) assert len(distribution._params['dataset']) == 1 assert len(distribution._params['dataset'][0]) == 3
def test_cumulative_distribution(self, kde_mock): """cumulative_distribution evaluates with the model.""" # Setup model_mock = kde_mock.return_value model_mock.integrate_box_1d.side_effect = [0.0, 0.5, 1.0] model_mock.dataset = MagicMock() model_mock.dataset.mean.return_value = 1 model_mock.dataset.std.return_value = 0.1 fit_data = np.array([1, 2, 3, 4, 5]) instance = GaussianKDE() instance.fit(fit_data) call_data = np.array([-10, 0, 10]) expected_result = np.array([0.0, 0.5, 1.0]) expected_integrate_1d_box_call_args_list = [ ((0.5, -10), {}), # The first argument is the lower_bound (1 - 0.1*5) ((0.5, 0), {}), ((0.5, 10), {}), ] # Run result = instance.cumulative_distribution(call_data) # Check compare_nested_iterables(result, expected_result) kde_mock.assert_called_once_with(fit_data) assert (model_mock.integrate_box_1d.call_args_list == expected_integrate_1d_box_call_args_list)
def test_serialization_fit_model(self): # Setup instance = get_tree(TreeTypes.REGULAR) X = pd.DataFrame(data=[ [1, 0, 0], [0, 1, 0], [0, 0, 1] ]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').values univariates_matrix = np.empty(X.shape) for i, column in enumerate(X): distribution = GaussianKDE() distribution.fit(X[column]) univariates_matrix[:, i] = distribution.cumulative_distribution(X[column]) instance.fit(index, n_nodes, tau_matrix, univariates_matrix) # Run result = Tree.from_dict(instance.to_dict()) # Check assert result.to_dict() == instance.to_dict()
def test__brentq_cdf(self, partial_mock, scalarize_mock): """_brentq_cdf returns a function that computes the cdf of a scalar minus its argument.""" # Setup instance = GaussianKDE() def mock_partial_return_value(x): return x scalarize_mock.return_value = 'scalar_function' partial_mock.return_value = mock_partial_return_value # Run result = instance._brentq_cdf(0.5) # Check assert callable(result) # result uses the return_value of partial_mock, so every value returned # is (x - 0.5) assert result(1.0) == 0.5 assert result(0.5) == 0 assert result(0.0) == -0.5 scalarize_mock.assert_called_once_with( GaussianKDE.cumulative_distribution) partial_mock.assert_called_once_with('scalar_function', instance)
def test_second_tree_likelihood(self): """Assert second tree likelihood is correct.""" # Setup # Build first tree data = pd.read_csv('data/iris.data.csv') tau_mat = data.corr(method='kendall').values u_matrix = np.empty(data.shape) for index, col in enumerate(data): uni = GaussianKDE() uni.fit(data[col]) u_matrix[:, index] = uni.cumulative_distribution(data[col]) first_tree = get_tree(TreeTypes.CENTER) first_tree.fit(0, 4, tau_mat, u_matrix) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) tau = first_tree.get_tau_matrix() # Build second tree second_tree = get_tree(TreeTypes.CENTER) second_tree.fit(1, 3, tau, first_tree) expected_likelihood_second_tree = 0.4888802429313932 # Run likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first) # Check assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree)
def test__fit_constant_sample_size(self): distribution = GaussianKDE(sample_size=3) distribution._fit_constant(np.array([1, 1, 1, 1])) assert distribution._params == { 'dataset': [1, 1, 1], }
def test__fit(self): distribution = GaussianKDE() distribution._fit(np.array([1, 2, 3, 4])) assert distribution._params == { 'dataset': [1, 2, 3, 4], }
def test_to_dict_sample_size(self): model = GaussianKDE(sample_size=10) model.fit(self.constant) params = model.to_dict() assert params['type'] == 'copulas.univariate.gaussian_kde.GaussianKDE' assert len(params['dataset']) == 10
def test_fit_sample(self): model = GaussianKDE() model.fit(self.data) sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) assert sampled_data.shape == (50, )
def test_fit_empty_data(self): """If fitting kde model with empty data it will raise ValueError.""" # Setup instance = GaussianKDE() data = np.array([]) # Run / Check with self.assertRaises(ValueError): instance.fit(data)
def test__extract_constant(self): distribution = GaussianKDE() distribution._params = { 'dataset': [1, 1, 1, 1], } constant = distribution._extract_constant() assert 1 == constant
def test_to_dict_constant(self): model = GaussianKDE() model.fit(self.constant) params = model.to_dict() assert params == { 'type': 'copulas.univariate.gaussian_kde.GaussianKDE', 'dataset': [5] * 100 }
def test_percent_point_bisect(self): """percent_point evaluates with the model.""" instance = GaussianKDE() instance.fit(np.array([0.5, 1.0, 1.5])) cdf = instance.percent_point(np.array([0.001, 0.5, 0.999]), method='bisect') assert cdf[0] < 0.0, "The 0.001th percentile should be small." assert abs(cdf[1] - 1.0) < 0.1, "The 50% percentile should be the median." assert cdf[2] > 2.0, "The 0.999th percentile should be large."
def test_valid_serialization_unfit_model(self): """For a unfitted model to_dict and from_dict are opposites.""" # Setup instance = GaussianKDE() # Run result = GaussianKDE.from_dict(instance.to_dict()) # Check assert instance.to_dict() == result.to_dict()
def test_sample(self, kde_mock): """Sample calls the gaussian_kde.resample method.""" instance = GaussianKDE() instance.fit(np.array([1, 2, 3, 4])) model = kde_mock.return_value model.resample.return_value = np.array([[1, 2, 3]]) samples = instance.sample(3) instance._model.resample.assert_called_once_with(3) np.testing.assert_equal(samples, np.array([1, 2, 3]))
def setUp(self): self.data = pd.read_csv('data/iris.data.csv') self.tau_mat = self.data.corr(method='kendall').values self.u_matrix = np.empty(self.data.shape) count = 0 for col in self.data: uni = GaussianKDE() uni.fit(self.data[col]) self.u_matrix[:, count] = uni.cumulative_distribution(self.data[col]) count += 1 self.tree = Tree(TreeTypes.DIRECT) self.tree.fit(0, 4, self.tau_mat, self.u_matrix)
def test_fit_constant(self): """If fit data is constant, no gaussian_kde model is created.""" # Setup instance = GaussianKDE() X = np.array([1, 1, 1, 1, 1]) # Run instance.fit(X) # Check assert instance.model is None assert instance.constant_value == 1 assert instance.fitted is True
def test_cumulative_distribution(self): """cumulative_distribution evaluates with the model.""" instance = GaussianKDE() instance.fit(np.array([0.9, 1.0, 1.1])) cdf = instance.cumulative_distribution(np.array([ 0.0, # There is no data below this (cdf = 0.0). 1.0, # Half the data is below this (cdf = 0.5). 2.0, # All the data is below this (cdf = 1.0). -1.0 # There is no data below this (cdf = 0). ])) assert np.all(np.isclose(cdf, np.array([0.0, 0.5, 1.0, 0.0]), atol=1e-3))
def test_probability_density(self, kde_mock): """Sample calls the gaussian_kde.resample method.""" instance = GaussianKDE() instance.fit(np.array([1, 2, 3, 4])) model = kde_mock.return_value model.evaluate.return_value = np.array([0.1, 0.2, 0.3]) pdf = instance.probability_density(np.array([1, 2, 3])) assert instance._model.evaluate.call_count == 1 input_array = instance._model.evaluate.call_args[0][0] np.testing.assert_equal(input_array, np.array([1, 2, 3])) np.testing.assert_equal(pdf, np.array([0.1, 0.2, 0.3]))
def from_dict(cls, vine_dict): """Create a new instance from a parameters dictionary. Args: params (dict): Parameters of the Vine, in the same format as the one returned by the ``to_dict`` method. Returns: Vine: Instance of the Vine defined on the parameters. """ instance = cls(vine_dict['vine_type']) fitted = vine_dict['fitted'] if fitted: instance.fitted = fitted instance.n_sample = vine_dict['n_sample'] instance.n_var = vine_dict['n_var'] instance.truncated = vine_dict['truncated'] instance.depth = vine_dict['depth'] instance.trees = cls._deserialize_trees(vine_dict['trees']) instance.unis = [ GaussianKDE.from_dict(uni) for uni in vine_dict['unis'] ] instance.ppfs = [uni.percent_point for uni in instance.unis] instance.columns = vine_dict['columns'] instance.tau_mat = np.array(vine_dict['tau_mat']) instance.u_matrix = np.array(vine_dict['u_matrix']) return instance
def test_from_dict(self): """From_dict sets the values of a dictionary as attributes of the instance.""" # Setup parameters = { 'fitted': True, 'dataset': [[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]], } # Run distribution = GaussianKDE.from_dict(parameters) # Check assert distribution.model.d == 1 assert distribution.model.n == 10 assert distribution.model.covariance == np.array( [[0.20810696044195226]]) assert distribution.model.factor == 0.6309573444801932 assert distribution.model.inv_cov == np.array([[4.805221304834406]]) assert (distribution.model.dataset == np.array([[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]])).all()
def test_save_load(self): model = GaussianKDE() model.fit(self.data) sampled_data = model.sample(50) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianKDE.load(path_to_model) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def test_fit(self, kde_mock): """On fit, a new instance of gaussian_kde is fitted.""" # Setup instance = GaussianKDE() X = np.array([1, 2, 3, 4, 5]) kde_instance = MagicMock(evaluate='pdf') kde_mock.return_value = kde_instance # Run instance.fit(X) # Check assert instance.model == kde_instance assert instance.fitted is True assert instance.constant_value is None assert instance.probability_density == 'pdf' kde_mock.assert_called_once_with(X)
def test__get_bounds(self): self = MagicMock() self._params = {'dataset': np.array([1, 2, 3, 4, 5])} lower, upper = GaussianKDE._get_bounds(self) k = 5 * np.std([1, 2, 3, 4, 5]) assert lower == 1 - k assert upper == 5 + k
def test___init__(self): """On init, model are set to None.""" # Setup / Run instance = GaussianKDE() # Check instance.model is None instance.fitted is False instance.constant_value is None
def test_gaussiankde_arguments(self): size = 1000 low = 0 high = 9 data = randint.rvs(low, high, size=size) + norm.rvs(0, 0.1, size=size) dist = GaussianMultivariate(distribution=GaussianKDE(bw_method=0.01)) dist.fit(data) samples = dist.sample(size).to_numpy()[0] d, p = ks_2samp(data, samples) assert p >= 0.05
def test_to_dict_from_dict(self): model = GaussianKDE() model.fit(self.data) sampled_data = model.sample(50) params = model.to_dict() model2 = GaussianKDE.from_dict(params) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))