def test_to_dict(self): """To_dict returns the parameters to replicate the copula.""" # Setup copula = GaussianMultivariate() data = pd.read_csv('data/iris.data.csv') copula.fit(data) covariance = [[ 1.006711409395973, -0.11010327176239865, 0.8776048563471857, 0.823443255069628 ], [ -0.11010327176239865, 1.006711409395972, -0.4233383520816991, -0.3589370029669186 ], [ 0.8776048563471857, -0.4233383520816991, 1.006711409395973, 0.9692185540781536 ], [ 0.823443255069628, -0.3589370029669186, 0.9692185540781536, 1.0067114093959735 ]] expected_result = { 'covariance': covariance, 'fitted': True, 'type': 'copulas.multivariate.gaussian.GaussianMultivariate', 'distribution': 'copulas.univariate.gaussian.GaussianUnivariate', 'distribs': { 'feature_01': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 5.843333333333334, 'std': 0.8253012917851409, 'fitted': True, }, 'feature_02': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.0540000000000003, 'std': 0.4321465800705435, 'fitted': True, }, 'feature_03': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.758666666666666, 'std': 1.7585291834055212, 'fitted': True, }, 'feature_04': { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 1.1986666666666668, 'std': 0.7606126185881716, 'fitted': True, } } } # Run result = copula.to_dict() # Check compare_nested_dicts(result, expected_result)
def test_to_dict(self, kde_mock): """To_dict returns the defining parameters of a distribution in a dict.""" # Setup column = np.array([[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]]) kde_instance_mock = kde_mock.return_value kde_instance_mock.dataset = column kde_instance_mock.resample.return_value = column distribution = GaussianKDE() distribution.fit(column) expected_result = { 'type': 'copulas.univariate.gaussian_kde.GaussianKDE', 'fitted': True, 'dataset': [[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]], } # Run result = distribution.to_dict() # Check compare_nested_dicts(result, expected_result)
def test_serialization_fit_model(self): """A fitted vine can be serialized and deserialized and kept unchanged.""" # Setup instance = VineCopula('regular') X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) instance.fit(X) # Run result = VineCopula.from_dict(instance.to_dict()) # Check compare_nested_dicts(result.to_dict(), instance.to_dict())
def test_save(self, json_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = GaussianMultivariate() data = pd.read_csv('data/iris.data.csv') instance.fit(data) covariance = [[ 1.006711409395973, -0.11010327176239865, 0.8776048563471857, 0.823443255069628 ], [ -0.11010327176239865, 1.006711409395972, -0.4233383520816991, -0.3589370029669186 ], [ 0.8776048563471857, -0.4233383520816991, 1.006711409395973, 0.9692185540781536 ], [ 0.823443255069628, -0.3589370029669186, 0.9692185540781536, 1.0067114093959735 ]] parameters = { 'covariance': covariance, 'distribs': { 'feature_01': { 'mean': 5.843333333333334, 'std': 0.8253012917851409 }, 'feature_02': { 'mean': 3.0540000000000003, 'std': 0.4321465800705435 }, 'feature_03': { 'mean': 3.758666666666666, 'std': 1.7585291834055212 }, 'feature_04': { 'mean': 1.1986666666666668, 'std': 0.7606126185881716 } } } expected_content = parameters # Run instance.save('test.json') # Check compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_serialization_fit_model(self): # Setup instance = VineCopula('regular') X = pd.DataFrame(data=[ [1, 0, 0], [0, 1, 0], [0, 0, 1] ]) instance.fit(X) # Run result = VineCopula.from_dict(instance.to_dict()) # Check compare_nested_dicts(result.to_dict(), instance.to_dict())
def test_to_dict(self): """To_dict returns the defining parameters of a distribution in a dict.""" # Setup distribution = KDEUnivariate() column = np.array([[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]]) distribution.fit(column) expected_result = { 'type': 'copulas.univariate.kde.KDEUnivariate', 'fitted': True, 'd': 1, 'n': 10, 'dataset': [[ 0.4967141530112327, -0.13826430117118466, 0.6476885381006925, 1.5230298564080254, -0.23415337472333597, -0.23413695694918055, 1.5792128155073915, 0.7674347291529088, -0.4694743859349521, 0.5425600435859647 ]], 'covariance': [[0.20810696044195218]], 'factor': 0.6309573444801932, 'inv_cov': [[4.805221304834407]] } # Run result = distribution.to_dict() # Check compare_nested_dicts(result, expected_result)
def test_save(self, json_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = Bivariate('frank') instance.fit(self.X) expected_content = { "copula_type": "FRANK", "tau": 0.014492753623188406, "theta": 0.13070829945417198 } # Run instance.save('test.json') # Check assert json_mock.called compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_save(self, json_mock, open_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = Bivariate(copula_type='frank') instance.fit(self.X) expected_content = { "copula_type": "FRANK", "tau": 0.9128709291752769, "theta": 44.2003852484162 } # Run instance.save('test.json') # Check assert open_mock.called_once_with('test.json', 'w') assert json_mock.called compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_to_dict_fit_model(self): # Setup instance = get_tree(TreeTypes.REGULAR) X = pd.DataFrame(data=[ [1, 0, 0], [0, 1, 0], [0, 0, 1] ]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').values univariates_matrix = np.empty(X.shape) for i, column in enumerate(X): distribution = GaussianKDE() distribution.fit(X[column]) univariates_matrix[:, i] = distribution.cumulative_distribution(X[column]) instance.fit(index, n_nodes, tau_matrix, univariates_matrix) expected_result = { 'type': 'copulas.multivariate.tree.RegularTree', 'fitted': True, 'level': 1, 'n_nodes': 3, 'previous_tree': [ [0.8230112726144534, 0.3384880496294825, 0.3384880496294825], [0.3384880496294825, 0.8230112726144534, 0.3384880496294825], [0.3384880496294825, 0.3384880496294825, 0.8230112726144534] ], 'tau_matrix': [ [1.0, -0.49999999999999994, -0.49999999999999994], [-0.49999999999999994, 1.0, -0.49999999999999994], [-0.49999999999999994, -0.49999999999999994, 1.0] ], 'tree_type': TreeTypes.REGULAR, 'edges': [ { 'index': 0, 'D': set(), 'L': 0, 'R': 1, 'U': [ [0.7969636014074211, 0.6887638642325501, 0.12078520049364487], [0.6887638642325501, 0.7969636014074211, 0.12078520049364487] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 }, { 'index': 1, 'D': set(), 'L': 1, 'R': 2, 'U': [ [0.12078520049364491, 0.7969636014074213, 0.6887638642325501], [0.12078520049364491, 0.6887638642325503, 0.7969636014074211] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 } ], } # Run result = instance.to_dict() # Check compare_nested_dicts(result, expected_result)
def test_save(self, json_mock, open_mock): """Save stores the internal dictionary as a json in a file.""" # Setup instance = GaussianMultivariate( distribution='copulas.univariate.gaussian.GaussianUnivariate') data = pd.read_csv('data/iris.data.csv') instance.fit(data) covariance = [[ 1.006711409395973, -0.11010327176239865, 0.8776048563471857, 0.823443255069628 ], [ -0.11010327176239865, 1.006711409395972, -0.4233383520816991, -0.3589370029669186 ], [ 0.8776048563471857, -0.4233383520816991, 1.006711409395973, 0.9692185540781536 ], [ 0.823443255069628, -0.3589370029669186, 0.9692185540781536, 1.0067114093959735 ]] expected_content = { 'covariance': covariance, 'fitted': True, 'type': 'copulas.multivariate.gaussian.GaussianMultivariate', 'distribution': 'copulas.univariate.gaussian.GaussianUnivariate', 'columns': ['feature_01', 'feature_02', 'feature_03', 'feature_04'], 'univariates': [{ 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 5.843333333333334, 'std': 0.8253012917851409, 'fitted': True, }, { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.0540000000000003, 'std': 0.4321465800705435, 'fitted': True, }, { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 3.758666666666666, 'std': 1.7585291834055212, 'fitted': True, }, { 'type': 'copulas.univariate.gaussian.GaussianUnivariate', 'mean': 1.1986666666666668, 'std': 0.7606126185881716, 'fitted': True, }] } # Run instance.save('test.json') # Check assert open_mock.called_once_with('test.json', 'w') compare_nested_dicts(json_mock.call_args[0][0], expected_content)
def test_to_dict_fit_model(self): # Setup instance = Tree(TreeTypes.REGULAR) X = pd.DataFrame(data=[ [1, 0, 0], [0, 1, 0], [0, 0, 1] ]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').values univariates_matrix = np.empty(X.shape) for i, column in enumerate(X): distribution = KDEUnivariate() distribution.fit(X[column]) univariates_matrix[:, i] = [distribution.cumulative_distribution(x) for x in X[column]] instance.fit(index, n_nodes, tau_matrix, univariates_matrix) expected_result = { 'type': 'copulas.multivariate.tree.RegularTree', 'fitted': True, 'level': 1, 'n_nodes': 3, 'previous_tree': [ [0.8230112726144534, 0.3384880496294825, 0.3384880496294825], [0.3384880496294825, 0.8230112726144534, 0.3384880496294825], [0.3384880496294825, 0.3384880496294825, 0.8230112726144534] ], 'tau_matrix': [ [1.0, -0.49999999999999994, -0.49999999999999994], [-0.49999999999999994, 1.0, -0.49999999999999994], [-0.49999999999999994, -0.49999999999999994, 1.0] ], 'tree_type': TreeTypes.REGULAR, 'edges': [ { 'D': set(), 'L': 0, 'R': 1, 'U': [ [6.533235975920359, 6.425034969827687, 5.857062027493768], [6.425034969827687, 6.533235975920359, 5.857062027493768] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 }, { 'D': set(), 'L': 1, 'R': 2, 'U': [ [5.857062027493768, 6.533235975920359, 6.425034969827687], [5.857062027493768, 6.425034969827687, 6.533235975920359] ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, 'theta': -5.736282443655552 } ], } # Run result = instance.to_dict() # Check compare_nested_dicts(result, expected_result)