def test_to_dict_from_dict(self): data = sample_trivariate_xyz() model = GaussianMultivariate() model.fit(data) sampled_data = model.sample(10) params = model.to_dict() model2 = GaussianMultivariate.from_dict(params) pdf = model.probability_density(sampled_data) pdf2 = model2.probability_density(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) cdf = model.cumulative_distribution(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
def _gaussian(self, dataset): """ For the given dataset, this runs "everything but the kitchen sink" (i.e. every feature of GaussianMultivariate that is officially supported) and makes sure it doesn't crash. """ model = GaussianMultivariate({ dataset.columns[0]: GaussianKDE() # Use a KDE for the first column }) model.fit(dataset) for N in [10, 100, 50]: assert len(model.sample(N)) == N sampled_data = model.sample(10) pdf = model.probability_density(sampled_data) cdf = model.cumulative_distribution(sampled_data) # Test Save/Load from Dictionary config = model.to_dict() model2 = GaussianMultivariate.from_dict(config) for N in [10, 100, 50]: assert len(model2.sample(N)) == N pdf2 = model2.probability_density(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) assert np.all(np.isclose(cdf, cdf2, atol=0.01)) path_to_model = os.path.join(self.test_dir.name, "model.pkl") model.save(path_to_model) model2 = GaussianMultivariate.load(path_to_model) for N in [10, 100, 50]: assert len(model2.sample(N)) == N pdf2 = model2.probability_density(sampled_data) cdf2 = model2.cumulative_distribution(sampled_data) assert np.all(np.isclose(pdf, pdf2, atol=0.01)) assert np.all(np.isclose(cdf, cdf2, atol=0.01))
########################################copula######################################### conv = [] scales = [] As = [] for i in range(len(XL)): sbl = XL[i].flatten() sbr = XR[i].flatten() conc = np.empty((sbl.shape[0], 2)) conc[:, 0] = sbr[:] conc[:, 1] = sbl[:] #print(conc.shape) copula = GaussianMultivariate(distribution=GammaUnivariate) copula.fit(conc) XX = np.array(copula.to_dict()['covariance']) xx = XX.flatten() conv.append(xx) UNI = copula.to_dict()['univariates'][0] #avec les sbr de droite scales.append(UNI["scale"]) As.append(UNI["a"]) A = np.array(As) B = np.array(scales) C = np.array(conv) distribution = np.empty((A.shape[0], 6)) distribution[:, 0] = A distribution[:, 1] = B distribution[:, 2: 6] = C # distribution[shape scale c[0] c[1] c[2] c[3] ] pour chaque sb #dis=np.reshape(distribution, -1) #distribution.shape
class GaussianCopula(SDVModel): """Model wrapping ``copulas.multivariate.GaussianMultivariate`` copula. Args: distribution (copulas.univariate.Univariate or str): Copulas univariate distribution to use. Example: The example below shows simple usage case where a ``GaussianMultivariate`` is being created and its ``fit`` and ``sample`` methods are being called. >>> model = GaussianMultivariate() >>> model.fit(pd.DataFrame({'a_field': list(range(10))})) >>> model.sample(5) a_field 0 4.796559 1 7.395329 2 7.400417 3 2.794212 4 1.925887 """ DISTRIBUTION = GaussianUnivariate distribution = None model = None def __init__(self, distribution=None): self.distribution = distribution or self.DISTRIBUTION def fit(self, table_data): """Fit the model to the table. Impute the table data before fit the model. Args: table_data (pandas.DataFrame): Data to be fitted. """ table_data = impute(table_data) self.model = GaussianMultivariate(distribution=self.distribution) self.model.fit(table_data) def sample(self, num_samples): """Sample ``num_samples`` rows from the model. Args: num_samples (int): Amount of rows to sample. Returns: pandas.DataFrame: Sampled data with the number of rows specified in ``num_samples``. """ return self.model.sample(num_samples) def get_parameters(self): """Get copula model parameters. Compute model ``covariance`` and ``distribution.std`` before it returns the flatten dict. Returns: dict: Copula flatten parameters. """ values = list() triangle = np.tril(self.model.covariance) for index, row in enumerate(triangle.tolist()): values.append(row[:index + 1]) self.model.covariance = np.array(values) params = self.model.to_dict() univariates = dict() for name, univariate in zip(params.pop('columns'), params['univariates']): univariates[name] = univariate if 'scale' in univariate: scale = univariate['scale'] if scale == 0: scale = EPSILON univariate['scale'] = np.log(scale) params['univariates'] = univariates return flatten_dict(params) def _prepare_sampled_covariance(self, covariance): """Prepare a covariance matrix. Args: covariance (list): covariance after unflattening model parameters. Result: list[list]: symmetric Positive semi-definite matrix. """ covariance = np.array(square_matrix(covariance)) covariance = (covariance + covariance.T - (np.identity(covariance.shape[0]) * covariance)) if not check_matrix_symmetric_positive_definite(covariance): covariance = make_positive_definite(covariance) return covariance.tolist() def _unflatten_gaussian_copula(self, model_parameters): """Prepare unflattened model params to recreate Gaussian Multivariate instance. The preparations consist basically in: - Transform sampled negative standard deviations from distributions into positive numbers - Ensure the covariance matrix is a valid symmetric positive-semidefinite matrix. - Add string parameters kept inside the class (as they can't be modelled), like ``distribution_type``. Args: model_parameters (dict): Sampled and reestructured model parameters. Returns: dict: Model parameters ready to recreate the model. """ univariate_kwargs = {'type': model_parameters['distribution']} columns = list() univariates = list() for column, univariate in model_parameters['univariates'].items(): columns.append(column) univariate.update(univariate_kwargs) univariate['scale'] = np.exp(univariate['scale']) univariates.append(univariate) model_parameters['univariates'] = univariates model_parameters['columns'] = columns covariance = model_parameters.get('covariance') model_parameters['covariance'] = self._prepare_sampled_covariance( covariance) return model_parameters def set_parameters(self, parameters): """Set copula model parameters. Add additional keys after unflatte the parameters in order to set expected parameters for the copula. Args: dict: Copula flatten parameters. """ parameters = unflatten_dict(parameters) parameters.setdefault('fitted', True) parameters.setdefault('distribution', self.distribution) parameters = self._unflatten_gaussian_copula(parameters) self.model = GaussianMultivariate.from_dict(parameters)