def test_weighted_gaussian_mixture_multicomponents_multidimensions(self): clf = WeightedGaussianMixture( mesh=self.mesh, n_components=self.n_components, covariance_type="full", max_iter=1000, n_init=20, tol=1e-8, means_init=self.means, warm_start=True, precisions_init=np.linalg.inv(self.sigma), weights_init=self.proportions, ) clf.fit(self.samples) checking_means = np.c_[ np.average( self.s0, axis=0, weights=self.mesh.cell_volumes[: self.s0.shape[0]] ), np.average( self.s1, axis=0, weights=self.mesh.cell_volumes[self.s0.shape[0] :] ), ].T checking_covariances = np.r_[ np.cov( self.s0.T, ddof=0, aweights=self.mesh.cell_volumes[: self.s0.shape[0]] ), np.cov( self.s1.T, ddof=0, aweights=self.mesh.cell_volumes[self.s0.shape[0] :] ), ].reshape(clf.covariances_.shape) checking_proportions = np.r_[ self.mesh.cell_volumes[: self.s0.shape[0]].sum(), self.mesh.cell_volumes[self.s0.shape[0] :].sum(), ] checking_proportions /= checking_proportions.sum() self.assertTrue(np.all(np.isclose(clf.means_, checking_means))) self.assertTrue(np.all(np.isclose(clf.covariances_, checking_covariances))) self.assertTrue(np.all(np.isclose(clf.weights_, checking_proportions))) print( "WeightedGaussianMixture is estimating correctly in 2D with 2 components." )
def test_weighted_gaussian_mixture_one_component_1d(self): model1d = self.wires.s0 * self.model clf = WeightedGaussianMixture( mesh=self.mesh, n_components=1, covariance_type="full", max_iter=1000, n_init=10, tol=1e-8, warm_start=True, ) clf.fit(model1d.reshape(-1, 1)) cheching_mean = np.average(model1d, weights=self.mesh.cell_volumes) checking_covariance = np.cov(model1d, ddof=0, aweights=self.mesh.cell_volumes) self.assertTrue(np.isclose(clf.means_[0], cheching_mean)) self.assertTrue(np.isclose(clf.covariances_[0], checking_covariance)) print("WeightedGaussianMixture is estimating correctly in 1D with 1 component.")
def test_MAP_estimate_multi_component_multidimensions(self): # prior model at three-quarter-way the means and identity covariances model_prior = ( np.random.randn(*self.samples.shape) + 0.9 * self.means[np.random.choice(2, size=self.nsample, p=[0.9, 0.1])] ) clfref = WeightedGaussianMixture( mesh=self.mesh, n_components=self.n_components, covariance_type="full", max_iter=1000, n_init=10, tol=1e-8, warm_start=True, ) clfref.fit(model_prior) clfref.order_clusters_GM_weight() clf = GaussianMixtureWithPrior( gmmref=clfref, max_iter=1000, n_init=100, tol=1e-10, nu=1, kappa=1, zeta=1, prior_type="semi", update_covariances=True, ) clf.fit(self.samples) # This is a rough estimate of the multidimensional, multi-components means checking_means = np.c_[ ( clf.weights_[0] * np.average( self.s0, axis=0, weights=self.mesh.cell_volumes[: self.s0.shape[0]] ) + clfref.weights_[0] * clfref.means_[0] ) / (clf.weights_[0] + clfref.weights_[0]), ( clf.weights_[1] * np.average( self.s1, axis=0, weights=self.mesh.cell_volumes[self.s0.shape[0] :] ) + clfref.weights_[1] * clfref.means_[1] ) / (clf.weights_[1] + clfref.weights_[1]), ].T self.assertTrue(np.all(np.isclose(checking_means, clf.means_, rtol=1e-2))) # This is a rough estimate of the multidimensional, multi-components covariances_ checking_covariances = np.r_[ ( clf.weights_[0] * np.cov( self.s0.T, ddof=0, aweights=self.mesh.cell_volumes[: self.s0.shape[0]], ) + clfref.weights_[0] * clfref.covariances_[0] ) / (clf.weights_[0] + clfref.weights_[0]), ( clf.weights_[1] * np.cov( self.s1.T, ddof=0, aweights=self.mesh.cell_volumes[self.s0.shape[0] :], ) + clfref.weights_[1] * clfref.covariances_[1] ) / (clf.weights_[1] + clfref.weights_[1]), ].reshape(clf.covariances_.shape) self.assertTrue( np.all(np.isclose(checking_covariances, clf.covariances_, rtol=0.15)) ) checking_proportions = np.r_[ self.mesh.cell_volumes[: self.s0.shape[0]].sum() + clfref.weights_[0] * self.mesh.cell_volumes.sum(), self.mesh.cell_volumes[self.s0.shape[0] :].sum() + +clfref.weights_[1] * self.mesh.cell_volumes.sum(), ] checking_proportions /= checking_proportions.sum() self.assertTrue(np.all(np.isclose(checking_proportions, clf.weights_))) print( "GaussianMixtureWithPrior is semi-MAP-estimating correctly in 2D with 2 components." )
def test_MAP_estimate_one_component_1d(self): # subsample mesh and model between mle and prior n_samples = int(self.nsample * self.proportions.min()) model_map = self.wires.s0 * self.model model_mle = model_map[:n_samples] model_prior = model_map[-n_samples:] actv = np.zeros(self.mesh.nC, dtype="bool") actv[:n_samples] = np.ones(n_samples, dtype="bool") clfref = WeightedGaussianMixture( mesh=self.mesh, actv=actv, n_components=1, covariance_type="full", max_iter=1000, n_init=10, tol=1e-8, warm_start=True, ) clfref.fit(model_prior.reshape(-1, 1)) clf = GaussianMixtureWithPrior( gmmref=clfref, max_iter=1000, n_init=10, tol=1e-8, warm_start=True, nu=1, kappa=1, zeta=1, prior_type="full", update_covariances=True, ) clf.fit(model_mle.reshape(-1, 1)) checking_means = np.average( np.r_[model_mle, model_prior], weights=np.r_[self.mesh.cell_volumes[actv], self.mesh.cell_volumes[actv]], ) checking_covariance = np.cov( np.r_[model_mle, model_prior], ddof=0, aweights=np.r_[self.mesh.cell_volumes[actv], self.mesh.cell_volumes[actv]], ) self.assertTrue(np.isclose(checking_covariance, clf.covariances_)) self.assertTrue(np.isclose(checking_means, clf.means_)) print( "GaussianMixtureWithPrior is fully-MAP-estimating correctly in 1D with 1 component." ) clfsemi = GaussianMixtureWithPrior( gmmref=clfref, max_iter=1000, n_init=10, tol=1e-8, warm_start=True, nu=1, kappa=1, zeta=1, prior_type="semi", update_covariances=True, ) clfsemi.fit(model_mle.reshape(-1, 1)) checking_means_semi = np.average( np.r_[model_mle, model_prior], weights=np.r_[self.mesh.cell_volumes[actv], self.mesh.cell_volumes[actv]], ) checking_covariance_semi = 0.5 * np.cov( model_mle, ddof=0, aweights=self.mesh.cell_volumes[actv] ) + 0.5 * np.cov(model_prior, ddof=0, aweights=self.mesh.cell_volumes[actv]) self.assertTrue(np.isclose(checking_covariance_semi, clfsemi.covariances_)) self.assertTrue(np.isclose(checking_means_semi, clfsemi.means_)) print( "GaussianMixtureWithPrior is semi-MAP-estimating correctly in 1D with 1 component." )
def test_full_covariances(self): print("Test Full covariances: ") print("=======================") # Fit a Gaussian Mixture clf = WeightedGaussianMixture( mesh=self.mesh, n_components=self.n_components, covariance_type="full", max_iter=1000, n_init=10, tol=1e-8, means_init=self.means, warm_start=True, precisions_init=np.linalg.inv(self.sigma), weights_init=self.proportions, ) clf.fit(self.samples) # Define reg reg = make_PGI_regularization( mesh=self.mesh, gmmref=clf, approx_gradient=True, alpha_x=0.0, wiresmap=self.wires, cell_weights_list=self.cell_weights_list, ) mref = mkvc(clf.means_[clf.predict(self.samples)]) # check score value dm = self.model - mref score_approx0 = reg(self.model) score_approx1 = 0.5 * dm.dot(reg.deriv2(self.model, dm)) passed_score_approx = np.allclose(score_approx0, score_approx1) self.assertTrue(passed_score_approx) reg.objfcts[0].approx_eval = False score = reg(self.model) - reg(mref) passed_score = np.allclose(score_approx0, score, rtol=1e-4) self.assertTrue(passed_score) print("scores for PGI & Full Cov. are ok.") # check derivatives as an optimization on locally quadratic function deriv = reg.deriv(self.model) reg.objfcts[0].approx_gradient = False reg.objfcts[0].approx_hessian = False deriv_full = reg.deriv(self.model) passed_deriv1 = np.allclose(deriv, deriv_full, rtol=1e-4) self.assertTrue(passed_deriv1) print("1st derivatives for PGI & Full Cov. are ok.") Hinv = SolverLU(reg.deriv2(self.model)) p = Hinv * deriv direction2 = np.c_[self.wires * p] passed_derivative = np.allclose( mkvc(self.samples - direction2), mkvc(mref), rtol=1e-4 ) self.assertTrue(passed_derivative) print("2nd derivatives for PGI & Full Cov. are ok.") if self.PlotIt: print("Plotting", self.PlotIt) import matplotlib.pyplot as plt xmin, xmax = ymin, ymax = self.samples.min(), self.samples.max() x, y = np.mgrid[xmin:xmax:0.5, ymin:ymax:0.5] pos = np.empty(x.shape + (2,)) pos[:, :, 0] = x pos[:, :, 1] = y rv = clf.score_samples(pos.reshape(-1, 2)) rvm = clf.predict(pos.reshape(-1, 2)) figfull, axfull = plt.subplots(1, 1, figsize=(16, 8)) figfull.suptitle("Full Covariances Tests") axfull.contourf(x, y, rvm.reshape(x.shape), alpha=0.25, cmap="brg") axfull.contour(x, y, rv.reshape(x.shape), 20) axfull.scatter( self.samples[:, 0], self.samples[:, 1], color="blue", s=5.0, alpha=0.25 ) axfull.quiver( self.samples[:, 0], self.samples[:, 1], -(self.wires.s0 * deriv), -(self.wires.s1 * deriv), color="red", alpha=0.25, ) axfull.quiver( self.samples[:, 0], self.samples[:, 1], -direction2[:, 0], -direction2[:, 1], color="k", ) axfull.scatter( (self.samples - direction2)[:, 0], (self.samples - direction2)[:, 1], color="k", s=50.0, ) axfull.set_xlabel("Property 1") axfull.set_ylabel("Property 2") axfull.set_title("PGI with W") plt.show()
def test_pgi_regularization_approxDeriv(self): """ This test might be redundant with the development of the tests above. """ print("Testing the PGI approximated derivatives for full Cov.") print("======================================================") mean0 = np.r_[2.0, 0.0] sigma0 = np.r_[[[1.0, -1.0], [-1.0, 2.0]]] rv0 = multivariate_normal(mean0, sigma0) mean1 = mean0 - 2.0 sigma1 = np.r_[[[0.5, 0.3], [0.3, 0.5]]] rv1 = multivariate_normal(mean1, sigma1) s0 = rv0.rvs(700) s1 = rv1.rvs(300) s = np.r_[s0, s1] model = mkvc(s) mesh = discretize.TensorMesh([s.shape[0]]) wires = Wires(("s0", mesh.nC), ("s1", mesh.nC)) n = 2 clfref = WeightedGaussianMixture(mesh=mesh, n_components=n, covariance_type="full", max_iter=1000, n_init=20) clfref.fit(s) reg = regularization.SimplePGI( mesh=mesh, gmmref=clfref, wiresmap=wires, approx_eval=False, approx_gradient=True, alpha_x=0.0, ) deriv = reg.deriv(model) H = lambda x: reg.deriv2(model, x) HH = LinearOperator([2000, 2000], matvec=H, rmatvec=H) deriv2 = bicgstab(HH, deriv, atol=1e-8)[0] Hfull = reg.deriv2(model) deriv2bis = spsolve(Hfull, deriv) tol = 1e-10 error00 = np.max( np.minimum( np.abs((wires * (model - deriv2))[0] - clfref.means_[0][0]), np.abs((wires * (model - deriv2))[0] - clfref.means_[1][0]), )) error01 = np.max( np.minimum( np.abs((wires * (model - deriv2))[1] - clfref.means_[0][1]), np.abs((wires * (model - deriv2))[1] - clfref.means_[1][1]), )) error10 = np.max( np.minimum( np.abs((wires * (model - deriv2bis))[0] - clfref.means_[0][0]), np.abs((wires * (model - deriv2bis))[0] - clfref.means_[1][0]), )) error11 = np.max( np.minimum( np.abs((wires * (model - deriv2bis))[1] - clfref.means_[0][1]), np.abs((wires * (model - deriv2bis))[1] - clfref.means_[1][1]), )) self.assertTrue(np.max([error00, error01, error10, error11]) < tol) print("PGI approximated derivatives for full Cov. Tested and Happy")