Пример #1
0
    def test_weighted_gaussian_mixture_multicomponents_multidimensions(self):
        clf = WeightedGaussianMixture(
            mesh=self.mesh,
            n_components=self.n_components,
            covariance_type="full",
            max_iter=1000,
            n_init=20,
            tol=1e-8,
            means_init=self.means,
            warm_start=True,
            precisions_init=np.linalg.inv(self.sigma),
            weights_init=self.proportions,
        )
        clf.fit(self.samples)

        checking_means = np.c_[
            np.average(
                self.s0, axis=0, weights=self.mesh.cell_volumes[: self.s0.shape[0]]
            ),
            np.average(
                self.s1, axis=0, weights=self.mesh.cell_volumes[self.s0.shape[0] :]
            ),
        ].T

        checking_covariances = np.r_[
            np.cov(
                self.s0.T, ddof=0, aweights=self.mesh.cell_volumes[: self.s0.shape[0]]
            ),
            np.cov(
                self.s1.T, ddof=0, aweights=self.mesh.cell_volumes[self.s0.shape[0] :]
            ),
        ].reshape(clf.covariances_.shape)

        checking_proportions = np.r_[
            self.mesh.cell_volumes[: self.s0.shape[0]].sum(),
            self.mesh.cell_volumes[self.s0.shape[0] :].sum(),
        ]
        checking_proportions /= checking_proportions.sum()

        self.assertTrue(np.all(np.isclose(clf.means_, checking_means)))
        self.assertTrue(np.all(np.isclose(clf.covariances_, checking_covariances)))
        self.assertTrue(np.all(np.isclose(clf.weights_, checking_proportions)))
        print(
            "WeightedGaussianMixture is estimating correctly in 2D with 2 components."
        )
Пример #2
0
    def test_weighted_gaussian_mixture_one_component_1d(self):
        model1d = self.wires.s0 * self.model
        clf = WeightedGaussianMixture(
            mesh=self.mesh,
            n_components=1,
            covariance_type="full",
            max_iter=1000,
            n_init=10,
            tol=1e-8,
            warm_start=True,
        )
        clf.fit(model1d.reshape(-1, 1))

        cheching_mean = np.average(model1d, weights=self.mesh.cell_volumes)
        checking_covariance = np.cov(model1d, ddof=0, aweights=self.mesh.cell_volumes)

        self.assertTrue(np.isclose(clf.means_[0], cheching_mean))
        self.assertTrue(np.isclose(clf.covariances_[0], checking_covariance))
        print("WeightedGaussianMixture is estimating correctly in 1D with 1 component.")
Пример #3
0
    def test_MAP_estimate_multi_component_multidimensions(self):
        # prior model at three-quarter-way the means and identity covariances
        model_prior = (
            np.random.randn(*self.samples.shape)
            + 0.9 * self.means[np.random.choice(2, size=self.nsample, p=[0.9, 0.1])]
        )

        clfref = WeightedGaussianMixture(
            mesh=self.mesh,
            n_components=self.n_components,
            covariance_type="full",
            max_iter=1000,
            n_init=10,
            tol=1e-8,
            warm_start=True,
        )
        clfref.fit(model_prior)
        clfref.order_clusters_GM_weight()

        clf = GaussianMixtureWithPrior(
            gmmref=clfref,
            max_iter=1000,
            n_init=100,
            tol=1e-10,
            nu=1,
            kappa=1,
            zeta=1,
            prior_type="semi",
            update_covariances=True,
        )
        clf.fit(self.samples)

        # This is a rough estimate of the multidimensional, multi-components means
        checking_means = np.c_[
            (
                clf.weights_[0]
                * np.average(
                    self.s0, axis=0, weights=self.mesh.cell_volumes[: self.s0.shape[0]]
                )
                + clfref.weights_[0] * clfref.means_[0]
            )
            / (clf.weights_[0] + clfref.weights_[0]),
            (
                clf.weights_[1]
                * np.average(
                    self.s1, axis=0, weights=self.mesh.cell_volumes[self.s0.shape[0] :]
                )
                + clfref.weights_[1] * clfref.means_[1]
            )
            / (clf.weights_[1] + clfref.weights_[1]),
        ].T
        self.assertTrue(np.all(np.isclose(checking_means, clf.means_, rtol=1e-2)))

        # This is a rough estimate of the multidimensional, multi-components covariances_
        checking_covariances = np.r_[
            (
                clf.weights_[0]
                * np.cov(
                    self.s0.T,
                    ddof=0,
                    aweights=self.mesh.cell_volumes[: self.s0.shape[0]],
                )
                + clfref.weights_[0] * clfref.covariances_[0]
            )
            / (clf.weights_[0] + clfref.weights_[0]),
            (
                clf.weights_[1]
                * np.cov(
                    self.s1.T,
                    ddof=0,
                    aweights=self.mesh.cell_volumes[self.s0.shape[0] :],
                )
                + clfref.weights_[1] * clfref.covariances_[1]
            )
            / (clf.weights_[1] + clfref.weights_[1]),
        ].reshape(clf.covariances_.shape)
        self.assertTrue(
            np.all(np.isclose(checking_covariances, clf.covariances_, rtol=0.15))
        )

        checking_proportions = np.r_[
            self.mesh.cell_volumes[: self.s0.shape[0]].sum()
            + clfref.weights_[0] * self.mesh.cell_volumes.sum(),
            self.mesh.cell_volumes[self.s0.shape[0] :].sum()
            + +clfref.weights_[1] * self.mesh.cell_volumes.sum(),
        ]
        checking_proportions /= checking_proportions.sum()
        self.assertTrue(np.all(np.isclose(checking_proportions, clf.weights_)))
        print(
            "GaussianMixtureWithPrior is semi-MAP-estimating correctly in 2D with 2 components."
        )
Пример #4
0
    def test_MAP_estimate_one_component_1d(self):
        # subsample mesh and model between mle and prior
        n_samples = int(self.nsample * self.proportions.min())
        model_map = self.wires.s0 * self.model
        model_mle = model_map[:n_samples]
        model_prior = model_map[-n_samples:]
        actv = np.zeros(self.mesh.nC, dtype="bool")
        actv[:n_samples] = np.ones(n_samples, dtype="bool")

        clfref = WeightedGaussianMixture(
            mesh=self.mesh,
            actv=actv,
            n_components=1,
            covariance_type="full",
            max_iter=1000,
            n_init=10,
            tol=1e-8,
            warm_start=True,
        )
        clfref.fit(model_prior.reshape(-1, 1))

        clf = GaussianMixtureWithPrior(
            gmmref=clfref,
            max_iter=1000,
            n_init=10,
            tol=1e-8,
            warm_start=True,
            nu=1,
            kappa=1,
            zeta=1,
            prior_type="full",
            update_covariances=True,
        )
        clf.fit(model_mle.reshape(-1, 1))

        checking_means = np.average(
            np.r_[model_mle, model_prior],
            weights=np.r_[self.mesh.cell_volumes[actv], self.mesh.cell_volumes[actv]],
        )
        checking_covariance = np.cov(
            np.r_[model_mle, model_prior],
            ddof=0,
            aweights=np.r_[self.mesh.cell_volumes[actv], self.mesh.cell_volumes[actv]],
        )

        self.assertTrue(np.isclose(checking_covariance, clf.covariances_))
        self.assertTrue(np.isclose(checking_means, clf.means_))
        print(
            "GaussianMixtureWithPrior is fully-MAP-estimating correctly in 1D with 1 component."
        )

        clfsemi = GaussianMixtureWithPrior(
            gmmref=clfref,
            max_iter=1000,
            n_init=10,
            tol=1e-8,
            warm_start=True,
            nu=1,
            kappa=1,
            zeta=1,
            prior_type="semi",
            update_covariances=True,
        )
        clfsemi.fit(model_mle.reshape(-1, 1))

        checking_means_semi = np.average(
            np.r_[model_mle, model_prior],
            weights=np.r_[self.mesh.cell_volumes[actv], self.mesh.cell_volumes[actv]],
        )
        checking_covariance_semi = 0.5 * np.cov(
            model_mle, ddof=0, aweights=self.mesh.cell_volumes[actv]
        ) + 0.5 * np.cov(model_prior, ddof=0, aweights=self.mesh.cell_volumes[actv])
        self.assertTrue(np.isclose(checking_covariance_semi, clfsemi.covariances_))
        self.assertTrue(np.isclose(checking_means_semi, clfsemi.means_))
        print(
            "GaussianMixtureWithPrior is semi-MAP-estimating correctly in 1D with 1 component."
        )
Пример #5
0
    def test_full_covariances(self):

        print("Test Full covariances: ")
        print("=======================")
        # Fit a Gaussian Mixture
        clf = WeightedGaussianMixture(
            mesh=self.mesh,
            n_components=self.n_components,
            covariance_type="full",
            max_iter=1000,
            n_init=10,
            tol=1e-8,
            means_init=self.means,
            warm_start=True,
            precisions_init=np.linalg.inv(self.sigma),
            weights_init=self.proportions,
        )
        clf.fit(self.samples)

        # Define reg
        reg = make_PGI_regularization(
            mesh=self.mesh,
            gmmref=clf,
            approx_gradient=True,
            alpha_x=0.0,
            wiresmap=self.wires,
            cell_weights_list=self.cell_weights_list,
        )

        mref = mkvc(clf.means_[clf.predict(self.samples)])

        # check score value
        dm = self.model - mref
        score_approx0 = reg(self.model)
        score_approx1 = 0.5 * dm.dot(reg.deriv2(self.model, dm))
        passed_score_approx = np.allclose(score_approx0, score_approx1)
        self.assertTrue(passed_score_approx)

        reg.objfcts[0].approx_eval = False
        score = reg(self.model) - reg(mref)
        passed_score = np.allclose(score_approx0, score, rtol=1e-4)
        self.assertTrue(passed_score)

        print("scores for PGI  & Full Cov. are ok.")

        # check derivatives as an optimization on locally quadratic function
        deriv = reg.deriv(self.model)
        reg.objfcts[0].approx_gradient = False
        reg.objfcts[0].approx_hessian = False
        deriv_full = reg.deriv(self.model)
        passed_deriv1 = np.allclose(deriv, deriv_full, rtol=1e-4)
        self.assertTrue(passed_deriv1)
        print("1st derivatives for PGI & Full Cov. are ok.")

        Hinv = SolverLU(reg.deriv2(self.model))
        p = Hinv * deriv
        direction2 = np.c_[self.wires * p]
        passed_derivative = np.allclose(
            mkvc(self.samples - direction2), mkvc(mref), rtol=1e-4
        )
        self.assertTrue(passed_derivative)
        print("2nd derivatives for PGI & Full Cov. are ok.")

        if self.PlotIt:
            print("Plotting", self.PlotIt)
            import matplotlib.pyplot as plt

            xmin, xmax = ymin, ymax = self.samples.min(), self.samples.max()
            x, y = np.mgrid[xmin:xmax:0.5, ymin:ymax:0.5]
            pos = np.empty(x.shape + (2,))
            pos[:, :, 0] = x
            pos[:, :, 1] = y
            rv = clf.score_samples(pos.reshape(-1, 2))
            rvm = clf.predict(pos.reshape(-1, 2))
            figfull, axfull = plt.subplots(1, 1, figsize=(16, 8))
            figfull.suptitle("Full Covariances Tests")

            axfull.contourf(x, y, rvm.reshape(x.shape), alpha=0.25, cmap="brg")
            axfull.contour(x, y, rv.reshape(x.shape), 20)
            axfull.scatter(
                self.samples[:, 0], self.samples[:, 1], color="blue", s=5.0, alpha=0.25
            )
            axfull.quiver(
                self.samples[:, 0],
                self.samples[:, 1],
                -(self.wires.s0 * deriv),
                -(self.wires.s1 * deriv),
                color="red",
                alpha=0.25,
            )
            axfull.quiver(
                self.samples[:, 0],
                self.samples[:, 1],
                -direction2[:, 0],
                -direction2[:, 1],
                color="k",
            )
            axfull.scatter(
                (self.samples - direction2)[:, 0],
                (self.samples - direction2)[:, 1],
                color="k",
                s=50.0,
            )
            axfull.set_xlabel("Property 1")
            axfull.set_ylabel("Property 2")
            axfull.set_title("PGI with W")

            plt.show()
Пример #6
0
    def test_pgi_regularization_approxDeriv(self):
        """
        This test might be redundant with the development
        of the tests above.
        """
        print("Testing the PGI approximated derivatives for full Cov.")
        print("======================================================")
        mean0 = np.r_[2.0, 0.0]
        sigma0 = np.r_[[[1.0, -1.0], [-1.0, 2.0]]]
        rv0 = multivariate_normal(mean0, sigma0)

        mean1 = mean0 - 2.0
        sigma1 = np.r_[[[0.5, 0.3], [0.3, 0.5]]]
        rv1 = multivariate_normal(mean1, sigma1)
        s0 = rv0.rvs(700)
        s1 = rv1.rvs(300)
        s = np.r_[s0, s1]
        model = mkvc(s)

        mesh = discretize.TensorMesh([s.shape[0]])
        wires = Wires(("s0", mesh.nC), ("s1", mesh.nC))

        n = 2
        clfref = WeightedGaussianMixture(mesh=mesh,
                                         n_components=n,
                                         covariance_type="full",
                                         max_iter=1000,
                                         n_init=20)
        clfref.fit(s)

        reg = regularization.SimplePGI(
            mesh=mesh,
            gmmref=clfref,
            wiresmap=wires,
            approx_eval=False,
            approx_gradient=True,
            alpha_x=0.0,
        )

        deriv = reg.deriv(model)
        H = lambda x: reg.deriv2(model, x)
        HH = LinearOperator([2000, 2000], matvec=H, rmatvec=H)
        deriv2 = bicgstab(HH, deriv, atol=1e-8)[0]

        Hfull = reg.deriv2(model)
        deriv2bis = spsolve(Hfull, deriv)

        tol = 1e-10
        error00 = np.max(
            np.minimum(
                np.abs((wires * (model - deriv2))[0] - clfref.means_[0][0]),
                np.abs((wires * (model - deriv2))[0] - clfref.means_[1][0]),
            ))
        error01 = np.max(
            np.minimum(
                np.abs((wires * (model - deriv2))[1] - clfref.means_[0][1]),
                np.abs((wires * (model - deriv2))[1] - clfref.means_[1][1]),
            ))
        error10 = np.max(
            np.minimum(
                np.abs((wires * (model - deriv2bis))[0] - clfref.means_[0][0]),
                np.abs((wires * (model - deriv2bis))[0] - clfref.means_[1][0]),
            ))
        error11 = np.max(
            np.minimum(
                np.abs((wires * (model - deriv2bis))[1] - clfref.means_[0][1]),
                np.abs((wires * (model - deriv2bis))[1] - clfref.means_[1][1]),
            ))

        self.assertTrue(np.max([error00, error01, error10, error11]) < tol)
        print("PGI approximated derivatives for full Cov. Tested and Happy")