Exemplo n.º 1
0
        def check(D, N, K, mu=None, Lambda=None, rho=None):

            if mu is None:
                mu = np.zeros(D)
            if Lambda is None:
                Lambda = np.identity(D)
            if rho is None:
                rho = np.ones(D)

            V = np.identity(D) + np.ones((D, D))

            # Construct model
            B = GaussianARD(3, 5, shape=(D, K), plates=(1, D))
            S = GaussianARD(2, 4, shape=(K, ), plates=(N, 1))
            A = SumMultiply('dk,k->d', B, S)
            X = GaussianMarkovChain(mu,
                                    Lambda,
                                    A,
                                    rho,
                                    n=N + 1,
                                    initialize=False)
            Y = Gaussian(X, V, initialize=False)

            # Posterior estimation
            Y.observe(np.random.randn(N + 1, D))
            X.update()
            B.update()
            S.update()
            try:
                mu.update()
            except:
                pass
            try:
                Lambda.update()
            except:
                pass
            try:
                rho.update()
            except:
                pass

            # Construct rotator
            rotB = RotateGaussianARD(B, axis=-2)
            rotX = RotateVaryingMarkovChain(X, B, S, rotB)
            rotX.setup()

            # Check gradient with respect to R
            R = np.random.randn(D, D)

            def cost(r):
                (b, dr) = rotX.bound(np.reshape(r, np.shape(R)))
                return (b, np.ravel(dr))

            err = optimize.check_gradient(cost, np.ravel(R), verbose=False)
            self.assertAllClose(err, 0, atol=1e-6, msg="Gradient incorrect")

            return
Exemplo n.º 2
0
        def check(D, N, mu=None, Lambda=None, rho=None, A=None):
            if mu is None:
                mu = np.zeros(D)
            if Lambda is None:
                Lambda = np.identity(D)
            if rho is None:
                rho = np.ones(D)
            if A is None:
                A = GaussianARD(3, 5, shape=(D, ), plates=(D, ))

            V = np.identity(D) + np.ones((D, D))

            # Construct model
            X = GaussianMarkovChain(mu,
                                    Lambda,
                                    A,
                                    rho,
                                    n=N + 1,
                                    initialize=False)
            Y = Gaussian(X, V, initialize=False)

            # Posterior estimation
            Y.observe(np.random.randn(*(Y.get_shape(0))))
            X.update()
            try:
                A.update()
            except:
                pass
            try:
                mu.update()
            except:
                pass
            try:
                Lambda.update()
            except:
                pass
            try:
                rho.update()
            except:
                pass

            # Construct rotator
            rotA = RotateGaussianARD(A, axis=-1)
            rotX = RotateGaussianMarkovChain(X, rotA)
            rotX.setup()

            # Check gradient with respect to R
            R = np.random.randn(D, D)

            def cost(r):
                (b, dr) = rotX.bound(np.reshape(r, np.shape(R)))
                return (b, np.ravel(dr))

            err = optimize.check_gradient(cost, np.ravel(R), verbose=False)
            self.assertAllClose(err, 0, atol=1e-5, msg="Gradient incorrect")

            return
Exemplo n.º 3
0
        def test(shape,
                 plates,
                 axis=-1,
                 alpha_plates=None,
                 plate_axis=None,
                 mu=3):

            if plate_axis is not None:
                precomputes = [False, True]
            else:
                precomputes = [False]

            for precompute in precomputes:
                # Construct the model
                D = shape[axis]
                if alpha_plates is not None:
                    alpha = Gamma(3, 5, plates=alpha_plates)
                    alpha.initialize_from_random()
                else:
                    alpha = 2
                X = GaussianARD(mu, alpha, shape=shape, plates=plates)

                # Some initial learning and rotator constructing
                X.initialize_from_random()
                Y = GaussianARD(X, 1)
                Y.observe(np.random.randn(*(Y.get_shape(0))))
                X.update()
                if alpha_plates is not None:
                    alpha.update()
                    rotX = RotateGaussianARD(X,
                                             alpha,
                                             axis=axis,
                                             precompute=precompute)
                else:
                    rotX = RotateGaussianARD(X,
                                             axis=axis,
                                             precompute=precompute)
                try:
                    mu.update()
                except:
                    pass

                # Rotation matrices
                R = np.random.randn(D, D)
                if plate_axis is not None:
                    C = plates[plate_axis]
                    Q = np.random.randn(C, C)
                else:
                    Q = None

                # Compute bound terms
                rotX.setup(plate_axis=plate_axis)

                if plate_axis is None:

                    def f_r(r):
                        (b, dr) = rotX.bound(np.reshape(r, np.shape(R)))
                        return (b, np.ravel(dr))
                else:

                    def f_r(r):
                        (b, dr, dq) = rotX.bound(np.reshape(r, np.shape(R)),
                                                 Q=Q)
                        return (b, np.ravel(dr))

                    def f_q(q):
                        (b, dr, dq) = rotX.bound(R,
                                                 Q=np.reshape(q, np.shape(Q)))
                        return (b, np.ravel(dq))

                # Check gradient with respect to R
                err = optimize.check_gradient(f_r, np.ravel(R), verbose=False)
                self.assertAllClose(err,
                                    0,
                                    atol=1e-4,
                                    msg="Gradient incorrect for R")

                # Check gradient with respect to Q
                if plate_axis is not None:
                    err = optimize.check_gradient(f_q,
                                                  np.ravel(Q),
                                                  verbose=False)
                    self.assertAllClose(err,
                                        0,
                                        atol=1e-4,
                                        msg="Gradient incorrect for Q")

            return
Exemplo n.º 4
0
    def rotate(self, maxiter=10, check_gradient=False, verbose=False, check_bound=False):
        """
        Optimize the rotation of two separate model blocks jointly.

        If some variable is the dot product of two Gaussians, rotating the two
        Gaussians optimally can make the inference algorithm orders of magnitude
        faster.

        First block is rotated with :math:`\mathbf{R}` and the second with
        :math:`\mathbf{R}^{-T}`.

        Blocks must have methods: `bound(U,s,V)` and `rotate(R)`.
        """

        I = np.identity(self.D)
        piv = np.arange(self.D)

        def cost(r):

            # Make vector-r into matrix-R
            R = np.reshape(r, (self.D, self.D))

            # Compute SVD
            invR = np.linalg.inv(R)
            logdetR = np.linalg.slogdet(R)[1]

            # Compute lower bound terms
            (b1, db1) = self.block1.bound(R, logdet=logdetR, inv=invR)
            (b2, db2) = self.block2.bound(invR.T, logdet=-logdetR, inv=R.T)

            # Apply chain rule for the second gradient:
            # d b(invR.T)
            # = tr(db.T * d(invR.T))
            # = tr(db * d(invR))
            # = -tr(db * invR * (dR) * invR)
            # = -tr(invR * db * invR * dR)
            db2 = -dot(invR.T, db2.T, invR.T)

            # Compute the cost function
            c = -(b1 + b2)
            dc = -(db1 + db2)

            return (c, np.ravel(dc))

        def get_bound_terms(r, gradient=False):
            """
            Returns a dictionary of bound terms for the nodes.
            """
            # Gradient not yet implemented..
            if gradient:
                raise NotImplementedError()

            # Make vector-r into matrix-R
            R = np.reshape(r, (self.D, self.D))

            # Compute SVD
            invR = np.linalg.inv(R)
            logdetR = np.linalg.slogdet(R)[1]

            # Compute lower bound terms
            dict1 = self.block1.get_bound_terms(R, logdet=logdetR, inv=invR)
            dict2 = self.block2.get_bound_terms(invR.T, logdet=-logdetR, inv=R.T)

            if not gradient:
                dict1.update(dict2)
                return dict1
            else:
                terms = dict1[0].copy()
                terms = terms.update(dict2[0])
                grad = dict1[1].copy()
                grad = grad.update(dict2[1])
                return (terms, grad)

        def get_true_bound_terms():
            nodes = set(self.block1.nodes()) | set(self.block2.nodes())
            D = {}
            # TODO/FIXME: Also compute bound for child nodes as they could be
            # affected in practice although they shouldn't. Just checking that.
            for node in nodes:
                L = node.lower_bound_contribution()
                D[node] = L
            return D

        self.block1.setup()
        self.block2.setup()

        if check_gradient:
            R = np.random.randn(self.D, self.D)
            err = optimize.check_gradient(cost, np.ravel(R), verbose=verbose)[1]
            if err > 1e-5:
                warnings.warn("Rotation gradient has relative error %g" % err)

        # Initial rotation is identity matrix
        r0 = np.ravel(np.identity(self.D))

        (cost_begin, _) = cost(r0)
        if check_bound:
            bound_terms_begin = get_bound_terms(r0)
            true_bound_terms_begin = get_true_bound_terms()

        # Run optimization
        r = optimize.minimize(cost, r0, maxiter=maxiter, verbose=verbose)

        (cost_end, _) = cost(r)
        if check_bound:
            bound_terms_end = get_bound_terms(r)

        # Apply the optimal rotation
        R = np.reshape(r, (self.D, self.D))
        invR = np.linalg.inv(R)
        logdetR = np.linalg.slogdet(R)[1]
        self.block1.rotate(R, inv=invR, logdet=logdetR)
        self.block2.rotate(invR.T, inv=R.T, logdet=-logdetR)

        # Check that the cost function and the true lower bound changed equally
        cost_change = cost_end - cost_begin

        # Check that we really have improved the bound.
        if cost_change > 0:
            warnings.warn(
                "Rotation optimization made the cost function worse "
                "by %g. Probably a bug in the gradient of the "
                "rotation functions." % (cost_change,)
            )

        if check_bound:
            true_bound_terms_end = get_true_bound_terms()
            bound_change = 0
            for node in bound_terms_begin.keys():
                node_bound_change = bound_terms_end[node] - bound_terms_begin[node]
                bound_change += node_bound_change
                true_node_bound_change = 0
                try:
                    true_node_bound_change += true_bound_terms_end[node] - true_bound_terms_begin[node]
                except KeyError:
                    raise Exception(
                        "The node %s is part of the "
                        "transformation but not part of the "
                        "model. Check your VB construction." % node.name
                    )
                if not np.allclose(node_bound_change, true_node_bound_change):
                    warnings.warn(
                        "Rotation cost function is not consistent "
                        "with the true lower bound for node %s. "
                        "Bound changed %g but optimized function "
                        "changed %g." % (node.name, true_node_bound_change, node_bound_change)
                    )

            # Check that we really have improved the bound.
            # TODO/FIXME: Also compute bound for child nodes as they could be
            # affected in practice although they shouldn't. Just checking that.
            if bound_change < 0:
                warnings.warn(
                    "Rotation made the true lower bound worse by %g. "
                    "Probably a bug in the rotation functions." % (bound_change,)
                )
Exemplo n.º 5
0
        def check(D, N, mu=None, Lambda=None, rho=None, A=None):
            if mu is None:
                mu = np.zeros(D)
            if Lambda is None:
                Lambda = np.identity(D)
            if rho is None:
                rho = np.ones(D)
            if A is None:
                A = GaussianARD(3, 5,
                                shape=(D,),
                                plates=(D,))
                
            V = np.identity(D) + np.ones((D,D))

            # Construct model
            X = GaussianMarkovChain(mu,
                                    Lambda,
                                    A,
                                    rho,
                                    n=N+1,
                                    initialize=False)
            Y = Gaussian(X,
                         V,
                         initialize=False)

            # Posterior estimation
            Y.observe(np.random.randn(*(Y.get_shape(0))))
            X.update()
            try:
                A.update()
            except:
                pass
            try:
                mu.update()
            except:
                pass
            try:
                Lambda.update()
            except:
                pass
            try:
                rho.update()
            except:
                pass

            # Construct rotator
            rotA = RotateGaussianARD(A, axis=-1)
            rotX = RotateGaussianMarkovChain(X, rotA)
            rotX.setup()

            # Check gradient with respect to R
            R = np.random.randn(D, D)
            def cost(r):
                (b, dr) = rotX.bound(np.reshape(r, np.shape(R)))
                return (b, np.ravel(dr))

            err = optimize.check_gradient(cost, 
                                          np.ravel(R), 
                                          verbose=False)[1]
            self.assertAllClose(err, 0, 
                                atol=1e-5,
                                msg="Gradient incorrect")
            
            return
Exemplo n.º 6
0
        def test(shape, plates, 
                 axis=-1, 
                 alpha_plates=None, 
                 plate_axis=None,
                 mu=3):
            
            if plate_axis is not None:
                precomputes = [False, True]
            else:
                precomputes = [False]
                
            for precompute in precomputes:
                # Construct the model
                D = shape[axis]
                if alpha_plates is not None:
                    alpha = Gamma(3, 5,
                                  plates=alpha_plates)
                    alpha.initialize_from_random()
                else:
                    alpha = 2
                X = GaussianARD(mu, alpha,
                                shape=shape,
                                plates=plates)

                # Some initial learning and rotator constructing
                X.initialize_from_random()
                Y = GaussianARD(X, 1)
                Y.observe(np.random.randn(*(Y.get_shape(0))))
                X.update()
                if alpha_plates is not None:
                    alpha.update()
                    rotX = RotateGaussianARD(X, alpha, 
                                             axis=axis,
                                             precompute=precompute)
                else:
                    rotX = RotateGaussianARD(X, 
                                             axis=axis,
                                             precompute=precompute)
                try:
                    mu.update()
                except:
                    pass

                # Rotation matrices
                R = np.random.randn(D, D)
                if plate_axis is not None:
                    C = plates[plate_axis]
                    Q = np.random.randn(C, C)
                else:
                    Q = None

                # Compute bound terms
                rotX.setup(plate_axis=plate_axis)

                if plate_axis is None:
                    def f_r(r):
                        (b, dr) = rotX.bound(np.reshape(r, np.shape(R)))
                        return (b, np.ravel(dr))
                else:
                    def f_r(r):
                        (b, dr, dq) = rotX.bound(np.reshape(r, np.shape(R)),
                                             Q=Q)
                        return (b, np.ravel(dr))

                    def f_q(q):
                        (b, dr, dq) = rotX.bound(R,
                                             Q=np.reshape(q, np.shape(Q)))
                        return (b, np.ravel(dq))

                # Check gradient with respect to R
                err = optimize.check_gradient(f_r, 
                                              np.ravel(R), 
                                              verbose=False)[1]
                self.assertAllClose(err, 0, 
                                    atol=1e-4,
                                    msg="Gradient incorrect for R")

                # Check gradient with respect to Q
                if plate_axis is not None:
                    err = optimize.check_gradient(f_q, 
                                                  np.ravel(Q), 
                                                  verbose=False)[1]
                    self.assertAllClose(err, 0,
                                        atol=1e-4,
                                        msg="Gradient incorrect for Q")

            return
Exemplo n.º 7
0
        def check(D, N, K,
                  mu=None,
                  Lambda=None,
                  rho=None):

            if mu is None:
                mu = np.zeros(D)
            if Lambda is None:
                Lambda = np.identity(D)
            if rho is None:
                rho = np.ones(D)

            V = np.identity(D) + np.ones((D,D))

            # Construct model
            B = GaussianARD(3, 5,
                            shape=(D,K),
                            plates=(1,D))
            S = GaussianARD(2, 4,
                            shape=(K,),
                            plates=(N,1))
            A = SumMultiply('dk,k->d', B, S)
            X = GaussianMarkovChain(mu,
                                    Lambda,
                                    A,
                                    rho,
                                    n=N+1,
                                    initialize=False)
            Y = Gaussian(X,
                         V,
                         initialize=False)

            # Posterior estimation
            Y.observe(np.random.randn(N+1,D))
            X.update()
            B.update()
            S.update()
            try:
                mu.update()
            except:
                pass
            try:
                Lambda.update()
            except:
                pass
            try:
                rho.update()
            except:
                pass

            # Construct rotator
            rotB = RotateGaussianARD(B, axis=-2)
            rotX = RotateVaryingMarkovChain(X, B, S, rotB)
            rotX.setup()

            # Check gradient with respect to R
            R = np.random.randn(D, D)
            def cost(r):
                (b, dr) = rotX.bound(np.reshape(r, np.shape(R)))
                return (b, np.ravel(dr))

            err = optimize.check_gradient(cost, 
                                          np.ravel(R), 
                                          verbose=False)[1]
            self.assertAllClose(err, 0, 
                                atol=1e-6,
                                msg="Gradient incorrect")
            
            return