Пример #1
0
    def _computations_for_A_and_X(self, XpXn, XpXp):
        # Get moments of the state dynamics matrix
        (A, AA) = self.A_node.get_moments()
        # Ignore axes that correspond to input signals
        D = np.shape(A)[-2]
        A = A[..., :D]
        AA = AA[..., :D, :D]
        # Make sure time axis is in the arrays
        A = misc.atleast_nd(A, 3)
        AA = misc.atleast_nd(AA, 4)
        CovA = AA - A[..., :, np.newaxis] * A[..., np.newaxis, :]

        #
        # Expectations with respect to A and X
        #

        # TODO: In case A does not depend on time, use a bit more efficient
        # formulas

        # Compute: \sum_n <A_n> <x_{n-1} x_n^T>
        A_XpXn = np.einsum("...nik,...nkj->...ij", A, XpXn)
        A_XpXn = sum_to_plates(A_XpXn, (), ndim=2, plates_from=self.X_node.plates)

        # Compute: \sum_n <A_n> <x_{n-1} x_{n-1}^T> <A_n>^T
        A_XpXp = np.einsum("...nik,...nkj->...nij", A, XpXp)
        A_XpXp_A = np.einsum("...nik,...njk->...ij", A_XpXp, A)
        A_XpXp_A = sum_to_plates(A_XpXp_A, (), ndim=2, plates_from=self.X_node.plates)

        # Compute: \sum_n tr(CovA_n <x_{n-1} x_{n-1}^T>)
        CovA_XpXp = np.einsum("...ndij,...nij->...d", CovA, XpXp)
        CovA_XpXp = sum_to_plates(CovA_XpXp, (), ndim=1, plates_from=self.X_node.plates)

        return (A_XpXn, A_XpXp_A, CovA_XpXp)
Пример #2
0
def _hinton(W, error=None, vmax=None, square=True):
    """
    Draws a Hinton diagram for visualizing a weight matrix. 

    Temporarily disables matplotlib interactive mode if it is on, 
    otherwise this takes forever.

    Originally copied from
    http://wiki.scipy.org/Cookbook/Matplotlib/HintonDiagrams
    """
    reenable = False
    if plt.isinteractive():
        plt.ioff()
        reenable = True
        
    #P.clf()
    W = misc.atleast_nd(W, 2)
    (height, width) = W.shape
    if not vmax:
        #vmax = 2**np.ceil(np.log(np.max(np.abs(W)))/np.log(2))
        if error is not None:
            vmax = np.max(np.abs(W) + error)
        else:
            vmax = np.max(np.abs(W))

    plt.fill(0.5+np.array([0,width,width,0]),
             0.5+np.array([0,0,height,height]),
             'gray')
    plt.axis('off')
    if square:
        plt.axis('equal')
    plt.gca().invert_yaxis()
    for x in range(width):
        for y in range(height):
            _x = x+1
            _y = y+1
            w = W[y,x]
            _w = np.abs(w)
            if w > 0:
                _c = 'white'
            else:
                _c = 'black'
            if error is not None:
                e = error[y,x]
                if e < 0:
                    print(e, _w, vmax)
                    raise Exception("BUG? Negative error")
                if _w + e > vmax:
                    print(e, _w, vmax)
                    raise Exception("BUG? Value+error greater than max")
                _rectangle(_x,
                           _y, 
                           min(1, np.sqrt((_w+e)/vmax)),
                           min(1, np.sqrt((_w+e)/vmax)),
                           edgecolor=_c,
                           fill=False)
            _blob(_x, _y, min(1, _w/vmax), _c)
                
    if reenable:
        plt.ion()
Пример #3
0
def _hinton(W, error=None, vmax=None, square=False, axes=None):
    """
    Draws a Hinton diagram for visualizing a weight matrix.

    Temporarily disables matplotlib interactive mode if it is on,
    otherwise this takes forever.

    Originally copied from
    http://wiki.scipy.org/Cookbook/Matplotlib/HintonDiagrams
    """

    if axes is None:
        axes = plt.gca()

    W = misc.atleast_nd(W, 2)
    (height, width) = W.shape
    if not vmax:
        #vmax = 2**np.ceil(np.log(np.max(np.abs(W)))/np.log(2))
        if error is not None:
            vmax = np.max(np.abs(W) + error)
        else:
            vmax = np.max(np.abs(W))

    axes.fill(0.5+np.array([0,width,width,0]),
              0.5+np.array([0,0,height,height]),
              'gray')
    if square:
        axes.set_aspect('equal')
    axes.set_ylim(0.5, height+0.5)
    axes.set_xlim(0.5, width+0.5)
    axes.set_xticks([])
    axes.set_yticks([])
    axes.invert_yaxis()
    for x in range(width):
        for y in range(height):
            _x = x+1
            _y = y+1
            w = W[y,x]
            _w = np.abs(w)
            if w > 0:
                _c = 'white'
            else:
                _c = 'black'
            if error is not None:
                e = error[y,x]
                if e < 0:
                    print(e, _w, vmax)
                    raise Exception("BUG? Negative error")
                if _w + e > vmax:
                    print(e, _w, vmax)
                    raise Exception("BUG? Value+error greater than max")
                _rectangle(axes,
                           _x,
                           _y,
                           min(1, np.sqrt((_w+e)/vmax)),
                           min(1, np.sqrt((_w+e)/vmax)),
                           edgecolor=_c,
                           fill=False)
            _blob(axes, _x, _y, min(1, _w/vmax), _c)
Пример #4
0
def timeseries_categorical_mc(Z):

    # Make sure that the node is categorical
    Z = Z._convert(CategoricalMoments)

    # Get expectations (and broadcast explicitly)
    z = Z._message_to_child()[0] * np.ones(Z.get_shape(0))

    # Compute the subplot layout
    z = misc.atleast_nd(z, 4)
    if np.ndim(z) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")
    M = np.shape(z)[0]
    N = np.shape(z)[1]

    #print("DEBUG IN PLOT", Z.get_shape(0), np.shape(z))

    # Plot Hintons
    for i in range(M):
        for j in range(N):
            plt.subplot(M, N, i*N+j+1)
            hinton(z[i,j].T, vmax=1.0, square=False)
Пример #5
0
def timeseries_categorical_mc(Z, fig=None):

    if fig is None:
        fig = plt.gcf()

    # Make sure that the node is categorical
    Z = Z._convert(CategoricalMoments)

    # Get expectations (and broadcast explicitly)
    z = Z._message_to_child()[0] * np.ones(Z.get_shape(0))

    # Compute the subplot layout
    z = misc.atleast_nd(z, 4)
    if np.ndim(z) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")
    M = np.shape(z)[0]
    N = np.shape(z)[1]

    # Plot Hintons
    for i in range(M):
        for j in range(N):
            axes = fig.add_subplot(M, N, i * N + j + 1)
            _hinton(z[i, j].T, vmax=1.0, square=False, axes=axes)
Пример #6
0
def timeseries_categorical_mc(Z, fig=None):

    if fig is None:
        fig = plt.gcf()

    # Make sure that the node is categorical
    Z = Z._ensure_moments(Z, CategoricalMoments, categories=None)

    # Get expectations (and broadcast explicitly)
    z = Z._message_to_child()[0] * np.ones(Z.get_shape(0))

    # Compute the subplot layout
    z = misc.atleast_nd(z, 4)
    if np.ndim(z) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")
    M = np.shape(z)[0]
    N = np.shape(z)[1]

    # Plot Hintons
    for i in range(M):
        for j in range(N):
            axes = fig.add_subplot(M, N, i*N+j+1)
            _hinton(z[i,j].T, vmax=1.0, square=False, axes=axes)
Пример #7
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):
        """
        Compute the message to a parent node.
        """

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            if np.ndim(g) < abs(self.cluster_plate):
                # Not enough axes, just add the cluster plate axis
                g = np.expand_dims(g, -1)
            else:
                # Move the cluster plate axis
                g = misc.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
            # Move phi axis:
            # Shape(phi)    = [Nn,..,N0,K,Dd,..,D0]
            for ind in range(len(phi)):
                if self.cluster_plate < 0:
                    axis_from = self.cluster_plate-self.ndims[ind]
                else:
                    raise RuntimeError("Cluster plate axis must be negative")
                axis_to = -1-self.ndims[ind]
                if np.ndim(phi[ind]) >= abs(axis_from):
                    # Cluster plate axis exists, move it to the correct position
                    phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to)
                else:
                    # No cluster plate axis, just add a new axis to the correct
                    # position, if phi has something on that axis
                    if np.ndim(phi[ind]) >= abs(axis_to):
                        phi[ind] = np.expand_dims(phi[ind], axis=axis_to)

            # Reshape u:
            # Shape(u)      = [Nn,..,N0,1,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                u_self.append(np.expand_dims(u[ind],
                                             axis=(-1-self.ndims[ind])))

            # Compute logpdf:
            # Shape(L)      = [Nn,..,N0,K]
            L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims)

            # Sum over other than the cluster dimensions? No!
            # Hmm.. I think the message passing method will do
            # that automatically

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index_for_parent = index - 1

            # Reshape u:
            # Shape(u)      = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.ndims[ind]
                else:
                    raise ValueError("Cluster plate axis must be negative")
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            m = self.distribution.compute_message_to_parent(parent,
                                                            index_for_parent,
                                                            u_self,
                                                            *(u_parents[1:]))

            # Note: The cluster assignment probabilities can be considered as
            # weights to plate elements. These weights need to mapped properly
            # via the plate mapping of self.distribution. Otherwise, nested
            # mixtures won't work, or possibly not any distribution that does
            # something to the plates. Thus, use compute_weights_to_parent to
            # compute the transformations to the weight array properly.
            #
            # See issue #39 for more details.

            # Compute weights (i.e., cluster assignment probabilities) and map
            # the plates properly.
            p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate))
            p = misc.moveaxis(p, -1, self.cluster_plate)
            p = self.distribution.compute_weights_to_parent(
                index_for_parent,
                p,
            )

            # Weigh the elements in the message array
            m = [mi * misc.add_trailing_axes(p, ndim)
                 #for (mi, ndim) in zip(m, self.ndims)]
                 for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent])]

            return m
Пример #8
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):
        """
        Compute the message to a parent node.
        """

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            if np.ndim(g) < abs(self.cluster_plate):
                # Not enough axes, just add the cluster plate axis
                g = np.expand_dims(g, -1)
            else:
                # Move the cluster plate axis
                g = misc.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
            # Move phi axis:
            # Shape(phi)    = [Nn,..,N0,K,Dd,..,D0]
            for ind in range(len(phi)):
                if self.cluster_plate < 0:
                    axis_from = self.cluster_plate - self.ndims[ind]
                else:
                    raise RuntimeError("Cluster plate axis must be negative")
                axis_to = -1 - self.ndims[ind]
                if np.ndim(phi[ind]) >= abs(axis_from):
                    # Cluster plate axis exists, move it to the correct position
                    phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to)
                else:
                    # No cluster plate axis, just add a new axis to the correct
                    # position, if phi has something on that axis
                    if np.ndim(phi[ind]) >= abs(axis_to):
                        phi[ind] = np.expand_dims(phi[ind], axis=axis_to)

            # Reshape u:
            # Shape(u)      = [Nn,..,N0,1,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                u_self.append(
                    np.expand_dims(u[ind], axis=(-1 - self.ndims[ind])))

            # Compute logpdf:
            # Shape(L)      = [Nn,..,N0,K]
            L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims)

            # Sum over other than the cluster dimensions? No!
            # Hmm.. I think the message passing method will do
            # that automatically

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index_for_parent = index - 1

            # Reshape u:
            # Shape(u)      = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.ndims[ind]
                else:
                    raise ValueError("Cluster plate axis must be negative")
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            m = self.distribution.compute_message_to_parent(
                parent, index_for_parent, u_self, *(u_parents[1:]))

            # Note: The cluster assignment probabilities can be considered as
            # weights to plate elements. These weights need to mapped properly
            # via the plate mapping of self.distribution. Otherwise, nested
            # mixtures won't work, or possibly not any distribution that does
            # something to the plates. Thus, use compute_weights_to_parent to
            # compute the transformations to the weight array properly.
            #
            # See issue #39 for more details.

            # Compute weights (i.e., cluster assignment probabilities) and map
            # the plates properly.
            p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate))
            p = misc.moveaxis(p, -1, self.cluster_plate)
            p = self.distribution.compute_weights_to_parent(
                index_for_parent,
                p,
            )

            # Weigh the elements in the message array
            m = [
                mi * misc.add_trailing_axes(p, ndim)
                #for (mi, ndim) in zip(m, self.ndims)]
                for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent])
            ]

            return m
Пример #9
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):
        """
        Compute the message to a parent node.
        """

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            if np.ndim(g) < abs(self.cluster_plate):
                # Not enough axes, just add the cluster plate axis
                g = np.expand_dims(g, -1)
            else:
                # Move the cluster plate axis
                g = misc.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
            # Move phi axis:
            # Shape(phi)    = [Nn,..,N0,K,Dd,..,D0]
            for ind in range(len(phi)):
                if self.cluster_plate < 0:
                    axis_from = self.cluster_plate - self.ndims[ind]
                else:
                    raise RuntimeError("Cluster plate axis must be negative")
                axis_to = -1 - self.ndims[ind]
                if np.ndim(phi[ind]) >= abs(axis_from):
                    # Cluster plate axis exists, move it to the correct position
                    phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to)
                else:
                    # No cluster plate axis, just add a new axis to the correct
                    # position, if phi has something on that axis
                    if np.ndim(phi[ind]) >= abs(axis_to):
                        phi[ind] = np.expand_dims(phi[ind], axis=axis_to)

            # Reshape u:
            # Shape(u)      = [Nn,..,N0,1,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                u_self.append(
                    np.expand_dims(u[ind], axis=(-1 - self.ndims[ind])))

            # Compute logpdf:
            # Shape(L)      = [Nn,..,N0,K]
            L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims)

            # Sum over other than the cluster dimensions? No!
            # Hmm.. I think the message passing method will do
            # that automatically

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index = index - 1

            # Reshape u:
            # Shape(u)      = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.ndims[ind]
                else:
                    cluster_axis = self.cluster_plate
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            m = self.distribution.compute_message_to_parent(
                parent, index, u_self, *(u_parents[1:]))

            # Weigh the messages with the responsibilities
            for i in range(len(m)):

                # Shape(m)      = [Nn,..,K,..,N0,Dd,..,D0]
                # Shape(p)      = [Nn,..,N0,K]
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]

                # Number of axes for the variable dimensions for
                # the parent message.
                D = self.ndims_parents[index][i]

                # Responsibilities for clusters are the first
                # parent's first moment:
                # Shape(p)      = [Nn,..,N0,K]
                p = u_parents[0][0]
                # Move the cluster axis to the proper place:
                # Shape(p)      = [Nn,..,K,..,N0]
                p = misc.atleast_nd(p, abs(self.cluster_plate))
                p = misc.moveaxis(p, -1, self.cluster_plate)
                # Add axes for variable dimensions to the contributions
                # Shape(p)      = [Nn,..,K,..,N0,1,..,1]
                p = misc.add_trailing_axes(p, D)

                if self.cluster_plate < 0:
                    # Add the variable dimensions
                    cluster_axis = self.cluster_plate - D

                # Add axis for clusters:
                # Shape(m)      = [Nn,..,1,..,N0,Dd,..,D0]
                #m[i] = np.expand_dims(m[i], axis=cluster_axis)

                #
                # TODO: You could do summing here already so that
                # you wouldn't compute huge matrices as
                # intermediate result. Use einsum.

                # Compute the message contributions for each
                # cluster:
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]
                m[i] = m[i] * p

            return m
Пример #10
0
def infer(y, D, K,
          mask=True, 
          maxiter=100,
          rotate=False, 
          debug=False, 
          precompute=False,
          update_hyper=0,
          start_rotating=0,
          start_rotating_weights=0,
          plot_C=True,
          monitor=True,
          autosave=None):
    
    """
    Run VB inference for linear state-space model with time-varying dynamics.
    """

    y = misc.atleast_nd(y, 2)
    (M, N) = np.shape(y)

    # Construct the model
    Q = model(M, N, D, K)
    if not plot_C:
        Q['C'].set_plotter(None)
        
    if autosave is not None:
        Q.set_autosave(autosave, iterations=10)

    # Observe data
    Q['Y'].observe(y, mask=mask)

    # Set up rotation speed-up
    if rotate:
        
        # Initial rotate the D-dimensional state space (X, A, C)
        # Does not update hyperparameters
        rotA_init = transformations.RotateGaussianARD(Q['A'], 
                                                      axis=0,
                                                      precompute=precompute)
        rotX_init = transformations.RotateVaryingMarkovChain(Q['X'], 
                                                             Q['A'], 
                                                             Q['S']._convert(GaussianMoments)[...,1:,None], 
                                                             rotA_init)
        rotC_init = transformations.RotateGaussianARD(Q['C'],
                                                      axis=0,
                                                      precompute=precompute)
        R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D)

        # Rotate the D-dimensional state space (X, A, C)
        rotA = transformations.RotateGaussianARD(Q['A'], 
                                                 Q['alpha'],
                                                 axis=0,
                                                 precompute=precompute)
        rotX = transformations.RotateVaryingMarkovChain(Q['X'], 
                                                        Q['A'], 
                                                        Q['S']._convert(GaussianMoments)[...,1:,None], 
                                                        rotA)
        rotC = transformations.RotateGaussianARD(Q['C'],
                                                 Q['gamma'],
                                                 axis=0,
                                                 precompute=precompute)
        R_X = transformations.RotationOptimizer(rotX, rotC, D)

        # Rotate the K-dimensional latent dynamics space (S, A, C)
        rotB = transformations.RotateGaussianARD(Q['B'],
                                                 Q['beta'], 
                                                 precompute=precompute)
        rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=-1,
                                                 precompute=precompute)
        R_S = transformations.RotationOptimizer(rotS, rotA, K)
            
        if debug:
            rotate_kwargs = {'maxiter': 10,
                             'check_bound': True,
                             'check_gradient': True}
        else:
            rotate_kwargs = {'maxiter': 10}

    # Plot initial distributions
    if monitor:
        Q.plot()

    # Run inference using rotations
    for ind in range(maxiter):

        if ind < update_hyper:
            # It might be a good idea to learn the lower level nodes a bit
            # before starting to learn the upper level nodes.
            Q.update('X', 'C', 'A', 'tau', plot=monitor)
            if rotate and ind >= start_rotating:
                # Use the rotation which does not update alpha nor beta
                R_X_init.rotate(**rotate_kwargs)
        else:
            Q.update(plot=monitor)
            if rotate and ind >= start_rotating:
                # It might be a good idea to not rotate immediately because it
                # might lead to pruning out components too efficiently before
                # even estimating them roughly
                R_X.rotate(**rotate_kwargs)
                if ind >= start_rotating_weights:
                    R_S.rotate(**rotate_kwargs)

    # Return the posterior approximation
    return Q
Пример #11
0
def infer(y,
          D,
          K,
          mask=True,
          maxiter=100,
          rotate=False,
          debug=False,
          precompute=False,
          update_hyper=0,
          start_rotating=0,
          start_rotating_weights=0,
          plot_C=True,
          monitor=True,
          autosave=None):
    """
    Run VB inference for linear state-space model with time-varying dynamics.
    """

    y = misc.atleast_nd(y, 2)
    (M, N) = np.shape(y)

    # Construct the model
    Q = model(M, N, D, K)
    if not plot_C:
        Q['C'].set_plotter(None)

    if autosave is not None:
        Q.set_autosave(autosave, iterations=10)

    # Observe data
    Q['Y'].observe(y, mask=mask)

    # Set up rotation speed-up
    if rotate:

        # Initial rotate the D-dimensional state space (X, A, C)
        # Does not update hyperparameters
        rotA_init = transformations.RotateGaussianARD(Q['A'],
                                                      axis=0,
                                                      precompute=precompute)
        rotX_init = transformations.RotateVaryingMarkovChain(
            Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None],
            rotA_init)
        rotC_init = transformations.RotateGaussianARD(Q['C'],
                                                      axis=0,
                                                      precompute=precompute)
        R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D)

        # Rotate the D-dimensional state space (X, A, C)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=0,
                                                 precompute=precompute)
        rotX = transformations.RotateVaryingMarkovChain(
            Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None],
            rotA)
        rotC = transformations.RotateGaussianARD(Q['C'],
                                                 Q['gamma'],
                                                 axis=0,
                                                 precompute=precompute)
        R_X = transformations.RotationOptimizer(rotX, rotC, D)

        # Rotate the K-dimensional latent dynamics space (S, A, C)
        rotB = transformations.RotateGaussianARD(Q['B'],
                                                 Q['beta'],
                                                 precompute=precompute)
        rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=-1,
                                                 precompute=precompute)
        R_S = transformations.RotationOptimizer(rotS, rotA, K)

        if debug:
            rotate_kwargs = {
                'maxiter': 10,
                'check_bound': True,
                'check_gradient': True
            }
        else:
            rotate_kwargs = {'maxiter': 10}

    # Plot initial distributions
    if monitor:
        Q.plot()

    # Run inference using rotations
    for ind in range(maxiter):

        if ind < update_hyper:
            # It might be a good idea to learn the lower level nodes a bit
            # before starting to learn the upper level nodes.
            Q.update('X', 'C', 'A', 'tau', plot=monitor)
            if rotate and ind >= start_rotating:
                # Use the rotation which does not update alpha nor beta
                R_X_init.rotate(**rotate_kwargs)
        else:
            Q.update(plot=monitor)
            if rotate and ind >= start_rotating:
                # It might be a good idea to not rotate immediately because it
                # might lead to pruning out components too efficiently before
                # even estimating them roughly
                R_X.rotate(**rotate_kwargs)
                if ind >= start_rotating_weights:
                    R_S.rotate(**rotate_kwargs)

    # Return the posterior approximation
    return Q
Пример #12
0
def gaussian_hinton(X, rows=None, cols=None, scale=1):
    """
    Plot the Hinton diagram of a Gaussian node
    """

    # Get mean and second moment
    X = X._convert(GaussianMoments)
    (x, xx) = X.get_moments()
    ndim = len(X.dims[0])
    shape = X.get_shape(0)
    size = len(X.get_shape(0))

    # Compute standard deviation
    xx = misc.get_diag(xx, ndim=ndim)
    std = np.sqrt(xx - x**2)

    # Force explicit elements when broadcasting
    x = x * np.ones(shape)
    std = std * np.ones(shape)

    if rows is None:
        rows = np.nan
    if cols is None:
        cols = np.nan

    # Preprocess the axes to 0,...,ndim
    if rows < 0:
        rows += size
    if cols < 0:
        cols += size
    if rows < 0 or rows >= size:
        raise ValueError("Row axis invalid")
    if cols < 0 or cols >= size:
        raise ValueError("Column axis invalid")

    # Remove non-row and non-column axes that have length 1
    squeezed_shape = list(shape)
    for i in reversed(range(len(shape))):
        if shape[i] == 1 and i != rows and i != cols:
            squeezed_shape.pop(i)
            if i < cols:
                cols -= 1
            if i < rows:
                rows -= 1
    x = np.reshape(x, squeezed_shape)
    std = np.reshape(std, squeezed_shape)

    # Make explicit four axes
    cols = cols + (4 - np.ndim(x))
    rows = rows + (4 - np.ndim(x))
    x = misc.atleast_nd(x, 4)
    std = misc.atleast_nd(std, 4)

    size = np.ndim(x)
    if np.isnan(cols):
        if rows != size - 1:
            cols = size - 1
        else:
            cols = size - 2
    if np.isnan(rows):
        if cols != size - 1:
            rows = size - 1
        else:
            rows = size - 2

    # Put the row and column axes to the end
    axes = [i for i in range(size) if i not in (rows, cols)] + [rows, cols]
    x = np.transpose(x, axes=axes)
    std = np.transpose(std, axes=axes)

    if np.ndim(x) != 4:
        raise ValueError("Can not plot arrays with over 4 axes")

    M = np.shape(x)[0]
    N = np.shape(x)[1]
    vmax = np.max(np.abs(x) + scale*std)
    #plt.subplots(M, N, sharey=True, sharex=True, fig_kw)
    ax = [plt.subplot(M, N, i*N+j+1) for i in range(M) for j in range(N)]
    for i in range(M):
        for j in range(N):
            plt.subplot(M, N, i*N+j+1)

            #plt.subplot(M, N, i*N+j+1, sharey=ax[0], sharex=ax[0])
            if scale == 0:
                _hinton(x[i,j], vmax=vmax)
            else:
                _hinton(x[i,j], vmax=vmax, error=scale*std[i,j])
Пример #13
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):
        """
        Compute the message to a parent node.
        """

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.distribution.compute_cgf_from_parents(*(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            if np.ndim(g) < abs(self.cluster_plate):
                # Not enough axes, just add the cluster plate axis
                g = np.expand_dims(g, -1)
            else:
                # Move the cluster plate axis
                g = misc.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.distribution.compute_phi_from_parents(*(u_parents[1:]))
            # Move phi axis:
            # Shape(phi)    = [Nn,..,N0,K,Dd,..,D0]
            for ind in range(len(phi)):
                if self.cluster_plate < 0:
                    axis_from = self.cluster_plate - self.ndims[ind]
                else:
                    raise RuntimeError("Cluster plate axis must be negative")
                axis_to = -1 - self.ndims[ind]
                if np.ndim(phi[ind]) >= abs(axis_from):
                    # Cluster plate axis exists, move it to the correct position
                    phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to)
                else:
                    # No cluster plate axis, just add a new axis to the correct
                    # position, if phi has something on that axis
                    if np.ndim(phi[ind]) >= abs(axis_to):
                        phi[ind] = np.expand_dims(phi[ind], axis=axis_to)

            # Reshape u:
            # Shape(u)      = [Nn,..,N0,1,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                u_self.append(np.expand_dims(u[ind], axis=(-1 - self.ndims[ind])))

            # Compute logpdf:
            # Shape(L)      = [Nn,..,N0,K]
            L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims)

            # Sum over other than the cluster dimensions? No!
            # Hmm.. I think the message passing method will do
            # that automatically

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index = index - 1

            # Reshape u:
            # Shape(u)      = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.ndims[ind]
                else:
                    cluster_axis = self.cluster_plate
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:]))

            # Weigh the messages with the responsibilities
            for i in range(len(m)):

                # Shape(m)      = [Nn,..,K,..,N0,Dd,..,D0]
                # Shape(p)      = [Nn,..,N0,K]
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]

                # Number of axes for the variable dimensions for
                # the parent message.
                D = self.ndims_parents[index][i]

                # Responsibilities for clusters are the first
                # parent's first moment:
                # Shape(p)      = [Nn,..,N0,K]
                p = u_parents[0][0]
                # Move the cluster axis to the proper place:
                # Shape(p)      = [Nn,..,K,..,N0]
                p = misc.atleast_nd(p, abs(self.cluster_plate))
                p = misc.moveaxis(p, -1, self.cluster_plate)
                # Add axes for variable dimensions to the contributions
                # Shape(p)      = [Nn,..,K,..,N0,1,..,1]
                p = misc.add_trailing_axes(p, D)

                if self.cluster_plate < 0:
                    # Add the variable dimensions
                    cluster_axis = self.cluster_plate - D

                # Add axis for clusters:
                # Shape(m)      = [Nn,..,1,..,N0,Dd,..,D0]
                # m[i] = np.expand_dims(m[i], axis=cluster_axis)

                #
                # TODO: You could do summing here already so that
                # you wouldn't compute huge matrices as
                # intermediate result. Use einsum.

                # Compute the message contributions for each
                # cluster:
                # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0]
                m[i] = m[i] * p

            return m
Пример #14
0
    def compute_message_to_parent(self, parent, index, u, *u_parents):
        """
        Compute the message to a parent node.
        """

        if index == 0:

            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            # Shape(L)      = [Nn,..,K,..,N0]
            # Shape(u)      = [Nn,..,N0,Dd,..,D0]
            # Shape(result) = [Nn,..,N0,K]

            # Compute g:
            # Shape(g)      = [Nn,..,K,..,N0]
            g = self.raw_distribution.compute_cgf_from_parents(
                *(u_parents[1:]))
            # Reshape(g):
            # Shape(g)      = [Nn,..,N0,K]
            if np.ndim(g) < abs(self.cluster_plate):
                # Not enough axes, just add the cluster plate axis
                g = np.expand_dims(g, -1)
            else:
                # Move the cluster plate axis
                g = misc.moveaxis(g, self.cluster_plate, -1)

            # Compute phi:
            # Shape(phi)    = [Nn,..,K,..,N0,Dd,..,D0]
            phi = self.raw_distribution.compute_phi_from_parents(
                *(u_parents[1:]))

            # Reshape u:
            # Shape(u) =    = [Nn,..,1,..,N0,Dd,..,D0]
            u_reshaped = [
                np.expand_dims(ui, self.cluster_plate - ndimi)
                if np.ndim(ui) >= abs(self.cluster_plate - ndimi) else ui
                for (ui, ndimi) in zip(u, self.ndims)
            ]

            # Compute logpdf:
            # Shape(L)      = [Nn,..,K,..,N0]
            L = self.raw_distribution.compute_logpdf(
                u_reshaped,
                phi,
                g,
                0,
                self.ndims,
            )

            # Move axis:
            # Shape(L)      = [Nn,..,N0,K]
            L = np.moveaxis(L, self.cluster_plate, -1)

            m = [L]

            return m

        elif index >= 1:

            # Parent index for the distribution used for the
            # mixture.
            index_for_parent = index - 1

            # Reshape u:
            # Shape(u_self)  = [Nn,..1,..,N0,Dd,..,D0]
            u_self = list()
            for ind in range(len(u)):
                if self.cluster_plate < 0:
                    cluster_axis = self.cluster_plate - self.ndims[ind]
                else:
                    raise ValueError("Cluster plate axis must be negative")
                u_self.append(np.expand_dims(u[ind], axis=cluster_axis))

            # Message from the mixed distribution
            # Shape(m)       = [Nn,..,K,..,N0,Dd,..,D0]
            m = self.raw_distribution.compute_message_to_parent(
                parent, index_for_parent, u_self, *(u_parents[1:]))

            # Note: The cluster assignment probabilities can be considered as
            # weights to plate elements. These weights need to mapped properly
            # via the plate mapping of self.distribution. Otherwise, nested
            # mixtures won't work, or possibly not any distribution that does
            # something to the plates. Thus, use compute_weights_to_parent to
            # compute the transformations to the weight array properly.
            #
            # See issue #39 for more details.

            # Compute weights (i.e., cluster assignment probabilities) and map
            # the plates properly.
            # Shape(p)       = [Nn,..,K,..,N0]
            p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate))
            p = misc.moveaxis(p, -1, self.cluster_plate)
            p = self.raw_distribution.compute_weights_to_parent(
                index_for_parent,
                p,
            )

            # Weigh the elements in the message array
            #
            # TODO/FIXME: This may result in huge intermediate arrays. Need to
            # use einsum!
            m = [
                mi * misc.add_trailing_axes(p, ndim)
                #for (mi, ndim) in zip(m, self.ndims)]
                for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent])
            ]

            return m