def _computations_for_A_and_X(self, XpXn, XpXp): # Get moments of the state dynamics matrix (A, AA) = self.A_node.get_moments() # Ignore axes that correspond to input signals D = np.shape(A)[-2] A = A[..., :D] AA = AA[..., :D, :D] # Make sure time axis is in the arrays A = misc.atleast_nd(A, 3) AA = misc.atleast_nd(AA, 4) CovA = AA - A[..., :, np.newaxis] * A[..., np.newaxis, :] # # Expectations with respect to A and X # # TODO: In case A does not depend on time, use a bit more efficient # formulas # Compute: \sum_n <A_n> <x_{n-1} x_n^T> A_XpXn = np.einsum("...nik,...nkj->...ij", A, XpXn) A_XpXn = sum_to_plates(A_XpXn, (), ndim=2, plates_from=self.X_node.plates) # Compute: \sum_n <A_n> <x_{n-1} x_{n-1}^T> <A_n>^T A_XpXp = np.einsum("...nik,...nkj->...nij", A, XpXp) A_XpXp_A = np.einsum("...nik,...njk->...ij", A_XpXp, A) A_XpXp_A = sum_to_plates(A_XpXp_A, (), ndim=2, plates_from=self.X_node.plates) # Compute: \sum_n tr(CovA_n <x_{n-1} x_{n-1}^T>) CovA_XpXp = np.einsum("...ndij,...nij->...d", CovA, XpXp) CovA_XpXp = sum_to_plates(CovA_XpXp, (), ndim=1, plates_from=self.X_node.plates) return (A_XpXn, A_XpXp_A, CovA_XpXp)
def _hinton(W, error=None, vmax=None, square=True): """ Draws a Hinton diagram for visualizing a weight matrix. Temporarily disables matplotlib interactive mode if it is on, otherwise this takes forever. Originally copied from http://wiki.scipy.org/Cookbook/Matplotlib/HintonDiagrams """ reenable = False if plt.isinteractive(): plt.ioff() reenable = True #P.clf() W = misc.atleast_nd(W, 2) (height, width) = W.shape if not vmax: #vmax = 2**np.ceil(np.log(np.max(np.abs(W)))/np.log(2)) if error is not None: vmax = np.max(np.abs(W) + error) else: vmax = np.max(np.abs(W)) plt.fill(0.5+np.array([0,width,width,0]), 0.5+np.array([0,0,height,height]), 'gray') plt.axis('off') if square: plt.axis('equal') plt.gca().invert_yaxis() for x in range(width): for y in range(height): _x = x+1 _y = y+1 w = W[y,x] _w = np.abs(w) if w > 0: _c = 'white' else: _c = 'black' if error is not None: e = error[y,x] if e < 0: print(e, _w, vmax) raise Exception("BUG? Negative error") if _w + e > vmax: print(e, _w, vmax) raise Exception("BUG? Value+error greater than max") _rectangle(_x, _y, min(1, np.sqrt((_w+e)/vmax)), min(1, np.sqrt((_w+e)/vmax)), edgecolor=_c, fill=False) _blob(_x, _y, min(1, _w/vmax), _c) if reenable: plt.ion()
def _hinton(W, error=None, vmax=None, square=False, axes=None): """ Draws a Hinton diagram for visualizing a weight matrix. Temporarily disables matplotlib interactive mode if it is on, otherwise this takes forever. Originally copied from http://wiki.scipy.org/Cookbook/Matplotlib/HintonDiagrams """ if axes is None: axes = plt.gca() W = misc.atleast_nd(W, 2) (height, width) = W.shape if not vmax: #vmax = 2**np.ceil(np.log(np.max(np.abs(W)))/np.log(2)) if error is not None: vmax = np.max(np.abs(W) + error) else: vmax = np.max(np.abs(W)) axes.fill(0.5+np.array([0,width,width,0]), 0.5+np.array([0,0,height,height]), 'gray') if square: axes.set_aspect('equal') axes.set_ylim(0.5, height+0.5) axes.set_xlim(0.5, width+0.5) axes.set_xticks([]) axes.set_yticks([]) axes.invert_yaxis() for x in range(width): for y in range(height): _x = x+1 _y = y+1 w = W[y,x] _w = np.abs(w) if w > 0: _c = 'white' else: _c = 'black' if error is not None: e = error[y,x] if e < 0: print(e, _w, vmax) raise Exception("BUG? Negative error") if _w + e > vmax: print(e, _w, vmax) raise Exception("BUG? Value+error greater than max") _rectangle(axes, _x, _y, min(1, np.sqrt((_w+e)/vmax)), min(1, np.sqrt((_w+e)/vmax)), edgecolor=_c, fill=False) _blob(axes, _x, _y, min(1, _w/vmax), _c)
def timeseries_categorical_mc(Z): # Make sure that the node is categorical Z = Z._convert(CategoricalMoments) # Get expectations (and broadcast explicitly) z = Z._message_to_child()[0] * np.ones(Z.get_shape(0)) # Compute the subplot layout z = misc.atleast_nd(z, 4) if np.ndim(z) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(z)[0] N = np.shape(z)[1] #print("DEBUG IN PLOT", Z.get_shape(0), np.shape(z)) # Plot Hintons for i in range(M): for j in range(N): plt.subplot(M, N, i*N+j+1) hinton(z[i,j].T, vmax=1.0, square=False)
def timeseries_categorical_mc(Z, fig=None): if fig is None: fig = plt.gcf() # Make sure that the node is categorical Z = Z._convert(CategoricalMoments) # Get expectations (and broadcast explicitly) z = Z._message_to_child()[0] * np.ones(Z.get_shape(0)) # Compute the subplot layout z = misc.atleast_nd(z, 4) if np.ndim(z) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(z)[0] N = np.shape(z)[1] # Plot Hintons for i in range(M): for j in range(N): axes = fig.add_subplot(M, N, i * N + j + 1) _hinton(z[i, j].T, vmax=1.0, square=False, axes=axes)
def timeseries_categorical_mc(Z, fig=None): if fig is None: fig = plt.gcf() # Make sure that the node is categorical Z = Z._ensure_moments(Z, CategoricalMoments, categories=None) # Get expectations (and broadcast explicitly) z = Z._message_to_child()[0] * np.ones(Z.get_shape(0)) # Compute the subplot layout z = misc.atleast_nd(z, 4) if np.ndim(z) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(z)[0] N = np.shape(z)[1] # Plot Hintons for i in range(M): for j in range(N): axes = fig.add_subplot(M, N, i*N+j+1) _hinton(z[i,j].T, vmax=1.0, square=False, axes=axes)
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate-self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1-self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1-self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index_for_parent = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise ValueError("Cluster plate axis must be negative") u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index_for_parent, u_self, *(u_parents[1:])) # Note: The cluster assignment probabilities can be considered as # weights to plate elements. These weights need to mapped properly # via the plate mapping of self.distribution. Otherwise, nested # mixtures won't work, or possibly not any distribution that does # something to the plates. Thus, use compute_weights_to_parent to # compute the transformations to the weight array properly. # # See issue #39 for more details. # Compute weights (i.e., cluster assignment probabilities) and map # the plates properly. p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) p = self.distribution.compute_weights_to_parent( index_for_parent, p, ) # Weigh the elements in the message array m = [mi * misc.add_trailing_axes(p, ndim) #for (mi, ndim) in zip(m, self.ndims)] for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent])] return m
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append( np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index_for_parent = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise ValueError("Cluster plate axis must be negative") u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent( parent, index_for_parent, u_self, *(u_parents[1:])) # Note: The cluster assignment probabilities can be considered as # weights to plate elements. These weights need to mapped properly # via the plate mapping of self.distribution. Otherwise, nested # mixtures won't work, or possibly not any distribution that does # something to the plates. Thus, use compute_weights_to_parent to # compute the transformations to the weight array properly. # # See issue #39 for more details. # Compute weights (i.e., cluster assignment probabilities) and map # the plates properly. p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) p = self.distribution.compute_weights_to_parent( index_for_parent, p, ) # Weigh the elements in the message array m = [ mi * misc.add_trailing_axes(p, ndim) #for (mi, ndim) in zip(m, self.ndims)] for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent]) ] return m
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append( np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent( parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = misc.atleast_nd(p, abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = misc.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] #m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def infer(y, D, K, mask=True, maxiter=100, rotate=False, debug=False, precompute=False, update_hyper=0, start_rotating=0, start_rotating_weights=0, plot_C=True, monitor=True, autosave=None): """ Run VB inference for linear state-space model with time-varying dynamics. """ y = misc.atleast_nd(y, 2) (M, N) = np.shape(y) # Construct the model Q = model(M, N, D, K) if not plot_C: Q['C'].set_plotter(None) if autosave is not None: Q.set_autosave(autosave, iterations=10) # Observe data Q['Y'].observe(y, mask=mask) # Set up rotation speed-up if rotate: # Initial rotate the D-dimensional state space (X, A, C) # Does not update hyperparameters rotA_init = transformations.RotateGaussianARD(Q['A'], axis=0, precompute=precompute) rotX_init = transformations.RotateVaryingMarkovChain(Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[...,1:,None], rotA_init) rotC_init = transformations.RotateGaussianARD(Q['C'], axis=0, precompute=precompute) R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D) # Rotate the D-dimensional state space (X, A, C) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=0, precompute=precompute) rotX = transformations.RotateVaryingMarkovChain(Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[...,1:,None], rotA) rotC = transformations.RotateGaussianARD(Q['C'], Q['gamma'], axis=0, precompute=precompute) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (S, A, C) rotB = transformations.RotateGaussianARD(Q['B'], Q['beta'], precompute=precompute) rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=-1, precompute=precompute) R_S = transformations.RotationOptimizer(rotS, rotA, K) if debug: rotate_kwargs = {'maxiter': 10, 'check_bound': True, 'check_gradient': True} else: rotate_kwargs = {'maxiter': 10} # Plot initial distributions if monitor: Q.plot() # Run inference using rotations for ind in range(maxiter): if ind < update_hyper: # It might be a good idea to learn the lower level nodes a bit # before starting to learn the upper level nodes. Q.update('X', 'C', 'A', 'tau', plot=monitor) if rotate and ind >= start_rotating: # Use the rotation which does not update alpha nor beta R_X_init.rotate(**rotate_kwargs) else: Q.update(plot=monitor) if rotate and ind >= start_rotating: # It might be a good idea to not rotate immediately because it # might lead to pruning out components too efficiently before # even estimating them roughly R_X.rotate(**rotate_kwargs) if ind >= start_rotating_weights: R_S.rotate(**rotate_kwargs) # Return the posterior approximation return Q
def infer(y, D, K, mask=True, maxiter=100, rotate=False, debug=False, precompute=False, update_hyper=0, start_rotating=0, start_rotating_weights=0, plot_C=True, monitor=True, autosave=None): """ Run VB inference for linear state-space model with time-varying dynamics. """ y = misc.atleast_nd(y, 2) (M, N) = np.shape(y) # Construct the model Q = model(M, N, D, K) if not plot_C: Q['C'].set_plotter(None) if autosave is not None: Q.set_autosave(autosave, iterations=10) # Observe data Q['Y'].observe(y, mask=mask) # Set up rotation speed-up if rotate: # Initial rotate the D-dimensional state space (X, A, C) # Does not update hyperparameters rotA_init = transformations.RotateGaussianARD(Q['A'], axis=0, precompute=precompute) rotX_init = transformations.RotateVaryingMarkovChain( Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None], rotA_init) rotC_init = transformations.RotateGaussianARD(Q['C'], axis=0, precompute=precompute) R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D) # Rotate the D-dimensional state space (X, A, C) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=0, precompute=precompute) rotX = transformations.RotateVaryingMarkovChain( Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None], rotA) rotC = transformations.RotateGaussianARD(Q['C'], Q['gamma'], axis=0, precompute=precompute) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (S, A, C) rotB = transformations.RotateGaussianARD(Q['B'], Q['beta'], precompute=precompute) rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB) rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'], axis=-1, precompute=precompute) R_S = transformations.RotationOptimizer(rotS, rotA, K) if debug: rotate_kwargs = { 'maxiter': 10, 'check_bound': True, 'check_gradient': True } else: rotate_kwargs = {'maxiter': 10} # Plot initial distributions if monitor: Q.plot() # Run inference using rotations for ind in range(maxiter): if ind < update_hyper: # It might be a good idea to learn the lower level nodes a bit # before starting to learn the upper level nodes. Q.update('X', 'C', 'A', 'tau', plot=monitor) if rotate and ind >= start_rotating: # Use the rotation which does not update alpha nor beta R_X_init.rotate(**rotate_kwargs) else: Q.update(plot=monitor) if rotate and ind >= start_rotating: # It might be a good idea to not rotate immediately because it # might lead to pruning out components too efficiently before # even estimating them roughly R_X.rotate(**rotate_kwargs) if ind >= start_rotating_weights: R_S.rotate(**rotate_kwargs) # Return the posterior approximation return Q
def gaussian_hinton(X, rows=None, cols=None, scale=1): """ Plot the Hinton diagram of a Gaussian node """ # Get mean and second moment X = X._convert(GaussianMoments) (x, xx) = X.get_moments() ndim = len(X.dims[0]) shape = X.get_shape(0) size = len(X.get_shape(0)) # Compute standard deviation xx = misc.get_diag(xx, ndim=ndim) std = np.sqrt(xx - x**2) # Force explicit elements when broadcasting x = x * np.ones(shape) std = std * np.ones(shape) if rows is None: rows = np.nan if cols is None: cols = np.nan # Preprocess the axes to 0,...,ndim if rows < 0: rows += size if cols < 0: cols += size if rows < 0 or rows >= size: raise ValueError("Row axis invalid") if cols < 0 or cols >= size: raise ValueError("Column axis invalid") # Remove non-row and non-column axes that have length 1 squeezed_shape = list(shape) for i in reversed(range(len(shape))): if shape[i] == 1 and i != rows and i != cols: squeezed_shape.pop(i) if i < cols: cols -= 1 if i < rows: rows -= 1 x = np.reshape(x, squeezed_shape) std = np.reshape(std, squeezed_shape) # Make explicit four axes cols = cols + (4 - np.ndim(x)) rows = rows + (4 - np.ndim(x)) x = misc.atleast_nd(x, 4) std = misc.atleast_nd(std, 4) size = np.ndim(x) if np.isnan(cols): if rows != size - 1: cols = size - 1 else: cols = size - 2 if np.isnan(rows): if cols != size - 1: rows = size - 1 else: rows = size - 2 # Put the row and column axes to the end axes = [i for i in range(size) if i not in (rows, cols)] + [rows, cols] x = np.transpose(x, axes=axes) std = np.transpose(std, axes=axes) if np.ndim(x) != 4: raise ValueError("Can not plot arrays with over 4 axes") M = np.shape(x)[0] N = np.shape(x)[1] vmax = np.max(np.abs(x) + scale*std) #plt.subplots(M, N, sharey=True, sharex=True, fig_kw) ax = [plt.subplot(M, N, i*N+j+1) for i in range(M) for j in range(N)] for i in range(M): for j in range(N): plt.subplot(M, N, i*N+j+1) #plt.subplot(M, N, i*N+j+1, sharey=ax[0], sharex=ax[0]) if scale == 0: _hinton(x[i,j], vmax=vmax) else: _hinton(x[i,j], vmax=vmax, error=scale*std[i,j])
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.distribution.compute_cgf_from_parents(*(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.distribution.compute_phi_from_parents(*(u_parents[1:])) # Move phi axis: # Shape(phi) = [Nn,..,N0,K,Dd,..,D0] for ind in range(len(phi)): if self.cluster_plate < 0: axis_from = self.cluster_plate - self.ndims[ind] else: raise RuntimeError("Cluster plate axis must be negative") axis_to = -1 - self.ndims[ind] if np.ndim(phi[ind]) >= abs(axis_from): # Cluster plate axis exists, move it to the correct position phi[ind] = misc.moveaxis(phi[ind], axis_from, axis_to) else: # No cluster plate axis, just add a new axis to the correct # position, if phi has something on that axis if np.ndim(phi[ind]) >= abs(axis_to): phi[ind] = np.expand_dims(phi[ind], axis=axis_to) # Reshape u: # Shape(u) = [Nn,..,N0,1,Dd,..,D0] u_self = list() for ind in range(len(u)): u_self.append(np.expand_dims(u[ind], axis=(-1 - self.ndims[ind]))) # Compute logpdf: # Shape(L) = [Nn,..,N0,K] L = self.distribution.compute_logpdf(u_self, phi, g, 0, self.ndims) # Sum over other than the cluster dimensions? No! # Hmm.. I think the message passing method will do # that automatically m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index = index - 1 # Reshape u: # Shape(u) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: cluster_axis = self.cluster_plate u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution m = self.distribution.compute_message_to_parent(parent, index, u_self, *(u_parents[1:])) # Weigh the messages with the responsibilities for i in range(len(m)): # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(p) = [Nn,..,N0,K] # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] # Number of axes for the variable dimensions for # the parent message. D = self.ndims_parents[index][i] # Responsibilities for clusters are the first # parent's first moment: # Shape(p) = [Nn,..,N0,K] p = u_parents[0][0] # Move the cluster axis to the proper place: # Shape(p) = [Nn,..,K,..,N0] p = misc.atleast_nd(p, abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) # Add axes for variable dimensions to the contributions # Shape(p) = [Nn,..,K,..,N0,1,..,1] p = misc.add_trailing_axes(p, D) if self.cluster_plate < 0: # Add the variable dimensions cluster_axis = self.cluster_plate - D # Add axis for clusters: # Shape(m) = [Nn,..,1,..,N0,Dd,..,D0] # m[i] = np.expand_dims(m[i], axis=cluster_axis) # # TODO: You could do summing here already so that # you wouldn't compute huge matrices as # intermediate result. Use einsum. # Compute the message contributions for each # cluster: # Shape(result) = [Nn,..,K,..,N0,Dd,..,D0] m[i] = m[i] * p return m
def compute_message_to_parent(self, parent, index, u, *u_parents): """ Compute the message to a parent node. """ if index == 0: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] # Shape(L) = [Nn,..,K,..,N0] # Shape(u) = [Nn,..,N0,Dd,..,D0] # Shape(result) = [Nn,..,N0,K] # Compute g: # Shape(g) = [Nn,..,K,..,N0] g = self.raw_distribution.compute_cgf_from_parents( *(u_parents[1:])) # Reshape(g): # Shape(g) = [Nn,..,N0,K] if np.ndim(g) < abs(self.cluster_plate): # Not enough axes, just add the cluster plate axis g = np.expand_dims(g, -1) else: # Move the cluster plate axis g = misc.moveaxis(g, self.cluster_plate, -1) # Compute phi: # Shape(phi) = [Nn,..,K,..,N0,Dd,..,D0] phi = self.raw_distribution.compute_phi_from_parents( *(u_parents[1:])) # Reshape u: # Shape(u) = = [Nn,..,1,..,N0,Dd,..,D0] u_reshaped = [ np.expand_dims(ui, self.cluster_plate - ndimi) if np.ndim(ui) >= abs(self.cluster_plate - ndimi) else ui for (ui, ndimi) in zip(u, self.ndims) ] # Compute logpdf: # Shape(L) = [Nn,..,K,..,N0] L = self.raw_distribution.compute_logpdf( u_reshaped, phi, g, 0, self.ndims, ) # Move axis: # Shape(L) = [Nn,..,N0,K] L = np.moveaxis(L, self.cluster_plate, -1) m = [L] return m elif index >= 1: # Parent index for the distribution used for the # mixture. index_for_parent = index - 1 # Reshape u: # Shape(u_self) = [Nn,..1,..,N0,Dd,..,D0] u_self = list() for ind in range(len(u)): if self.cluster_plate < 0: cluster_axis = self.cluster_plate - self.ndims[ind] else: raise ValueError("Cluster plate axis must be negative") u_self.append(np.expand_dims(u[ind], axis=cluster_axis)) # Message from the mixed distribution # Shape(m) = [Nn,..,K,..,N0,Dd,..,D0] m = self.raw_distribution.compute_message_to_parent( parent, index_for_parent, u_self, *(u_parents[1:])) # Note: The cluster assignment probabilities can be considered as # weights to plate elements. These weights need to mapped properly # via the plate mapping of self.distribution. Otherwise, nested # mixtures won't work, or possibly not any distribution that does # something to the plates. Thus, use compute_weights_to_parent to # compute the transformations to the weight array properly. # # See issue #39 for more details. # Compute weights (i.e., cluster assignment probabilities) and map # the plates properly. # Shape(p) = [Nn,..,K,..,N0] p = misc.atleast_nd(u_parents[0][0], abs(self.cluster_plate)) p = misc.moveaxis(p, -1, self.cluster_plate) p = self.raw_distribution.compute_weights_to_parent( index_for_parent, p, ) # Weigh the elements in the message array # # TODO/FIXME: This may result in huge intermediate arrays. Need to # use einsum! m = [ mi * misc.add_trailing_axes(p, ndim) #for (mi, ndim) in zip(m, self.ndims)] for (mi, ndim) in zip(m, self.ndims_parents[index_for_parent]) ] return m