def update(self, Tij, Pi=None): r""" Updates the transition matrix and recomputes all derived quantities """ # EMMA imports from pyemma.msm import analysis as msmana # save a copy of the transition matrix self._Tij = np.array(Tij) assert msmana.is_transition_matrix(self._Tij), "Given transition matrix is not a stochastic matrix" assert self._Tij.shape[0] == self._nstates, "Given transition matrix has unexpected number of states " # initial / stationary distribution if Pi is not None: assert np.all(Pi >= 0), "Given initial distribution contains negative elements." Pi = np.array(Pi) / np.sum(Pi) # ensure normalization and make a copy if self._stationary: pT = msmana.stationary_distribution(self._Tij) if Pi is None: # stationary and no stationary distribution fixed, so computing it from trans. mat. self._Pi = pT else: # stationary but stationary distribution is fixed, so the transition matrix must be consistent assert np.allclose(Pi, pT), ( "Stationary HMM requested, but given distribution is not the " "stationary distribution of the given transition matrix." ) self._Pi = Pi else: if Pi is None: # no initial distribution given, so use stationary distribution anyway self._Pi = msmana.stationary_distribution(self._Tij) else: self._Pi = Pi # reversible if self._reversible: assert msmana.is_reversible(Tij), "Reversible HMM requested, but given transition matrix is not reversible." # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices from scipy.linalg import LinAlgError try: if self._reversible: self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm="reversible") # everything must be real-valued self._R = self._R.real self._D = self._D.real self._L = self._L.real else: self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm="standard") self._eigenvalues = np.diag(self._D) self._spectral_decomp_available = True except LinAlgError: logger().warn( "Eigendecomposition failed for transition matrix\n" + str(self._Tij) + "\nspectral properties will not be available" ) self._spectral_decomp_available = False
def plot_markov_model(P, pos=None, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto', minflux=1e-6, arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights', arrow_label_format='%2.e', max_width=12, max_height=12, figpadding=0.2, show_frame=False, **textkwargs): r"""Network representation of MSM transition matrix This visualization is not optimized for large matrices. It is meant to be used for the visualization of small models with up to 10-20 states, e.g. obtained by a HMM coarse-graining. If used with large network, the automatic node positioning will be very slow and may still look ugly. Parameters ---------- P : ndarray(n,n) or MSM object with attribute 'transition matrix' Transition matrix or MSM object pos : ndarray(n,2), optional, default=None User-defined positions to draw the states on. If not given, will try to place them automatically. state_sizes : ndarray(n), optional, default=None User-defined areas of the discs drawn for each state. If not given, the stationary probability of P will be used. state_colors : string, ndarray(n), or list, optional, default='#ff5500' (orange) string : a Hex code for a single color used for all states array : n values in [0,1] which will result in a grayscale plot list : of len = nstates, with a color for each state. The list can mix strings, RGB values and hex codes, e.g. :py:obj:`state_colors` = ['g', 'red', [.23, .34, .35], '#ff5500'] is possible. state_labels : list of strings, optional, default is 'auto' A list with a label for each state, to be displayed at the center of each node/state. If left to 'auto', the labels are automatically set to the state indices. minflux : float, optional, default=1e-6 The minimal flux (p_i * p_ij) for a transition to be drawn arrow_scale : float, optional, default=1.0 Relative arrow scale. Set to a value different from 1 to increase or decrease the arrow width. arrow_curvature : float, optional, default=1.0 Relative arrow curvature. Set to a value different from 1 to make arrows more or less curved. arrow_labels : 'weights', None or a ndarray(n,n) with label strings. Optional, default='weights' Strings to be placed upon arrows. If None, no labels will be used. If 'weights', the elements of P will be used. If a matrix of strings is given by the user these will be used. arrow_label_format : str, optional, default='%10.2f' The numeric format to print the arrow labels max_width = 12 The maximum figure width max_height = 12 The maximum figure height figpadding = 0.2 The relative figure size used for the padding show_frame: boolean (default=False) Draw a frame around the network. textkwargs : optional argument for the text of the state labels. See http://matplotlib.org/api/text_api.html#matplotlib.text.Text for more info Returns ------- fig, pos : matplotlib.Figure, ndarray(n,2) a Figure object containing the plot and the positions of states. Can be used later to plot a different network representation (e.g. the flux) Examples -------- >>> P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], ... [0.1, 0.75, 0.05, 0.05, 0.05], ... [0.05, 0.1, 0.8, 0.0, 0.05], ... [0.0, 0.2, 0.0, 0.8, 0.0], ... [0.0, 0.02, 0.02, 0.0, 0.96]]) >>> plot_markov_model(P) # doctest:+ELLIPSIS (<matplotlib.figure.Figure..., array...) """ from pyemma.msm import analysis as msmana if isinstance(P, np.ndarray): P = P.copy() else: # MSM object? then get transition matrix first P = P.transition_matrix.copy() if state_sizes is None: state_sizes = msmana.stationary_distribution(P) if minflux > 0: F = np.dot(np.diag(msmana.stationary_distribution(P)), P) I, J = np.where(F < minflux) P[I, J] = 0.0 plot = NetworkPlot(P, pos=pos) ax = plot.plot_network(state_sizes=state_sizes, state_scale=state_scale, state_colors=state_colors, state_labels=state_labels, arrow_scale=arrow_scale, arrow_curvature=arrow_curvature, arrow_labels=arrow_labels, arrow_label_format=arrow_label_format, max_width=max_width, max_height=max_height, figpadding=figpadding, xticks=False, yticks=False, show_frame=show_frame, **textkwargs) return ax, plot.pos
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False): r""" Computes the A->B reactive flux using transition path theory (TPT) Parameters ---------- T : (M, M) ndarray or scipy.sparse matrix Transition matrix (default) or Rate matrix (if rate_matrix=True) A : array_like List of integer state labels for set A B : array_like List of integer state labels for set B mu : (M,) ndarray (optional) Stationary vector qminus : (M,) ndarray (optional) Backward committor for A->B reaction qplus : (M,) ndarray (optional) Forward committor for A-> B reaction rate_matrix = False : boolean By default (False), T is a transition matrix. If set to True, T is a rate matrix. Returns ------- tpt: pyemma.msm.flux.ReactiveFlux object A python object containing the reactive A->B flux network and several additional quantities, such as stationary probability, committors and set definitions. Notes ----- The central object used in transition path theory is the forward and backward comittor function. TPT (originally introduced in [1]) for continous systems has a discrete version outlined in [2]. Here, we use the transition matrix formulation described in [3]. See also -------- pyemma.msm.analysis.committor, ReactiveFlux References ---------- .. [1] W. E and E. Vanden-Eijnden. Towards a theory of transition paths. J. Stat. Phys. 123: 503-523 (2006) .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden. Transition Path Theory for Markov Jump Processes. Multiscale Model Simul 7: 1192-1219 (2009) .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and T. Weikl: Constructing the Full Ensemble of Folding Pathways from Short Off-Equilibrium Simulations. Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009) """ import pyemma.msm.analysis as msmana if len(A) == 0 or len(B) == 0: raise ValueError('set A or B is empty') n = T.shape[0] if len(A) > n or len(B) > n or max(A) > n or max(B) > n: raise ValueError('set A or B defines more states, than given transition matrix.') if (rate_matrix is False) and (not msmana.is_transition_matrix(T)): raise ValueError('given matrix T is not a transition matrix') if (rate_matrix is True): raise NotImplementedError('TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.') # we can compute the following properties from either dense or sparse T # stationary dist if mu is None: mu = msmana.stationary_distribution(T) # forward committor if qplus is None: qplus = msmana.committor(T, A, B, forward=True) # backward committor if qminus is None: if msmana.is_reversible(T, mu=mu): qminus = 1.0-qplus else: qminus = msmana.committor(T, A, B, forward=False, mu=mu) # gross flux grossflux = flux_matrix(T, mu, qminus, qplus, netflux = False) # net flux netflux = to_netflux(grossflux) # construct flux object from reactive_flux import ReactiveFlux F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux) # done return F
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False): r""" Computes the A->B reactive flux using transition path theory (TPT) Parameters ---------- T : (M, M) ndarray or scipy.sparse matrix Transition matrix (default) or Rate matrix (if rate_matrix=True) A : array_like List of integer state labels for set A B : array_like List of integer state labels for set B mu : (M,) ndarray (optional) Stationary vector qminus : (M,) ndarray (optional) Backward committor for A->B reaction qplus : (M,) ndarray (optional) Forward committor for A-> B reaction rate_matrix = False : boolean By default (False), T is a transition matrix. If set to True, T is a rate matrix. Returns ------- tpt: pyemma.msm.flux.ReactiveFlux object A python object containing the reactive A->B flux network and several additional quantities, such as stationary probability, committors and set definitions. Notes ----- The central object used in transition path theory is the forward and backward comittor function. TPT (originally introduced in [1]) for continous systems has a discrete version outlined in [2]. Here, we use the transition matrix formulation described in [3]. See also -------- pyemma.msm.analysis.committor, ReactiveFlux References ---------- .. [1] W. E and E. Vanden-Eijnden. Towards a theory of transition paths. J. Stat. Phys. 123: 503-523 (2006) .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden. Transition Path Theory for Markov Jump Processes. Multiscale Model Simul 7: 1192-1219 (2009) .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and T. Weikl: Constructing the Full Ensemble of Folding Pathways from Short Off-Equilibrium Simulations. Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009) """ import pyemma.msm.analysis as msmana if len(A) == 0 or len(B) == 0: raise ValueError('set A or B is empty') n = T.shape[0] if len(A) > n or len(B) > n or max(A) > n or max(B) > n: raise ValueError('set A or B defines more states, than given transition matrix.') if (rate_matrix is False) and (not msmana.is_transition_matrix(T)): raise ValueError('given matrix T is not a transition matrix') if (rate_matrix is True): raise NotImplementedError( 'TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.') # we can compute the following properties from either dense or sparse T # stationary dist if mu is None: mu = msmana.stationary_distribution(T) # forward committor if qplus is None: qplus = msmana.committor(T, A, B, forward=True) # backward committor if qminus is None: if msmana.is_reversible(T, mu=mu): qminus = 1.0 - qplus else: qminus = msmana.committor(T, A, B, forward=False, mu=mu) # gross flux grossflux = flux_matrix(T, mu, qminus, qplus, netflux=False) # net flux netflux = to_netflux(grossflux) # construct flux object from reactive_flux import ReactiveFlux F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux) # done return F
def _pcca_connected(P, n, return_rot=False): """ PCCA+ spectral clustering method with optimized memberships [1]_ Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states. This function assumes that the transition matrix is fully connected. Parameters ---------- P : ndarray (n,n) Transition matrix. n : int Number of clusters to group to. Returns ------- chi by default, or (chi,rot) if return_rot = True chi : ndarray (n x m) A matrix containing the probability or membership of each state to be assigned to each cluster. The rows sum to 1. rot_mat : ndarray (m x m) A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.: chi = np.dot(evec, rot_matrix References ---------- [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013). """ # test connectivity from pyemma.msm.estimation import connected_sets labels = connected_sets(P) n_components = len( labels ) # (n_components, labels) = connected_components(P, connection='strong') if (n_components > 1): raise ValueError( "Transition matrix is disconnected. Cannot use pcca_connected.") from pyemma.msm.analysis import stationary_distribution pi = stationary_distribution(P) # print "statdist = ",pi from pyemma.msm.analysis import is_reversible if not is_reversible(P, mu=pi): raise ValueError( "Transition matrix does not fulfill detailed balance. " "Make sure to call pcca with a reversible transition matrix estimate" ) # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate # pair by the real and imaginary components of one of the two vectors. We could use this but would then need to # orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical # foundation for this, so I'll skip it for now. # right eigenvectors, ordered from pyemma.msm.analysis import eigenvectors evecs = eigenvectors(P, n) # orthonormalize for i in range(n): evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i])) # make first eigenvector positive evecs[:, 0] = np.abs(evecs[:, 0]) # Is there a significant complex component? if not np.alltrue(np.isreal(evecs)): raise Warning( "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance " + "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid." ) evecs = np.real(evecs) # create initial solution using PCCA+. This could have negative memberships (chi, rot_matrix) = _pcca_connected_isa(evecs, n) #print "initial chi = \n",chi # optimize the rotation matrix with PCCA++. rot_matrix = _opt_soft(evecs, rot_matrix, n) # These memberships should be nonnegative memberships = np.dot(evecs[:, :], rot_matrix) # We might still have numerical errors. Force memberships to be in [0,1] # print "memberships unnormalized: ",memberships memberships = np.maximum(0.0, memberships) memberships = np.minimum(1.0, memberships) # print "memberships unnormalized: ",memberships for i in range(0, np.shape(memberships)[0]): memberships[i] /= np.sum(memberships[i]) # print "final chi = \n",chi return memberships
def _pcca_connected(P, n, return_rot=False): """ PCCA+ spectral clustering method with optimized memberships [1]_ Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states. This function assumes that the transition matrix is fully connected. Parameters ---------- P : ndarray (n,n) Transition matrix. n : int Number of clusters to group to. Returns ------- chi by default, or (chi,rot) if return_rot = True chi : ndarray (n x m) A matrix containing the probability or membership of each state to be assigned to each cluster. The rows sum to 1. rot_mat : ndarray (m x m) A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.: chi = np.dot(evec, rot_matrix References ---------- [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013). """ # test connectivity from pyemma.msm.estimation import connected_sets labels = connected_sets(P) n_components = len(labels) # (n_components, labels) = connected_components(P, connection='strong') if (n_components > 1): raise ValueError("Transition matrix is disconnected. Cannot use pcca_connected.") from pyemma.msm.analysis import stationary_distribution pi = stationary_distribution(P) # print "statdist = ",pi from pyemma.msm.analysis import is_reversible if not is_reversible(P, mu=pi): raise ValueError("Transition matrix does not fulfill detailed balance. " "Make sure to call pcca with a reversible transition matrix estimate") # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate # pair by the real and imaginary components of one of the two vectors. We could use this but would then need to # orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical # foundation for this, so I'll skip it for now. # right eigenvectors, ordered from pyemma.msm.analysis import eigenvectors evecs = eigenvectors(P, n) # orthonormalize for i in range(n): evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i])) # make first eigenvector positive evecs[:, 0] = np.abs(evecs[:, 0]) # Is there a significant complex component? if not np.alltrue(np.isreal(evecs)): raise Warning( "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance " + "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid.") evecs = np.real(evecs) # create initial solution using PCCA+. This could have negative memberships (chi, rot_matrix) = _pcca_connected_isa(evecs, n) #print "initial chi = \n",chi # optimize the rotation matrix with PCCA++. rot_matrix = _opt_soft(evecs, rot_matrix, n) # These memberships should be nonnegative memberships = np.dot(evecs[:, :], rot_matrix) # We might still have numerical errors. Force memberships to be in [0,1] # print "memberships unnormalized: ",memberships memberships = np.maximum(0.0, memberships) memberships = np.minimum(1.0, memberships) # print "memberships unnormalized: ",memberships for i in range(0, np.shape(memberships)[0]): memberships[i] /= np.sum(memberships[i]) # print "final chi = \n",chi return memberships
def test_statdist(self): P = self.bdc.transition_matrix() mu = self.bdc.stationary_distribution() mun = stationary_distribution(P) assert_allclose(mu, mun)
def test_statdist(self): P=self.bdc.transition_matrix_sparse() mu=self.bdc.stationary_distribution() mun=stationary_distribution(P) self.assertTrue(np.allclose(mu, mun))