def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() self.dtraj = generate_traj(P, 10000, start=0) self.tau = 1 """Estimate MSM""" self.C_MSM = cmatrix(self.dtraj, self.tau, sliding=True) self.lcc_MSM = largest_connected_set(self.C_MSM) self.Ccc_MSM = connected_cmatrix(self.C_MSM, lcc=self.lcc_MSM) self.P_MSM = tmatrix(self.Ccc_MSM, reversible=True) self.mu_MSM = statdist(self.P_MSM) self.k = 3 self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() self.dtraj = generate_traj(P, 10000, start=0) self.tau = 1 """Estimate MSM""" self.C_MSM = cmatrix(self.dtraj, self.tau, sliding=True) self.lcc_MSM = largest_connected_set(self.C_MSM) self.Ccc_MSM = connected_cmatrix(self.C_MSM, lcc=self.lcc_MSM) self.P_MSM = tmatrix(self.Ccc_MSM, reversible=True) self.mu_MSM = statdist(self.P_MSM) self.k = 3 self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def stationary_distribution(C, P): # import emma import pyemma.msm.estimation as msmest import pyemma.msm.analysis as msmana # disconnected sets n = np.shape(C)[0] ctot = np.sum(C) pi = np.zeros((n)) # treat each connected set separately S = msmest.connected_sets(C) for s in S: # compute weight w = np.sum(C[s,:]) / ctot pi[s] = w * msmana.statdist(P[s,:][:,s]) # reinforce normalization pi /= np.sum(pi) return pi
def trajectory(self, N, start=None, stop=None): """ Generates a trajectory realization of length N, starting from state s Parameters ---------- N : int trajectory length start : int, optional, default = None starting state. If not given, will sample from the stationary distribution of P stop : int or int-array-like, optional, default = None stopping set. If given, the trajectory will be stopped before N steps once a state of the stop set is reached """ if start is None: if self.mudist is None: # compute mu, the stationary distribution of P import pyemma.msm.analysis as msmana mu = msmana.statdist(self.P) self.mudist = scipy.stats.rv_discrete(values=(range(self.n), mu)) # sample starting point from mu start = self.mudist.rvs() # evaluate stopping set stopat = np.ndarray((self.n), dtype=bool) stopat[:] = False if (stop is not None): for s in np.array(stop): stopat[s] = True # result traj = np.zeros(N, dtype=int) traj[0] = start for t in range(1, N): traj[t] = self.rgs[traj[t - 1]].rvs() if stopat[traj[t]]: break # return return traj
def trajectory(self, N, start=None, stop=None): """ Generates a trajectory realization of length N, starting from state s Parameters ---------- N : int trajectory length start : int, optional, default = None starting state. If not given, will sample from the stationary distribution of P stop : int or int-array-like, optional, default = None stopping set. If given, the trajectory will be stopped before N steps once a state of the stop set is reached """ if start is None: if self.mudist is None: # compute mu, the stationary distribution of P import pyemma.msm.analysis as msmana mu = msmana.statdist(self.P) self.mudist = scipy.stats.rv_discrete(values=(range(self.n), mu )) # sample starting point from mu start = self.mudist.rvs() # evaluate stopping set stopat = np.ndarray((self.n), dtype=bool) stopat[:] = False if (stop is not None): for s in np.array(stop): stopat[s] = True # result traj = np.zeros(N, dtype=int) traj[0] = start for t in range(1, N): traj[t] = self.rgs[traj[t - 1]].rvs() if stopat[traj[t]]: break # return return traj
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() dtraj = generate_traj(P, 10000, start=0) tau = 1 """Estimate MSM""" C_MSM = cmatrix(dtraj, tau) lcc_MSM = largest_connected_set(C_MSM) Ccc_MSM = connected_cmatrix(C_MSM, lcc=lcc_MSM) P_MSM = tmatrix(Ccc_MSM) mu_MSM = statdist(P_MSM) """Meta-stable sets""" A = [0, 1, 2] B = [4, 5, 6] w_MSM = np.zeros((2, mu_MSM.shape[0])) w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum() w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum() K = 10 P_MSM_dense = P_MSM.toarray() p_MSM = np.zeros((K, 2)) w_MSM_k = 1.0 * w_MSM for k in range(1, K): w_MSM_k = np.dot(w_MSM_k, P_MSM_dense) p_MSM[k, 0] = w_MSM_k[0, A].sum() p_MSM[k, 1] = w_MSM_k[1, B].sum() """Assume that sets are equal, A(\tau)=A(k \tau) for all k""" w_MD = 1.0 * w_MSM p_MD = np.zeros((K, 2)) eps_MD = np.zeros((K, 2)) p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 for k in range(1, K): """Build MSM at lagtime k*tau""" C_MD = cmatrix(dtraj, k * tau, sliding=True) / (k * tau) lcc_MD = largest_connected_set(C_MD) Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD) c_MD = Ccc_MD.sum(axis=1) P_MD = tmatrix(Ccc_MD).toarray() w_MD_k = np.dot(w_MD, P_MD) """Set A""" prob_MD = w_MD_k[0, A].sum() c = c_MD[A].sum() p_MD[k, 0] = prob_MD eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD**2) / c) """Set B""" prob_MD = w_MD_k[1, B].sum() c = c_MD[B].sum() p_MD[k, 1] = prob_MD eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD**2) / c) """Input""" self.P_MSM = P_MSM self.lcc_MSM = lcc_MSM self.dtraj = dtraj self.tau = tau self.K = K self.A = A self.B = B """Expected results""" self.p_MSM = p_MSM self.p_MD = p_MD self.eps_MD = eps_MD
def stationary_distribution(self): self._assert_computed() if self.mu is None: self.mu=statdist(self.T) return self.mu
def cktest(T_MSM, lcc_MSM, dtrajs, lag, K, nsets=2, sets=None, full_output=False): r"""Perform Chapman-Kolmogorov tests for given data. Parameters ---------- T_MSM : (M, M) ndarray or scipy.sparse matrix Transition matrix of estimated MSM lcc_MSM : array-like Largest connected set of the estimated MSM dtrajs : list discrete trajectories lag : int lagtime for the MSM estimation K : int number of time points for the test nsets : int, optional number of PCCA sets on which to perform the test sets : list, optional List of user defined sets for the test full_output : bool, optional Return additional information about set_factors Returns ------- p_MSM : (K, nsets) ndarray p_MSM[k, l] is the probability of making a transition from set l to set l after k*lag steps for the MSM computed at 1*lag p_MD : (K, nsets) ndarray p_MD[k, l] is the probability of making a transition from set l to set l after k*lag steps as estimated from the given data eps_MD : (K, nsets) eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l] set_factors : (K, nsets) ndarray, optional set_factor[k, i] is the quotient of the MD and the MSM set probabilities References ---------- .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D Chodera, C Schuette and F Noe. 2011. Markov models of molecular kinetics: Generation and validation. J Chem Phys 134: 174105 """ p_MD = np.zeros((K, nsets)) p_MSM = np.zeros((K, nsets)) eps_MD = np.zeros((K, nsets)) set_factors = np.zeros((K, nsets)) if sets is None: """Compute PCCA-sets from MSM at lagtime 1*\tau""" if issparse(T_MSM): msg = ( "Converting sparse transition matrix to dense\n" "since PCCA is currently only implemented for dense matrices.\n" "You can avoid automatic conversion to dense arrays by\n" "giving sets for the Chapman-Kolmogorov test explicitly") warnings.warn(msg, UserWarning) sets = pcca_sets(T_MSM.toarray(), nsets) else: sets = pcca_sets(T_MSM, nsets) nsets = len(sets) # translate sets from connected-set indexes to full indexes, where the comparison is made: for i, s in enumerate(sets): sets[i] = lcc_MSM[s] """Stationary distribution at 1*tau""" mu_MSM = statdist(T_MSM) """Mapping to lcc at lagtime 1*tau""" lccmap_MSM = MapToConnectedStateLabels(lcc_MSM) """Compute stationary distribution on sets""" w_MSM_1 = np.zeros((nsets, mu_MSM.shape[0])) for l in range(nsets): A = sets[l] A_MSM = lccmap_MSM.map(A) w_MSM_1[l, A_MSM] = mu_MSM[A_MSM] / mu_MSM[A_MSM].sum() w_MSM_k = 1.0 * w_MSM_1 p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 set_factors[0, :] = 1.0 for k in range(1, K): """Propagate probability vectors for MSM""" w_MSM_k = propagate(w_MSM_k, T_MSM) """Estimate model at k*tau and normalize to make 'uncorrelated'""" C_MD = cmatrix(dtrajs, k * lag, sliding=True) / (k * lag) lcc_MD = largest_connected_set(C_MD) Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD) """State counts for MD""" c_MD = Ccc_MD.sum(axis=1) """Transition matrix at k*tau""" T_MD = tmatrix(Ccc_MD) """Mapping to lcc at lagtime k*tau""" lccmap_MD = MapToConnectedStateLabels(lcc_MD) """Intersection of lcc_1 and lcc_k. lcc_k is not necessarily contained within lcc_1""" lcc = np.intersect1d(lcc_MSM, lcc_MD) """Stationary distribution restricted to lcc at lagtime k*tau""" mu_MD = np.zeros(T_MD.shape[0]) """Extract stationary values in 'joint' lcc and assining them to their respective position""" mu_MD[lccmap_MD.map(lcc)] = mu_MSM[lccmap_MSM.map(lcc)] """Obtain sets and distribution at k*tau""" w_MD_1 = np.zeros((nsets, mu_MD.shape[0])) for l in range(len(sets)): A = sets[l] """Intersect the set with the lcc at lagtime k*tau""" A_new = np.intersect1d(A, lcc) if A_new.size > 0: A_MD = lccmap_MD.map(A_new) w_MD_1[l, A_MD] = mu_MD[A_MD] / mu_MD[A_MD].sum() """Propagate vector by the MD model""" w_MD_k = propagate(w_MD_1, T_MD) """Compute values""" for l in range(len(sets)): A = sets[l] """MSM values""" A_MSM = lccmap_MSM.map(A) p_MSM[k, l] = w_MSM_k[l, A_MSM].sum() """MD values""" A_new = np.intersect1d(A, lcc) if A_new.size > 0: A_MD = lccmap_MD.map(A_new) prob_MD = w_MD_k[l, A_MD].sum() p_MD[k, l] = prob_MD """Statistical errors""" c = c_MD[A_MD].sum() eps_MD[k, l] = np.sqrt(k * (prob_MD - prob_MD**2) / c) set_factors[k, l] = mu_MSM[lccmap_MSM.map( A_new)].sum() / mu_MSM[A_MSM].sum() if full_output: return p_MSM, p_MD, eps_MD, set_factors else: return p_MSM, p_MD, eps_MD
def setUp(self): # 5-state toy system self.P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], [0.1, 0.75, 0.05, 0.05, 0.05], [0.05, 0.1, 0.8, 0.0, 0.05], [0.0, 0.2, 0.0, 0.8, 0.0], [0.0, 0.02, 0.02, 0.0, 0.96]]) self.A = [0] self.B = [4] self.I = [1,2,3] # REFERENCE SOLUTION FOR PATH DECOMP self.ref_committor = np.array([ 0., 0.35714286, 0.42857143, 0.35714286, 1. ]) self.ref_backwardcommittor = np.array([ 1. , 0.65384615, 0.53125 , 0.65384615, 0. ]) self.ref_grossflux = np.array([[ 0., 0.00771792, 0.00308717, 0. , 0. ], [ 0., 0. , 0.00308717, 0.00257264, 0.00720339], [ 0., 0.00257264, 0. , 0. , 0.00360169], [ 0., 0.00257264, 0. , 0. , 0. ], [ 0., 0. , 0. , 0. , 0. ]]) self.ref_netflux = np.array([[ 0.00000000e+00, 7.71791768e-03, 3.08716707e-03, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 5.14527845e-04, 0.00000000e+00, 7.20338983e-03], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.60169492e-03], [ 0.00000000e+00, 4.33680869e-19, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]) self.ref_totalflux = 0.0108050847458 self.ref_kAB = 0.0272727272727 self.ref_mfptAB = 36.6666666667 self.ref_paths = [[0,1,4],[0,2,4],[0,1,2,4]] self.ref_pathfluxes = np.array([0.00720338983051, 0.00308716707022, 0.000514527845036]) self.ref_paths_99percent = [[0,1,4],[0,2,4]] self.ref_pathfluxes_99percent = np.array([0.00720338983051, 0.00308716707022]) self.ref_majorflux_99percent = np.array([[ 0. , 0.00720339 , 0.00308717 , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0.00720339], [ 0. , 0. , 0. , 0. , 0.00308717], [ 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. ]]) # Testing: self.tpt1 = msmapi.tpt(self.P, self.A, self.B) # 16-state toy system P2_nonrev = np.array([[0.5, 0.2, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.1, 0.5, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.0, 0.0, 0.0, 0.2], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.5, 0.2, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.1, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.1, 0.5, 0.2], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.5], ]) pstat2_nonrev = msmana.statdist(P2_nonrev) # make reversible C = np.dot(np.diag(pstat2_nonrev), P2_nonrev) Csym = C + C.T self.P2 = Csym / np.sum(Csym,axis=1)[:,np.newaxis] pstat2 = msmana.statdist(self.P2) self.A2 = [0,4] self.B2 = [11,15] self.coarsesets2 = [[2,3,6,7],[10,11,14,15],[0,1,4,5],[8,9,12,13],] # REFERENCE SOLUTION CG self.ref2_tpt_sets = [set([0, 4]), set([2, 3, 6, 7]), set([10, 14]), set([1, 5]), set([8, 9, 12, 13]), set([11, 15])] self.ref2_cgA = [0] self.ref2_cgI = [1,2,3,4] self.ref2_cgB = [5] self.ref2_cgpstat = np.array([ 0.15995388, 0.18360442, 0.12990937, 0.11002342, 0.31928127, 0.09722765]) self.ref2_cgcommittor = np.array([ 0. , 0.56060272 , 0.73052426 , 0.19770537 , 0.36514272 , 1. ]) self.ref2_cgbackwardcommittor = np.array([ 1. , 0.43939728 , 0.26947574 , 0.80229463 , 0.63485728 , 0. ]) self.ref2_cggrossflux = np.array([[ 0. , 0. , 0. , 0.00427986 , 0.00282259 , 0. ], [ 0. , 0 , 0.00234578 , 0.00104307 , 0. , 0.00201899], [ 0. , 0.00113892 , 0 , 0. , 0.00142583 , 0.00508346], [ 0. , 0.00426892 , 0. , 0 , 0.00190226 , 0. ], [ 0. , 0. , 0.00530243 , 0.00084825 , 0 , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. ]]) self.ref2_cgnetflux = np.array([[ 0. , 0. , 0. , 0.00427986 , 0.00282259 , 0. ], [ 0. , 0. , 0.00120686 , 0. , 0. , 0.00201899], [ 0. , 0. , 0. , 0. , 0. , 0.00508346], [ 0. , 0.00322585 , 0. , 0. , 0.00105401 , 0. ], [ 0. , 0. , 0.0038766 , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. ]]) #Testing self.tpt2 = msmapi.tpt(self.P2, self.A2, self.B2)
def cktest(T_MSM, lcc_MSM, dtrajs, lag, K, nsets=2, sets=None, full_output=False): r"""Perform Chapman-Kolmogorov tests for given data. Parameters ---------- T_MSM : (M, M) ndarray or scipy.sparse matrix Transition matrix of estimated MSM lcc_MSM : array-like Largest connected set of the estimated MSM dtrajs : list discrete trajectories lag : int lagtime for the MSM estimation K : int number of time points for the test nsets : int, optional number of PCCA sets on which to perform the test sets : list, optional List of user defined sets for the test full_output : bool, optional Return additional information about set_factors Returns ------- p_MSM : (K, nsets) ndarray p_MSM[k, l] is the probability of making a transition from set l to set l after k*lag steps for the MSM computed at 1*lag p_MD : (K, nsets) ndarray p_MD[k, l] is the probability of making a transition from set l to set l after k*lag steps as estimated from the given data eps_MD : (K, nsets) eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l] set_factors : (K, nsets) ndarray, optional set_factor[k, i] is the quotient of the MD and the MSM set probabilities References ---------- .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D Chodera, C Schuette and F Noe. 2011. Markov models of molecular kinetics: Generation and validation. J Chem Phys 134: 174105 """ p_MD = np.zeros((K, nsets)) p_MSM = np.zeros((K, nsets)) eps_MD = np.zeros((K, nsets)) set_factors = np.zeros((K, nsets)) if sets is None: """Compute PCCA-sets from MSM at lagtime 1*\tau""" if issparse(T_MSM): msg = ("Converting sparse transition matrix to dense\n" "since PCCA is currently only implemented for dense matrices.\n" "You can avoid automatic conversion to dense arrays by\n" "giving sets for the Chapman-Kolmogorov test explicitly") warnings.warn(msg, UserWarning) sets = pcca_sets(T_MSM.toarray(), nsets) else: sets = pcca_sets(T_MSM, nsets) nsets = len(sets) # translate sets from connected-set indexes to full indexes, where the comparison is made: for i, s in enumerate(sets): sets[i] = lcc_MSM[s] """Stationary distribution at 1*tau""" mu_MSM = statdist(T_MSM) """Mapping to lcc at lagtime 1*tau""" lccmap_MSM = MapToConnectedStateLabels(lcc_MSM) """Compute stationary distribution on sets""" w_MSM_1 = np.zeros((nsets, mu_MSM.shape[0])) for l in range(nsets): A = sets[l] A_MSM = lccmap_MSM.map(A) w_MSM_1[l, A_MSM] = mu_MSM[A_MSM] / mu_MSM[A_MSM].sum() w_MSM_k = 1.0 * w_MSM_1 p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 set_factors[0, :] = 1.0 for k in range(1, K): """Propagate probability vectors for MSM""" w_MSM_k = propagate(w_MSM_k, T_MSM) """Estimate model at k*tau and normalize to make 'uncorrelated'""" C_MD = cmatrix(dtrajs, k * lag, sliding=True) / (k * lag) lcc_MD = largest_connected_set(C_MD) Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD) """State counts for MD""" c_MD = Ccc_MD.sum(axis=1) """Transition matrix at k*tau""" T_MD = tmatrix(Ccc_MD) """Mapping to lcc at lagtime k*tau""" lccmap_MD = MapToConnectedStateLabels(lcc_MD) """Intersection of lcc_1 and lcc_k. lcc_k is not necessarily contained within lcc_1""" lcc = np.intersect1d(lcc_MSM, lcc_MD) """Stationary distribution restricted to lcc at lagtime k*tau""" mu_MD = np.zeros(T_MD.shape[0]) """Extract stationary values in 'joint' lcc and assining them to their respective position""" mu_MD[lccmap_MD.map(lcc)] = mu_MSM[lccmap_MSM.map(lcc)] """Obtain sets and distribution at k*tau""" w_MD_1 = np.zeros((nsets, mu_MD.shape[0])) for l in range(len(sets)): A = sets[l] """Intersect the set with the lcc at lagtime k*tau""" A_new = np.intersect1d(A, lcc) if A_new.size > 0: A_MD = lccmap_MD.map(A_new) w_MD_1[l, A_MD] = mu_MD[A_MD] / mu_MD[A_MD].sum() """Propagate vector by the MD model""" w_MD_k = propagate(w_MD_1, T_MD) """Compute values""" for l in range(len(sets)): A = sets[l] """MSM values""" A_MSM = lccmap_MSM.map(A) p_MSM[k, l] = w_MSM_k[l, A_MSM].sum() """MD values""" A_new = np.intersect1d(A, lcc) if A_new.size > 0: A_MD = lccmap_MD.map(A_new) prob_MD = w_MD_k[l, A_MD].sum() p_MD[k, l] = prob_MD """Statistical errors""" c = c_MD[A_MD].sum() eps_MD[k, l] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) set_factors[k, l] = mu_MSM[lccmap_MSM.map(A_new)].sum() / mu_MSM[A_MSM].sum() if full_output: return p_MSM, p_MD, eps_MD, set_factors else: return p_MSM, p_MD, eps_MD
def chapman_kolmogorov(dtrajs, lag, K, nsets=2, sets=None): r"""Perform Chapman-Kolmogorov tests for given data. Parameters ---------- dtrajs : list discrete trajectories lag : int lagtime for the MSM estimation K : int number of time points for the test nsets : int, optional number of PCCA sets on which to perform the test sets : list, optional List of user defined sets for the test Returns ------- p_MSM : (K, n_sets) ndarray p_MSM[k, l] is the probability of making a transition from set l to set l after k*lag steps for the MSM computed at 1*lag p_MD : (K, n_sets) ndarray p_MD[k, l] is the probability of making a transition from set l to set l after k*lag steps as estimated from the given data eps_MD : (K, n_sets) eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l] References ---------- .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D Chodera, C Schuette and F Noe. 2011. Markov models of molecular kinetics: Generation and validation. J Chem Phys 134: 174105 """ C_1=cmatrix(dtrajs, lag, sliding=True) lcc_1=largest_connected_set(C_1) """Compute PCCA-sets from MSM at lagtime 1*\tau""" if sets is None: Ccc_1=connected_cmatrix(C_1, lcc=lcc_1) T_1=tmatrix(Ccc_1) sets=pcca_sets(T_1.toarray(), nsets, lcc_1) p_MD = np.zeros((K, nsets)) p_MSM = np.zeros((K, nsets)) eps_MD = np.zeros((K, nsets)) for k in range(1, K): C_k = cmatrix(dtrajs, (k+1)*lag, sliding=True) lcc_k = largest_connected_set(C_k) lcc = np.intersect1d(lcc_1, lcc_k) Ccc_1 = connected_cmatrix(C_1, lcc=lcc) Ccc_k = connected_cmatrix(C_k, lcc=lcc) T_1 = tmatrix(Ccc_1).toarray() T_k = tmatrix(Ccc_k).toarray() mu = statdist(T_1) T_1_k = np.linalg.matrix_power(T_1, k+1) lccmap=MapToConnectedStateLabels(lcc) C=Ccc_k.toarray() for l in range(len(sets)): w=np.zeros(len(lcc)) inds = lccmap.map(sets[l]) nu = mu[inds] nu /= nu.sum() w[inds] = nu w_1_k = np.dot(w, T_1_k) w_k = np.dot(w, T_k) prob_MD=np.sum(w_k[inds]) prob_MSM=np.sum(w_1_k[inds]) p_MD[k, l] = prob_MD p_MSM[k, l] = prob_MSM """Statistical errors""" c=C[inds, :].sum() eps_MD[k, l]=np.sqrt((k + 1) * (prob_MD - prob_MD**2) / c) return p_MSM, p_MD, eps_MD
def stationary_distribution(self): self._assert_computed() if self.mu is None: self.mu = statdist(self.T) return self.mu
def setUp(self): # 5-state toy system self.P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], [0.1, 0.75, 0.05, 0.05, 0.05], [0.05, 0.1, 0.8, 0.0, 0.05], [0.0, 0.2, 0.0, 0.8, 0.0], [0.0, 0.02, 0.02, 0.0, 0.96]]) self.A = [0] self.B = [4] self.I = [1, 2, 3] # REFERENCE SOLUTION FOR PATH DECOMP self.ref_committor = np.array( [0., 0.35714286, 0.42857143, 0.35714286, 1.]) self.ref_backwardcommittor = np.array( [1., 0.65384615, 0.53125, 0.65384615, 0.]) self.ref_grossflux = np.array( [[0., 0.00771792, 0.00308717, 0., 0.], [0., 0., 0.00308717, 0.00257264, 0.00720339], [0., 0.00257264, 0., 0., 0.00360169], [0., 0.00257264, 0., 0., 0.], [0., 0., 0., 0., 0.]]) self.ref_netflux = np.array([[ 0.00000000e+00, 7.71791768e-03, 3.08716707e-03, 0.00000000e+00, 0.00000000e+00 ], [ 0.00000000e+00, 0.00000000e+00, 5.14527845e-04, 0.00000000e+00, 7.20338983e-03 ], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.60169492e-03 ], [ 0.00000000e+00, 4.33680869e-19, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00 ], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00 ]]) self.ref_totalflux = 0.0108050847458 self.ref_kAB = 0.0272727272727 self.ref_mfptAB = 36.6666666667 self.ref_paths = [[0, 1, 4], [0, 2, 4], [0, 1, 2, 4]] self.ref_pathfluxes = np.array( [0.00720338983051, 0.00308716707022, 0.000514527845036]) self.ref_paths_99percent = [[0, 1, 4], [0, 2, 4]] self.ref_pathfluxes_99percent = np.array( [0.00720338983051, 0.00308716707022]) self.ref_majorflux_99percent = np.array( [[0., 0.00720339, 0.00308717, 0., 0.], [0., 0., 0., 0., 0.00720339], [0., 0., 0., 0., 0.00308717], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]]) # Testing: self.tpt1 = msmapi.tpt(self.P, self.A, self.B) # 16-state toy system P2_nonrev = np.array([[ 0.5, 0.2, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.2, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.1, 0.5, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.3, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.0, 0.0, 0.0, 0.2 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.5, 0.2, 0.0, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.1, 0.0 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.1, 0.5, 0.2 ], [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.5 ]]) pstat2_nonrev = msmana.statdist(P2_nonrev) # make reversible C = np.dot(np.diag(pstat2_nonrev), P2_nonrev) Csym = C + C.T self.P2 = Csym / np.sum(Csym, axis=1)[:, np.newaxis] pstat2 = msmana.statdist(self.P2) self.A2 = [0, 4] self.B2 = [11, 15] self.coarsesets2 = [ [2, 3, 6, 7], [10, 11, 14, 15], [0, 1, 4, 5], [8, 9, 12, 13], ] # REFERENCE SOLUTION CG self.ref2_tpt_sets = [ set([0, 4]), set([2, 3, 6, 7]), set([10, 14]), set([1, 5]), set([8, 9, 12, 13]), set([11, 15]) ] self.ref2_cgA = [0] self.ref2_cgI = [1, 2, 3, 4] self.ref2_cgB = [5] self.ref2_cgpstat = np.array([ 0.15995388, 0.18360442, 0.12990937, 0.11002342, 0.31928127, 0.09722765 ]) self.ref2_cgcommittor = np.array( [0., 0.56060272, 0.73052426, 0.19770537, 0.36514272, 1.]) self.ref2_cgbackwardcommittor = np.array( [1., 0.43939728, 0.26947574, 0.80229463, 0.63485728, 0.]) self.ref2_cggrossflux = np.array( [[0., 0., 0., 0.00427986, 0.00282259, 0.], [0., 0, 0.00234578, 0.00104307, 0., 0.00201899], [0., 0.00113892, 0, 0., 0.00142583, 0.00508346], [0., 0.00426892, 0., 0, 0.00190226, 0.], [0., 0., 0.00530243, 0.00084825, 0, 0.], [0., 0., 0., 0., 0., 0.]]) self.ref2_cgnetflux = np.array( [[0., 0., 0., 0.00427986, 0.00282259, 0.], [0., 0., 0.00120686, 0., 0., 0.00201899], [0., 0., 0., 0., 0., 0.00508346], [0., 0.00322585, 0., 0., 0.00105401, 0.], [0., 0., 0.0038766, 0., 0., 0.], [0., 0., 0., 0., 0., 0.]]) # Testing self.tpt2 = msmapi.tpt(self.P2, self.A2, self.B2)