示例#1
0
def build_msm_from_counts(counts, lag_time, symmetrize, return_rev_counts=False, trim=True):
    """
    Estimates the transition probability matrix from the counts matrix.
    
    Parameters
    ----------
    counts : matrix
        the MSM counts matrix
    lag_time :
        the lag time to build the msm with, in frames
    symmetrize : {'MLE', 'Transpose', None}
        symmetrization scheme so that we have reversible counts
    return_rev_counts : bool
        whether or not to return the reversible counts or not
        
        
    Returns
    -------
    t_matrix : matrix
        the transition probability matrix
    rev_counts : matrix
        the estimate of the reversible counts 
        (only returned if `return_rev_counts` is True)
    """
    
    symmetrize = str(symmetrize).lower()
    symmetrization_error = ValueError("Invalid symmetrization scheme requested: %s. Exiting." % symmetrize)
    if symmetrize not in ['mle', 'transpose', 'none']:
        raise symmetrization_error

    if trim:
        counts, mapping = ergodic_trim(counts)

    # Apply a symmetrization scheme
    if symmetrize == 'mle':
        rev_counts = mle_reversible_count_matrix(counts, prior=0.0)

    elif symmetrize == 'transpose':
        rev_counts = 0.5*(counts + counts.transpose())

    elif symmetrize == 'none':
        rev_counts = counts

    else:
        raise symmetrization_error

    t_matrix = estimate_transition_matrix(rev_counts)

    if symmetrize in ['mle', 'transpose']:
        populations = np.array(rev_counts.sum(0)).flatten()
    elif symmetrize == 'none':
        vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1]
        populations = vectors[:, 0]
    else:
        populations = None

    if populations is not None:
        populations /= populations.sum()

    return counts, rev_counts, t_matrix, populations, mapping
示例#2
0
    def fit(self, sequences, y=None):
        """Estimate model parameters.

        Parameters
        ----------
        sequences : list
            List of integer sequences, each of which is one-dimensional
        y : unused parameter

        Returns
        -------
        self
        """
        if self.n_states is None:
            self.n_states = np.max([np.max(x) for x in sequences]) + 1

        from msmbuilder import MSMLib

        MSMLib.logger.info = lambda *args: None
        from msmbuilder.msm_analysis import get_eigenvectors
        from msmbuilder.MSMLib import mle_reversible_count_matrix, estimate_transition_matrix, ergodic_trim

        self.rawcounts_ = self._count_transitions(sequences)
        if self.prior_counts > 0:
            self.rawcounts_ = scipy.sparse.csr_matrix(self.rawcounts_.todense() + self.prior_counts)

        # STEP (1): Ergodic trimming
        if self.ergodic_trim:
            self.rawcounts_, mapping = ergodic_trim(scipy.sparse.csr_matrix(self.rawcounts_))
            self.mapping_ = {}
            for i, j in enumerate(mapping):
                if j != -1:
                    self.mapping_[i] = j
        else:
            self.mapping_ = dict((zip(np.arange(self.n_states), np.arange(self.n_states))))

        # STEP (2): Reversible counts matrix
        if self.reversible_type in ["mle", "MLE"]:
            self.countsmat_ = mle_reversible_count_matrix(self.rawcounts_)
        elif self.reversible_type in ["transpose", "Transpose"]:
            self.countsmat_ = 0.5 * (self.rawcounts_ + self.rawcounts_.T)
        elif self.reversible_type is None:
            self.countsmat_ = self.rawcounts_
        else:
            raise RuntimeError()

        # STEP (3): transition matrix
        self.transmat_ = estimate_transition_matrix(self.countsmat_)

        # STEP (3.5): Stationary eigenvector
        if self.reversible_type in ["mle", "MLE", "transpose", "Transpose"]:
            self.populations_ = np.array(self.countsmat_.sum(0)).flatten()
        elif self.reversible_type is None:
            vectors = get_eigenvectors(self.transmat_, 5)[1]
            self.populations_ = vectors[:, 0]
        else:
            raise RuntimeError()
        self.populations_ /= self.populations_.sum()  # ensure normalization

        return self
示例#3
0
def plot_distribution(mixture_model, grid, t_matrix=None, eigen=1, n_contours=80):
    """Plot the mixture distribution."""

    xx, yy = grid

    mixture_samples = np.c_[xx.ravel(), yy.ravel()]
    contour_data = mixture_model.score(mixture_samples)
    contour_data = -contour_data.reshape(xx.shape)

    if t_matrix is not None:
        _, vecs = msma.get_eigenvectors(t_matrix, n_eigs=eigen)
        sizes = vecs[:, -1] * 300
        print vecs[:, -1]
        colors = ['r' if s > 0 else 'b' for s in sizes]
        sizes = np.abs(sizes)
    else:
        sizes = 300
        colors = 'y'


    # Plot means
    means = mixture_model.means_
    pp.scatter(means[:, 0], means[:, 1], c=colors, s=sizes)

    pp.contour(xx, yy, contour_data, n_contours)
示例#4
0
    def __init__(self, T, num_macrostates, flux_cutoff=None):
        """Base class for PCCA and PCCA+.

        Parameters
        ----------
        T : csr sparse matrix
            Transition matrix
        num_macrostates : int
            Desired number of macrostates
        flux_cutoff : float, optional
            Can be set to discard low-flux eigenvectors.
        """

        self.T = T
        self.num_macrostates = num_macrostates

        self.eigenvalues, self.left_eigenvectors = msm_analysis.get_eigenvectors(
            T, self.num_macrostates)
        utils.normalize_left_eigenvectors(self.left_eigenvectors)

        if flux_cutoff != None:
            self.eigenvalues, self.left_eigenvectors = utils.trim_eigenvectors_by_flux(
                self.eigenvalues, self.left_eigenvectors, flux_cutoff)
            self.num_macrostates = len(self.eigenvalues)

        self.populations = self.left_eigenvectors[:, 0]
        self.num_microstates = len(self.populations)

        # Construct properly normalized right eigenvectors
        self.right_eigenvectors = utils.construct_right_eigenvectors(
            self.left_eigenvectors, self.populations, self.num_macrostates)
示例#5
0
def run(tProb, observable, init_pops=None, num_vecs=10, output='evec_amps.h5'):

    if init_pops is None:
        init_pops = np.ones(tProb.shape[0]).astype(float) / float(tProb.shape[0])

    else:
        init_pops = init_pops.astype(float) 
        init_pops /= init_pops.sum()

    assert (observable.shape[0] == init_pops.shape[0])
    assert (observable.shape[0] == tProb.shape[0])
    
    try:
        f = io.loadh('eigs%d.h5' % num_vecs)
        vals = f['vals']
        vecsL = f['vecs']
    except:
        vals, vecsL = msm_analysis.get_eigenvectors(tProb, num_vecs + 1, right=False)
        io.saveh('eigs%d.h5' % num_vecs, vals=vals, vecs=vecsL)

    equil = vecsL[:,0] / vecsL[:,0].sum()

    dyn_vecsL = vecsL[:, 1:]
    # normalize the left and right eigenvectors

    dyn_vecsL /= np.sqrt(np.sum(dyn_vecsL * dyn_vecsL / np.reshape(equil, (-1, 1)), axis=0))

    dyn_vecsR = dyn_vecsL / np.reshape(equil, (-1, 1))

    amps = dyn_vecsL.T.dot(observable) * dyn_vecsR.T.dot(init_pops)

    io.saveh(output, evals=vals[1:], amplitudes=amps)
    logger.info("saved output to %s" % output)
示例#6
0
def plot_distribution(mixture_model,
                      grid,
                      t_matrix=None,
                      eigen=1,
                      n_contours=80):
    """Plot the mixture distribution."""

    xx, yy = grid

    mixture_samples = np.c_[xx.ravel(), yy.ravel()]
    contour_data = mixture_model.score(mixture_samples)
    contour_data = -contour_data.reshape(xx.shape)

    if t_matrix is not None:
        _, vecs = msma.get_eigenvectors(t_matrix, n_eigs=eigen)
        sizes = vecs[:, -1] * 300
        print vecs[:, -1]
        colors = ['r' if s > 0 else 'b' for s in sizes]
        sizes = np.abs(sizes)
    else:
        sizes = 300
        colors = 'y'

    # Plot means
    means = mixture_model.means_
    pp.scatter(means[:, 0], means[:, 1], c=colors, s=sizes)

    pp.contour(xx, yy, contour_data, n_contours)
示例#7
0
def test_get_eigenvectors_left():    
    # just some random counts
    N = 100
    counts = np.random.randint(1, 10, size=(N,N))
    transmat, pi = build_msm(scipy.sparse.csr_matrix(counts), 'MLE')[1:3]

    values0, vectors0 = get_eigenvectors(transmat, 10)
    values1, vectors1 = get_reversible_eigenvectors(transmat, 10)
    values2, vectors2 = get_reversible_eigenvectors(transmat, 10, populations=pi)

    # check that the eigenvalues are the same using the two methods
    np.testing.assert_array_almost_equal(values0, values1)
    
    # check that the eigenvectors returned by both methods are _actually_
    # left eigenvectors of the transmat
    def test_eigenpairs(values, vectors):
        for value, vector in zip(values, vectors.T):
            np.testing.assert_array_almost_equal(
                (transmat.T.dot(vector) / vector).flatten(), np.ones(N)*value)

    np.testing.assert_array_almost_equal(pi, vectors0[:, 0])
    np.testing.assert_array_almost_equal(pi, vectors1[:, 0])
    np.testing.assert_array_almost_equal(pi, vectors2[:, 0])
    test_eigenpairs(values0, vectors0)
    test_eigenpairs(values1, vectors1)
    test_eigenpairs(values2, vectors2)
示例#8
0
    def __init__(self, T, num_macrostates, flux_cutoff=None):
        """Base class for PCCA and PCCA+.

        Parameters
        ----------
        T : csr sparse matrix
            Transition matrix
        num_macrostates : int
            Desired number of macrostates
        flux_cutoff : float, optional
            Can be set to discard low-flux eigenvectors.
        """

        self.T = T
        self.num_macrostates = num_macrostates

        self.eigenvalues, self.left_eigenvectors = msm_analysis.get_eigenvectors(T, self.num_macrostates)
        utils.normalize_left_eigenvectors(self.left_eigenvectors)

        if flux_cutoff != None:
            self.eigenvalues, self.left_eigenvectors = utils.trim_eigenvectors_by_flux(
                self.eigenvalues, self.left_eigenvectors, flux_cutoff)
            self.num_macrostates = len(self.eigenvalues)

        self.populations = self.left_eigenvectors[:, 0]
        self.num_microstates = len(self.populations)

        # Construct properly normalized right eigenvectors
        self.right_eigenvectors = utils.construct_right_eigenvectors(
            self.left_eigenvectors, self.populations, self.num_macrostates)  
示例#9
0
def build_msm(counts, symmetrize='MLE', ergodic_trimming=True):
    """
    Estimates the transition probability matrix from the counts matrix.

    Parameters
    ----------
    counts : matrix
        the MSM counts matrix
    symmetrize : {'MLE', 'Transpose', None}
        symmetrization scheme so that we have reversible counts
    ergodic_trim : bool (optional)
        whether or not to trim states to achieve an ergodic model

    Returns
    -------
    rev_counts : matrix
        the estimate of the reversible counts
    t_matrix : matrix
        the transition probability matrix
    populations : ndarray, float
        the equilibrium populations of each state
    mapping : ndarray, int
        a mapping from the passed counts matrix to the new counts and transition
        matrices
    """

    symmetrize = str(symmetrize).lower()
    symmetrization_error = ValueError("Invalid symmetrization scheme requested: %s. Exiting." % symmetrize)
    if symmetrize not in ['mle', 'transpose', 'none']:
        raise symmetrization_error

    if ergodic_trimming:
        counts, mapping = ergodic_trim(counts)
    else:
        mapping = np.arange(counts.shape[0])

    # Apply a symmetrization scheme
    if symmetrize == 'mle':
        rev_counts = mle_reversible_count_matrix(counts)
    elif symmetrize == 'transpose':
        rev_counts = 0.5 * (counts + counts.transpose())
    elif symmetrize == 'none':
        rev_counts = counts
    else:
        raise symmetrization_error

    t_matrix = estimate_transition_matrix(rev_counts)

    if symmetrize in ['mle', 'transpose']:
        populations = np.array(rev_counts.sum(0)).flatten()
    elif symmetrize == 'none':
        vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1]
        populations = vectors[:, 0]
    else:
        raise symmetrization_error

    populations /= populations.sum()  # ensure normalization

    return rev_counts, t_matrix, populations, mapping
示例#10
0
def calculate_all_to_all_mfpt(tprob, populations=None):
    """
    Calculate the all-states by all-state matrix of mean first passage
    times.

    This uses the fundamental matrix formalism, and should be much faster
    than GetMFPT for calculating many MFPTs.

    Parameters
    ----------
    tprob : matrix
        transition probability matrix
    populations : array_like, float
        optional argument, the populations of each state. If  not supplied,
        it will be computed from scratch

    Returns
    -------
    MFPT : array, float
        MFPT in time units of LagTime, square array for MFPT from i -> j

    See Also
    --------
    GetMFPT : function
        for calculating a subset of the MFPTs, with functionality for including
        a set of sinks
    """

    msm_analysis.check_transition(tprob)
    
    if scipy.sparse.issparse(tprob):
        tprob = tprob.toarray()
        logger.warning('calculate_all_to_all_mfpt does not support sparse linear algebra')

    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 5)
        if np.count_nonzero(np.imag(eigens[1][:,0])) != 0:
            raise ValueError('First eigenvector has imaginary parts')
        populations = np.real(eigens[1][:,0])

    # ensure that tprob is a transition matrix
    msm_analysis.check_transition(tprob)
    num_states = len(populations)
    if tprob.shape[0] != num_states:
        raise ValueError("Shape of tprob and populations vector don't match")

    eye = np.transpose( np.matrix(np.ones(num_states)) )
    limiting_matrix = eye * populations
    #z = scipy.linalg.inv(scipy.sparse.eye(num_states, num_states) - (tprob - limiting_matrix))
    z = scipy.linalg.inv(np.eye(num_states) - (tprob - limiting_matrix))

    # mfpt[i,j] = z[j,j] - z[i,j] / pi[j]
    mfpt = -z
    for j in range(num_states):
        mfpt[:, j] += z[j, j]
        mfpt[:, j] /= populations[j]

    return mfpt
示例#11
0
def GetRateMatrix(T,EigAns=None,FixNegativity=True):
    NumStates=T.shape[0]
    if EigAns==None:
        EigAns = get_eigenvectors(T,NumStates)
    Pi=EigAns[1][:,0]
    print("Done Getting Eigenvectors")
    """
    K=np.zeros((NumStates,NumStates),dtype=T.dtype)
    for i in range(1,NumStates):
	phi=EigAns[1][:,i]
	psi=phi/Pi
	alpha=np.dot(phi,psi)**.5
	psi/=alpha
	phi/=alpha
	K-=np.log(EigAns[0][i])*np.outer(psi,phi)
    """
    #To Check, compare the following transition matrix with the input:
    #T2=scipy.linalg.matfuncs.expm(-K)
    #T2-T

    ev=EigAns[1]
    p=ev[:,0]
    for i in xrange(NumStates):
	ev[:,i]/=np.dot(ev[:,i]/p,ev[:,i])**.5

    #Ld=np.diag(-np.log(EigAns[0]))
    #K=np.dot(np.dot(np.dot(np.diag(1./Pi),ev),Ld),ev.transpose())
    #return(K)
    print("Getting evT and deleting old ev.")
    ev=np.real(ev).copy()

    lam=EigAns[0]
    lam=np.abs(lam)
    #lam[where(lam<0)]=1/np.e #Anything with negative eigenvalues is set to have timescale 1 lagtime.


    del EigAns


    D=scipy.sparse.dia_matrix((1/p,0),(NumStates,NumStates))
    K=D.dot(ev)
    print("Done 1st mm")

    L=scipy.sparse.dia_matrix((-np.log(lam),0),(NumStates,NumStates))
    K=K.transpose()
    K=L.dot(K)
    print("Done 2nd mm")
    K=K.transpose()
    K=np.array(K,dtype=lam.dtype,order="C")

    ev=ev.transpose()
    ev=np.array(ev,dtype=lam.dtype,order="C")
    K=np.dot(K,ev)
    print("Done 3rd mm")

    if FixNegativity==True:#This enforces "reasonable" constraints on the rate matrix, e.g. negative off diagonals and positive diagonals
        RemoveRateDiagonal(K)
    return(K)
示例#12
0
def estimate_mle_populations(matrix):
    if msmb_version == '2.8.2':
        t_matrix = estimate_transition_matrix(matrix)
        populations = get_eigenvectors(t_matrix, 1, **kwargs)[1][:, 0]
        return populations
    elif msmb_version == '3.2.0':
        obj = MarkovStateModel()
        populations = obj._fit_mle(matrix)[1]
        return populations
示例#13
0
def GetRateMatrix(T, EigAns=None, FixNegativity=True):
    NumStates = T.shape[0]
    if EigAns == None:
        EigAns = get_eigenvectors(T, NumStates)
    Pi = EigAns[1][:, 0]
    print("Done Getting Eigenvectors")
    """
    K=np.zeros((NumStates,NumStates),dtype=T.dtype)
    for i in range(1,NumStates):
	phi=EigAns[1][:,i]
	psi=phi/Pi
	alpha=np.dot(phi,psi)**.5
	psi/=alpha
	phi/=alpha
	K-=np.log(EigAns[0][i])*np.outer(psi,phi)
    """
    #To Check, compare the following transition matrix with the input:
    #T2=scipy.linalg.matfuncs.expm(-K)
    #T2-T

    ev = EigAns[1]
    p = ev[:, 0]
    for i in xrange(NumStates):
        ev[:, i] /= np.dot(ev[:, i] / p, ev[:, i])**.5

    #Ld=np.diag(-np.log(EigAns[0]))
    #K=np.dot(np.dot(np.dot(np.diag(1./Pi),ev),Ld),ev.transpose())
    #return(K)
    print("Getting evT and deleting old ev.")
    ev = np.real(ev).copy()

    lam = EigAns[0]
    lam = np.abs(lam)
    #lam[where(lam<0)]=1/np.e #Anything with negative eigenvalues is set to have timescale 1 lagtime.

    del EigAns

    D = scipy.sparse.dia_matrix((1 / p, 0), (NumStates, NumStates))
    K = D.dot(ev)
    print("Done 1st mm")

    L = scipy.sparse.dia_matrix((-np.log(lam), 0), (NumStates, NumStates))
    K = K.transpose()
    K = L.dot(K)
    print("Done 2nd mm")
    K = K.transpose()
    K = np.array(K, dtype=lam.dtype, order="C")

    ev = ev.transpose()
    ev = np.array(ev, dtype=lam.dtype, order="C")
    K = np.dot(K, ev)
    print("Done 3rd mm")

    if FixNegativity == True:  #This enforces "reasonable" constraints on the rate matrix, e.g. negative off diagonals and positive diagonals
        RemoveRateDiagonal(K)
    return (K)
示例#14
0
def kl_equilib(gold_eq, comp_tmatrix):
    """Return the KL divergence of comp_tmatrix from gold_tmatrix."""
    comp_vals, comp_vecs = msma.get_eigenvectors(comp_tmatrix, n_eigs=1)
    # Sanity check
    if np.abs(comp_vals[0] - 1.0) > EPS:
        print "Warning, comp eigenvalue is {}".format(comp_vals[0])
    # Do KL
    comp_eq = comp_vecs[0]
    kl = np.sum(np.log(gold_eq / comp_eq) * gold_eq)
    return kl
示例#15
0
def kl_equilib(gold_eq, comp_tmatrix):
    """Return the KL divergence of comp_tmatrix from gold_tmatrix."""
    comp_vals, comp_vecs = msma.get_eigenvectors(comp_tmatrix, n_eigs=1)
    # Sanity check
    if np.abs(comp_vals[0] - 1.0) > EPS:
        print "Warning, comp eigenvalue is {}".format(comp_vals[0])
    # Do KL
    comp_eq = comp_vecs[0]
    kl = np.sum(np.log(gold_eq / comp_eq) * gold_eq)
    return kl
示例#16
0
def get_implied_timescales(t_matrix, n_timescales=4, lag_time=1):
    """Get implied timescales from a transition matrix."""
    try:
        vals, vecs = msma.get_eigenvectors(t_matrix, n_eigs=n_timescales + 1)

        implied_timescales = -lag_time / np.log(vals[1:])
        implied_timescales_pad = np.pad(
            implied_timescales, (0, n_timescales - len(implied_timescales)),
            mode='constant')
        return implied_timescales_pad
    except Exception:
        print "+++ Error getting implied timescales +++"
        return np.zeros(n_timescales)
示例#17
0
def get_implied_timescales(t_matrix, n_timescales=4, lag_time=1):
    """Get implied timescales from a transition matrix."""
    try:
        vals, vecs = msma.get_eigenvectors(t_matrix, n_eigs=n_timescales + 1)

        implied_timescales = -lag_time / np.log(vals[1:])
        implied_timescales_pad = np.pad(implied_timescales,
                                        (0, n_timescales - len(implied_timescales)),
                                        mode='constant')
        return implied_timescales_pad
    except Exception:
        print "+++ Error getting implied timescales +++"
        return np.zeros(n_timescales)
示例#18
0
def main(dir, coarse , lag, type):
    data=dict()
    rmsd=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.rmsd.dat' % (dir, coarse), usecols=(2,))
    #data['rmsd']=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.selfrmsd.dat' % (dir, coarse))
    data['rmsd']=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.rmsd.dat' % (dir, coarse), usecols=(2,))
    com=numpy.loadtxt('%s/Coarsed_r10_gen/Coarsed%s_r10_Gens.vmd_com.dat' % (dir, coarse), usecols=(1,))
    com=[i/com[0] for i in com]
    data['com']=com[1:]
    modeldir='%s/msml%s_coarse_r10_d%s/' % (dir, lag, coarse)
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)

    map_rmsd=[]
    map_com=[]
    for x in range(0, len(data['rmsd'])):
        if map[x]!=-1:
            map_com.append(data['com'][x])
            map_rmsd.append(data['rmsd'][x])
    
    map_com=numpy.array(map_com)
    map_rmsd=numpy.array(map_rmsd)
    T=mmread('%s/tProb.mtx' % modeldir)
    eigs_m=msm_analysis.get_eigenvectors(T, 10)

    order=numpy.argsort(map_rmsd)
    ordercom=numpy.argsort(map_com)

    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive

    print numpy.shape(eigs_m[1][:,1])
    for i in range(1,4):
        if i==0:
            print numpy.where(eigs_m[1][:,i]==max(eigs_m[1][:,i]))
        else:
            print numpy.where(eigs_m[1][:,i]==min(eigs_m[1][:,i]))
        pylab.scatter(map_com[ordercom], map_rmsd[ordercom], c=eigs_m[1][ordercom,i], cmap=cm, s=1000*abs(eigs_m[1][ordercom,i]), alpha=0.5)
        print map_com[ordercom][numpy.argmax(eigs_m[1][ordercom,i])]
        print eigs_m[1][ordercom,i][1]
#       pylab.scatter(map_rmsd[order], statehelix[order]*100., c=eigs_m[1][:,i], cmap=cm, s=50, alpha=0.7)
        pylab.subplots_adjust(left = 0.1, right = 1.02, bottom = 0.10, top = 0.85, wspace = 0, hspace = 0)
        CB=pylab.colorbar()
        l,b,w,h=pylab.gca().get_position().bounds
        ll, bb, ww, hh=CB.ax.get_position().bounds
        CB.ax.set_position([ll, b+0.1*h, ww, h*0.8])
        ylabel=pylab.ylabel('p53 RMSD to Bound Conformation ($\AA$)')
        xlabel=pylab.xlabel(r'p53 to S100B($\beta$$\beta$) CoM Separation ($\AA$)')
        pylab.ylim(0, max(map_rmsd))
        #pylab.title('Folding and Binding \n Colored by Magnitudes of Slowest Eigenvector Components')
        pylab.savefig('%s/2deigs%i_com_prmsd.pdf' %(modeldir, i),dpi=300)
        pylab.show()
示例#19
0
def test_eigenvector_norm():
    N = 100
    counts = np.random.randint(1, 10, size=(N,N))
    transmat, pi = build_msm(scipy.sparse.csr_matrix(counts), 'MLE')[1:3]

    left_values0, left_vectors0 = get_eigenvectors(transmat, 10, right=False, normalized=True)
    right_values0, right_vectors0 = get_eigenvectors(transmat, 10, right=True, normalized=True)

    left_values1, left_vectors1 = get_reversible_eigenvectors(transmat, 10, right=False, normalized=True)
    right_values1, right_vectors1 = get_reversible_eigenvectors(transmat, 10, right=True, normalized=True)

    np.testing.assert_array_almost_equal(left_values0, right_values0)
    np.testing.assert_array_almost_equal(left_values1, right_values1)

    test_left_vectors1 = left_vectors1 * np.sign(left_vectors0[0].reshape((1,-1))) * np.sign(left_vectors1[0].reshape((1,-1)))
    test_right_vectors1 = right_vectors1 * np.sign(right_vectors0[0].reshape((1,-1))) * np.sign(right_vectors1[0].reshape((1,-1)))

    np.testing.assert_array_almost_equal(left_vectors0, test_left_vectors1)
    np.testing.assert_array_almost_equal(right_vectors0, test_right_vectors1)

    Id = np.eye(10)

    np.testing.assert_array_almost_equal(np.abs(left_vectors0.T.dot(right_vectors0)), Id)
    np.testing.assert_array_almost_equal(np.abs(left_vectors1.T.dot(right_vectors1)), Id)
示例#20
0
def analyze_msm(t_matrix, centroids, desc, neigen=4, show=False):
    """Analyze a particular msm.

    Right now, it does this by printing eigenvalues and optionally plotting
    eigenvectors.
    """
    val, vec = msma.get_eigenvectors(t_matrix, neigen)
    oolambda = -1.0 / np.log(val[1:])

    print("\n%s" % desc)
    print("Eigenvalues:\t%s" % val.__str__())
    print("1/lambda:\t%s" % oolambda.__str__())

    if show: plot_eigens(centroids, vec, val, desc)
    return oolambda
示例#21
0
def analyze_msm(t_matrix, centroids, desc, neigen=4, show=False):
    """Analyze a particular msm.

    Right now, it does this by printing eigenvalues and optionally plotting
    eigenvectors.
    """
    val, vec = msma.get_eigenvectors(t_matrix, neigen)
    oolambda = -1.0 / np.log(val[1:])

    print("\n%s" % desc)
    print("Eigenvalues:\t%s" % val.__str__())
    print("1/lambda:\t%s" % oolambda.__str__())

    if show: plot_eigens(centroids, vec, val, desc)
    return oolambda
示例#22
0
 def step(self):
     mapping, tprob = self.tprob()
     
     # if we can guarentee that the counts matrix is reversible, we can
     # do this faster without the eigensolver, but I don't want to do that yet.
     vectors = msm_analysis.get_eigenvectors(tprob, min(5, tprob.shape[0]))[1]
     populations = vectors[:, 0]
     
     # this chooses the point from the multinomial, but its now indexed
     # in the mapped (integer) space
     chosen = np.where(np.random.multinomial(1, populations) == 1)[0][0]
     
     # back out the chosen item as a tuple, such that mapping[k] == chosen
     k = next(k for k,v in mapping.iteritems() if v == chosen)
     
     self.walker.set_point(k)
示例#23
0
    def step(self):
        mapping, tprob = self.tprob()

        # if we can guarentee that the counts matrix is reversible, we can
        # do this faster without the eigensolver, but I don't want to do that yet.
        vectors = msm_analysis.get_eigenvectors(tprob, min(5,
                                                           tprob.shape[0]))[1]
        populations = vectors[:, 0]

        # this chooses the point from the multinomial, but its now indexed
        # in the mapped (integer) space
        chosen = np.where(np.random.multinomial(1, populations) == 1)[0][0]

        # back out the chosen item as a tuple, such that mapping[k] == chosen
        k = next(k for k, v in mapping.iteritems() if v == chosen)

        self.walker.set_point(k)
示例#24
0
def get_eigenvalues( count_matrix ):


    bad_states = np.array(np.where( count_matrix.sum(axis=1) == 0 )[0]).flatten()

    i_ary = count_matrix.nonzero()[0]
    j_ary = count_matrix.nonzero()[1]

    i_ary = np.concatenate( (i_ary, bad_states) )
    j_ary = np.concatenate( (j_ary, bad_states) )
    new_data = np.concatenate( (count_matrix.data, np.ones(len(bad_states))) )

    print i_ary.shape, count_matrix.data.shape, new_data.shape, len(bad_states)

    count_matrix = scipy.sparse.csr_matrix( (new_data, (i_ary, j_ary)) )

    #count_matrix = count_matrix.tolil()
    #count_matrix[(bad_states, bad_states)] = 1
    #count_matrix = count_matrix.tocsr()

    print count_matrix.data.shape, count_matrix.nonzero()[0].shape
    #NZ = np.array(count_matrix.nonzero()).T

    #keep_ind = []
    #for i in xrange(len(NZ)):
    #    if NZ[i][0] in bad_states or NZ[i][1] in bad_states:
    #        pass
    #    else:
    #        keep_ind.append(i)
    #keep_ind = np.array(keep_ind)

    #N = NZ.max()+1

    #count_matrix = scipy.sparse.csr_matrix( (np.array(count_matrix.data)[keep_ind], NZ[keep_ind].T), shape=(N,N), copy=True )

    try:
        t_matrix = MSMLib.build_msm(count_matrix, symmetrize=args.symmetrize)[1]
    except:
        return None
    
    vals = msm_analysis.get_eigenvectors(t_matrix, args.num_vals, epsilon=1)[0]
    vals.sort()

    return vals[::-1]
示例#25
0
    def set_coordinate_as_eigvector2(self, lag_time=1, symmetrize='transpose'):
        """
        Set the reaction coordinate to be the second eigenvector of the MSM generated
        by counts, the provided lag_time, and the provided symmetrization method.

        Parameters
        ----------
        lag_time : int
            The MSM lag time to use (in units of frames) in the estimation
            of the MSM transition probability matrix from the `counts` matrix.

        symmetrize : str {'mle', 'transpose', 'none'}
            Which symmetrization method to employ in the estimation of the
            MSM transition probability matrix from the `counts` matrix.
        """

        t_matrix = MSMLib.build_msm_from_counts(self.counts, lag_time, symmetrize)
        v, w = get_eigenvectors(t_matrix, 5)
        self.reaction_coordinate_values = w[:, 1].flatten()

        return
示例#26
0
    def set_coordinate_as_eigvector2(self, lag_time=1, symmetrize='transpose'):
        """
        Set the reaction coordinate to be the second eigenvector of the MSM generated
        by counts, the provided lag_time, and the provided symmetrization method.

        Parameters
        ----------
        lag_time : int
            The MSM lag time to use (in units of frames) in the estimation
            of the MSM transition probability matrix from the `counts` matrix.

        symmetrize : str {'mle', 'transpose', 'none'}
            Which symmetrization method to employ in the estimation of the
            MSM transition probability matrix from the `counts` matrix.
        """

        t_matrix = MSMLib.build_msm_from_counts(self.counts, lag_time, symmetrize)
        v, w = get_eigenvectors(t_matrix, 5)
        self.reaction_coordinate_values = w[:,1].flatten()

        return
示例#27
0
    def get_implied_timescales(self, i, num_vals=2):
        # Get the model
        model_sql = self.all_models[i]
        model = get_model_from_sql(model_sql)

        transition_mat = model.transition_matrix
        lag_time = model.lag_time

        print('\n\n')
        print(i)

        if not msma.is_transition_matrix(transition_mat):
            print ("%d is not a transition matrix!" % i)

        eigenvals, _ = msma.get_eigenvectors(transition_mat, num_vals + 1, epsilon=1.0)
        eigenvals = eigenvals[1:]

        oo_lambda = [-1.0 / np.log(ev) for ev in eigenvals]

        print(oo_lambda)

        return oo_lambda
示例#28
0
#!/usr/bin/env python

from msmbuilder import io
from msmbuilder import msm_analysis
from scipy.io import mmread
from argparse import ArgumentParser
import os

parser = ArgumentParser()
parser.add_argument('-t', dest='tProb', help='transition matrix', default='./tProb.mtx')
parser.add_argument('-o', dest='output', help='output filename', default='./eigs.h5')
parser.add_argument('-n', dest='num_vecs', help='number of eigenvectors to find.', default=500, type=int)

args = parser.parse_args()

if os.path.exists(args.output):
    raise Exception("path (%s) exists!" % args.output)

tProb = mmread(args.tProb)

eigs = msm_analysis.get_eigenvectors(tProb, args.num_vecs)

io.saveh(args.output, vals=eigs[0], vecs=eigs[1])
示例#29
0
def kl_equilib_setup(gold_tmatrix):
    """Save gold equilibrium population."""
    gold_vals, gold_vecs = msma.get_eigenvectors(gold_tmatrix, n_eigs=1)
    assert np.abs(gold_vals[0] - 1.0) < EPS, 'Gold eigenval is {}'.format(gold_vals[0])
    return gold_vecs[0]
示例#30
0
def GetEigenvectors_Right(*args, **kwargs):
    warnings.warn(
        'GetEigenvectors_Right is deprecated use get_eigenvectors() with the keyword Right=True'
    )
    kwargs['right'] = True
    return msm_analysis.get_eigenvectors(*args, **kwargs)
示例#31
0
def build_msm(counts, symmetrize='MLE', ergodic_trimming=True):
    """
    Estimates the transition probability matrix from the counts matrix.

    Parameters
    ----------
    counts : scipy.sparse.csr_matrix
        the MSM counts matrix
    symmetrize : {'MLE', 'Transpose', None}
        symmetrization scheme so that we have reversible counts
    ergodic_trim : bool (optional)
        whether or not to trim states to achieve an ergodic model

    Returns
    -------
    rev_counts : matrix
        the estimate of the reversible counts
    t_matrix : matrix
        the transition probability matrix
    populations : ndarray, float
        the equilibrium populations of each state
    mapping : ndarray, int
        a mapping from the passed counts matrix to the new counts and transition
        matrices
    """

    symmetrize = str(symmetrize).lower()
    symmetrization_error = ValueError("Invalid symmetrization scheme "
                                      "requested: %s. Exiting." % symmetrize)
    if symmetrize not in ['mle', 'transpose', 'none']:
        raise symmetrization_error

    if ergodic_trimming:
        counts, mapping = ergodic_trim(counts)
    else:
        mapping = np.arange(counts.shape[0])

    # Apply a symmetrization scheme
    if symmetrize == 'mle':
        if not ergodic_trimming:
            raise ValueError("MLE symmetrization requires ergodic trimming.")
        rev_counts = mle_reversible_count_matrix(counts)
    elif symmetrize == 'transpose':
        rev_counts = 0.5 * (counts + counts.transpose())
    elif symmetrize == 'none':
        rev_counts = counts
    else:
        raise symmetrization_error

    t_matrix = estimate_transition_matrix(rev_counts)

    if symmetrize in ['mle', 'transpose']:
        populations = np.array(rev_counts.sum(0)).flatten()
    elif symmetrize == 'none':
        vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1]
        populations = vectors[:, 0]
    else:
        raise symmetrization_error

    populations /= populations.sum()  # ensure normalization

    return rev_counts, t_matrix, populations, mapping
示例#32
0
def kl_equilib_setup(gold_tmatrix):
    """Save gold equilibrium population."""
    gold_vals, gold_vecs = msma.get_eigenvectors(gold_tmatrix, n_eigs=1)
    assert np.abs(gold_vals[0] - 1.0) < EPS, 'Gold eigenval is {}'.format(
        gold_vals[0])
    return gold_vecs[0]
示例#33
0
def build_msm_from_counts(counts,
                          lag_time,
                          symmetrize,
                          return_rev_counts=False,
                          trim=True):
    """
    Estimates the transition probability matrix from the counts matrix.
    
    Parameters
    ----------
    counts : matrix
        the MSM counts matrix
    lag_time :
        the lag time to build the msm with, in frames
    symmetrize : {'MLE', 'Transpose', None}
        symmetrization scheme so that we have reversible counts
    return_rev_counts : bool
        whether or not to return the reversible counts or not
        
        
    Returns
    -------
    t_matrix : matrix
        the transition probability matrix
    rev_counts : matrix
        the estimate of the reversible counts 
        (only returned if `return_rev_counts` is True)
    """

    symmetrize = str(symmetrize).lower()
    symmetrization_error = ValueError(
        "Invalid symmetrization scheme requested: %s. Exiting." % symmetrize)
    if symmetrize not in ['mle', 'transpose', 'none']:
        raise symmetrization_error

    if trim:
        counts, mapping = ergodic_trim(counts)

    # Apply a symmetrization scheme
    if symmetrize == 'mle':
        rev_counts = mle_reversible_count_matrix(counts, prior=0.0)

    elif symmetrize == 'transpose':
        rev_counts = 0.5 * (counts + counts.transpose())

    elif symmetrize == 'none':
        rev_counts = counts

    else:
        raise symmetrization_error

    t_matrix = estimate_transition_matrix(rev_counts)

    if symmetrize in ['mle', 'transpose']:
        populations = np.array(rev_counts.sum(0)).flatten()
    elif symmetrize == 'none':
        vectors = msm_analysis.get_eigenvectors(t_matrix, 5)[1]
        populations = vectors[:, 0]
    else:
        populations = None

    if populations is not None:
        populations /= populations.sum()

    return counts, rev_counts, t_matrix, populations, mapping
示例#34
0
# try to find eigenvectors in a file

file_list = glob.glob("eigs*.h5")

try:
    fn = file_list[0]
    f = io.loadh(fn)
    vals = f['vals']
    vecs = f['vecs']

    vals = vals[:num_vecs + 1]
    vecs = vecs[:, :num_vecs + 1]

except:
    vals, vecs = msm_analysis.get_eigenvectors( T, num_vecs+1 )
    #vals, vecs = eigs( T, k = num_vecs + 1 )

vecs=vecs.real

pi = vecs[:,0] / vecs[:,0].sum()

vecs /= np.sum( np.square( vecs ) / pi.reshape( (-1,1) ), axis=0 )

ord_param_ind = ord_param.argsort()
state_lines = np.array([ np.where( ord_param[ ord_param_ind ] == i )[0][0] for i in np.unique( ord_param ) ])[1:]


for i in range( num_vecs ):
   figure()
   eval = vals[i+1]
示例#35
0
def GetEigenvectors_Right(*args, **kwargs):
    warnings.warn('GetEigenvectors_Right is deprecated use get_eigenvectors() with the keyword Right=True')
    kwargs['right'] = True
    return msm_analysis.get_eigenvectors(*args, **kwargs)
示例#36
0
def calculate_fluxes(sources, sinks, tprob, populations=None, committors=None):
    """
    Compute the transition path theory flux matrix.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    ------
    fluxes : mm_matrix
        The flux matrix

    Optional Parameters
    -------------------
    populations : nd_array, float
        The equilibrium populations, if not provided is re-calculated
    committors : nd_array, float
        The committors associated with `sources`, `sinks`, and `tprob`.
        If not provided, is calculated from scratch. If provided, `sources`
        and `sinks` are ignored.

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
    else:
        dense = True

    # check if we got the populations
    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 1)
        if np.count_nonzero(np.imag(eigens[1][:, 0])) != 0:
            raise ValueError('First eigenvector has imaginary components')
        populations = np.real(eigens[1][:, 0])

    # check if we got the committors
    if committors is None:
        committors = calculate_committors(sources, sinks, tprob)

    # perform the flux computation
    Indx, Indy = tprob.nonzero()

    n = tprob.shape[0]

    if dense:
        X = np.zeros((n, n))
        Y = np.zeros((n, n))
        X[(np.arange(n), np.arange(n))] = populations * (1.0 - committors)
        Y[(np.arange(n), np.arange(n))] = committors
    else:
        X = scipy.sparse.lil_matrix((n, n))
        Y = scipy.sparse.lil_matrix((n, n))
        X.setdiag(populations * (1.0 - committors))
        Y.setdiag(committors)

    if dense:
        fluxes = np.dot(np.dot(X, tprob), Y)
        fluxes[(np.arange(n), np.arange(n))] = np.zeros(n)
    else:
        fluxes = (X.tocsr().dot(tprob.tocsr())).dot(Y.tocsr())
        # This should be the same as below, but it's a bit messy...
        #fluxes = np.dot(np.dot(X.tocsr(), tprob.tocsr()), Y.tocsr())
        fluxes = fluxes.tolil()
        fluxes.setdiag(np.zeros(n))

    return fluxes
示例#37
0
def pcca_plus(T, N, flux_cutoff=None, do_minimization=True, objective_function="crisp_metastability"):
    """Perform PCCA+ lumping

    Parameters
    ----------
    T : csr sparse matrix
        Transition matrix
    M : int
        desired (maximum) number of macrostates
    flux_cutoff : float, optional
        If desired, discard eigenvectors with flux below this value.
    do_minimization : bool, optional
        If False, skip the optimization of the transformation matrix.
        In general, minimization is recommended.
    objective_function: {'crisp_metastablility', 'metastability', 'metastability'}
        Possible objective functions.  See objective for details.
    
    Returns
    -------
    A : ndarray
        The transformation matrix.
    chi : ndarray
        The membership matrix
    vr : ndarray
        The right eigenvectors.
    microstate_mapping : ndarray
        Mapping from microstates to macrostates.
    
    
    Notes
    -----
    PCCA+ is used to construct a "lumped" state decomposition.  First,
    The eigenvalues and eigenvectors are computed for a transition matrix.
    An optimization problem is then used to estimate a mapping from
    microstates to macrostates.
    
    For each microstate i, microstate_mapping[i] is chosen as the
    macrostate with the largest membership (chi) value.
    
    The membership matrix chi is given by chi = dot(vr,A).
    
    Finally, the transformation matrix A is the output of a constrained
    optimization problem.
    

    References
    ----------
    .. [1]  Deuflhard P, et al.  "Identification of almost invariant
    aggregates in reversible nearly uncoupled markov chains,"
    Linear Algebra Appl., vol 315 pp 39-59, 2000.

    .. [2]  Deuflhard P, Weber, M.,  "Robust perron cluster analysis in
     conformation dynamics,"
    Linear Algebra Appl., vol 398 pp 161-184 2005.
    
    .. [3]  Kube S, Weber M.  "A coarse graining method for the
    identification of transition rates between molecular conformations,"
    J. Chem. Phys., vol 126 pp 24103-024113, 2007.
    
    
    See Also
    --------
    PCCA
    """
    lam, vl = msm_analysis.get_eigenvectors(T, N)
    normalize_left_eigenvectors(vl)

    if flux_cutoff != None:
        lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff)
        N = len(lam)
    
    pi = vl[:, 0]

    vr = vl.copy()
    for i in range(N):
        vr[:, i] /= pi
        vr[:, i] *= np.sign(vr[0, i])
        vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i]))

    A, chi, microstate_mapping = opt_soft(vr, N, pi, lam, T, do_minimization=do_minimization, objective_function=objective_function)

    return A, chi, vr, microstate_mapping
示例#38
0
def PCCA(T, num_macro, tolerance=1E-5, flux_cutoff=None):
    """Create a lumped model using the PCCA algorithm.
    
    1.  Iterate over the eigenvectors, starting with the slowest.
    2.  Calculate the spread of that eigenvector within each existing macrostate.
    3.  Pick the macrostate with the largest eigenvector spread.
    4.  Split the macrostate based on the sign of the eigenvector.

    Parameters
    ----------
    T : csr sparse matrix
        A transition matrix
    num_macro : int
        The desired number of states.
    tolerance : float, optional
        Specifies the numerical cutoff to use when splitting states based on sign.
    flux_cutoff : float, optional
        If enabled, discard eigenvectors with flux below this value.

    Returns
    -------
    microstate_mapping : ndarray
        mapping from the Microstate indices to the Macrostate indices

    Notes
    -----
    To construct a Macrostate MSM, you then need to map your Assignment data to
    the new states (e.g. MSMLib.apply_mapping_to_assignments).

    References
    ----------
    .. [1]  Deuflhard P, et al.  "Identification of almost invariant
    aggregates in reversible nearly uncoupled markov chains,"
    Linear Algebra Appl., vol 315 pp 39-59, 2000.

    """

    n = T.shape[0]
    lam, vl = msm_analysis.get_eigenvectors(T, num_macro)
    normalize_left_eigenvectors(vl)

    if flux_cutoff is not None:
        lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff)
        num_macro = len(lam)

    pi = vl[:, 0]

    vr = vl.copy()
    for i in range(num_macro):
        vr[:, i] /= pi
        vr[:, i] *= np.sign(vr[0, i])
        vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i]))

    #Remove the stationary eigenvalue and eigenvector.
    lam = lam[1:]
    vr = vr[:, 1:]

    microstate_mapping = np.zeros(n, 'int')

    #Function to calculate the spread of a single eigenvector.
    spread = lambda x: x.max() - x.min()
    """
    1.  Iterate over the eigenvectors, starting with the slowest.
    2.  Calculate the spread of that eigenvector within each existing macrostate.
    3.  Pick the macrostate with the largest eigenvector spread.
    4.  Split the macrostate based on the sign of the eigenvector.
    """

    for i in range(num_macro - 1):  # Thus, if we want 2 states, we split once.
        v = vr[:, i]
        AllSpreads = np.array(
            [spread(v[microstate_mapping == k]) for k in range(i + 1)])
        StateToBeSplit = np.argmax(AllSpreads)
        microstate_mapping[(microstate_mapping == StateToBeSplit)
                           & (v >= tolerance)] = i + 1

    return (microstate_mapping)
示例#39
0
def PCCA(T, num_macro, tolerance=1E-5, flux_cutoff=None):
    """Create a lumped model using the PCCA algorithm.
    
    1.  Iterate over the eigenvectors, starting with the slowest.
    2.  Calculate the spread of that eigenvector within each existing macrostate.
    3.  Pick the macrostate with the largest eigenvector spread.
    4.  Split the macrostate based on the sign of the eigenvector.

    Parameters
    ----------
    T : csr sparse matrix
        A transition matrix
    num_macro : int
        The desired number of states.
    tolerance : float, optional
        Specifies the numerical cutoff to use when splitting states based on sign.
    flux_cutoff : float, optional
        If enabled, discard eigenvectors with flux below this value.

    Returns
    -------
    microstate_mapping : ndarray
        mapping from the Microstate indices to the Macrostate indices

    Notes
    -----
    To construct a Macrostate MSM, you then need to map your Assignment data to
    the new states (e.g. MSMLib.apply_mapping_to_assignments).

    References
    ----------
    .. [1]  Deuflhard P, et al.  "Identification of almost invariant
    aggregates in reversible nearly uncoupled markov chains,"
    Linear Algebra Appl., vol 315 pp 39-59, 2000.

    """

    n = T.shape[0]
    lam, vl = msm_analysis.get_eigenvectors(T, num_macro)
    normalize_left_eigenvectors(vl)

    if flux_cutoff is not None:
        lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff)
        num_macro = len(lam)
    
    pi = vl[:, 0]

    vr = vl.copy()
    for i in range(num_macro):
        vr[:, i] /= pi
        vr[:, i] *= np.sign(vr[0, i])
        vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i]))

    #Remove the stationary eigenvalue and eigenvector.
    lam = lam[1:]
    vr = vr[:, 1:]

    microstate_mapping = np.zeros(n, 'int')

    #Function to calculate the spread of a single eigenvector.
    spread = lambda x: x.max() - x.min()
    """
    1.  Iterate over the eigenvectors, starting with the slowest.
    2.  Calculate the spread of that eigenvector within each existing macrostate.
    3.  Pick the macrostate with the largest eigenvector spread.
    4.  Split the macrostate based on the sign of the eigenvector.
    """
    
    for i in range(num_macro - 1):  # Thus, if we want 2 states, we split once.
        v = vr[:, i]
        AllSpreads = np.array([spread(v[microstate_mapping == k]) for k in range(i + 1)])
        StateToBeSplit = np.argmax(AllSpreads)
        microstate_mapping[(microstate_mapping == StateToBeSplit) & (v >= tolerance)] = i + 1

    return(microstate_mapping)
示例#40
0
from matplotlib.pyplot import *
from scipy.io import mmread
import os, sys, re

matplotlib.rcParams['font.size']=22

#import warnings
#warnings.filterwarnings("ignore",category=DeprecationWarning)
if os.path.exists( options.writeFN +'%d.npy'%options.num_vals ):
   print "Found %s, and using these values rather than recalculating them." % ( options.writeFN+'%d.npy'%options.num_vals )
 
   Vals = np.load( options.writeFN+'%d.npy'%options.num_vals )
else:
   T = mmread( options.T_FN )

   Vals,Vecs = msm_analysis.get_eigenvectors( T, options.num_vals+1 )

   Vals = Vals.real[1:]

   np.save( options.writeFN + '%d.npy' % options.num_vals, Vals )
   print "Saved values"

Vals = Vals[np.where(Vals > 0)]
figure()
subplot(132)

Taus = - options.lag / options.divisor / np.log( Vals )
hlines( Taus, 0, 1, color=options.color)
if options.y_lim == None:
   ylim([10**int(np.log10(Taus.min())),10**int(np.log10(Taus.max())+1)])
else:
示例#41
0
def main(modeldir, gensfile, write=False):
    if not os.path.exists('%s/eig-states/' % modeldir):
        os.mkdir('%s/eig-states/' % modeldir)
    ohandle=open('%s/eig-states/eiginfo.txt' % modeldir, 'w')
    project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0])
    ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir)

    gens=Trajectory.load_from_lhdf(gensfile)
    T=mmread('%s/tProb.mtx' % modeldir)
    data=dict()
    data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0])
    com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,))
    data['com']=com[1:]
    pops=numpy.loadtxt('%s/Populations.dat' % modeldir)
    map=numpy.loadtxt('%s/Mapping.dat' % modeldir)

    map_rmsd=[]
    map_com=[]
    for x in range(0, len(data['rmsd'])):
        if map[x]!=-1:
            map_com.append(data['com'][x])
            map_rmsd.append(data['rmsd'][x])
    
    map_com=numpy.array(map_com)
    map_rmsd=numpy.array(map_rmsd)
    T=mmread('%s/tProb.mtx' % modeldir)
    eigs_m=msm_analysis.get_eigenvectors(T, 10)

    cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive

    print numpy.shape(eigs_m[1][:,1])
    for i in range(0,1):
        order=numpy.argsort(eigs_m[1][:,i])
        if i==0:
            maxes=[]
            gen_maxes=[]
            values=[]
            ohandle.write('eig%s maxes\n' % i)
            ohandle.write('state\tgenstate\tmagnitude\trmsd\tcom\n')
            for n in order[::-1][:5]:
                gen_maxes.append(numpy.where(map==n)[0])
                maxes.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "maxes at ",  maxes, values
            maxes=numpy.array(maxes)
            if write==True:
                get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max')
        else:
            maxes=[]
            gen_maxes=[]
            values=[]
            ohandle.write('eig%s maxes\n' % i)
            for n in order[::-1][:5]:
                gen_maxes.append(numpy.where(map==n)[0])
                maxes.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "maxes at ",  maxes, values
            order=numpy.argsort(eigs_m[1][:,i])
            mins=[]
            gen_mins=[]
            values=[]
            ohandle.write('eig%s mins\n' % i)
            for n in order[:5]:
                gen_mins.append(numpy.where(map==n)[0])
                mins.append(n)
                values.append(eigs_m[1][n,i])
                ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n]))
            print "mins at ",  mins, values
            if write==True:
                get_structure(modeldir, i, gen_maxes,  maxes, gens, project, ass, type='max')
                get_structure(modeldir, i, gen_mins,  mins, gens, project, ass, type='min')
        pylab.scatter(map_com[order], map_rmsd[order], c=eigs_m[1][order,i], cmap=cm, s=1000*abs(eigs_m[1][order,i]), alpha=0.5)
        print map_com[order][numpy.argmax(eigs_m[1][order,i])]
        print eigs_m[1][order,i][1]
        CB=pylab.colorbar()
        l,b,w,h=pylab.gca().get_position().bounds
        ll, bb, ww, hh=CB.ax.get_position().bounds
        CB.ax.set_position([ll, b+0.1*h, ww, h*0.8])
        CB.set_label('Eig%s Magnitudes' % i)
        ylabel=pylab.ylabel('Ligand RMSD to Xtal ($\AA$)')
        xlabel=pylab.xlabel(r'P Active Site - L COM Distance ($\AA$)')
        pylab.legend(loc=8, frameon=False)
        pylab.savefig('%s/2deigs%i_com_prmsd.png' %(modeldir, i),dpi=300)
示例#42
0
def calculate_all_to_all_mfpt(tprob, populations=None):
    """
    Calculate the all-states by all-state matrix of mean first passage
    times.

    This uses the fundamental matrix formalism, and should be much faster
    than GetMFPT for calculating many MFPTs.

    Parameters
    ----------
    tprob : matrix
        transition probability matrix
    populations : array_like, float
        optional argument, the populations of each state. If  not supplied,
        it will be computed from scratch

    Returns
    -------
    MFPT : array, float
        MFPT in time units of LagTime, square array for MFPT from i -> j

    See Also
    --------
    GetMFPT : function
        for calculating a subset of the MFPTs, with functionality for including
        a set of sinks

    References
    ----------
    .. [1] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 
           for Markov jump processes. Multiscale Model. Simul. 7, 1192–1219 
           (2009).
    .. [2] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 
           pathways in network models of coarse-grained protein dynamics. J. 
           Chem. Phys. 130, 205102 (2009).
    """

    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        tprob = tprob.toarray()
        logger.warning('calculate_all_to_all_mfpt does not support sparse linear algebra')

    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 1)
        if np.count_nonzero(np.imag(eigens[1][:, 0])) != 0:
            raise ValueError('First eigenvector has imaginary parts')
        populations = np.real(eigens[1][:, 0])

    # ensure that tprob is a transition matrix
    msm_analysis.check_transition(tprob)
    num_states = len(populations)
    if tprob.shape[0] != num_states:
        raise ValueError("Shape of tprob and populations vector don't match")

    eye = np.transpose(np.matrix(np.ones(num_states)))
    limiting_matrix = eye * populations
    #z = scipy.linalg.inv(scipy.sparse.eye(num_states, num_states) - (tprob - limiting_matrix))
    z = scipy.linalg.inv(np.eye(num_states) - (tprob - limiting_matrix))

    # mfpt[i,j] = z[j,j] - z[i,j] / pi[j]
    mfpt = -z
    for j in range(num_states):
        mfpt[:, j] += z[j, j]
        mfpt[:, j] /= populations[j]

    return mfpt
from scipy.io import *
from msmbuilder import Trajectory
import numpy
import pylab
from msmbuilder import msm_analysis

T=mmread('./l6/tProb.mtx')
map=numpy.loadtxt('./l6/Mapping.dat')
com_dist=numpy.loadtxt('Gens_com_dist.dat', usecols=(1,))
prmsd=numpy.loadtxt('Gens_p53_rmsd.dat', usecols=(1,))

frames=numpy.where(map!=-1)[0]
stateprmsd=prmsd[frames]
statecom=com_dist[frames]

eigs_m=msm_analysis.get_eigenvectors(T, 10)

#import pdb
#pdb.set_trace()

order=numpy.argsort(stateprmsd)
ordercom=numpy.argsort(statecom)

cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive

print numpy.shape(eigs_m[1][:,1])
print len(stateprmsd)
print len(frames)
for i in range(0,4):
    #pylab.scatter(statermsd[order], statehelix[order]*100., c=eigs_m[1][:,i], cmap=cm, s=1000*abs(eigs_m[1][:,i]), alpha=0.7)
    pylab.scatter(statecom[ordercom], stateprmsd[ordercom]*10., c=eigs_m[1][ordercom,i], cmap=cm, s=1000*abs(eigs_m[1][ordercom,i]), alpha=0.5)
示例#44
0
def pcca_plus(T,
              N,
              flux_cutoff=None,
              do_minimization=True,
              objective_function="crisp_metastability"):
    """Perform PCCA+ lumping

    Parameters
    ----------
    T : csr sparse matrix
        Transition matrix
    M : int
        desired (maximum) number of macrostates
    flux_cutoff : float, optional
        If desired, discard eigenvectors with flux below this value.
    do_minimization : bool, optional
        If False, skip the optimization of the transformation matrix.
        In general, minimization is recommended.
    objective_function: {'crisp_metastablility', 'metastability', 'metastability'}
        Possible objective functions.  See objective for details.
    
    Returns
    -------
    A : ndarray
        The transformation matrix.
    chi : ndarray
        The membership matrix
    vr : ndarray
        The right eigenvectors.
    microstate_mapping : ndarray
        Mapping from microstates to macrostates.
    
    
    Notes
    -----
    PCCA+ is used to construct a "lumped" state decomposition.  First,
    The eigenvalues and eigenvectors are computed for a transition matrix.
    An optimization problem is then used to estimate a mapping from
    microstates to macrostates.
    
    For each microstate i, microstate_mapping[i] is chosen as the
    macrostate with the largest membership (chi) value.
    
    The membership matrix chi is given by chi = dot(vr,A).
    
    Finally, the transformation matrix A is the output of a constrained
    optimization problem.
    

    References
    ----------
    .. [1]  Deuflhard P, et al.  "Identification of almost invariant
    aggregates in reversible nearly uncoupled markov chains,"
    Linear Algebra Appl., vol 315 pp 39-59, 2000.

    .. [2]  Deuflhard P, Weber, M.,  "Robust perron cluster analysis in
     conformation dynamics,"
    Linear Algebra Appl., vol 398 pp 161-184 2005.
    
    .. [3]  Kube S, Weber M.  "A coarse graining method for the
    identification of transition rates between molecular conformations,"
    J. Chem. Phys., vol 126 pp 24103-024113, 2007.
    
    
    See Also
    --------
    PCCA
    """
    lam, vl = msm_analysis.get_eigenvectors(T, N)
    normalize_left_eigenvectors(vl)

    if flux_cutoff != None:
        lam, vl = trim_eigenvectors_by_flux(lam, vl, flux_cutoff)
        N = len(lam)

    pi = vl[:, 0]

    vr = vl.copy()
    for i in range(N):
        vr[:, i] /= pi
        vr[:, i] *= np.sign(vr[0, i])
        vr[:, i] /= np.sqrt(dot(vr[:, i] * pi, vr[:, i]))

    A, chi, microstate_mapping = opt_soft(
        vr,
        N,
        pi,
        lam,
        T,
        do_minimization=do_minimization,
        objective_function=objective_function)

    return A, chi, vr, microstate_mapping
示例#45
0
def calculate_fluxes(sources, sinks, tprob, populations=None, committors=None):
    """
    Compute the transition path theory flux matrix.

    Parameters
    ----------
    sources : array_like, int
        The set of unfolded/reactant states.
    sinks : array_like, int
        The set of folded/product states.
    tprob : mm_matrix
        The transition matrix.

    Returns
    ------
    fluxes : mm_matrix
        The flux matrix

    Optional Parameters
    -------------------
    populations : nd_array, float
        The equilibrium populations, if not provided is re-calculated
    committors : nd_array, float
        The committors associated with `sources`, `sinks`, and `tprob`.
        If not provided, is calculated from scratch. If provided, `sources`
        and `sinks` are ignored.
    """

    sources, sinks = _check_sources_sinks(sources, sinks)
    msm_analysis.check_transition(tprob)

    if scipy.sparse.issparse(tprob):
        dense = False
    else:
        dense = True

    # check if we got the populations
    if populations is None:
        eigens = msm_analysis.get_eigenvectors(tprob, 5)
        if np.count_nonzero(np.imag(eigens[1][:,0])) != 0:
            raise ValueError('First eigenvector has imaginary components')
        populations = np.real(eigens[1][:,0])

    # check if we got the committors
    if committors is None:
        committors = calculate_committors(sources, sinks, tprob)

    # perform the flux computation
    Indx, Indy = tprob.nonzero()

    n = tprob.shape[0]

    if dense:
        X = np.zeros((n, n))
        Y = np.zeros((n, n))
        X[(np.arange(n), np.arange(n))] = populations * (1.0 - committors)
        Y[(np.arange(n), np.arange(n))] = committors
    else:
        X = scipy.sparse.lil_matrix((n,n))
        Y = scipy.sparse.lil_matrix((n,n))
        X.setdiag( populations * (1.0 - committors))
        Y.setdiag(committors)

    if dense:
        fluxes = np.dot(np.dot(X, tprob), Y)
        fluxes[(np.arange(n), np.arange(n))] = np.zeros(n)
    else:
        fluxes = np.dot(np.dot(X.tocsr(), tprob.tocsr()), Y.tocsr())
        fluxes = fluxes.tolil()
        fluxes.setdiag(np.zeros(n))

    return fluxes