def setup_parser():
    parser = arglib.ArgumentParser("""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use
AssignHierarchical.py)
    
This code uses IPython.parallel to get parallelism accross many nodes. Consult
the documentation for details on how to run it""",
                                   get_metric=True)
    parser.add_argument('project')
    parser.add_argument(dest='generators',
                        help='''Trajectory file containing
        the structures of each of the cluster centers.''')
    parser.add_argument('output_dir')
    parser.add_argument('chunk_size',
                        help='''Number of frames to processes per worker.
        Each chunk requires some communication overhead, so you should use relativly large chunks''',
                        default=1000,
                        type=int)
    parser.add_argument('profile',
                        help='IPython.parallel profile to use.',
                        default='default')
    parser.add_argument('cluster_id',
                        help='IPython.parallel cluster_id to use',
                        default='')

    args, metric = parser.parse_args()
    return args, metric
Пример #2
0
def main():
    parser = arglib.ArgumentParser(
        description='Assign data using a hierarchical clustering')
    parser.add_argument('hierarchical_clustering_zmatrix',
                        default='./Data/Zmatrix.h5',
                        help='Path to hierarchical clustering zmatrix')
    parser.add_argument('num_states', help='Number of States', default='none')
    parser.add_argument('cutoff_distance',
                        help='Maximum cophenetic distance',
                        default='none')
    parser.add_argument('assignments', type=str)
    args = parser.parse_args()

    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance'
        )
        sys.exit(1)

    arglib.die_if_path_exists(args.assignments)

    assignments = hierarchical_clustering_zmatrix.get_assignments(
        k=k, cutoff_distance=d)

    msmbuilder.io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Пример #3
0
def main():
    parser = arglib.ArgumentParser(
        description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""",
        get_metric=True
    )  #, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('project')
    parser.add_argument(dest='generators',
                        help='''Output trajectory file containing
        the structures of each of the cluster centers. Note that for hierarchical clustering
        methods, this file will not be produced.''',
                        default='Data/Gens.lh5')
    parser.add_argument('output_dir')

    args, metric = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    assignments_path = os.path.join(args.output_dir, "Assignments.h5")
    distances_path = os.path.join(args.output_dir, "Assignments.h5.distances")
    project = Project.load_from(args.project)
    gens = Trajectory.load_trajectory_file(args.generators)

    # this runs assignment and prints them to disk
    assign_with_checkpoint(metric, project, gens, assignments_path,
                           distances_path)

    logger.info('All Done!')
#!/usr/bin/env python
"""Interactively estimate a rate matrix usign SCRE.
"""

import os, sys
import scipy.io
from mdtraj import io
from msmbuilder import MSMLib, SCRE, arglib
import numpy as np
import string
import logging
logger = logging.getLogger('msmbuilder.scripts.Interactive-SCRE')

parser = arglib.ArgumentParser(description=__doc__)
parser.add_argument('output_dir')
parser.add_argument('assignments')


def interactive_scre(assignments):
    Counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=1)
    CountsAfterTrimming,Mapping = MSMLib.ErgodicTrim(Counts)
    MSMLib.ApplyMappingToAssignments(assignments,Mapping)
    ReversibleCounts = MSMLib.EstimateReversibleCountMatrix(CountsAfterTrimming)
    T = MSMLib.EstimateTransitionMatrix(ReversibleCounts).toarray()
    populations = np.array(ReversibleCounts.sum(0)).flatten()
    populations /= populations.sum()

    K0=SCRE.ConvertTIntoK(T)
    M,X=SCRE.get_parameter_mapping(K0)

    while len(X) > 0:
from mdtraj.utils.six.moves import xrange

import os
import numpy as np
from mdtraj import io
from msmbuilder import arglib
from msmbuilder import Project
from msmbuilder import MSMLib
from msmbuilder.clustering import concatenate_trajectories
import logging
logger = logging.getLogger('msmbuilder.scripts.SaveStructures')
DEBUG = True

parser = arglib.ArgumentParser(description="""
Yank a number of randomly selected conformations from each state in a model.

The conformations can either be saved in separate files (i.e. one PDB file per
conformations), or in the same file.
""")
parser.add_argument('project')
parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
parser.add_argument('conformations_per_state',
                    default=5,
                    type=int,
                    help='Number of conformations to sample from each state')
parser.add_argument(
    'states',
    nargs='+',
    type=int,
    help='''Which states to sample from. Pass a list of integers, separated
    by whitespace. To specify ALL of the states, include the integer -1.''',
    default=[-1])
Пример #6
0
import os
import logging
import scipy.io
import numpy as np
from mdtraj import io
from msmbuilder import MSMLib
from msmbuilder import lumping
from msmbuilder import arglib
logger = logging.getLogger('msmbuilder.scripts.PCCA')

float_or_none = lambda s: None if s.lower() == 'none' else float(s)
parser = arglib.ArgumentParser(description="""
Applies the (f)PCCA(+) algorithm to lump your microstates into macrostates. You may
specify a transition matrix if you wish - this matrix is used to determine the
dynamics of the microstate model for lumping into kinetically relevant
macrostates.

Output: MacroAssignments.h5, a new assignments HDF file, for the Macro MSM.""")
parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
parser.add_argument('num_macrostates', type=int)
parser.add_argument('tProb')
parser.add_argument('output_dir')
parser.add_argument('algorithm',
                    help='Which algorithm to use',
                    choices=['PCCA', 'PCCA+'],
                    default='PCCA')
parser.add_argument_group('Extra PCCA+ Options')
parser.add_argument('flux_cutoff',
                    help='''Discard eigenvectors below
    this flux''',
import os
import glob
import logging
import numpy as np
from msmbuilder import Project, utils, arglib
import mdtraj as md
logger = logging.getLogger('msmbuilder.scripts.RebuildProject')

parser = arglib.ArgumentParser(description="""
Rebuild the project file (ProjectInfo.yaml). This is useful when
trajectory files have been  deleted, or when you have lost your ProjectInfo 
file. \nOutput: ProjectInfo.yaml""")
parser.add_argument('traj_dir', default="./Trajectories/")
parser.add_argument('conf_filename', default="native.pdb")
parser.add_argument('project_filename', default="./ProjectInfo.yaml")
parser.add_argument('iext', default=".h5")


def run(traj_dir, conf_filename, project_filename, iext):
    logger.info("Rebuilding project.")
    file_list = glob.glob(traj_dir + "/trj*%s" % iext)
    num_traj = len(file_list)

    traj_lengths = np.zeros(num_traj, 'int')
    traj_paths = []

    if not os.path.exists(conf_filename):
        raise(IOError("Cannot find conformation file %s" % conf_filename))

    file_list = sorted(file_list, key=utils.keynat)
    for i, filename in enumerate(file_list):
Пример #8
0
import numpy as np
from mdtraj import io
from msmbuilder import arglib
from msmbuilder import msm_analysis
from msmbuilder import Project
import scipy.io
import logging
logger = logging.getLogger('msmbuilder.scripts.SampleTrajectory')
DEBUG = True

parser = arglib.ArgumentParser(description="""
Create an MSM movie by sampling a sequence of states and sampling a 
random conformation from each state in the sequence.  
""")
parser.add_argument('project')
parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
parser.add_argument('tprob', default='Data/tProb.mtx')
parser.add_argument('num_steps')
parser.add_argument('starting_state',
                    type=int,
                    help='''Which state to start trajectory from.''')
parser.add_argument(
    'output',
    default='sample_traj.pdb',
    help=
    """The filename of your output trajectory.  The filetype suffix will be used to select the output file format."""
)


def entry_point():
    """Parse command line inputs, load up files, and build a movie."""
        ptraj = rmsd.prepare_trajectory(project.load_traj(i))
        d = rmsd.one_to_all(ppdb, ptraj, 0)
        distances[i, 0:len(d)] = d

    return distances


if __name__ == '__main__':
    deprecationmessage = """
===============================================================================
This script is deprecated and will be removed in v2.7 
Please use CalculateProjectDistance.py
===============================================================================
"""
    parser = arglib.ArgumentParser(description="""
Calculate the RMSD between an input PDB and all conformations in your project.
Output as a HDF5 file (load using msmbuilder.io.loadh())
""" + deprecationmessage)
    warnings.warn(deprecationmessage, DeprecationWarning)

    parser.add_argument('pdb')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to compare',
                        default='AtomIndices.dat')
    parser.add_argument('output',
                        help='''Output file name. Output is an
        .h5 file with RMSD entries corresponding to the Assignments.h5 file.''',
                        default='Data/RMSD.h5')
    parser.add_argument('project')
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)
Пример #10
0
            kwargs['help'] += ' {d}'.format(d=d)
        else:
            kwargs['help'] = d
    group.add_argument(*args, **kwargs)


################################################################################

parser = arglib.ArgumentParser(description='''
    Cluster.py: Cluster MD trajectories into microstates
    
    Output: Assignments.h5, and other files depending on your choice of distance
    metric and/or clustering algorithm.
    
    Note that there are many distance metrics and clustering algorithms available
    Many of which have multiple options and parameters. 

    ''' + highlight(
    '''MAKE LIBERAL USE OF THE -h OPTION. The help text changes significantly 
    depending on which level in the options tree you are currently in''',
    color='green',
    bold=True),
                               get_metric=True)
parser.add_argument('project')
parser.add_argument(dest='stride',
                    help='Subsample by striding',
                    default=1,
                    type=int)
parser.add_argument(dest='assignments',
                    help='''Output assignments file
    (will be used if stride is 1 and you're not using hierarchical)''',
Пример #11
0
import scipy.io

from msmbuilder import arglib
import logging
from msmbuilder import Serializer, MSMLib
logger = logging.getLogger(__name__)

pjoin = lambda a, b: os.path.join(a, b)

if __name__ == "__main__":
    parser = arglib.ArgumentParser("""Build a rate matrix MSM.

Note: this uses a lag_time of 1 to get the transition counts, and uses
rate estimators that use the *dwell_times*.

The *correct* likelihood function to use for estimating the rate matrix when
the data is sampled at a discrete frequency is open for debate. This
likelihood function doesn't take into account the error in the lifetime estimates
from the discrete sampling. Other methods are currently under development.
    
Output: tCounts.mtx, K.mtx, Populations.dat,  Mapping.dat,
Assignments.Fixed.h5, tCounts.UnSym.mtx""")

    parser.add_argument('assignments')
    parser.add_argument('symmetrize', choices=['none', 'transpose', 'mle'])
    parser.add_argument('outdir')
    args = parser.parse_args()
    assignments = Serializer.LoadData(args.assignments)

    ratemtx_fn = pjoin(args.outdir, 'K.mtx')
    tcounts_fn = pjoin(args.outdir, 'tCounts.mtx')
    unsym_fn = pjoin(args.outdir, 'tCounts.UnSym.mtx')
Пример #12
0
def run(assignments, distances, cutoff):
    number = np.count_nonzero(distances > cutoff)
    logger.info('Discarding %d assignments', number)
    
    assignments[ np.where(distances > cutoff) ] = -1
    return assignments


if __name__ == "__main__":
    parser = arglib.ArgumentParser("""
Trims assignments based on the distance to their generator. Useful for
eliminating bad assignments from a coase clustering. Note that this
discards (expensive!) data, so should only be used if an optimal
clustering is not available.

Note: Check your cluster sized with CalculateClusterRadii.py to get
a handle on how big they are before you trim. Recall the radius is the
*average* distance to the generator, here you are enforcing the
*maximum* distance.

Output: A trimmed assignments file (Assignments.Trimmed.h5).""")
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('distances', default='Data/Assignments.h5.distances')
    parser.add_argument('rmsd_cutoff', help="""distance value at which to trim,
        in. Data further than this value to its generator will be
        discarded. Note: this is measured with whatever distance metric you used to cluster""", type=float)
    parser.add_argument('output', default='Data/Assignments.Trimmed.h5')
    args = parser.parse_args()
    
    arglib.die_if_path_exists(args.output)
    
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import os
import logging
import numpy as np
import scipy.io
from msmbuilder import tpt
from msmbuilder import arglib
logger = logging.getLogger('msmbuilder.scripts.CalculateMFPTs')


parser = arglib.ArgumentParser(description="""
Calculates the mean first passage times (MFPTs) to one or all states.
Returns: MFPTs_X.dat or PairwiseMFPTs.dat, where X is the state ID.

Note: PairwiseMFPTs.dat is written if state=-1.  Otherwise, MFPTs_X.dat
is written.
""")
parser.add_argument('tProb')
parser.add_argument(
    'state', help='''ID of state to which we calculate MFPT.  If state=-1, then calculate all pairwise MFPTs.''', default="-1", type=int)
parser.add_argument('output_dir', default='.')


def run(T, state):
    if state != -1:
        logger.info("Calculating MFPTs to state %d" % state)
        m = tpt.calculate_mfpt([state], T)
        logger.info("Finished calculating MFPTs to state %d" % state)
    else:
Пример #14
0
import os
import sys
import logging
from mdtraj import io
from msmbuilder import Project
from msmbuilder.clustering import Hierarchical
from msmbuilder import arglib
import numpy as np

##############################################################################
# Globals
##############################################################################

logger = logging.getLogger('msmbuilder.scripts.AssignHierarchical')
parser = arglib.ArgumentParser(
    description='Assign data using a hierarchical clustering.')
parser.add_argument('hierarchical_clustering_zmatrix',
                    default='./Data/ZMatrix.h5',
                    help='Path to hierarchical clustering zmatrix')
parser.add_argument('stride',
                    type=int,
                    help='stride used when generating ZMatrix.h5')
parser.add_argument('project')
parser.add_argument('num_states', help='Number of States', default='none')
parser.add_argument('cutoff_distance',
                    help='Maximum cophenetic distance',
                    default='none')
parser.add_argument('assignments', type=str)

##############################################################################
# Code
Пример #15
0
import re
import numpy as np
import scipy
import mdtraj as md
from mdtraj import io
from mdtraj.utils.six.moves import xrange
from msmbuilder import arglib
from msmbuilder import Project
from msmbuilder.reduce.tICA import tICA
logger = logging.getLogger('msmbuilder.scripts.tICA_train')

parser = arglib.ArgumentParser(get_metric=True,
                               description="""
Calculate the time-lag correlation and covariance matrices for use in the tICA
metric. This method attempts to find projection vectors such that they have a
maximal autocorrelation function.

For more details see:
Schwantes, CR and Pande, VS. J. Chem. Theory Comput., 2013, 9 (4),
pp 2000-2009. DOI: 10.1021/ct300878a""")
parser.add_argument('project')
parser.add_argument('stride',
                    type=int,
                    default=1,
                    help='stride to subsample input trajectories')
parser.add_argument('atom_indices',
                    default='all',
                    help='atom indices to restrict trajectories to')
parser.add_argument('output',
                    default='tICAData.h5',
                    help='output filename to save results to')
Пример #16
0
# TJL 2011, PANDE GROUP

import numpy as np
import logging
import scipy.io
from mdtraj import io
from msmbuilder import tpt
from msmbuilder import arglib
logger = logging.getLogger('msmbuilder.scripts.FindPaths')


parser = arglib.ArgumentParser(description=
"""Finds the highest flux paths through an MSM.
Returns: an HDF5 file (default: Paths.h5), which contains three items:
(1) The highest flux pathways (a list of ints)
(2) The bottlenecks in these pathways (a list of 2-tuples)
(3) The flux of each pathway

Paths.h5 can be read by RenderPaths.py which generates a .dot file capturing these paths.""")

parser.add_argument('number', help='''Number of pathways you want
    to retreive''', type=int)
parser.add_argument('tprob', help='Transition probability matrix',
                    default='tProb.mtx')
parser.add_argument('starting', help='''Vector of states in the
    starting/reactants/unfolded ensemble.''', default='U_states.dat')
parser.add_argument('ending', help='''Vector of states in the
    ending/products/folded ensemble.''', default='F_states.dat')
parser.add_argument('output', default='Paths.h5')

Пример #17
0
    np.savetxt(FnMap, mapping,"%d")
    scipy.io.mmwrite(str(FnTProb), t_matrix)
    scipy.io.mmwrite(str(FnTCounts), rev_counts)
    msmbuilder.io.saveh(FnAss, assignments)

    for output in outputlist:
        logger.info("Wrote: %s", output)

    return

if __name__ == "__main__":
    parser = arglib.ArgumentParser(description=
"""Estimates the counts and transition matrices from an
Assignments.h5 file. Reversible models can be calculated either from naive
symmetrization or estimation of the most likely reversible matrices (MLE,
recommended). Also calculates the equilibrium populations for the model
produced. Outputs will be saved in the directory of your input Assignments.h5
file.
\nOutput: tCounts.mtx, tProb.mtx, Populations.dat,  Mapping.dat,
Assignments.Fixed.h5, tCounts.UnSym.mtx""")
    parser.add_argument('assignments')
    parser.add_argument('symmetrize', help="""Method by which to estimate a
        symmetric counts matrix. Symmetrization ensures reversibility, but may skew
        dynamics. We recommend maximum likelihood estimation (MLE) when tractable,
        else try Transpose. It is strongly recommended you read the documentation
        surrounding this choice.""", default='MLE',
        choices=['MLE', 'Transpose', 'None'])
    parser.add_argument('lagtime', help='''Lag time to use in model (in
        number of snapshots. EG, if you have snapshots every 200ps, and set the
        lagtime=50, you'll get a model with a lagtime of 10ns)''', type=int)
    parser.add_argument('mapping', help='''Mapping, EG from microstates to macrostates. If given, this mapping will be applied to the specified assignments before creating an MSM.''', default="None")
Пример #18
0
            ptraj = metric.prepare_trajectory(project.load_traj(i))
            d = metric.one_to_all(ppdb, ptraj, 0)
            distances[i, 0:len(d)] = d
    else:
        traj = Trajectory.load_trajectory_file(traj_fn)
        ptraj = metric.prepare_trajectory(traj)

        distances = metric.one_to_all(ppdb, ptraj, 0)

    return distances


if __name__ == '__main__':
    parser = arglib.ArgumentParser(description="""
Calculate the distance between an input PDB and all conformations in your project.
Alternatively, you can limit the distance calculate to a single trajectory by
passing a trajectory filename.
Output as a HDF5 file (load using msmbuilder.io.loadh())""",
                                   get_metric=True)
    parser.add_argument('pdb')
    parser.add_argument('output',
                        help='''Output file name. Output is an
        .h5 file with RMSD entries corresponding to the Assignments.h5 file.''',
                        default='Data/RMSD.h5')
    parser.add_argument('project')
    parser.add_argument('traj_fn',
                        help='''Pass a trajectory file, to return
        just the distance for a particular trajectory. Pass 'all' to get all
        distances in the project.''',
                        default='all')

    args, metric = parser.parse_args()
Пример #19
0
    # xyzlist is now a list of (n_atoms, 3) arrays, and we're going
    # to stack it along the third dimension
    xyzlist = np.dstack(xyzlist)
    # load up the conf to get the topology, put then pop in the new coordinates
    output = project.load_conf()
    output['XYZList'] = xyzlist

    return output


if __name__ == "__main__":
    parser = arglib.ArgumentParser(description="""
Pulls a certain number of random conformations from each cluster. Returns these
as an HDF5/PDB/XTC file that contains one long chain of these conformations that looks
like a Trajectory. If you selected to sample N conformations from each cluster,
the first N conformations are from cluster 0, the next N from cluster 1, etc.

Output default: XRandomConfs.lh5, where X=Number of Conformations.""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument('output',
                        help="""The name of the RandomConfs
        trajectory (.lh5) to write. XRandomConfs.lh5, where X=Number of
        Conformations.""",
                        default='XRandomConfs')
    parser.add_argument('conformations_per_state',
                        help='''Number of
        conformations to randomly sample from your data per state''',
                        type=int)
    parser.add_argument('format',
Пример #20
0
import mdtraj as md

##############################################################################
# Globals
##############################################################################

logger = logging.getLogger('msmbuilder.scripts.Assign')
parser = arglib.ArgumentParser(
    description="""
Assign data that were not originally used in the clustering (because of
striding) to the microstates. This is applicable to all medoid-based clustering
algorithms, which includes all those implemented by Cluster.py except the
hierarchical methods. (For assigning to a hierarchical clustering, use 
AssignHierarchical.py)

Outputs:
-Assignments.h5
-Assignments.h5.distances

Assignments.h5 contains the assignment of each frame of each trajectory to a 
microstate in a rectangular array of ints. Assignments.h5.distances is an 
array of real numbers of the same dimension containing the distance (according 
to whichever metric you choose) from each frame to to the medoid of the 
microstate it is assigned to.""",
    get_metric=True)  # , formatter_class=argparse.RawDescriptionHelpFormatter)

parser.add_argument('project')
parser.add_argument(dest='generators',
                    help='''Output trajectory file containing
    the structures of each of the cluster centers. Note that for hierarchical clustering
    methods, this file will not be produced.''',
                    default='Data/Gens.h5')
        states = states.remove(-1)
    n_states = len(states)

    radii = np.nan * np.ones(n_states)
    for s in states:
        trj, frame = inverse_mapping[s]
        radii[s] = distances[trj, frame].mean()

    return radii


if __name__ == "__main__":
    parser = arglib.ArgumentParser(description="""
Calculates the cluster radius for all clusters in the model. Here, we define
radius is simply the average distance of all conformations in a cluster to its
generator. Does this by taking averaging the distance of each assigned state to
its generator.

Output: A flat txt file, 'ClusterRadii.dat', the average RMSD distance to the
generator, measured by what ever distance metric was used in assigning.""")

    parser.add_argument('assignments',
                        type=str,
                        default='Data/Assignments.Fixed.h5')
    parser.add_argument('distances',
                        help='''Path to assignment
        distances file.''',
                        default='Data/Assignments.h5.distances')
    parser.add_argument('output', default='ClusterRadii.dat')
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)
Пример #22
0
        print ", saving XTC to %s" % os.path.join(output_dir, outxtc),
        p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc))
        print ", saved"
        NowMem = float(resource.getrusage(
            resource.RUSAGE_SELF).ru_maxrss) / 1048576
        if NowMem > MaxMem:
            MaxMem = NowMem
    #print "This script used at least % .3f GB of memory" % MaxMem


if __name__ == '__main__':
    parser = arglib.ArgumentParser("""
Pulls the specified number of random structures (or optionally all
structures) from each state in an assignments file, aligned to the
generators. Specify which states to pull from with space-seperated
ints

Output: A bunch of PDB files named: State<StateIndex>-<Conformation>, inside
the directory 'PDBs'
Note: If you want to get structures for all states, it is more efficient
to use GetRandomConfs.py""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument(
        'conformations_per_state',
        default=5,
        type=int,
        help=
        'Number of conformations to sample from each state: to specify ALL of the conformations, pass the integer -1.'
    )
    parser.add_argument(
        'states',
Пример #23
0
    # Get committors and flux
    logger.info("Getting committors and flux...")

    Fc = calculate_committors(Uv, Fv, TC)
    logger.info("Calculated forward committors.")

    NFlux = calculate_net_fluxes(Uv, Fv, TC)
    logger.info("Calculated net flux.")

    return Fc, NFlux


if __name__ == "__main__":
    parser = arglib.ArgumentParser(
        description=
        """Calculates a number of kinetic transition properties of a given MSM. Returns:
(1) committors.dat - the forward committors of the MSM (numpy savetxt)
(3) net_flux.mtx - the net flux matrix (scipy sparse fmt)""")

    parser.add_argument('tProb')
    parser.add_argument('starting',
                        help='''Vector of states in the
        starting/reactants/unfolded ensemble.''',
                        default='U_states.dat')
    parser.add_argument('ending',
                        help='''Vector of states in the
        ending/products/folded ensemble.''',
                        default='F_states.dat')
    parser.add_argument('output_dir', default='.')
    args = parser.parse_args()
Пример #24
0
    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf(
                traj_fn, AtomIndices=atom_indices):
            traj_asa.extend(asa.calculate_asa(traj_chunk))

        SASA = np.array(traj_asa)

    return SASA


if __name__ == '__main__':
    parser = arglib.ArgumentParser(
        """Calculates the Solvent Accessible Surface Area
    of all atoms in a given trajectory, or for all trajectories in the project. The
    output is a hdf5 file which contains the SASA for each atom in each frame
    in each trajectory (or the single trajectory you passed in.""")
    parser.add_argument('project')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to calculate SASA',
                        default='all')
    parser.add_argument('output',
                        help='''hdf5 file for output. Note this will
        be THREE dimensional: ( trajectory, frame, atom ), unless you just ask for
        one trajectory, in which case it will be shape (frame, atom).''',
                        default='SASA.h5')
    parser.add_argument('traj_fn',
                        help='''Pass a trajectory file if you only
        want to calclate the SASA for a single trajectory''',
                        default='all')
Пример #25
0
def GrabSpecificAtoms(C1, toKeepDict):
    IndicesToKeep = []
    for k, CurrentIndices in enumerate(C1["IndexList"]):
        Residue = C1["ResidueNames"][CurrentIndices[0]]
        DesiredAtoms = toKeepDict[Residue]
        IndicesRelativeToCurrentResidue = np.where(
            np.in1d(C1["AtomNames"][CurrentIndices], DesiredAtoms) == True)[0]
        IndicesToKeep.extend(
            np.array(CurrentIndices)[IndicesRelativeToCurrentResidue])
    IndicesToKeep = np.array(IndicesToKeep, 'int')
    return (IndicesToKeep)


if __name__ == "__main__":
    parser = arglib.ArgumentParser(
        description="Creates an atom indices file from a PDB.")
    parser.add_argument('pdb')
    parser.add_argument('output', default='AtomIndices.dat')
    parser.add_argument('atom_type',
                        help='''Atoms to include in index file.
    One of four options: (1) minimal (CA, CB, C, N, O, recommended), (2) heavy,
    (3) alpha (carbons), or (4) all.  Use "all" in cases where protein
    nomenclature may be inapproprate, although you may want to define your own
    indices in such situations.''',
                        choices=['minimal', 'heavy', 'alpha', 'all'],
                        default='minimal')
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    indices = run(args.pdb, args.atom_type)
Пример #26
0
                                 EqPops,
                                 Directed=Directed,
                                 EdgeScale=EdgeScale,
                                 PopCutoff=PopCutoff,
                                 EdgeCutoff=EdgeCutoff,
                                 ImageList=pngs)

    plot_graph.PlotNetwork(G, OutputFile=OutputFile)


if __name__ == "__main__":
    parser = arglib.ArgumentParser(description="""
Draws a representation of your MSM and draws a graph corresponding to it. This graph
is written as a .dot file, which can be read by many common graph utilities. Read in MSM info
as a counts, transition, or net flux matrix.

Note: You need networkx and either Graphviz & PyGraphviz or pydot to get this utility working.
To get the graph the way you want it to look, you might want to open up this script and play
with some default parameters (EdgeScale=1, PopCutoff=0.01, EdgeCutoff=0.1) in the run() function.\n\n"""
                                   )
    parser.add_argument(
        'tmat',
        description=
        'Name of the matric to represent as a graph. Can be counts, transition, or net flux matrix. Should be in .mtx format'
    )
    parser.add_argument('populations',
                        description='Populations file',
                        default='Populations.dat')
    parser.add_argument(
        'directed',
        description='Make the graph directed (if, e.g., a net flux matrix)',
Пример #27
0
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import sys
import numpy as np
from msmbuilder import arglib

parser = arglib.ArgumentParser(description="""
\nPlots data generated from CalculateImpliedTimescales.py. You may want to use
this as a template for a pylab session

We recommend modifying this script for your own purposes""")
parser.add_argument('input',
                    help='Path to ImpledTimescales.dat',
                    default='ImpliedTimescales.dat')
parser.add_argument('dt',
                    help='Time between snapshots in your data',
                    default=1,
                    type=float)
parser.add_argument(
    'filename',
    help='Filename to save plot to. Leave blank to render plot to sceen',
    default='')
parser.add_argument('title',
                    help='Title for plot',
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import logging
import numpy as np
from msmbuilder import arglib
from msmbuilder import msm_analysis
logger = logging.getLogger('msmbuilder.scripts.CalculateImpliedTimescales')

parser = arglib.ArgumentParser(description="""
\nCalculates the implied timescales of a set of assigned data, up to
the argument 'lagtime'. Returns: ImpliedTimescales.dat, a flat file that
contains all the lag times.\n""")
parser.add_argument('assignments', type=str)
parser.add_argument('lagtime',
                    help="""The lagtime range to calculate.
    Pass two ints as X,Y with NO WHITESPACE, where X is the lowest
    timescale you want and Y is the biggest. EG: '-l 5,50'.""")
parser.add_argument('output',
                    help="""The name of the  implied
    timescales data file (use .dat extension)""",
                    default='ImpliedTimescales.dat')
parser.add_argument('procs',
                    help='''Number of concurrent processes
    (cores) to use''',
                    default=1,
                    type=int)
Пример #29
0
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# TJL 2011, PANDE GROUP

import numpy as np
import logging
import scipy.io
from mdtraj import io
from msmbuilder import tpt
from msmbuilder import arglib
logger = logging.getLogger('msmbuilder.scripts.FindPaths')

parser = arglib.ArgumentParser(
    description="""Finds the highest flux paths through an MSM.
Returns: an HDF5 file (default: Paths.h5), which contains three items:
(1) The highest flux pathways (a list of ints)
(2) The bottlenecks in these pathways (a list of 2-tuples)
(3) The flux of each pathway""")

parser.add_argument('number',
                    help='''Number of pathways you want
    to retreive''',
                    type=int)
parser.add_argument('tprob',
                    help='Transition probability matrix',
                    default='tProb.mtx')
parser.add_argument('starting',
                    help='''Vector of states in the
    starting/reactants/unfolded ensemble.''',
                    default='U_states.dat')
parser.add_argument('ending',
Пример #30
0
    ppdb = metric.prepare_trajectory(pdb)
    ptraj = metric.prepare_trajectory(traj)

    print ppdb['XYZList'].shape
    print ptraj['XYZList'].shape

    distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0)
    print distances
    return distances


if __name__ == '__main__':
    parser = arglib.ArgumentParser("""Takes a trajectory (the input data,
'INPUT') and a PDB, and calculates the RMSD between every frame of the trajectory
and PDB for the atoms specified in the atom indicies file. Note that trajectory
can be any trajectory-like format, including generators and random conformation 
files. Output: a flat file vector of RMSDs, in nm. Note that MSMBuilder's RMSD
calculator is highly optimized, so this calculation should be rapid. Output: 
RMSD.dat, a flat text file of the RMSDs.""")
    parser.add_argument('pdb')
    parser.add_argument('input', help='Path to a trajectory-like file')
    parser.add_argument('project')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to compare',
                        default='AtomIndices.dat')

    parser.add_argument('lprmsd_alt_indices',
                        help='''Optional
    alternate atom indices for RMSD. If you want to align the
    trajectories using one set of atom indices but then compute the
    distance using a different set of indices, use this option. If