Пример #1
def read_dtrajs_from_pattern(patterns, logger=getLogger()):
    patterns : single pattern or list of patterns
        eg. '*.txt' or ['/foo/*/bar/*.txt', '*.txt']

    list of discrete trajectories : list of numpy arrays, dtype=int

    dtrajs = []
    filenames = paths_from_patterns(patterns)
    if filenames == []:
        raise ValueError('no match to given pattern')
    for dt in filenames:
        # skip directories
        if os.path.isdir(dt):
        logger.info('reading discrete trajectory: %s' % dt)
        except Exception as e:
                'Exception occurred during reading of %s:\n%s' % (dt, e))
    return dtrajs
Пример #2
    def __init__(self, chain, chunksize=100, param_stride=1):
        r"""Data processing pipeline.

        chain : list of transformers like objects
            the order in the list defines the direction of data flow.
        chunksize : int, optional
            how many frames shall be processed at once.
        param_stride : int, optional
            omit every n'th data point

        self._chain = []
        self.chunksize = chunksize
        self.param_stride = param_stride
        self.chunksize = chunksize

        # add given elements in chain
        for e in chain:

        self._parametrized = False

        name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
        self._logger = getLogger(name)
Пример #3
 def __init__(self, topfile):
     self.topologyfile = topfile
     self.topology = (mdtraj.load(topfile)).topology
     self.active_features = []
     self._dim = 0
     self._logger = getLogger("%s[%s]" %
                              (self.__class__.__name__, hex(id(self))))
Пример #4
 def __init__(self, topfile):
     self.topologyfile = topfile
     self.topology = (mdtraj.load(topfile)).topology
     self.active_features = []
     self._dim = 0
     self._logger = getLogger("%s[%s]" %
                              (self.__class__.__name__, hex(id(self))))
Пример #5
    def setUpClass(cls):

        cls.logger = getLogger(cls.__class__.__name__)

        d = np.arange(3 * 100).reshape((100, 3))
        d2 = np.arange(300, 900).reshape((200, 3))
        d_1d = np.random.random(100)

        cls.dir = tempfile.mkdtemp(prefix='pyemma_npyreader')

        cls.f1 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f2 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f3 = tempfile.mktemp(suffix='.npz', dir=cls.dir)
        cls.f4 = tempfile.mktemp(suffix='.npy', dir=cls.dir)

        # 2d
        np.save(cls.f1, d)
        np.save(cls.f4, d2)

        # 1d
        np.save(cls.f2, d_1d)

        np.savez(cls.f3, d, d)

        cls.files2d = [cls.f1, cls.f4]  #cls.f3]
        cls.files1d = [cls.f2]
        cls.d = d
        cls.d_1d = d_1d

        cls.npy_files = [f for f in cls.files2d if f.endswith('.npy')]
        cls.npz = cls.f3

        return cls
Пример #6
    def setUpClass(cls):

        cls.logger = getLogger(cls.__class__.__name__)

        d = np.arange(3 * 100).reshape((100, 3))
        d_1d = np.random.random(100)

        cls.dir = tempfile.mkdtemp(prefix='pyemma_npyreader')

        cls.f1 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f2 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f3 = tempfile.mktemp(suffix='.npz', dir=cls.dir)

        # 2d
        np.save(cls.f1, d)

        # 1d
        np.save(cls.f2, d_1d)

        np.savez(cls.f3, d, d)

        cls.files2d = [cls.f1, cls.f3]
        cls.files1d = [cls.f2]
        cls.d = d
        cls.d_1d = d_1d

        cls.npy_files = [f for f in cls.files2d if f.endswith('.npy')]
        cls.npz = cls.f3

        return cls
Пример #7
    def __init__(self, chain, chunksize=100, param_stride=1):

        TODO:chunksize should be estimated from memory requirements (max memory usage)
        self._chain = []
        self.chunksize = chunksize
        self.param_stride = param_stride

        # add given elements in chain
        for e in chain:

        self._parametrized = False

        name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
        self._logger = getLogger(name)
Пример #8
    def __init__(self, chain, chunksize=100, param_stride=1):

        TODO:chunksize should be estimated from memory requirements (max memory usage)
        self._chain = []
        self.chunksize = chunksize
        self.param_stride = param_stride

        # add given elements in chain
        for e in chain:

        self._parametrized = False

        name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
        self._logger = getLogger(name)
Пример #9
# GNU General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import

import mdtraj as md
import numpy as np
from pyemma.util.log import getLogger
from pyemma.coordinates.data.util.reader_utils import copy_traj_attributes as _copy_traj_attributes, \
    preallocate_empty_trajectory as _preallocate_empty_trajectory, enforce_top as _enforce_top
from mdtraj.core.trajectory import Trajectory
__all__ = ['frames_from_file']

log = getLogger(__name__)

def frames_from_files(files, top, frames, chunksize=1000, stride=1, verbose=False, copy_not_join=None):
    from pyemma.coordinates import source
    # Enforce topology to be a md.Topology object
    top = _enforce_top(top)
    reader = source(files, top=top)
    stride = int(stride)

    if stride != 1:
        frames[:, 1] *= int(stride)
        if verbose:
            log.info('A stride value of = %u was parsed, '
                     'interpreting "indexes" accordingly.' % stride)
Пример #10
import unittest
import os
import numpy as np
import tempfile

from pyemma.util.log import getLogger
import pyemma.coordinates as coor
import pyemma.util.types as types

logger = getLogger('TestCluster')

class TestCluster(unittest.TestCase):

    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [np.array([-3,0]),
        widths = [np.array([0.1,0.1]),
Пример #11
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import
import unittest
import os
import numpy as np

from pyemma.coordinates.data import MDFeaturizer
from pyemma.util.log import getLogger
import pyemma.coordinates.api as api
import pyemma.util.types as types
import pkg_resources

logger = getLogger('pyemma.' + 'TestReaderUtils')

class TestSource(unittest.TestCase):
    def setUp(self):
        path = pkg_resources.resource_filename('pyemma.coordinates.tests',
                                               'data') + os.path.sep
        self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
        self.traj_files = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')

    def tearDown(self):
Пример #12

from __future__ import print_function

from __future__ import absolute_import
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger
from six.moves import range

log = getLogger('pyemma.' + 'TestFeatureReaderAndTICA')

class TestFeatureReaderAndTICA(unittest.TestCase):
    def setUpClass(cls):
        cls.dim = 9  # dimension (must be divisible by 3)
        N = 50000  # length of single trajectory # 500000
        N_trajs = 10  # number of trajectories

        cls.w = 2.0 * np.pi * 1000.0 / N  # have 1000 cycles in each trajectory

        # get random amplitudes and phases
        cls.A = np.random.randn(cls.dim)
        cls.phi = np.random.random_sample((cls.dim, )) * np.pi * 2.0
        mean = np.random.randn(cls.dim)
Пример #13
__author__ = 'noe'

from pyemma.util.log import getLogger

import numpy as np

log = getLogger('Transformer')
__all__ = ['Transformer']

class Transformer(object):


    chunksize : int (optional)
        the chunksize used to batch process underlying data
    lag : int (optional)
        if you want to process time lagged data, set this to a value > 0.

    def __init__(self, chunksize=100, lag=0):
        self.chunksize = chunksize
        self._lag = lag
        self._in_memory = False
        self._dataproducer = None

    def data_producer(self):
Пример #14
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import
import unittest
import os
import numpy as np
import tempfile

from pyemma.util.log import getLogger
import pyemma.coordinates as coor
import pyemma.util.types as types
from six.moves import range

logger = getLogger('TestCluster')

class TestCluster(unittest.TestCase):
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [
            np.array([-3, 0]),
            np.array([-1, 1]),
            np.array([0, 0]),
            np.array([1, -1]),
            np.array([4, 2])
Test feature reader and Tica by checking the properties of the ICs.
cov(ic_i,ic_j) = delta_ij and cov(ic_i,ic_j,tau) = lambda_i delta_ij
@author: Fabian Paul
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates.api import tica, _TICA as TICA
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger

log = getLogger('TestFeatureReaderAndTICAProjection')

def random_invertible(n, eps=0.01):
    'generate real random invertible matrix'
    m = np.random.randn(n, n)
    u, s, v = np.linalg.svd(m)
    s = np.maximum(s, eps)
    return u.dot(np.diag(s)).dot(v)

from nose.plugins.attrib import attr

class TestFeatureReaderAndTICAProjection(unittest.TestCase):
    def setUpClass(cls):
        c = super(TestFeatureReaderAndTICAProjection, cls).setUpClass()
Пример #16
Created on 18.10.2013

@author: marscher
import numpy as np
from decimal import Decimal # for wrapping java.math.BigDecimal

from pyemma.util.log import getLogger
from pyemma.util.pystallone import ndarray_to_stallone_array, stallone, JArray, JInt, JDouble

__all__ = ['PathwayDecomposition']

log = getLogger()

# TODO: test
class PathwayDecomposition(object):

    def __init__(self, F, Q, A, B):
        F : The net fluxes matrix
          ndarray(dtype=float, shape=(n,n))
        Q : The committor vector
          ndarray(dtype=float, shape=(n)
        A : set of representatives (indices defining set A in F)
        B : set of representatives
Пример #17
@author: marscher
import mdtraj
import os
import tempfile
import unittest
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger
import pkg_resources

import numpy as np
from pyemma.coordinates.api import feature_reader, discretizer, tica

log = getLogger('TestFeatureReader')

class TestFeatureReader(unittest.TestCase):

    def setUpClass(cls):
        c = super(TestFeatureReader, cls).setUpClass()
        # create a fake trajectory which has 3 atoms and coordinates are just a range
        # over all frames.
        cls.trajfile = tempfile.mktemp('.xtc')
        cls.n_frames = 1000
        cls.xyz = np.random.random(cls.n_frames * 3 * 3).reshape((cls.n_frames, 3, 3))
        log.debug("shape traj: %s" % str(cls.xyz.shape))
        cls.topfile = pkg_resources.resource_filename(
            'pyemma.coordinates.tests.test_featurereader', 'data/test.pdb')
Пример #18
Created on 04.02.2015

@author: marscher
import unittest

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.util.log import getLogger
import numpy as np

import tempfile
import os

logger = getLogger('TestDataInMemory')

class TestDataInMemory(unittest.TestCase):

    def setUpClass(cls):
        d = np.random.random((100, 3))
        d_1d = np.random.random(100)

        f1 = tempfile.mktemp()
        f2 = tempfile.mktemp(suffix='.npy')
        f3 = tempfile.mktemp()
        f4 = tempfile.mktemp(suffix='.npy')

        npz = tempfile.mktemp(suffix='.npz')
Пример #19
Created on 02.02.2015

@author: marscher
import unittest

import numpy as np

from pyemma.coordinates import pca
from pyemma.util.log import getLogger
import pyemma.util.types as types

logger = getLogger('TestTICA')

class TestPCAExtensive(unittest.TestCase):
    def setUpClass(cls):
        import pyemma.msm.generation as msmgen

        # generate HMM with two Gaussians
        cls.P = np.array([[0.99, 0.01], [0.01, 0.99]])
        cls.T = 10000
        means = [np.array([-1, 1]), np.array([1, -1])]
        widths = [np.array([0.3, 2]), np.array([0.3, 2])]
        # continuous trajectory
        cls.X = np.zeros((cls.T, 2))
        # hidden trajectory
        dtraj = msmgen.generate_traj(cls.P, cls.T)
        for t in range(cls.T):
Пример #20
from pyemma.coordinates.data.featurizer import MDFeaturizer as _MDFeaturizer
from pyemma.coordinates.data.feature_reader import FeatureReader as _FeatureReader
from pyemma.coordinates.data.data_in_memory import DataInMemory as _DataInMemory
from pyemma.coordinates.data.util.reader_utils import create_file_reader as _create_file_reader
from pyemma.coordinates.data.frames_from_file import frames_from_file as _frames_from_file
# transforms
from pyemma.coordinates.transform.transformer import Transformer as _Transformer
from pyemma.coordinates.transform.pca import PCA as _PCA
from pyemma.coordinates.transform.tica import TICA as _TICA
# clustering
from pyemma.coordinates.clustering.kmeans import KmeansClustering as _KmeansClustering
from pyemma.coordinates.clustering.uniform_time import UniformTimeClustering as _UniformTimeClustering
from pyemma.coordinates.clustering.regspace import RegularSpaceClustering as _RegularSpaceClustering
from pyemma.coordinates.clustering.assign import AssignCenters as _AssignCenters

logger = getLogger('coordinates.api')

__author__ = "Frank Noe, Martin Scherer"
__copyright__ = "Copyright 2015, Computational Molecular Biology Group, FU-Berlin"
__credits__ = ["Benjamin Trendelkamp-Schroer", "Martin Scherer", "Frank Noe"]
__license__ = "FreeBSD"
__version__ = "2.0.0"
__maintainer__ = "Martin Scherer"
__email__ = "m.scherer AT fu-berlin DOT de"

__all__ = ['featurizer',  # IO
Пример #21
Created on 18.02.2015

@author: marscher
from pyemma.coordinates.transform.transformer import Transformer
from pyemma.util.log import getLogger
import numpy as np

log = getLogger('Clustering')

class AbstractClustering(Transformer):
    provides a common interface for cluster algorithms.
    def __init__(self):
        super(AbstractClustering, self).__init__()
        self.clustercenters = None
        self.dtrajs = []

    def map(self, x):
        """get closest index of point in :attr:`clustercenters` to x."""
        d = self.data_producer.distances(x, self.clustercenters)
        return np.argmin(d)

    def save_dtrajs(self,
Пример #22
Created on 22.01.2015

@author: marscher
from pyemma.util.log import getLogger

import numpy as np
from pyemma.coordinates.transform.transformer import Transformer

log = getLogger('WriterCSV')
__all__ = ['WriterCSV']

class WriterCSV(Transformer):

    shall write to csv files

    def __init__(self, filename):
        super(WriterCSV, self).__init__()

        # filename should be obtained from source trajectory filename,
        # eg suffix it to given filename
        self.filename = filename
        self.last_frame = False
Пример #23
Created on 02.02.2015

@author: marscher
import unittest

import numpy as np

from pyemma.coordinates import pca
from pyemma.util.log import getLogger
import pyemma.util.types as types

logger = getLogger('TestTICA')

class TestPCAExtensive(unittest.TestCase):

    def setUpClass(cls):
        import pyemma.msm.generation as msmgen

        # generate HMM with two Gaussians
        cls.P = np.array([[0.99, 0.01],
                      [0.01, 0.99]])
        cls.T = 10000
        means = [np.array([-1,1]), np.array([1,-1])]
        widths = [np.array([0.3,2]),np.array([0.3,2])]
        # continuous trajectory
        cls.X = np.zeros((cls.T, 2))
Пример #24
Created on 19.01.2015

@author: marscher
from .transformer import Transformer
from pyemma.util.linalg import eig_corr
from pyemma.util.log import getLogger
from pyemma.util.annotators import doc_inherit

import numpy as np

log = getLogger('TICA')
__all__ = ['TICA']

class TICA(Transformer):
    Time-lagged independent component analysis (TICA)

    Given a sequence of multivariate data :math:`X_t`, computes the mean-free
    covariance and time-lagged covariance matrix:

    .. math::

        C_0 &=   (X_t - \mu)^T (X_t - \mu) \\
        C_{\tau} &= (X_t - \mu)^T (X_t+\tau - \mu)
    and solves the eigenvalue problem

    .. math:: C_{\tau} r_i = C_0 \lambda_i r_i
Пример #25

from __future__ import absolute_import
import mdtraj
import tempfile
import unittest
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger
import pkg_resources

import numpy as np
from pyemma.coordinates.api import discretizer, tica, source
from six.moves import range

log = getLogger('TestFeatureReader')

def create_traj(top, format='.xtc', dir=None):
    trajfile = tempfile.mktemp(suffix=format, dir=dir)
    n_frames = np.random.randint(500, 1500)
    log.debug("create traj with %i frames" % n_frames)
    xyz = np.arange(n_frames * 3 * 3).reshape((n_frames, 3, 3))

    t = mdtraj.load(top)
    t.xyz = xyz
    t.unitcell_vectors = np.array(n_frames*[[0,0,1], [0,1,0], [1,0,0]]).reshape(n_frames, 3,3)
    t.time = np.arange(n_frames)

    return trajfile, xyz, n_frames
Пример #26
#!/usr/bin/env python
# encoding: utf-8
import argparse
import sys
import os
from pyemma.util.log import getLogger
from pyemma.msm.generation import generate_traj
import pyemma

log = getLogger('mm_generate')

def handleArgs():
    parser = argparse.ArgumentParser()
    parser.add_argument('-T', type=str, help='path to transition matrix.')
                        help='output filename of trajectory.')
    parser.add_argument('-dt', type=int, default=1)
    parser.add_argument('-steps', type=int)
    parser.add_argument('-start_state', type=int)

    args = parser.parse_args()

    return args

Пример #27
from __future__ import absolute_import
import pyemma
from six.moves import range
from six.moves import zip
Created on 04.02.2015

@author: marscher
import unittest
import numpy as np

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.util.log import getLogger

logger = getLogger('pyemma.' + 'TestDataInMemory')

class TestDataInMemory(unittest.TestCase):
    def setUpClass(cls):
        d = np.random.random((100, 3))
        d_1d = np.random.random(100)

        cls.d = d
        cls.d_1d = d_1d
        return cls

    def test_skip(self):
        for skip in [0, 3, 13]:
            r1 = DataInMemory(self.d)
Пример #28
from __future__ import absolute_import
import unittest
import os
import tempfile

from pyemma.coordinates.data import MDFeaturizer
from pyemma.util.log import getLogger
import pyemma.coordinates.api as api
import numpy as np
from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
from pyemma.coordinates.data.py_csv_reader import PyCSVReader as CSVReader
import shutil

logger = getLogger('TestReaderUtils')

class TestApiSourceFileReader(unittest.TestCase):

    def setUpClass(cls):

        data_np = np.random.random((100, 3))
        data_raw = np.arange(300 * 4).reshape(300, 4)

        cls.dir = tempfile.mkdtemp("test-api-src")

        cls.npy = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.npz = tempfile.mktemp(suffix='.npz', dir=cls.dir)
        cls.dat = tempfile.mktemp(suffix='.dat', dir=cls.dir)
Пример #29
def create_logger(self):
    # creates a logger based on the the attribe "name" of self
    self._logger_instance = getLogger(self.name)
    r = weakref.ref(self, _cleanup_logger(self))
    _refs[self.name] = r
    return self._logger_instance
Пример #30
    def __init__(self, counts, mu=None, reversible=False, Tinit=None):
        Sets the count matrix used for sampling. Assumes that the prior 
        (if desired) is included.
        counts : ndarray (n, n)
            the posterior count matrix
        mu : ndarray (n)
           optional stationary distribution, if given, the sampled transition matrix
           will have this this stat dist.
        reversible : boolean
           should sample a reversible transition matrix.
        Tinit : ndarray(n, n)
           optional start point for sampling algorithm.
        >>> C = np.array([[5, 2], [1,10]]) 
        >>> sampler = ITransitionMatrixSampler(C)
        >>> T = sampler.sample(10**6)
        >>> print T
        if issparse(counts):
            counts = counts.toarray()
        # the interface in stallone takes counts as doubles
        counts = counts.astype(np.float64)

            C = ndarray_to_stallone_array(counts)
            jpackage = stallone.mc.sampling
            # convert types to java
            if Tinit is not None:
                Tinit = ndarray_to_stallone_array(Tinit)
            if mu is not None:
                mu = ndarray_to_stallone_array(mu)

            if reversible:
                if mu:  # fixed pi
                    if Tinit:
                        self.sampler = jpackage.TransitionMatrixSamplerRevFixPi(
                            C, Tinit, mu)
                        self.sampler = jpackage.TransitionMatrixSamplerRevFixPi(
                            C, mu)
                else:  # sample reversible matrix, with arbitrary pi
                    if Tinit:
                        self.sampler = jpackage.TransitionMatrixSamplerRev(
                            C, Tinit)
                        self.sampler = jpackage.TransitionMatrixSamplerRev(C)
            else:  # sample non rev
                if Tinit:
                    self.sampler = jpackage.TransitionMatrixSamplerNonrev(
                        C, Tinit)
                    self.sampler = jpackage.TransitionMatrixSamplerNonrev(C)

        except JavaException as je:
            log = getLogger()
            log.exception("Error during creation of tmatrix sampling wrapper:"
                          " stack\n%s" % je.stacktrace())
Пример #31
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import

import os
import unittest

from pyemma.util.files import TemporaryDirectory
from pyemma.util.log import getLogger
from six.moves import range
import numpy as np
import pyemma.coordinates as coor
import pyemma.util.types as types

logger = getLogger('pyemma.' + 'TestCluster')

class TestClusterAssign(unittest.TestCase):
    def setUpClass(cls):
        super(TestClusterAssign, cls).setUpClass()

        # generate Gaussian mixture
        means = [
            np.array([-3, 0]),
            np.array([-1, 1]),
            np.array([0, 0]),
            np.array([1, -1]),
            np.array([4, 2])
Пример #32
from __future__ import absolute_import
import unittest
import os
import pkg_resources
import numpy as np

from pyemma.coordinates import api

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.coordinates import source, tica
from pyemma.util.contexts import numpy_random_seed
from pyemma.util.log import getLogger
import pyemma.util.types as types
from six.moves import range

logger = getLogger('pyemma.' + 'TestTICA')

def mycorrcoef(X, Y, lag):
    X = X.astype(np.float64)
    Y = Y.astype(np.float64)
    mean_X = 0.5 * (np.mean(X[lag:], axis=0) + np.mean(X[0:-lag], axis=0))
    mean_Y = 0.5 * (np.mean(Y[lag:], axis=0) + np.mean(Y[0:-lag], axis=0))
    cov = (
        (X[0:-lag] - mean_X).T.dot(Y[0:-lag] - mean_Y) +
        (X[lag:] - mean_X).T.dot(Y[lag:] - mean_Y)) / (2 *
                                                       (X.shape[0] - lag) - 1)

    autocov_X = (
        (X[0:-lag] - mean_X).T.dot(X[0:-lag] - mean_X) +
        (X[lag:] - mean_X).T.dot(X[lag:] - mean_X)) / (2 *
Пример #33
import unittest
import os
import tempfile

import numpy as np

from pyemma.util.log import getLogger
import pyemma.coordinates as coor
import pyemma.util.types as types

logger = getLogger('TestReaderUtils')

class TestCluster(unittest.TestCase):

    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [np.array([-3,0]),
        widths = [np.array([0.3,2]),
Пример #34
#!/usr/bin/env python
Created on 17.02.2014

@author: marscher
import argparse
import sys

from pyemma.msm.estimation.api import count_matrix, largest_connected_set
from pyemma.msm.io.api import write_matrix
from pyemma.util.files import read_dtrajs_from_pattern
from pyemma.util.log import getLogger

log = getLogger()

def handleArgs():
    parser = argparse.ArgumentParser()
                        help='list of discrete trajectories')
                        help='output filename of largest connected set')
        help='lag time for which connectivity should be calculated for',
Пример #35
 def __create_logger(self):
     name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
     self._logger = getLogger(name)
Пример #36
__author__ = 'noe'

import numpy as np
from .transformer import Transformer
from pyemma.util.log import getLogger
from pyemma.util.annotators import doc_inherit

log = getLogger('PCA')
__all__ = ['PCA']

class PCA(Transformer):

    r"""Principal component analysis.

    Given a sequence of multivariate data :math:`X_t`,
    computes the mean-free covariance matrix.

    .. math:: C = (X - \mu)^T (X - \mu)

    and solves the eigenvalue problem

    .. math:: C r_i = \sigma_i r_i,

    where :math:`r_i` are the principal components and :math:`\sigma_i` are
    their respective variances.

    When used as a dimension reduction method, the input data is projected onto
    the dominant principal components.

Пример #37
@author: marscher
import numpy as np
import warnings

from mdtraj.utils.validation import cast_indices
from mdtraj.core.trajectory import load, Trajectory, _parse_topology
from mdtraj.formats.hdf5 import HDF5TrajectoryFile
from mdtraj.utils.unit import in_units_of
from mdtraj.formats.lh5 import LH5TrajectoryFile
from mdtraj.formats import DCDTrajectoryFile
from mdtraj.formats import XTCTrajectoryFile

from pyemma.util.log import getLogger

log = getLogger('patches')

def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at

    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.  If 0, load all.
Пример #38
__author__ = 'noe'

from pyemma.util.log import getLogger
from pyemma.coordinates.clustering.interface import AbstractClustering

import numpy as np

log = getLogger('UniformTimeClustering')
__all__ = ['UniformTimeClustering']

class UniformTimeClustering(AbstractClustering):
    Uniform time clustering

    k : int
    def __init__(self, k=2):
        super(UniformTimeClustering, self).__init__()
        self.k = k

    def describe(self):
        return "[Uniform time clustering, k = %i]" % self.k

    def dimension(self):
        return 1

    def get_memory_per_frame(self):
Пример #39
import pyemma
from six.moves import range
from six.moves import zip
Created on 04.02.2015

@author: marscher
import unittest
import numpy as np

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.coordinates.transform.transformer import TransformerIteratorContext
from pyemma.util.log import getLogger

logger = getLogger('TestDataInMemory')

class TestDataInMemory(unittest.TestCase):
    def setUpClass(cls):
        d = np.random.random((100, 3))
        d_1d = np.random.random(100)

        cls.d = d
        cls.d_1d = d_1d
        return cls

    def testWrongArguments(self):
        with self.assertRaises(ValueError):
            reader = DataInMemory("foo")
Test feature reader and Tica by checking the properties of the ICs.
cov(ic_i,ic_j) = delta_ij and cov(ic_i,ic_j,tau) = lambda_i delta_ij
@author: Fabian Paul
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates.api import tica, _TICA as TICA
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger

log = getLogger('TestFeatureReaderAndTICAProjection')

def random_invertible(n, eps=0.01):
    'generate real random invertible matrix'
    m = np.random.randn(n, n)
    u, s, v = np.linalg.svd(m)
    s = np.maximum(s, eps)
    return u.dot(np.diag(s)).dot(v)

from nose.plugins.attrib import attr

class TestFeatureReaderAndTICAProjection(unittest.TestCase):
Пример #41
Created on 22.01.2015

@author: marscher
import numpy as np
from sklearn.cluster import MiniBatchKMeans

from pyemma.util.log import getLogger
from pyemma.util.annotators import doc_inherit
from pyemma.coordinates.clustering.interface import AbstractClustering

log = getLogger('KmeansClustering')

__all__ = ['KmeansClustering']

class KmeansClustering(AbstractClustering):
    Kmeans clustering

    n_clusters : int
        amount of cluster centers
    max_iter : int 
        how many iterations per chunk?

    def __init__(self, n_clusters, max_iter=1000):
        super(KmeansClustering, self).__init__()
Пример #42
def tmatrix_sampler(C, reversible=False, mu=None, T0=None):
    r"""Generate transition matrix sampler object.
    C : (M, M) ndarray or scipy.sparse matrix
        Count matrix
    reversible : bool
        If true sample from the ensemble of transition matrices
        restricted to those obeying a detailed balance condition,
        else draw from the whole ensemble of stochastic matrices.
    mu : array_like
        The stationary distribution of the transition matrix samples.
    T0 : ndarray, shape=(n, n) or scipy.sparse matrix
        Starting point of the MC chain of the sampling algorithm.
        Has to obey the required constraints.
    sampler : A :py:class:dense.ITransitionMatrixSampler object.

    The transition matrix sampler generates transition matrices from
    the posterior distribution. The posterior distribution is given as
    a product of Dirichlet distributions

    .. math:: \mathbb{P}(T|C) \propto \prod_{i=1}^{M} \left( \prod_{j=1}^{M} p_{ij}^{c_{ij}} \right)

    The method can generate samples from the posterior under the follwing two constraints
    **Reversible sampling**

    Using a MCMC sampler outlined in .. [1] it is ensured that samples
    from the posterior are reversible, i.e. there is a probability
    vector :math:`(\mu_i)` such that :math:`\mu_i t_{ij} = \mu_j
    t_{ji}` holds for all :math:`i,j`.

    **Reversible sampling with fixed stationary vector**

    Using a MCMC sampler outlined in .. [2] it is ensured that samples
    from the posterior fulfill detailed balance with respect to a given 
    probability vector :math:`(\mu_i)`.

    .. [1] Noe, F. 2008. Probability distributions of molecular observables
        computed from Markov state models. J Chem Phys 128: 244103.
    .. [2] Trendelkamp-Schroer, B and F Noe. 2013. Efficient Bayesian estimation
        of Markov model transition matrices with given stationary distribution.
        J Chem Phys 138: 164113.
    if issparse(C):
    from pyemma.util.pystallone import JavaException
        return ITransitionMatrixSampler(C, mu, reversible, Tinit=T0)
    except JavaException as je:
        log = getLogger()
        log.exception("Error during tmatrix sampling")
Test feature reader and Tica with a set of cosine time series.
@author: Fabian Paul
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger

log = getLogger('TestFeatureReaderAndTICA')

from nose.plugins.attrib import attr

class TestFeatureReaderAndTICA(unittest.TestCase):
    def setUpClass(cls):
        c = super(TestFeatureReaderAndTICA, cls).setUpClass()

        cls.dim = 99  # dimension (must be divisible by 3)
        N = 50000  # length of single trajectory # 500000
        N_trajs = 10  # number of trajectories
        cls.w = 2.0*np.pi*1000.0/N  # have 1000 cycles in each trajectory

        # get random amplitudes and phases