Пример #1
0
    def __init__(self, filenames, selection='/*', chunk_size=5000, **kw):
        super(H5Reader, self).__init__(chunksize=chunk_size)

        self._is_reader = True
        self._is_random_accessible = True

        from pyemma.coordinates.data.data_in_memory import (DataInMemoryCuboidRandomAccessStrategy,
                                                            DataInMemoryJaggedRandomAccessStrategy,
                                                            DataInMemoryLinearRandomAccessStrategy,
                                                            DataInMemoryLinearItrajRandomAccessStrategy)
        self._ra_cuboid = DataInMemoryCuboidRandomAccessStrategy(self, 3)
        self._ra_jagged = DataInMemoryJaggedRandomAccessStrategy(self, 3)
        self._ra_linear_strategy = DataInMemoryLinearRandomAccessStrategy(self, 2)
        self._ra_linear_itraj_strategy = DataInMemoryLinearItrajRandomAccessStrategy(self, 3)

        # set selection first, so we can use it the filename setter.
        self.selection = selection
        # we count data sets as itrajs, because a hdf5 file can contain multiple data sets.
        from collections import defaultdict
        self._itraj_dataset_mapping = defaultdict(int)

        # we explicitly do not want to cache anything for H5, because the user can provide different selections
        # and the interface of the cache does not allow for such a mapping (1:1 relation filename:(dimension, len)).
        from pyemma.util.contexts import settings
        with settings(use_trajectory_lengths_cache=False):
            self.filenames = filenames

        # we need to override the ntraj attribute to be equal with the itraj_counter to respect all data sets.
        self._ntraj = self._itraj_counter

        # sanity
        if self._itraj_counter == 0:
            raise ValueError('Your provided selection did not match anything in your provided files. '
                             'Check the log output')
 def test_chunksize_max_memory(self):
     from pyemma.util.contexts import settings
     data = np.random.random((10000, 10))
     max_size = 1024
     with settings(default_chunksize=str(max_size)):
         r = DataInMemory(data)
         for itraj, x in r.iterator():
             self.assertLessEqual(x.nbytes, max_size)
 def test_invalid_data_in_input_inf(self):
     self.d[1][-1] = np.inf
     r = DataInMemory(self.d, chunksize=5)
     it = r.iterator()
     from pyemma.coordinates.data._base.datasource import InvalidDataInStreamException
     with settings(coordinates_check_output=True):
         with self.assertRaises(InvalidDataInStreamException) as cm:
             for itraj, X in it:
                 pass
Пример #4
0
 def test_exception_getoutput_invalid_data(self):
     """ensure we get a proper exception if invalid data is contained in the stream"""
     from pyemma.util.contexts import settings
     data = np.ones(10)
     data[-1] = np.nan
     reader = pyemma.coordinates.source(data)
     from pyemma.coordinates.data._base.datasource import InvalidDataInStreamException
     with settings(coordinates_check_output=True), self.assertRaises(InvalidDataInStreamException):
         reader.get_output()
Пример #5
0
    def test_with_pg_data_not_in_memory(self):
        import pkg_resources
        import pyemma

        path = pkg_resources.resource_filename('pyemma.coordinates.tests', 'data') + os.path.sep
        pdb_file = os.path.join(path, 'bpti_ca.pdb')
        traj_files = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_034-066.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]
        reader = pyemma.coordinates.source(traj_files, top=pdb_file)

        with settings(show_progress_bars=True), Capturing(which='stderr') as out:
            cluster_kmeans(reader)
        self.assertIn('creating data array', '\n'.join(out))
Пример #6
0
    def test_max_size(self):
        data = [np.random.random((150, 10)) for _ in range(150)]
        max_size = 1

        files = []
        config.show_progress_bars = False
        with TemporaryDirectory() as td, settings(traj_info_max_size=max_size):
            for i, arr in enumerate(data):
                f = os.path.join(td, "%s.txt" % i)
                # save as txt to enforce creation of offsets
                np.savetxt(f, arr)
                files.append(f)
            pyemma.coordinates.source(files)

        self.assertLessEqual(os.stat(self.db.database_filename).st_size / 1024, config.traj_info_max_size)
        self.assertGreater(self.db.num_entries, 0)
Пример #7
0
    def test_featurereader_xtc(self):
        # cause cache failures
        with settings(use_trajectory_lengths_cache=False):
            reader = FeatureReader(xtcfiles, pdbfile)

        results = {}
        for f in xtcfiles:
            traj_info = self.db[f, reader]
            results[f] = traj_info.ndim, traj_info.length, traj_info.offsets

        expected = {}
        for f in xtcfiles:
            with mdtraj.open(f) as fh:
                length = len(fh)
                ndim = fh.read(1)[0].shape[1]
                offsets = fh.offsets if hasattr(fh, 'offsets') else []
                expected[f] = ndim, length, offsets

        np.testing.assert_equal(results, expected)
Пример #8
0
 def test_with_pg(self):
     with settings(show_progress_bars=True), Capturing(
             which='stderr') as output:
         cluster_kmeans(np.random.rand(100, 3))
     self.assertNotIn('creating data array', '\n'.join(output))
Пример #9
0
 def test_with_pg(self):
     from pyemma.util.contexts import settings
     with settings(show_progress_bars=True):
         cluster_kmeans(np.random.rand(100, 3))
Пример #10
0
 def test_config_2(self):
     self.pg.show_progress = False
     with settings(show_progress_bars=True):
         assert not self.pg.show_progress
Пример #11
0
 def test_config_override(self):
     self.pg.show_progress = True
     with settings(show_progress_bars=False):
         assert self.pg.show_progress == False