Exemplo n.º 1
0
    def testDataArray(self):
        frames_per_traj = 100
        dim = 3

        data = np.random.random((frames_per_traj, dim))
        d = DataInMemory(data)

        np.testing.assert_equal(d.trajectory_lengths(),
                                np.array([frames_per_traj for _ in range(1)]))
Exemplo n.º 2
0
    def test1dData(self):
        n = 3
        data = np.arange(n)
        reader = DataInMemory(data)

        self.assertEqual(reader.trajectory_lengths(), np.array([n]))
        self.assertEqual(reader.ndim, 1)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), n)
Exemplo n.º 3
0
    def test1dDataList(self):
        n = 10
        data = [np.arange(n), np.arange(n)]
        reader = DataInMemory(data)

        np.testing.assert_equal(reader.trajectory_lengths(), np.array([n, n]))
        self.assertEqual(reader.ndim, 1)
        self.assertEqual(reader.number_of_trajectories(), 2)
        self.assertEqual(reader.n_frames_total(), 2 * n)
Exemplo n.º 4
0
    def test1dDataList(self):
        n = 10
        data = [np.arange(n), np.arange(n)]
        reader = DataInMemory(data)

        self.assertEqual(reader.trajectory_lengths(), [n, n])
        self.assertEqual(reader.dimension(), 1)
        self.assertEqual(reader.number_of_trajectories(), 2)
        self.assertEqual(reader.n_frames_total(), 2 * n)
Exemplo n.º 5
0
    def test1dData(self):
        n = 3
        data = np.arange(n)
        reader = DataInMemory(data)

        self.assertEqual(reader.trajectory_lengths(), [n])
        self.assertEqual(reader.dimension(), 1)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), n)
Exemplo n.º 6
0
    def testDataArray(self):
        frames_per_traj = 100
        dim = 3

        data = np.random.random((frames_per_traj, dim))
        d = DataInMemory(data)

        self.assertEqual(
            d.trajectory_lengths(), [frames_per_traj for _ in xrange(1)])
Exemplo n.º 7
0
    def test_ndim_input(self):
        data = np.empty((4, 2, 2, 2))

        reader = DataInMemory(data)

        self.assertEqual(reader.ndim, 2 * 2 * 2)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), 4)
        np.testing.assert_equal(reader.trajectory_lengths(),
                                np.array([reader.n_frames_total()]))
Exemplo n.º 8
0
    def test_ndim_input(self):
        data = np.empty((4, 2, 2, 2))

        reader = DataInMemory(data)

        self.assertEqual(reader.dimension(), 2 * 2 * 2)
        self.assertEqual(reader.number_of_trajectories(), 1)
        self.assertEqual(reader.n_frames_total(), 4)
        self.assertEqual(
            reader.trajectory_lengths(), [reader.n_frames_total()])
Exemplo n.º 9
0
    def testListOfArrays(self):

        frames_per_traj = 100
        dim = 3
        data = [np.random.random((frames_per_traj, dim)) for _ in range(3)]

        d = DataInMemory(data)

        self.assertEqual(d.dimension(), dim)

        np.testing.assert_equal(d.trajectory_lengths(),
                                np.array([frames_per_traj for _ in range(3)]))
Exemplo n.º 10
0
    def testListOfArrays(self):

        frames_per_traj = 100
        dim = 3
        data = [np.random.random((frames_per_traj, dim)) for _ in xrange(3)]

        d = DataInMemory(data)

        self.assertEqual(d.dimension(), dim)

        self.assertEqual(
            d.trajectory_lengths(), [frames_per_traj for _ in xrange(3)])
Exemplo n.º 11
0
    def get_output(self,
                   dimensions=slice(0, None),
                   stride=1,
                   skip=0,
                   chunk=None):
        """Maps all input data of this transformer and returns it as an array or list of arrays

        Parameters
        ----------
        dimensions : list-like of indexes or slice, default=all
           indices of dimensions you like to keep.
        stride : int, default=1
           only take every n'th frame.
        skip : int, default=0
            initially skip n frames of each file.
        chunk: int, default=None
            How many frames to process at once. If not given obtain the chunk size
            from the source.

        Returns
        -------
        output : list of ndarray(T_i, d)
           the mapped data, where T is the number of time steps of the input data, or if stride > 1,
           floor(T_in / stride). d is the output dimension of this transformer.
           If the input consists of a list of trajectories, Y will also be a corresponding list of trajectories

        """
        if isinstance(dimensions, int):
            ndim = 1
            dimensions = slice(dimensions, dimensions + 1)
        elif isinstance(dimensions, (list, np.ndarray, tuple, slice)):
            if hasattr(dimensions, 'ndim') and dimensions.ndim > 1:
                raise ValueError(
                    'dimension indices can\'t have more than one dimension')
            ndim = len(np.zeros(self.ndim)[dimensions])
        else:
            raise ValueError('unsupported type (%s) of "dimensions"' %
                             type(dimensions))

        assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__

        if chunk is None:
            chunk = self.chunksize

        # create iterator
        if self.in_memory and not self._mapping_to_mem_active:
            from pyemma.coordinates.data.data_in_memory import DataInMemory
            assert self._Y is not None
            it = DataInMemory(self._Y)._create_iterator(skip=skip,
                                                        chunk=chunk,
                                                        stride=stride,
                                                        return_trajindex=True)
        else:
            it = self._create_iterator(skip=skip,
                                       chunk=chunk,
                                       stride=stride,
                                       return_trajindex=True)

        with it:
            # allocate memory
            try:
                from pyemma import config
                if config.coordinates_check_output:
                    trajs = [
                        np.full((l, ndim), np.nan, dtype=self.output_type())
                        for l in it.trajectory_lengths()
                    ]
                else:
                    # TODO: avoid having a copy here, if Y is already filled
                    trajs = [
                        np.empty((l, ndim), dtype=self.output_type())
                        for l in it.trajectory_lengths()
                    ]
            except MemoryError:
                self.logger.exception(
                    "Could not allocate enough memory to map all data."
                    " Consider using a larger stride.")
                return

            if self._logger_is_active(self._loglevel_DEBUG):
                self.logger.debug("get_output(): dimensions=%s" %
                                  str(dimensions))
                self.logger.debug(
                    "get_output(): created output trajs with shapes: %s" %
                    [x.shape for x in trajs])
                self.logger.debug("nchunks :%s, chunksize=%s" %
                                  (it.n_chunks, it.chunksize))
            # fetch data
            from pyemma._base.progress import ProgressReporter
            pg = ProgressReporter()
            pg.register(it.n_chunks,
                        description='getting output of %s' %
                        self.__class__.__name__)
            with pg.context(), it:
                for itraj, chunk in it:
                    i = slice(it.pos, it.pos + len(chunk))
                    assert i.stop - i.start > 0
                    trajs[itraj][i, :] = chunk[:, dimensions]
                    pg.update(1)

        if config.coordinates_check_output:
            for i, t in enumerate(trajs):
                finite = self._chunk_finite(t)
                if not np.all(finite):
                    # determine position
                    frames = np.where(np.logical_not(finite))
                    if not len(frames):
                        raise RuntimeError(
                            'nothing got assigned for traj {}'.format(i))
                    raise RuntimeError(
                        'unassigned sections in traj {i} in range [{frames}]'.
                        format(frames=frames, i=i))

        return trajs