def testDataArray(self): frames_per_traj = 100 dim = 3 data = np.random.random((frames_per_traj, dim)) d = DataInMemory(data) np.testing.assert_equal(d.trajectory_lengths(), np.array([frames_per_traj for _ in range(1)]))
def test1dData(self): n = 3 data = np.arange(n) reader = DataInMemory(data) self.assertEqual(reader.trajectory_lengths(), np.array([n])) self.assertEqual(reader.ndim, 1) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), n)
def test1dDataList(self): n = 10 data = [np.arange(n), np.arange(n)] reader = DataInMemory(data) np.testing.assert_equal(reader.trajectory_lengths(), np.array([n, n])) self.assertEqual(reader.ndim, 1) self.assertEqual(reader.number_of_trajectories(), 2) self.assertEqual(reader.n_frames_total(), 2 * n)
def test1dDataList(self): n = 10 data = [np.arange(n), np.arange(n)] reader = DataInMemory(data) self.assertEqual(reader.trajectory_lengths(), [n, n]) self.assertEqual(reader.dimension(), 1) self.assertEqual(reader.number_of_trajectories(), 2) self.assertEqual(reader.n_frames_total(), 2 * n)
def test1dData(self): n = 3 data = np.arange(n) reader = DataInMemory(data) self.assertEqual(reader.trajectory_lengths(), [n]) self.assertEqual(reader.dimension(), 1) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), n)
def testDataArray(self): frames_per_traj = 100 dim = 3 data = np.random.random((frames_per_traj, dim)) d = DataInMemory(data) self.assertEqual( d.trajectory_lengths(), [frames_per_traj for _ in xrange(1)])
def test_ndim_input(self): data = np.empty((4, 2, 2, 2)) reader = DataInMemory(data) self.assertEqual(reader.ndim, 2 * 2 * 2) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), 4) np.testing.assert_equal(reader.trajectory_lengths(), np.array([reader.n_frames_total()]))
def test_ndim_input(self): data = np.empty((4, 2, 2, 2)) reader = DataInMemory(data) self.assertEqual(reader.dimension(), 2 * 2 * 2) self.assertEqual(reader.number_of_trajectories(), 1) self.assertEqual(reader.n_frames_total(), 4) self.assertEqual( reader.trajectory_lengths(), [reader.n_frames_total()])
def testListOfArrays(self): frames_per_traj = 100 dim = 3 data = [np.random.random((frames_per_traj, dim)) for _ in range(3)] d = DataInMemory(data) self.assertEqual(d.dimension(), dim) np.testing.assert_equal(d.trajectory_lengths(), np.array([frames_per_traj for _ in range(3)]))
def testListOfArrays(self): frames_per_traj = 100 dim = 3 data = [np.random.random((frames_per_traj, dim)) for _ in xrange(3)] d = DataInMemory(data) self.assertEqual(d.dimension(), dim) self.assertEqual( d.trajectory_lengths(), [frames_per_traj for _ in xrange(3)])
def get_output(self, dimensions=slice(0, None), stride=1, skip=0, chunk=None): """Maps all input data of this transformer and returns it as an array or list of arrays Parameters ---------- dimensions : list-like of indexes or slice, default=all indices of dimensions you like to keep. stride : int, default=1 only take every n'th frame. skip : int, default=0 initially skip n frames of each file. chunk: int, default=None How many frames to process at once. If not given obtain the chunk size from the source. Returns ------- output : list of ndarray(T_i, d) the mapped data, where T is the number of time steps of the input data, or if stride > 1, floor(T_in / stride). d is the output dimension of this transformer. If the input consists of a list of trajectories, Y will also be a corresponding list of trajectories """ if isinstance(dimensions, int): ndim = 1 dimensions = slice(dimensions, dimensions + 1) elif isinstance(dimensions, (list, np.ndarray, tuple, slice)): if hasattr(dimensions, 'ndim') and dimensions.ndim > 1: raise ValueError( 'dimension indices can\'t have more than one dimension') ndim = len(np.zeros(self.ndim)[dimensions]) else: raise ValueError('unsupported type (%s) of "dimensions"' % type(dimensions)) assert ndim > 0, "ndim was zero in %s" % self.__class__.__name__ if chunk is None: chunk = self.chunksize # create iterator if self.in_memory and not self._mapping_to_mem_active: from pyemma.coordinates.data.data_in_memory import DataInMemory assert self._Y is not None it = DataInMemory(self._Y)._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True) else: it = self._create_iterator(skip=skip, chunk=chunk, stride=stride, return_trajindex=True) with it: # allocate memory try: from pyemma import config if config.coordinates_check_output: trajs = [ np.full((l, ndim), np.nan, dtype=self.output_type()) for l in it.trajectory_lengths() ] else: # TODO: avoid having a copy here, if Y is already filled trajs = [ np.empty((l, ndim), dtype=self.output_type()) for l in it.trajectory_lengths() ] except MemoryError: self.logger.exception( "Could not allocate enough memory to map all data." " Consider using a larger stride.") return if self._logger_is_active(self._loglevel_DEBUG): self.logger.debug("get_output(): dimensions=%s" % str(dimensions)) self.logger.debug( "get_output(): created output trajs with shapes: %s" % [x.shape for x in trajs]) self.logger.debug("nchunks :%s, chunksize=%s" % (it.n_chunks, it.chunksize)) # fetch data from pyemma._base.progress import ProgressReporter pg = ProgressReporter() pg.register(it.n_chunks, description='getting output of %s' % self.__class__.__name__) with pg.context(), it: for itraj, chunk in it: i = slice(it.pos, it.pos + len(chunk)) assert i.stop - i.start > 0 trajs[itraj][i, :] = chunk[:, dimensions] pg.update(1) if config.coordinates_check_output: for i, t in enumerate(trajs): finite = self._chunk_finite(t) if not np.all(finite): # determine position frames = np.where(np.logical_not(finite)) if not len(frames): raise RuntimeError( 'nothing got assigned for traj {}'.format(i)) raise RuntimeError( 'unassigned sections in traj {i} in range [{frames}]'. format(frames=frames, i=i)) return trajs