def sample_indexes_by_cluster(self, clusters, nsample, replace=True): """Samples trajectory/time indexes according to the given sequence of states. Parameters ---------- clusters : iterable of integers It contains the cluster indexes to be sampled nsample : int Number of samples per cluster. If replace = False, the number of returned samples per cluster could be smaller if less than nsample indexes are available for a cluster. replace : boolean, optional Whether the sample is with or without replacement Returns ------- indexes : list of ndarray( (N, 2) ) List of the sampled indices by cluster. Each element is an index array with a number of rows equal to N=len(sequence), with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ # Check if the catalogue (index_states) if len(self._index_states) == 0: # has never been run self._index_states = index_states(self.dtrajs) return sample_indexes_by_state(self._index_states[clusters], nsample, replace=replace)
def test_sample_by_state_replace(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = dt.index_states(dtraj) sidx = dt.sample_indexes_by_state(idx, 5) for i in range(4): assert (sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert (dtraj[sidx[i][t, 1]] == i)
def test_sample_by_state_replace(self): dtraj =[0,1,2,3,2,1,0] idx = dt.index_states(dtraj) sidx = dt.sample_indexes_by_state(idx, 5) for i in range(4): assert(sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert(dtraj[sidx[i][t,1]] == i)
def test_sample_by_state_replace_subset(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = dt.index_states(dtraj) subset = [1, 2] sidx = dt.sample_indexes_by_state(idx, 5, subset=subset) for i in range(len(subset)): assert (sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert (dtraj[sidx[i][t, 1]] == subset[i])
def test_sample_by_state_replace_subset(self): dtraj =[0,1,2,3,2,1,0] idx = dt.index_states(dtraj) subset = [1,2] sidx = dt.sample_indexes_by_state(idx, 5, subset=subset) for i in range(len(subset)): assert(sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert(dtraj[sidx[i][t,1]] == subset[i])
def sample_by_state(self, nsample, subset=None, replace=True): """Generates samples of the connected states. For each state in the active set of states, generates nsample samples with trajectory/time indexes. This information can be used in order to generate a trajectory of length nsample * nconnected using :func:`pyemma.coordinates.save_traj` or nconnected trajectories of length nsample each using :func:`pyemma.coordinates.save_traj` Parameters ---------- N : int Number of time steps in the output trajectory. The total simulation time is stride * lag time * N nsample : int Number of samples per state. If replace = False, the number of returned samples per state could be smaller if less than nsample indexes are available for a state. subset : ndarray((n)), optional, default = None array of states to be indexed. By default all states in the connected set will be used replace : boolean, optional Whether the sample is with or without replacement start : int, optional, default = None starting state. If not given, will sample from the stationary distribution of P Returns ------- indexes : list of ndarray( (N, 2) ) list of trajectory/time index arrays with an array for each state. Within each index array, each row consist of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. See also -------- pyemma.coordinates.save_traj in order to save the sampled frames sequentially in a trajectory file with molecular structures pyemma.coordinates.save_trajs in order to save the sampled frames in nconnected trajectory files with molecular structures """ self._check_is_estimated() # generate connected state indexes import pyemma.util.discrete_trajectories as dt return dt.sample_indexes_by_state(self.active_state_indexes, nsample, subset=subset, replace=replace)