def sample_indexes_by_cluster(self, clusters, nsample, replace=True): """Samples trajectory/time indexes according to the given sequence of states. Parameters ---------- clusters : iterable of integers It contains the cluster indexes to be sampled nsample : int Number of samples per cluster. If replace = False, the number of returned samples per cluster could be smaller if less than nsample indexes are available for a cluster. replace : boolean, optional Whether the sample is with or without replacement Returns ------- indexes : list of ndarray( (N, 2) ) List of the sampled indices by cluster. Each element is an index array with a number of rows equal to N=len(sequence), with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ # Check if the catalogue (index_states) if len(self._index_states) == 0: # has never been run self._index_states = index_states(self.dtrajs) return sample_indexes_by_state(self._index_states[clusters], nsample, replace=replace)
def test_sample_by_state_replace(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = dt.index_states(dtraj) sidx = dt.sample_indexes_by_state(idx, 5) for i in range(4): assert (sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert (dtraj[sidx[i][t, 1]] == i)
def test_sample_by_state_replace_subset(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = dt.index_states(dtraj) subset = [1, 2] sidx = dt.sample_indexes_by_state(idx, 5, subset=subset) for i in range(len(subset)): assert (sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert (dtraj[sidx[i][t, 1]] == subset[i])
def test_onetraj_sub(self): dtraj = [0, 1, 2, 3, 2, 1, 0] # should be a ValueError because this is not a subset res = dt.index_states(dtraj, subset=[2, 3]) expected = [np.array([[0, 2], [0, 4]]), np.array([[0, 3]])] assert (len(res) == len(expected)) for i in range(len(res)): assert (res[i].shape == expected[i].shape) assert (np.alltrue(res[i] == expected[i]))
def test_sample_by_sequence(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = dt.index_states(dtraj) seq = [0, 1, 1, 1, 0, 0, 0, 0, 1, 1] sidx = dt.sample_indexes_by_sequence(idx, seq) assert (np.alltrue(sidx.shape == (len(seq), 2))) for t in range(sidx.shape[0]): assert (sidx[t, 0] == 0) # did we pick the right traj? assert (dtraj[sidx[t, 1]] == seq[t]) # did we pick the right states?
def active_state_indexes(self): """ Ensures that the connected states are indexed and returns the indices """ self._check_is_estimated() if not hasattr(self, '_active_state_indexes'): from pyerna.util.discrete_trajectories import index_states self._active_state_indexes = index_states( self.discrete_trajectories_active) return self._active_state_indexes
def observable_state_indexes(self): """ Ensures that the observable states are indexed and returns the indices """ try: # if we have this attribute, return it return self._observable_state_indexes except AttributeError: # didn't exist? then create it. import pyerna.util.discrete_trajectories as dt self._observable_state_indexes = dt.index_states( self.discrete_trajectories_obs) return self._observable_state_indexes
def test_twotraj(self): dtrajs = [[0, 1, 2, 3, 2, 1, 0], [3, 4, 5]] # should be a ValueError because this is not a subset res = dt.index_states(dtrajs) expected = [ np.array([[0, 0], [0, 6]]), np.array([[0, 1], [0, 5]]), np.array([[0, 2], [0, 4]]), np.array([[0, 3], [1, 0]]), np.array([[1, 1]]), np.array([[1, 2]]) ] assert (len(res) == len(expected)) for i in range(len(res)): assert (res[i].shape == expected[i].shape) assert (np.alltrue(res[i] == expected[i]))
def index_clusters(self): """Returns trajectory/time indexes for all the clusters Returns ------- indexes : list of ndarray( (N_i, 2) ) For each state, all trajectory and time indexes where this cluster occurs. Each matrix has a number of rows equal to the number of occurrences of the corresponding state, with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ if len(self._dtrajs) == 0: # nothing assigned yet, doing that now self._dtrajs = self.assign() if len(self._index_states) == 0: # has never been run self._index_states = index_states(self._dtrajs) return self._index_states
def test_big(self): import pyerna.datasets dtraj = pyerna.datasets.load_2well_discrete().dtraj_T100K_dt10 # just run these to see if there's any exception dt.index_states(dtraj)
def test_subset_error(self): dtraj = [0, 1, 2, 3, 2, 1, 0] # should be a ValueError because this is not a subset with self.assertRaises(ValueError): dt.index_states(dtraj, subset=[3, 4, 5])