def test_sample_by_state_replace(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = sample.compute_index_states(dtraj) sidx = sample.indices_by_state(idx, 5) for i in range(4): assert (sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert (dtraj[sidx[i][t, 1]] == i)
def test_sample_by_state_replace_subset(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = sample.compute_index_states(dtraj) subset = [1, 2] sidx = sample.indices_by_state(idx, 5, subset=subset) for i in range(len(subset)): assert (sidx[i].shape[0] == 5) for t in range(sidx[i].shape[0]): assert (dtraj[sidx[i][t, 1]] == subset[i])
def test_onetraj_sub(self): dtraj = [0, 1, 2, 3, 2, 1, 0] # should be a ValueError because this is not a subset res = sample.compute_index_states(dtraj, subset=[2, 3]) expected = [np.array([[0, 2], [0, 4]]), np.array([[0, 3]])] assert (len(res) == len(expected)) for i in range(len(res)): assert (res[i].shape == expected[i].shape) assert (np.alltrue(res[i] == expected[i]))
def test_sample_by_sequence(self): dtraj = [0, 1, 2, 3, 2, 1, 0] idx = sample.compute_index_states(dtraj) seq = [0, 1, 1, 1, 0, 0, 0, 0, 1, 1] sidx = sample.indices_by_sequence(idx, seq) assert (np.alltrue(sidx.shape == (len(seq), 2))) for t in range(sidx.shape[0]): assert (sidx[t, 0] == 0) # did we pick the right traj? assert (dtraj[sidx[t, 1]] == seq[t]) # did we pick the right states?
def _active_state_indices(self, msm): from sktime.markov.sample import compute_index_states I = compute_index_states(self.dtraj, subset=msm.count_model.state_symbols) assert (len(I) == msm.n_states) # compare to histogram hist = count_states(self.dtraj) # number of frames should match on active subset A = msm.count_model.state_symbols for i in range(A.shape[0]): assert I[i].shape[0] == hist[A[i]] assert I[i].shape[1] == 2
def test_observable_state_indices(self): from sktime.markov.sample import compute_index_states hmsm = self.hmm_lag10_largest I = compute_index_states(self.dtrajs, subset=hmsm.observation_symbols) # I = hmsm.observable_state_indexes np.testing.assert_equal(len(I), hmsm.n_observation_states) # compare to histogram hist = count_states(self.dtrajs) # number of frames should match on active subset A = hmsm.observation_symbols for i in range(A.shape[0]): np.testing.assert_equal(I[i].shape[0], hist[A[i]]) np.testing.assert_equal(I[i].shape[1], 2)
def test_twotraj(self): dtrajs = [[0, 1, 2, 3, 2, 1, 0], [3, 4, 5]] # should be a ValueError because this is not a subset res = sample.compute_index_states(dtrajs) expected = [ np.array([[0, 0], [0, 6]]), np.array([[0, 1], [0, 5]]), np.array([[0, 2], [0, 4]]), np.array([[0, 3], [1, 0]]), np.array([[1, 1]]), np.array([[1, 2]]) ] assert (len(res) == len(expected)) for i in range(len(res)): assert (res[i].shape == expected[i].shape) assert (np.alltrue(res[i] == expected[i]))
def test_performance(self): import pyemma.util.discrete_trajectories as dt state = np.random.RandomState(42) n_states = 10000 dtrajs = [state.randint(0, n_states, size=100000) for _ in range(500)] selection = np.random.choice(np.arange(n_states), size=(500, ), replace=False) with timing('pyemma'): out2 = dt.index_states(dtrajs, selection) with timing('cpp'): out = sample.compute_index_states(dtrajs, selection) assert len(out) == len(out2) for o1, o2 in zip(out, out2): np.testing.assert_array_almost_equal(o1, o2)
def sample_by_observation_probabilities(self, dtrajs, nsample): r"""Generates samples according to the current observation probability distribution Parameters ---------- dtrajs : discrete trajectory Input observation trajectory or list of trajectories nsample : int Number of samples per distribution. Returns ------- indexes : length m list of ndarray( (nsample, 2) ) List of the sampled indices by distribution. Each element is an index array with a number of rows equal to nsample, with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ from sktime.markov.sample import compute_index_states mapped = self.transform_discrete_trajectories_to_observed_symbols( dtrajs) observable_state_indices = compute_index_states(mapped) return indices_by_distribution(observable_state_indices, self.output_probabilities, nsample)
def test_subset_error(self): dtraj = [0, 1, 2, 3, 2, 1, 0] # should be a ValueError because this is not a subset with self.assertRaises(ValueError): sample.compute_index_states(dtraj, subset=[3, 4, 5])