def test_hmm_predict_aima_umbrella_example(): """ This example was taken from AI a Modern Approach The example comes from page 572, filtering and prediction section. The correct values were manually compared propagated one step into future from the filtering example. """ # states = {0: 'No Rain', 1: 'Rain'} # obs = {0: 'No Umbrella', 1: 'Umbrella'} T0 = torch.tensor([0.5, 0.5]) T = torch.tensor([[0.7, 0.3], [0.3, 0.7]]) s1_orig = torch.tensor([0.8, 0.2]) s2_orig = torch.tensor([0.1, 0.9]) s1 = CategoricalModel(probs=s1_orig) s2 = CategoricalModel(probs=s2_orig) model = HiddenMarkovModel([s1, s2], T0=T0, T=T) obs_seq = pack_list([torch.tensor([1])]) posterior = model.predict(obs_seq) normalized = posterior.softmax(1) correct = torch.tensor([[0.37272727, 0.62727273]]) assert torch.allclose(normalized, correct) obs_seq = pack_list([torch.tensor([1, 1])]) posterior = model.predict(obs_seq) normalized = posterior.softmax(1) correct = torch.tensor([[0.34665718, 0.65334282]]) assert torch.allclose(normalized, correct)
def test_hmm_filter_aima_umbrella_example(): """ This example was taken from AI a Modern Approach The example comes from page 572, filtering and prediction section. The correct values were manually compared to the normalized values from this example. """ # states = {0: 'No Rain', 1: 'Rain'} # obs = {0: 'No Umbrella', 1: 'Umbrella'} T0 = torch.tensor([0.5, 0.5]) T = torch.tensor([[0.7, 0.3], [0.3, 0.7]]) s1 = CategoricalModel(probs=torch.tensor([0.8, 0.2])) s2 = CategoricalModel(probs=torch.tensor([0.1, 0.9])) model = HiddenMarkovModel([s1, s2], T0=T0, T=T) obs_seq = pack_list([torch.tensor([1])]) posterior = model.filter(obs_seq) normalized = posterior.softmax(1) correct = torch.tensor([[0.18181818, 0.81818182]]) assert torch.allclose(normalized, correct) obs_seq = pack_list([torch.tensor([1, 1])]) posterior = model.filter(obs_seq) normalized = posterior.softmax(1) correct = torch.tensor([[0.11664296, 0.88335704]]) assert torch.allclose(normalized, correct)
def decode( self, X: Union[PackedSequence, Tensor, List[Tensor]] ) -> Tuple[List[Tensor], List[Tensor]]: """ Find the most likely state sequences corresponding to each observation in X. Note the state assignments within a sequence are not independent and this gives the best joint set of state assignments for each sequence; i.e., this finds the state assignments for each observation. In essence, this finds the state assignments for each observation. :param X: sequence/observation data (packed or unpacked) :returns: Two lists of tensors. The first contains state labels, the second contains the previous state label (i.e., the path). """ if isinstance(X, PackedSequence): return super().decode(X) if isinstance(X, torch.Tensor): X = [torch.tensor(x) for x in X.tolist()] if isinstance(X, list): X = pack_list(X) state_seq_packed, path_ll_packed = super().decode(X) return unpack_list(state_seq_packed), unpack_list(path_ll_packed)
def test_hmm_decode_aima_umbrella_example(): """ This example was taken from AI a Modern Approach The state sequence comes from figure 15.5(b) on page 577 of the third edition. The correct values were manually compared to the normalized values from the figure 15.5(b). """ states = {0: 'No Rain', 1: 'Rain'} # obs = {0: 'No Umbrella', 1: 'Umbrella'} T0 = torch.tensor([0.5, 0.5]) T = torch.tensor([[0.7, 0.3], [0.3, 0.7]]) s1 = CategoricalModel(probs=torch.tensor([0.8, 0.2])) s2 = CategoricalModel(probs=torch.tensor([0.1, 0.9])) model = HiddenMarkovModel([s1, s2], T0=T0, T=T) obs_seq = pack_list([torch.tensor([1, 1, 0, 1, 1])]) states_seq, path_ll = model.decode(obs_seq) ss_unpacked, _ = pad_packed_sequence(states_seq, batch_first=True) path_unpacked, _ = pad_packed_sequence(path_ll, batch_first=True) most_likely_states = [states[s.item()] for s in ss_unpacked[0]] assert most_likely_states == ['Rain', 'Rain', 'No Rain', 'Rain', 'Rain'] normalized = path_unpacked[0].softmax(1) correct = torch.tensor([[0.18181818, 0.81818182], [0.08695652, 0.91304348], [0.77419355, 0.22580645], [0.34146341, 0.65853659], [0.10332103, 0.89667897]]) assert torch.allclose(normalized, correct)
def fit(self, X: Union[PackedSequence, Tensor, List[Tensor]], max_steps: int = 500, epsilon: float = 1e-3, randomize_first: bool = False, restarts: int = 10, rand_fun: Callable = None, **kwargs) -> bool: """ .. todo:: Can this docstring be inherited from hmm_packed? Learn new model parameters from X using hard expectation maximization (viterbi training). This has a number of model fitting parameters. max_steps determines the maximum number of expectation-maximization steps per fitting iteration. epsilon determines the convergence threshold for the expectation-maximization (model converges when successive iterations do not improve greater than this threshold). The expectation-maximization can often get stuck in local maximums, so this method supports random restarts (reinitialize and rerun the EM). The restarts parameters specifies how many random restarts to perform. On each restart the model parameters are randomized using the provided rand_fun(hmm, data) function. If no rand_fun is provided, then the parameters are sampled using the init_params_random(). If randomize_first is True, then the model randomizes the parameters on the first iteration, otherwise it uses the current parameter values as the first EM start point (the default). When doing random restarts, the model will finish with parameters that had the best log-likihood across all the restarts. The model returns a flag specifying if the best fitting model (across the restarts) converged. :param X: Sequences/observations :param max_steps: Maximum number of iterations to allow viterbi to run if it does not converge before then :param epsilon: Convergence criteria (log-likelihood delta) :param randomize_first: Randomize on the first iteration (restart 0) :param restarts: Number of random restarts. :param rand_func: Callable F(self, data) for custom randomization :param \\**kwargs: arguments for self._viterbi_training :returns: Boolean indicating whether or not any of the restarts converged """ if isinstance(X, PackedSequence): return super().fit(X, max_steps, epsilon, randomize_first, restarts, rand_fun, **kwargs) if isinstance(X, torch.Tensor): X = [torch.tensor(x) for x in X.tolist()] if isinstance(X, list): X = pack_list(X) return super().fit(X, max_steps, epsilon, randomize_first, restarts, rand_fun, **kwargs)
def test_pack_and_unpack_lists(): seqs = [[0, 0, 0, 1, 1], [1, 0, 1], [1, 1, 1, 1]] seqs = [torch.tensor(s) for s in seqs] X = pack_list(seqs) Y = unpack_list(X) for i in range(len(Y)): assert Y[i].shape == seqs[i].shape assert torch.allclose(Y[i], seqs[i])
def test_log_prob_aima_umbrella_example(): """ This example was taken from AI a Modern Approach The example comes from page 572, filtering and prediction section. The correct values were manually computed by summing the filtered posterior. """ T0 = torch.tensor([0.5, 0.5]) T = torch.tensor([[0.7, 0.3], [0.3, 0.7]]) s1 = CategoricalModel(probs=torch.tensor([0.8, 0.2])) s2 = CategoricalModel(probs=torch.tensor([0.1, 0.9])) model = HiddenMarkovModel([s1, s2], T0=T0, T=T) obs_seq = pack_list([torch.tensor([1])]) ll_score = model.log_prob(obs_seq).item() assert abs(ll_score - -0.5978) < 0.001 obs_seq = pack_list([torch.tensor([1, 1])]) ll_score = model.log_prob(obs_seq).item() assert abs(ll_score - -1.045545) < 0.001 obs_seq = pack_list([torch.tensor([1]), torch.tensor([1])]) ll_score = model.log_prob(obs_seq) assert abs(ll_score - (2 * -0.5978)) < 0.001
def test_hmm_decode(): states = {0: 'Healthy', 1: 'Fever'} # obs = {0: 'normal', 1: 'cold', 2: 'dizzy'} T0 = torch.tensor([0.6, 0.4]) T = torch.tensor([[0.7, 0.3], [0.4, 0.6]]) s1 = CategoricalModel(probs=torch.tensor([0.5, 0.4, 0.1])) s2 = CategoricalModel(probs=torch.tensor([0.1, 0.3, 0.6])) model = HiddenMarkovModel([s1, s2], T0=T0, T=T) obs_seq = pack_list([torch.tensor([1, 0, 1, 2, 2])]) states_seq, _ = model.decode(obs_seq) ss_unpacked, _ = pad_packed_sequence(states_seq, batch_first=True) most_likely_states = [states[s.item()] for s in ss_unpacked[0]] assert most_likely_states == [ 'Healthy', 'Healthy', 'Healthy', 'Fever', 'Fever' ]
def test_hmm_smooth(): T0 = torch.tensor([0.5, 0.5]) T = torch.tensor([[0.7, 0.3], [0.3, 0.7]]) s1_orig = torch.tensor([0.8, 0.2]) s2_orig = torch.tensor([0.1, 0.9]) s1 = CategoricalModel(probs=s1_orig) s2 = CategoricalModel(probs=s2_orig) model = HiddenMarkovModel([s1, s2], T0=T0, T=T) obs_seq = pack_list([torch.tensor([1, 1])]) # New approach posterior_ll = model.smooth(obs_seq) post_unpacked, post_lengths = pad_packed_sequence(posterior_ll, batch_first=True) posterior_prob = post_unpacked[0].softmax(1) first_correct = torch.tensor([[0.11664296, 0.88335704]]) assert torch.allclose(posterior_prob[0], first_correct)
def filter(self, X: Union[PackedSequence, Tensor, List[Tensor]]) -> Tensor: """ Compute the log posterior distribution over the last state in each sequence--given all the data in each sequence. Filtering might also be referred to as state estimation. :param X: packed sequence or list of tensors containing observation data :returns: tensor with shape N x S, where N is number of sequences and S is the number of states. """ if isinstance(X, PackedSequence): return super().filter(X) if isinstance(X, torch.Tensor): X = [torch.tensor(x) for x in X.tolist()] if isinstance(X, list): X = pack_list(X) return super().filter(X)
def smooth( self, X: Union[PackedSequence, Tensor, List[Tensor]]) -> PackedSequence: """ Compute the smoothed posterior probability over each state for the sequences in X. Unlike decode, this computes the best state assignment for each observation independent of the other assignments. Note, using this to compute the state state labels for the observations in a sequence is incorrect! Use decode instead. :param X: packed observation data :returns: a packed sequence tensors. """ if isinstance(X, PackedSequence): return super().smooth(X) if isinstance(X, torch.Tensor): X = [torch.tensor(x) for x in X.tolist()] if isinstance(X, list): X = pack_list(X) return super().smooth(X)