Пример #1
0
    def m_step(self, expectations, datas, inputs, masks, tags, samples, **kwargs):
        # Update the transition matrix between super states
        P = sum([np.sum(Ezzp1, axis=0) for _, Ezzp1, _ in expectations]) + 1e-16
        np.fill_diagonal(P, 0)
        P /= P.sum(axis=-1, keepdims=True)
        self.Ps = P

        # Fit negative binomial models for each duration based on sampled states
        states, durations = map(np.concatenate, zip(*[rle(z_smpl) for z_smpl in samples]))
        for k in range(self.K):
            self.rs[k], self.ps[k] = \
                fit_negative_binomial_integer_r(durations[states == k], self.r_min, self.r_max)

        # Reset the transition matrix
        self._transition_matrix = None
Пример #2
0
plt.yticks([])

plt.subplot(212)
plt.imshow(hsmm_z[None, :1000],
           aspect="auto",
           cmap="cubehelix",
           vmin=0,
           vmax=K - 1)
plt.xlim(0, 1000)
plt.ylabel("$z_{\\mathrm{inferred}}$")
plt.yticks([])
plt.xlabel("time")
plt.tight_layout()

# Plot the true and inferred duration distributions
states, durations = rle(z)
inf_states, inf_durations = rle(hsmm_z)
max_duration = max(np.max(durations), np.max(inf_durations))
dd = np.arange(max_duration, step=1)

plt.figure(figsize=(3 * K, 6))
for k in range(K):
    # Plot the durations of the true states
    plt.subplot(2, K, k + 1)
    plt.hist(durations[states == k] - 1, dd, density=True)
    plt.plot(dd,
             nbinom.pmf(dd, true_hsmm.transitions.rs[k],
                        1 - true_hsmm.transitions.ps[k]),
             '-k',
             lw=2,
             label='true')
Пример #3
0
    def test_hsmm_example(self):
        import autograd.numpy as np
        import autograd.numpy.random as npr
        from scipy.stats import nbinom
        import matplotlib.pyplot as plt
        import ssm
        from ssm.util import rle, find_permutation

        npr.seed(0)

        # Set the parameters of the HMM
        T = 5000  # number of time bins
        K = 5  # number of discrete states
        D = 2  # number of observed dimensions

        # Make an HMM with the true parameters
        true_hsmm = ssm.HSMM(K, D, observations="gaussian")
        print(true_hsmm.transitions.rs)
        z, y = true_hsmm.sample(T)
        z_test, y_test = true_hsmm.sample(T)
        true_ll = true_hsmm.log_probability(y)

        # Fit an HSMM
        N_em_iters = 500

        print("Fitting Gaussian HSMM with EM")
        hsmm = ssm.HSMM(K, D, observations="gaussian")
        hsmm_em_lls = hsmm.fit(y, method="em", num_em_iters=N_em_iters)

        print("Fitting Gaussian HMM with EM")
        hmm = ssm.HMM(K, D, observations="gaussian")
        hmm_em_lls = hmm.fit(y, method="em", num_em_iters=N_em_iters)

        # Plot log likelihoods (fit model is typically better)
        plt.figure()
        plt.plot(hsmm_em_lls, ls='-', label="HSMM (EM)")
        plt.plot(hmm_em_lls, ls='-', label="HMM (EM)")
        plt.plot(true_ll * np.ones(N_em_iters), ':', label="true")
        plt.legend(loc="lower right")

        # Print the test likelihoods (true model is typically better)
        print("Test log likelihood")
        print("True HSMM: ", true_hsmm.log_likelihood(y_test))
        print("Fit HSMM:  ", hsmm.log_likelihood(y_test))
        print("Fit HMM: ", hmm.log_likelihood(y_test))

        # Plot the true and inferred states
        hsmm.permute(find_permutation(z, hsmm.most_likely_states(y)))
        hsmm_z = hsmm.most_likely_states(y)
        hmm.permute(find_permutation(z, hmm.most_likely_states(y)))
        hmm_z = hsmm.most_likely_states(y)

        # Plot the true and inferred discrete states
        plt.figure(figsize=(8, 6))
        plt.subplot(311)
        plt.imshow(z[None, :1000],
                   aspect="auto",
                   cmap="cubehelix",
                   vmin=0,
                   vmax=K - 1)
        plt.xlim(0, 1000)
        plt.ylabel("True $z")
        plt.yticks([])

        plt.subplot(312)
        plt.imshow(hsmm_z[None, :1000],
                   aspect="auto",
                   cmap="cubehelix",
                   vmin=0,
                   vmax=K - 1)
        plt.xlim(0, 1000)
        plt.ylabel("HSMM Inferred $z$")
        plt.yticks([])

        plt.subplot(313)
        plt.imshow(hmm_z[None, :1000],
                   aspect="auto",
                   cmap="cubehelix",
                   vmin=0,
                   vmax=K - 1)
        plt.xlim(0, 1000)
        plt.ylabel("HMM Inferred $z$")
        plt.yticks([])
        plt.xlabel("time")

        plt.tight_layout()

        # Plot the true and inferred duration distributions
        states, durations = rle(z)
        inf_states, inf_durations = rle(hsmm_z)
        hmm_inf_states, hmm_inf_durations = rle(hmm_z)
        max_duration = max(np.max(durations), np.max(inf_durations),
                           np.max(hmm_inf_durations))
        dd = np.arange(max_duration, step=1)

        plt.figure(figsize=(3 * K, 9))
        for k in range(K):
            # Plot the durations of the true states
            plt.subplot(3, K, k + 1)
            plt.hist(durations[states == k] - 1, dd, density=True)
            plt.plot(dd,
                     nbinom.pmf(dd, true_hsmm.transitions.rs[k],
                                1 - true_hsmm.transitions.ps[k]),
                     '-k',
                     lw=2,
                     label='true')
            if k == K - 1:
                plt.legend(loc="lower right")
            plt.title("State {} (N={})".format(k + 1, np.sum(states == k)))

            # Plot the durations of the inferred states
            plt.subplot(3, K, K + k + 1)
            plt.hist(inf_durations[inf_states == k] - 1, dd, density=True)
            plt.plot(dd,
                     nbinom.pmf(dd, hsmm.transitions.rs[k],
                                1 - hsmm.transitions.ps[k]),
                     '-r',
                     lw=2,
                     label='hsmm inf.')
            if k == K - 1:
                plt.legend(loc="lower right")
            plt.title("State {} (N={})".format(k + 1, np.sum(inf_states == k)))

            # Plot the durations of the inferred states
            plt.subplot(3, K, 2 * K + k + 1)
            plt.hist(hmm_inf_durations[hmm_inf_states == k] - 1,
                     dd,
                     density=True)
            plt.plot(dd,
                     nbinom.pmf(dd, 1,
                                1 - hmm.transitions.transition_matrix[k, k]),
                     '-r',
                     lw=2,
                     label='hmm inf.')
            if k == K - 1:
                plt.legend(loc="lower right")
            plt.title("State {} (N={})".format(k + 1,
                                               np.sum(hmm_inf_states == k)))
        plt.tight_layout()

        plt.show()
Пример #4
0
    def plot_duration_distributions(cls, true_z, true_x, hmm, hsmm, hm_z,
                                    state_sel, img_file_path):

        from scipy.stats import nbinom
        # Plot the true and inferred duration distributions
        """
        N = the number of infered states
            how often the state was inferred
            blue bar is how often when one was in that state it endured x long
        x = maximal duration in a state


        red binomial plot
            for the hmm it is 1 trial and the self transitioning probability
            for the hsmm it is

        Negativ binomial distribution for state durations
        
            NB(r,p)
                r int, r>0
                p = [0,1] always .5 wk des eintretens von erfolgreicher transition
                r = anzahl erflogreiche selbst transitionen  befor man etwas anderes (trans in anderen
                zustand sieht)
        """
        from ssm.util import rle

        hmm_z = hmm.most_likely_states(true_x)
        hsmm_z = hsmm.most_likely_states(true_x)

        K = hmm.K
        true_states = true_z
        true_states, true_durations = rle(true_states)
        hmm_inf_states, hmm_inf_durations = rle(hmm_z)
        hsmm_inf_states, hsmm_inf_durations = rle(hsmm_z)

        max_duration = max(np.max(true_durations), np.max(hsmm_inf_durations),
                           np.max(hmm_inf_durations))
        max_duration = 50
        dd = np.arange(max_duration, step=1)

        n_cols = len(state_sel)
        n_rows = 3

        height = 9
        width = 3 * n_cols
        plt.figure(figsize=(width, height))
        legend_label_hmm = 'hmm'
        legend_label_hsmm = 'hsmm'
        #for k in range(K):
        #n_cols = K
        for col, act in enumerate(state_sel):
            # Plot the durations of the true states
            index = col + 1
            plt.subplot(n_rows, n_cols, index)
            """
            get the durations where it was gone into the state k =1
            state_seq: [0,1,2,3,1,1]
            dur_seq: [1,4,5,2,4,2]
                meaning one ts in state 0, than 4 in state 1, 5 in state 2, so on and so forth
            x = [4,4,2]
            """
            enc_state = hm_z[act]
            x = true_durations[true_states == enc_state] - 1
            plt.hist(x, dd, density=True)
            #n = true_hsmm.transitions.rs[k]
            #p = 1 - true_hsmm.transitions.ps[k]
            #plt.plot(dd, nbinom.pmf(dd, n, p),
            #         '-k', lw=2, label='true')
            #if k == K - 1:
            #    plt.legend(loc="lower right")
            plt.title("{} (N={})".format(act,
                                         np.sum(true_states == enc_state)))

            # Plot the durations of the inferred states of hmm
            index = 2 * n_cols + col + 1
            plt.subplot(n_rows, n_cols, index)
            plt.hist(hmm_inf_durations[hmm_inf_states == enc_state] - 1,
                     dd,
                     density=True)
            plt.plot(
                dd,
                nbinom.pmf(
                    dd, 1, 1 -
                    hmm.transitions.transition_matrix[enc_state, enc_state]),
                '-r',
                lw=2,
                label=legend_label_hmm)
            if col == n_cols - 1:
                plt.legend(loc="lower right")
            #plt.title("State {} (N={})".format(k+1, np.sum(hmm_inf_states == k)))
            plt.title("{} (N={})".format(act,
                                         np.sum(hmm_inf_states == enc_state)))

            # Plot the durations of the inferred states of hsmm
            index = n_cols + col + 1
            plt.subplot(n_rows, n_cols, index)
            plt.hist(hsmm_inf_durations[hsmm_inf_states == enc_state] - 1,
                     dd,
                     density=True)
            plt.plot(dd,
                     nbinom.pmf(dd, hsmm.transitions.rs[enc_state],
                                1 - hsmm.transitions.ps[enc_state]),
                     '-r',
                     lw=2,
                     label=legend_label_hsmm)
            if col == n_cols - 1:
                plt.legend(loc="lower right")
            plt.title("{} (N={})".format(act,
                                         np.sum(hsmm_inf_states == enc_state)))

        plt.tight_layout()
        #plt.show()
        plt.savefig(img_file_path)
        plt.clf()
Пример #5
0
    def test_own_hsmm_example(self):
        import autograd.numpy as np
        import autograd.numpy.random as npr
        from scipy.stats import nbinom
        import matplotlib.pyplot as plt
        import ssm
        from ssm.util import rle, find_permutation

        print(npr.seed(0))

        # Set the parameters of the HMM
        T = 1000  # number of time bins todo why can't I set this < 500
        K = 8  # number of discrete states
        D = 5  # number of observed dimensions

        # Make an HMM with the true parameters
        true_hsmm = ssm.HSMM(K, D, observations="categorical")
        z, y = true_hsmm.sample(T)
        z_test, y_test = true_hsmm.sample(T)
        true_ll = true_hsmm.log_probability(y)

        # Fit an HSMM
        N_em_iters = 100

        print("Fitting Categorical HSMM with EM")
        hsmm = ssm.HSMM(K, D, observations="categorical")
        hsmm_em_lls = hsmm.fit(y, method="em", num_em_iters=N_em_iters)

        print("Fitting Categorical HMM with EM")
        hmm = ssm.HMM(K, D, observations="categorical")
        hmm_em_lls = hmm.fit(y, method="em", num_em_iters=N_em_iters)

        # Plot log likelihoods (fit model is typically better)
        plt.figure()
        plt.plot(hsmm_em_lls, ls='-', label="HSMM (EM)")
        plt.plot(hmm_em_lls, ls='-', label="HMM (EM)")
        plt.plot(true_ll * np.ones(N_em_iters), ':', label="true")
        plt.legend(loc="lower right")

        # Print the test likelihoods (true model is typically better)
        print("Test log likelihood")
        print("True HSMM: ", true_hsmm.log_likelihood(y_test))
        print("Fit HSMM:  ", hsmm.log_likelihood(y_test))
        print("Fit HMM: ", hmm.log_likelihood(y_test))

        # Plot the true and inferred states
        tmp1 = hsmm.most_likely_states(y)
        tmp2 = find_permutation(z, tmp1)
        hsmm.permute(tmp2)
        hsmm_z = hsmm.most_likely_states(y)

        # calculates viterbi sequence of states
        tmp3 = hmm.most_likely_states(y)
        #
        """
        z = true state seq [1,2,1,....,]
        tmp3 = pred. state seq [3,4,1,2,...,]
        match each row to different column in such a way that corresp
        sum is minimized
        select n el of C, so that there is exactly one el.  in each row 
        and one in each col. with min corresp. costs 
        
        
        match states [1,2,...,] of of the 
        """
        tmp4 = find_permutation(z, tmp3)
        hmm.permute(tmp4)
        hmm_z = hsmm.most_likely_states(y)

        # Plot the true and inferred discrete states
        plt.figure(figsize=(8, 6))
        plt.subplot(311)
        plt.imshow(z[None, :1000],
                   aspect="auto",
                   cmap="cubehelix",
                   vmin=0,
                   vmax=K - 1)
        plt.xlim(0, 1000)
        plt.ylabel("True $z")
        plt.yticks([])

        plt.subplot(312)
        plt.imshow(hsmm_z[None, :1000],
                   aspect="auto",
                   cmap="cubehelix",
                   vmin=0,
                   vmax=K - 1)
        plt.xlim(0, 1000)
        plt.ylabel("HSMM Inferred $z$")
        plt.yticks([])

        plt.subplot(313)
        plt.imshow(hmm_z[None, :1000],
                   aspect="auto",
                   cmap="cubehelix",
                   vmin=0,
                   vmax=K - 1)
        plt.xlim(0, 1000)
        plt.ylabel("HMM Inferred $z$")
        plt.yticks([])
        plt.xlabel("time")

        plt.tight_layout()

        # Plot the true and inferred duration distributions
        """
        N = the number of infered states 
            how often the state was inferred 
            blue bar is how often when one was in that state it endured x long
        x = maximal duration in a state
        
        
        red binomial plot
            for the hmm it is 1 trial and the self transitioning probability
            for the hsmm it is
            
        """
        """
        Negativ binomial distribution for state durations
        
            NB(r,p)
                r int, r>0
                p = [0,1] always .5 wk des eintretens von erfolgreicher transition
                r = anzahl erflogreiche selbst transitionen  befor man etwas anderes (trans in anderen
                zustand sieht)
                
                
        
        
        """

        true_states, true_durations = rle(z)
        hmm_inf_states, hmm_inf_durations = rle(hmm_z)
        hsmm_inf_states, hsmm_inf_durations = rle(hsmm_z)
        max_duration = max(np.max(true_durations), np.max(hsmm_inf_durations),
                           np.max(hmm_inf_durations))
        max_duration = 100
        dd = np.arange(max_duration, step=1)

        plt.figure(figsize=(3 * K, 9))
        for k in range(K):
            # Plot the durations of the true states
            plt.subplot(3, K, k + 1)
            """
            get the durations where it was gone into the state k =1
            state_seq: [0,1,2,3,1,1]
            dur_seq: [1,4,5,2,4,2]
                meaning one ts in state 0, than 4 in state 1, 5 in state 2, so on and so forth
            x = [4,4,2]
            """
            x = true_durations[true_states == k] - 1
            plt.hist(x, dd, density=True)
            n = true_hsmm.transitions.rs[k]
            p = 1 - true_hsmm.transitions.ps[k]
            plt.plot(dd, nbinom.pmf(dd, n, p), '-k', lw=2, label='true')
            if k == K - 1:
                plt.legend(loc="lower right")
            plt.title("State {} (N={})".format(k + 1,
                                               np.sum(true_states == k)))

            # Plot the durations of the inferred states of hmm
            plt.subplot(3, K, 2 * K + k + 1)
            plt.hist(hmm_inf_durations[hmm_inf_states == k] - 1,
                     dd,
                     density=True)
            plt.plot(dd,
                     nbinom.pmf(dd, 1,
                                1 - hmm.transitions.transition_matrix[k, k]),
                     '-r',
                     lw=2,
                     label='hmm inf.')
            if k == K - 1:
                plt.legend(loc="lower right")
            plt.title("State {} (N={})".format(k + 1,
                                               np.sum(hmm_inf_states == k)))

            # Plot the durations of the inferred states of hsmm
            plt.subplot(3, K, K + k + 1)
            plt.hist(hsmm_inf_durations[hsmm_inf_states == k] - 1,
                     dd,
                     density=True)
            plt.plot(dd,
                     nbinom.pmf(dd, hsmm.transitions.rs[k],
                                1 - hsmm.transitions.ps[k]),
                     '-r',
                     lw=2,
                     label='hsmm inf.')
            if k == K - 1:
                plt.legend(loc="lower right")
            plt.title("State {} (N={})".format(k + 1,
                                               np.sum(hsmm_inf_states == k)))

        plt.tight_layout()

        plt.show()