Пример #1
0
    def _baumwelch(self, obs:np.ndarray, alpha:np.ndarray, beta:np.ndarray, remap:bool=False)->Tuple[np.ndarray]:
        """Baum-Welch算法(向前向后算法)

        :Parameters:
            - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值
            - alpha: 向前传递因子(TxN)
            - beta: 向后传递因子(TxN)
            - remap: 是否重新映射发射概率

        :Returns: xi和gamma参数
        """
        if remap:
            self._map_b(obs)
        T = len(obs)
        # E-Step: xi(TxNxN), gamma(TxN)
        # xi[t][i][j]
        xi = np.full((T, self.n, self.n), -np.inf, dtype=self.precision)
        for t in np.arange(T-1):
            numer = alpha[t, :].reshape((self.n, 1)) \
                    + self.A \
                    + self.b[:, t+1].reshape((1, self.n)) \
                    + beta[t+1, :].reshape((1, self.n))
            denom = lse(numer)
            xi[t] = numer - denom
        # gamma[t][i]: 对xi的j求和
        gamma = lse(xi, axis=2)
        return xi, gamma
Пример #2
0
    def train(self, obs, iters=1, eps=0.0001, verbose:bool=True):
        """训练HMM模型参数

        :Parameters:
            - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值
            - iters: 迭代次数
            - eps: 训练精度
        """
        for k in range(iters):
            # 训练
            self._map_b(obs)
            alpha =  self._forward(obs)
            beta = self._backward(obs)
            xi, gamma = self._baumwelch(obs, alpha, beta)
            model = self._estimate(obs, alpha, beta, xi, gamma)
            # 更新参数
            prob_old = lse(alpha[-1])
            self._updatemodel(model)
            prob_new = self.calc_prob(obs)
            # 检测收敛精度
            prod_d = abs(prob_old - prob_new)
            if verbose:
                print("Iter: {:3},"
                      " L(lambda|O) = {:.6e},"
                      " L(lambda_new|O) = {:.6e},"
                      " eps = {:.6f}"
                      .format(k, prob_old, prob_new, prod_d))
            if (prod_d < eps):
                break
Пример #3
0
    def calc_prob(self, obs:np.ndarray)->float:
        """计算观测序列的log似然度

        :Parameters:
            - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值
        """
        return lse(self._forward(obs, True)[-1])
Пример #4
0
    def _backward(self, obs:np.ndarray, remap:bool=False)->np.ndarray:
        """向后算法(类似于forward)

        ::

            在t,i处beta网格计算示意图:
                q1  q2 ... qN
            o0
            ...
            ot  t,i
            ... #   #       #   -> beta[t+1], A[i:, :], B[j, o(t+1)]
            oT

        :Parameters:
            - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值
            - remap: 是否重新映射发射概率

        :Returns: 向后传递因子beta(TxN),也称向后网格、向后概率
        """
        if remap:
            self._map_b(obs)
        T = len(obs)
        beta = np.empty((T, self.n), dtype=self.precision)
        # 初始状态
        beta[-1] = np.log(np.ones(self.n, dtype=self.precision))
        # 迭代
        for t in np.arange(T-2, -1, -1):
            for i in np.arange(self.n):
                beta[t, i] = lse(beta[t+1] + self.A[i, :] + self.b[:, t+1])
        return beta
Пример #5
0
    def _estimate(self, obs:np.ndarray, alpha, beta, xi, gamma)->dict:
        """计算Maximization模型新参数

        :Parameters:
            - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值
            - alpha: 向前传递因子(TxN)
            - xi: baum-welch参数(TxNxN)
            - gamma: baum-welch参数(TxN)

        :Returns: HMM模型参数(A,B,pi)
        """
        # M-Step: new A,B,pi
        denom = lse(gamma, axis=0)
        A = lse(xi, axis=0) - denom.reshape((self.n, 1))
        B = np.empty((self.n, self.m), dtype=self.precision)
        for k in np.arange(self.m):
            B[:, k] = lse(gamma[k==obs, :], axis=0) - denom.reshape((1, self.n))
        pi = gamma[0]
        # New model
        model = {}
        model['A'] = A
        model['B'] = B
        model['pi'] = pi
        return model
Пример #6
0
    def _forward(self, obs:np.ndarray, remap:bool=False)->np.ndarray:
        """向前算法

        ::

            在t,j处alpha网格计算示意图:
                q1  q2 ... qN
            o0
            ... #   #       #   -> alpha[t-1], A[:, j]
            ot  t,j             -> B[j, ot]
            ...
            oT

        :Parameters:
            - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值
            - remap: 是否重新映射发射概率

        :Returns: 向前传递因子alpha(TxN),也称向前网格、向前概率
        """
        if remap:
            self._map_b(obs)
        # 低空间复杂度版本(只能保存最后一行alpha的值)
        # alpha = self.pi + self.b[:, 0]
        # p = np.copy(alpha)
        # for t in range(1, len(obs)):
        #     for j in range(self.n):
        #         alpha[j] = lse(p + self.A[:, j]) + self.b[j, t]
        #     p = np.copy(alpha)
        # return alpha
        T = len(obs)
        alpha = np.empty((T, self.n), dtype=self.precision)
        # 初始状态
        alpha[0] = self.pi + self.b[:, 0]
        # 迭代
        for t in np.arange(1, T):
            for j in np.arange(self.n):
                alpha[t, j] = lse(alpha[t-1] + self.A[:, j]) + self.b[j, t]
        return alpha
Пример #7
0
    def _map_b(self, obs: np.ndarray):
        """利用GMM计算发射概率

        b与GMM计算的映射关系:

        - b: (NxT), b[j, t]表示从状态j生成ot的概率
        - bm: (NxMxT), b[j, m, t]表示从状态j生成ot的概率的第m个混合分量

        :Parameters:
            - obs: 观测序列
        """
        T = obs.shape[0]
        self.b = np.empty((self.n, T), dtype=self.precision)
        self.bm = np.empty((self.n, self.m, T), dtype=self.precision)
        # 可以直接使用gaussian_mixture_distribution计算,但没法获取混合分量
        # for j in np.arange(self.n):
        #     self.b[j] = gaussian_mixture_distribution_log(
        #             self.w[j], obs, self.mu[j], self.si[j])
        for j in np.arange(self.n):
            for m in np.arange(self.m):
                self.bm[j, m] = gaussian_multi_distribution_log(
                    obs, self.mu[j, m], self.si[j, m])
            self.b[j] = lse(self.w[j].reshape(self.m, 1) + self.bm[j], axis=0)
Пример #8
0
    def _estimate(self, obs: np.ndarray, alpha, beta, xi, gamma):
        """计算Maximization模型新参数

        :Parameters:
            - obs: 观测序列O[TxD]={o1, o2, ... oT},值为观测集中的值
            - alpha: 向前传递因子(TxN)
            - xi: baum-welch参数(TxNxN)
            - gamma: baum-welch参数(TxN)

        :Returns: HMM模型参数(A,pi,mu,si,w)
        """
        T = obs.shape[0]
        # M-Step: new A,pi,mu,si,w
        # A[NxN]
        A = lse(xi, axis=0) - lse(gamma, axis=0).reshape((self.n, 1))

        # pi[N]
        pi = gamma[0]

        # xi_mix[TxNxM]
        xi_mix = np.empty((T, self.n, self.m), dtype=self.precision)
        for t in np.arange(T):
            """
            # 用2层for循环,便于理解数组乘法代替for循环
            for j in np.arange(self.n):
                xi_mix[t, j, :] = (alpha[t, j] + beta[t, j] + self.w[j, :] + self.bm[j, :, t])
                xi_mix[t, j, :] -= lse(alpha[t] + beta[t])
                xi_mix[t, j, :] -= lse(self.w[j] + self.bm[j, :, t])
            """
            xi_mix[t] = (alpha[t] + beta[t]).reshape(
                self.n, 1) + self.w + self.bm[:, :, t]
            xi_mix[t] -= lse(alpha[t] + beta[t])
            xi_mix[t] -= lse(self.w + self.bm[:, :, t],
                             axis=1).reshape(self.n, 1)

        # w[NxM]
        w = lse(xi_mix, axis=0) - lse(xi_mix, axis=(0, 2)).reshape(self.n, 1)

        # xi_mix取exp,用于计算非log值的均值和方差
        xi_mix = np.exp(xi_mix)

        # mu[NxMxD]
        """
        # 用2层for循环
        mu = np.zeros((self.n, self.m, self.d), dtype=self.precision)
        for i in np.arange(self.n):
            for m in np.arange(self.m):
                mu[i, m] = np.dot(xi_mix[:, i, m].reshape(1, T), obs)
                mu[i, m] /= np.sum(xi_mix[:, i, m])
        """
        mu = np.dot(
            # dot(TxNxM -> NxMxT, TxD) -> NxMxD
            np.swapaxes(np.swapaxes(xi_mix, 0, 1), 1, 2),
            obs) / np.sum(xi_mix, axis=0).reshape(self.n, self.m, 1)

        # Sigma[NxMxDxD]
        """
        # 用2层for循坏
        si = np.zeros((self.n, self.m, self.d, self.d), dtype=self.precision)
        for i in np.arange(self.n):
            for m in np.arange(self.m):
                dt = obs - self.mu[i, m]  # TxD
                si[i, m] = np.sum(
                        xi_mix[:, i, m].reshape(T, 1, 1) * np.matmul(
                            # matmul(TxDx1, Tx1xD) -> TxDxD
                            dt.reshape(T, self.d, 1),
                            dt.reshape(T, 1, self.d)),
                        axis=0)
                si[i, m] /= np.sum(xi_mix[:, i, m])
        """
        # dt = 1x1xTxD - NxMx1xD = NxMxTxD
        dt = obs.reshape(1, 1, T, self.d) - self.mu.reshape(
            self.n, self.m, 1, self.d)
        si = np.sum(
                # TxNxM -> NxMxT -> NxMxTx1x1
                np.swapaxes(np.swapaxes(xi_mix, 0, 1), 1, 2).reshape(self.n, self.m, T, 1, 1) * \
                np.matmul(
                    # matmul(NxMxTxDx1, NxMxTx1xD) -> NxMxTxDxD
                    dt.reshape(self.n, self.m, T, self.d, 1),
                    dt.reshape(self.n, self.m, T, 1, self.d)),
                axis=2) / np.sum(xi_mix, axis=0).reshape(self.n, self.m, 1, 1)
        si[:, :] += np.matrix(self.min_std * np.eye(
            (self.d), dtype=self.precision))

        # New model
        model = {}
        model['A'] = A
        model['pi'] = pi
        model['w'] = w
        model['mu'] = mu
        model['si'] = si
        return model