Пример #1
0
    def _forward(self, O, scale = True):
        '''
        Calculates the forward variable, alpha: the probability of the partial observation
        sequence O1 O2 ... Ot (until time t) and state Si at time t.

        PARAMETERS
        ----------
        O {TxD}: observation matrix with a sequence of T observations, each having dimension D
        scale {Boolean}: default True
        
        RETURNS
        -------
        lnP {Float}: log probability of the observation sequence O
        lnAlpha {T,N}: log of the forward variable: the probability of the partial observation
                       sequence O1 O2 ... Ot (until time t) and state Si at time t.
        lnC (T,): log of the scaling coefficients for each observation
        '''

        O = unsqueeze(O,2)
        T, D = O.shape

        # check dimensions of provided observations agree with the trained emission distributions
        dim = self._B[0].mu.shape[1]
        if D != dim:
            raise ValueError('GHMM: observation dimension does not agree with the trained emission distributions for the model')

        # calculate lnP for each observation for each state's emission distribution
        # lnP_obs {T, N}
        lnP_obs = np.zeros([T,self.N])
        for i in range(self.N):
            lnP_obs[:,i] = self._B[i].calcLnP(O)

        # forward variable, alpha {T,N}
        lnAlpha = np.zeros([T,self.N])

        # initialize vector of scaling coefficients
        lnC = np.zeros(T)

        # Step 1: Initialization
        lnAlpha[0,:] = np.log(self._pi) + lnP_obs[0,:]
        if scale:
            lnC[0] = -logsumexp(lnAlpha[0,:])
            lnAlpha[0,:] += lnC[0]
            
        # Step 2: Induction
        for t in range(1,T):
            lnAlpha[t,:] = logsumexp(lnAlpha[[t-1],:].T + np.log(self._A), axis=0) + lnP_obs[t,:]
            if scale:
                lnC[t] = -logsumexp(lnAlpha[0,:])
                lnAlpha[t,:] += lnC[t]

        # Step 3: Termination
        if scale:
            lnP = -np.sum(lnC)
        else:
            lnP = logsumexp(lnAlpha[T-1,:])

        return lnP, lnAlpha, lnC
Пример #2
0
    def logit_cost(self, theta, X, y):
        tt = X.shape[0]  # number of training examples
        theta = np.reshape(theta, (len(theta), 1))

        lgsum = utils.logsumexp(X.dot(theta))
        lgsumneg = utils.logsumexp(-X.dot(theta))
        J = (1. / tt) * (np.transpose(y).dot(lgsumneg) +
                         np.transpose(1 - y).dot(lgsum))

        return J
Пример #3
0
    def _expect(self, X, verbose = False):
        ''' 
        Expectation step of the expectation maximization algorithm. 
        
        PARAMETERS
        ----------
        X {NxD}: training data

        RETURNS
        -------
        lnP (N,): ln[sum_M p(l)*p(Xi | l)]
            ln probabilities of each observation in the training data,
            marginalizing over mixture components to get ln[p(Xi)]
        posteriors {NxM}: p(l | Xi)
            Posterior probabilities of each mixture component for each observation.
        '''

        N, _ = X.shape
        lnP_Xi_l = np.zeros([N, self.M])

        # zero correction
        self._Sigma[self._Sigma == 0.0] += self._zeroCorr

        if hasattr(slinalg, 'solve_triangular'):
            # only in scipy since 0.9
            solve_triangular = slinalg.solve_triangular
        else:
            # slower, but works
            solve_triangular = slinalg.solve

        # for each mixture component
        for l in range(0,self.M):
            X_mu = X - self._mu[l,:]

            if self.covType == 'diag':
                sig_l = np.diag(self._Sigma[l,:,:])
                lnP_Xi_l[:,l] = -0.5 * (self.D * np.log(2.0*np.pi) + np.sum((X_mu ** 2) / sig_l, axis=1) + np.sum(np.log(sig_l)))

            elif self.covType == 'full':
                try:
                    # cholesky decomposition => U*U.T = _Sigma[l,:,:]
                    U = slinalg.cholesky(self._Sigma[l,:,:], lower=True)
                except slinalg.LinAlgError:
                    # reinitialization trick is from scikit learn GMM
                    if verbose:
                        print "Sigma is not positive definite. Reinitializing ..."
                    self._Sigma[l,:,:] = 1e-6 * np.eye(self.D)
                    U = 1000.0 * self._Sigma[l,:,:]
                    
                Q = solve_triangular(U, X_mu.T, lower=True)
                lnP_Xi_l[:,l] = -0.5 * (self.D * np.log(2.0 * np.pi) + 2.0 * np.sum(np.log(np.diag(U))) + np.sum(Q ** 2, axis=0))

        lnP_Xi_l += self._lnw
        
        # calculate sum of probabilities (marginalizing over mixtures)
        lnP = logsumexp(lnP_Xi_l, axis=1)

        posteriors = np.exp(lnP_Xi_l - lnP[:,np.newaxis])
        
        return lnP, posteriors
Пример #4
0
 def logpartition(self):
     if self.nature == 'Bernoulli':
         self.logZ = np.logaddexp(0, self.weights).sum(-1)
     elif self.nature == 'Spin':
         self.logZ = np.logaddexp(self.weights, -self.weights).sum(-1)
     elif self.nature == 'Potts':
         self.logZ = utilities.logsumexp(self.weights, axis=-1).sum(-1)
Пример #5
0
    def partial_EM(self, data, cond_muh_ijk, indices, weights=None, eps=1e-4, maxiter=10, verbose=0):
        (i, j, k) = indices
        converged = False
        previous_L = utilities.average(
            self.likelihood(data), weights=weights) / self.N
        mini_epochs = 0
        if verbose:
            print('Partial EM %s, L = %.3f' % (mini_epochs, previous_L))
        while not converged:
            if self.nature in ['Bernoulli', 'Spin']:
                f = np.dot(data, self.weights[[i, j, k], :].T)
            elif self.nature == 'Potts':
                f = cy_utilities.compute_output_C(data, self.weights[[i, j, k], :, :], np.zeros([
                                                  data.shape[0], 3], dtype=curr_float))

            tmp = f - self.logZ[np.newaxis, [i, j, k]]
            tmp -= tmp.max(-1)[:, np.newaxis]
            cond_muh = np.exp(tmp) * self.muh[np.newaxis, [i, j, k]]
            cond_muh /= cond_muh.sum(-1)[:, np.newaxis]
            cond_muh *= cond_muh_ijk[:, np.newaxis]

            self.muh[[i, j, k]] = utilities.average(cond_muh, weights=weights)
            self.cum_muh = np.cumsum(self.muh)
            self.gh[[i, j, k]] = np.log(self.muh[[i, j, k]])
            self.gh -= self.gh.mean()
            if self.nature == 'Bernoulli':
                self.cond_muv[[i, j, k]] = utilities.average_product(
                    cond_muh, data, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis]
                self.weights[[i, j, k]] = np.log(
                    (self.cond_muv[[i, j, k]] + eps) / (1 - self.cond_muv[[i, j, k]] + eps))
                self.logZ[[i, j, k]] = np.logaddexp(
                    0, self.weights[[i, j, k]]).sum(-1)
            elif self.nature == 'Spin':
                self.cond_muv[[i, j, k]] = utilities.average_product(
                    cond_muh, data, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis]
                self.weights[[i, j, k]] = 0.5 * np.log(
                    (1 + self.cond_muv[[i, j, k]] + eps) / (1 - self.cond_muv[[i, j, k]] + eps))
                self.logZ[[i, j, k]] = np.logaddexp(
                    self.weights[[i, j, k]], -self.weights[[i, j, k]]).sum(-1)
            elif self.nature == 'Potts':
                self.cond_muv[[i, j, k]] = utilities.average_product(
                    cond_muh, data, c2=self.n_c, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis, np.newaxis]
                self.cum_cond_muv[[i, j, k]] = np.cumsum(
                    self.cond_muv[[i, j, k]], axis=-1)
                self.weights[[i, j, k]] = np.log(
                    self.cond_muv[[i, j, k]] + eps)
                self.weights[[i, j, k]] -= self.weights[[i, j, k]
                                                        ].mean(-1)[:, :, np.newaxis]
                self.logZ[[i, j, k]] = utilities.logsumexp(
                    self.weights[[i, j, k]], axis=-1).sum(-1)

            current_L = utilities.average(
                self.likelihood(data), weights=weights) / self.N
            mini_epochs += 1
            converged = (mini_epochs >= maxiter) | (
                np.abs(current_L - previous_L) < eps)
            previous_L = current_L.copy()
            if verbose:
                print('Partial EM %s, L = %.3f' % (mini_epochs, current_L))
        return current_L
Пример #6
0
 def likelihood(self, data):
     if data.ndim == 1:
         data = data[np.newaxis, :]
     if self.nature in ['Bernoulli', 'Spin']:
         f = np.dot(data, self.weights.T)
     elif self.nature == 'Potts':
         f = cy_utilities.compute_output_C(data, self.weights, np.zeros(
             [data.shape[0], self.M], dtype=curr_float))
     return utilities.logsumexp((f - self.logZ[np.newaxis, :] + np.log(self.muh)[np.newaxis, :]), axis=1)
Пример #7
0
 def pseudo_likelihood(self, x):
     if self.nature not in ['Bernoulli', 'Spin', 'Potts']:
         print('PL not supported for continuous data')
     else:
         fields = self.compute_fields_eff(x)
         if self.nature == 'Bernoulli':
             return (fields * x - np.logaddexp(fields, 0)).mean(1)
         elif self.nature == 'Spin':
             return (fields * x - np.logaddexp(fields, -fields)).mean(1)
         elif self.nature == 'Potts':
             return (cy_utilities.substitute_C(fields, x) -
                     utilities.logsumexp(fields, axis=2)).mean(1)
Пример #8
0
 def likelihood_and_expectation(self, data):
     if self.nature in ['Bernoulli', 'Spin']:
         f = np.dot(data, self.weights.T)
     elif self.nature == 'Potts':
         f = cy_utilities.compute_output_C(data, self.weights, np.zeros(
             [data.shape[0], self.M], dtype=curr_float))
     L = utilities.logsumexp(
         (f - self.logZ[np.newaxis, :] + np.log(self.muh)[np.newaxis, :]), axis=1)
     cond_muh = np.exp(
         f - self.logZ[np.newaxis, :]) * self.muh[np.newaxis, :]
     cond_muh /= cond_muh.sum(-1)[:, np.newaxis]
     return L, cond_muh
Пример #9
0
    def __call__(self, a):
        if self._partition is None:
            aMax = np.max(a)
            return np.exp(a - aMax) / np.sum(np.exp(a - aMax))
        else:
            activation = np.zeros_like(a)
            pPart = 0
            for part in self._partition:
                lnAct = a[pPart:part] - logsumexp(a[pPart:part])
                # clamp values to avoid numerical overflow/underflow
                lnAct[lnAct <= -self.clamp] = -self.clamp
                lnAct[lnAct >= self.clamp] = self.clamp
                activation[pPart:part] = np.exp(lnAct)
                pPart = part
            # now calculate softmax over last partition boundary to the end
            lnAct = a[pPart:] - logsumexp(a[pPart:])
            # clamp values to avoid numerical overflow/underflow
            lnAct[lnAct <= -self.clamp] = -self.clamp
            lnAct[lnAct >= self.clamp] = self.clamp
            activation[pPart:] = np.exp(lnAct)

            return activation
Пример #10
0
    def __call__(self, a):
        if self._partition is None:
            aMax = np.max(a)
            return np.exp(a - aMax) / np.sum(np.exp(a - aMax))
        else:
            activation = np.zeros_like(a)
            pPart = 0
            for part in self._partition:
                lnAct = a[pPart:part] - logsumexp(a[pPart:part])
                # clamp values to avoid numerical overflow/underflow
                lnAct[lnAct <= -self.clamp] = -self.clamp
                lnAct[lnAct >= self.clamp] = self.clamp
                activation[pPart:part] = np.exp(lnAct)
                pPart = part
            # now calculate softmax over last partition boundary to the end
            lnAct = a[pPart:] - logsumexp(a[pPart:])
            # clamp values to avoid numerical overflow/underflow
            lnAct[lnAct <= -self.clamp] = -self.clamp
            lnAct[lnAct >= self.clamp] = self.clamp
            activation[pPart:] = np.exp(lnAct)

            return activation
Пример #11
0
    def merge_split(self, proposed_merge_split, eps=1e-6):
        i, j, k = proposed_merge_split
        old_mui = self.muh[i].copy()
        old_muj = self.muh[j].copy()
        old_muk = self.muh[k].copy()
        self.muh[i] = old_mui + old_muj
        self.muh[k] = old_muk / 2
        self.muh[j] = old_muk / 2
        self.gh = np.log(self.muh)
        self.gh -= self.gh.mean()
        self.cum_muh = np.cumsum(self.muh)

        old_cond_muvi = self.cond_muv[i].copy()
        old_cond_muvj = self.cond_muv[j].copy()
        old_cond_muvk = self.cond_muv[k].copy()

        self.cond_muv[i] = (old_cond_muvi * old_mui +
                            old_cond_muvj * old_muj) / (old_mui + old_muj)

        if self.nature == 'Potts':
            noise = np.random.rand(self.N, self.n_c)
            noise /= noise.sum(-1)[:, np.newaxis]
        elif self.nature == 'Bernoulli':
            noise = np.random.rand(self.N)
            noise /= noise.sum(-1)
        elif self.nature == 'Spin':
            noise = (2 * np.random.rand(self.N) - 1)

        self.cond_muv[j] = 0.95 * old_cond_muvk + 0.05 * noise

        if self.nature == 'Bernoulli':
            self.weights[[i, j]] = np.log(
                (self.cond_muv[[i, j]] + eps) / (1 - self.cond_muv[[i, j]] + eps))
            self.logZ[[i, j]] = np.logaddexp(0, self.weights[[i, j]]).sum(-1)
        elif self.nature == 'Spin':
            self.weights[[i, j]] = 0.5 * np.log(
                (1 + self.cond_muv[[i, j]] + eps) / (1 - self.cond_muv[[i, j]] + eps))
            self.logZ[[i, j]] = np.logaddexp(
                self.weights[[i, j]], -self.weights[[i, j]]).sum(-1)
        elif self.nature == 'Potts':
            self.cum_cond_muv[[i, j]] = np.cumsum(
                self.cond_muv[[i, j]], axis=-1)
            self.weights[[i, j]] = np.log(self.cond_muv[[i, j]] + eps)
            self.weights[[i, j]] -= self.weights[[i, j]
                                                 ].mean(-1)[:, :, np.newaxis]
            self.logZ[[i, j]] = utilities.logsumexp(
                self.weights[[i, j]], axis=-1).sum(-1)
Пример #12
0
    def _backward(self, O, lnC):
        '''
        Calculates the backward variable, beta: the probability of the partial observation 
        sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1

        PARAMETERS
        ----------
        O {TxD}: observation matrix with a sequence of T observations, each having dimension D
        lnC (T,): log of the scaling coefficients for each observation calculated from the forward pass
        
        RETURNS
        -------
        lnBeta {T,N}: log of the backward variable: the probability of the partial observation 
                      sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1
        '''

        O = unsqueeze(O, 2)
        T, D = O.shape

        # check dimensions of provided observations agree with the trained emission distributions
        dim = self._B[0].mu.shape[1]
        if D != dim:
            raise ValueError(
                'GHMM: observation dimension does not agree with the trained emission distributions for the model'
            )

        # calculate lnP for each observation for each state's emission distribution
        # lnP_obs {T, N}
        lnP_obs = np.zeros([T, self.N])
        for i in range(0, self.N):
            lnP_obs[:, i] = self._B[i].calcLnP(O)

        # backward variable, beta {T,N}
        # Step 1: Initialization
        # since ln(1) = 0
        lnBeta = np.zeros([T, self.N]) + lnC[T - 1]

        # Step 2: Induction
        for t in reversed(range(T - 1)):
            lnBeta[t, :] = logsumexp(
                np.log(self._A) + lnP_obs[t + 1, :] + lnBeta[t + 1, :],
                axis=1) + lnC[t]

        return lnBeta
Пример #13
0
    def _backward(self, O, lnC):
        '''
        Calculates the backward variable, beta: the probability of the partial observation 
        sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1

        PARAMETERS
        ----------
        O {TxD}: observation matrix with a sequence of T observations, each having dimension D
        lnC (T,): log of the scaling coefficients for each observation calculated from the forward pass
        
        RETURNS
        -------
        lnBeta {T,N}: log of the backward variable: the probability of the partial observation 
                      sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1
        '''
        
        O = unsqueeze(O,2)
        T, D = O.shape

        # check dimensions of provided observations agree with the trained emission distributions
        dim = self._B[0].mu.shape[1]
        if D != dim:
            raise ValueError('GHMM: observation dimension does not agree with the trained emission distributions for the model')

        # calculate lnP for each observation for each state's emission distribution
        # lnP_obs {T, N}
        lnP_obs = np.zeros([T,self.N])
        for i in range(0,self.N):
            lnP_obs[:,i] = self._B[i].calcLnP(O)

        # backward variable, beta {T,N}
        # Step 1: Initialization
        # since ln(1) = 0
        lnBeta = np.zeros([T,self.N]) + lnC[T-1]

        # Step 2: Induction
        for t in reversed(range(T-1)):
            lnBeta[t,:] = logsumexp(np.log(self._A) + lnP_obs[t+1,:] + lnBeta[t+1,:], axis=1) + lnC[t]

        return lnBeta
Пример #14
0
    def _expect(self, X, verbose=False):
        ''' 
        Expectation step of the expectation maximization algorithm. 
        
        PARAMETERS
        ----------
        X {NxD}: training data

        RETURNS
        -------
        lnP (N,): ln[sum_M p(l)*p(Xi | l)]
            ln probabilities of each observation in the training data,
            marginalizing over mixture components to get ln[p(Xi)]
        posteriors {NxM}: p(l | Xi)
            Posterior probabilities of each mixture component for each observation.
        '''

        N, _ = X.shape
        lnP_Xi_l = np.zeros([N, self.M])

        # zero correction
        self._Sigma[self._Sigma == 0.0] += self._zeroCorr

        if hasattr(slinalg, 'solve_triangular'):
            # only in scipy since 0.9
            solve_triangular = slinalg.solve_triangular
        else:
            # slower, but works
            solve_triangular = slinalg.solve

        # for each mixture component
        for l in range(0, self.M):
            X_mu = X - self._mu[l, :]

            if self.covType == 'diag':
                sig_l = np.diag(self._Sigma[l, :, :])
                lnP_Xi_l[:, l] = -0.5 * (self.D * np.log(2.0 * np.pi) + np.sum(
                    (X_mu**2) / sig_l, axis=1) + np.sum(np.log(sig_l)))

            elif self.covType == 'full':
                try:
                    # cholesky decomposition => U*U.T = _Sigma[l,:,:]
                    U = slinalg.cholesky(self._Sigma[l, :, :], lower=True)
                except slinalg.LinAlgError:
                    # reinitialization trick is from scikit learn GMM
                    if verbose:
                        print "Sigma is not positive definite. Reinitializing ..."
                    self._Sigma[l, :, :] = 1e-6 * np.eye(self.D)
                    U = 1000.0 * self._Sigma[l, :, :]

                Q = solve_triangular(U, X_mu.T, lower=True)
                lnP_Xi_l[:, l] = -0.5 * (self.D * np.log(2.0 * np.pi) +
                                         2.0 * np.sum(np.log(np.diag(U))) +
                                         np.sum(Q**2, axis=0))

        lnP_Xi_l += self._lnw

        # calculate sum of probabilities (marginalizing over mixtures)
        lnP = logsumexp(lnP_Xi_l, axis=1)

        posteriors = np.exp(lnP_Xi_l - lnP[:, np.newaxis])

        return lnP, posteriors
Пример #15
0
    def _forward(self, O, scale=True):
        '''
        Calculates the forward variable, alpha: the probability of the partial observation
        sequence O1 O2 ... Ot (until time t) and state Si at time t.

        PARAMETERS
        ----------
        O {TxD}: observation matrix with a sequence of T observations, each having dimension D
        scale {Boolean}: default True
        
        RETURNS
        -------
        lnP {Float}: log probability of the observation sequence O
        lnAlpha {T,N}: log of the forward variable: the probability of the partial observation
                       sequence O1 O2 ... Ot (until time t) and state Si at time t.
        lnC (T,): log of the scaling coefficients for each observation
        '''

        O = unsqueeze(O, 2)
        T, D = O.shape

        # check dimensions of provided observations agree with the trained emission distributions
        dim = self._B[0].mu.shape[1]
        if D != dim:
            raise ValueError(
                'GHMM: observation dimension does not agree with the trained emission distributions for the model'
            )

        # calculate lnP for each observation for each state's emission distribution
        # lnP_obs {T, N}
        lnP_obs = np.zeros([T, self.N])
        for i in range(self.N):
            lnP_obs[:, i] = self._B[i].calcLnP(O)

        # forward variable, alpha {T,N}
        lnAlpha = np.zeros([T, self.N])

        # initialize vector of scaling coefficients
        lnC = np.zeros(T)

        # Step 1: Initialization
        lnAlpha[0, :] = np.log(self._pi) + lnP_obs[0, :]
        if scale:
            lnC[0] = -logsumexp(lnAlpha[0, :])
            lnAlpha[0, :] += lnC[0]

        # Step 2: Induction
        for t in range(1, T):
            lnAlpha[t, :] = logsumexp(lnAlpha[[t - 1], :].T + np.log(self._A),
                                      axis=0) + lnP_obs[t, :]
            if scale:
                lnC[t] = -logsumexp(lnAlpha[0, :])
                lnAlpha[t, :] += lnC[t]

        # Step 3: Termination
        if scale:
            lnP = -np.sum(lnC)
        else:
            lnP = logsumexp(lnAlpha[T - 1, :])

        return lnP, lnAlpha, lnC