Пример #1
0
    def setParamsFromBeta(self, K, beta=None):
        """ Set params to reasonable values given comp probabilities.

        Parameters
        --------
        K : int
            number of components
        beta : 1D array, size K. optional, default=[1 1 1 1 ... 1]
            probability of each component

        Post Condition for EM
        --------
        Attribute w is set to posterior mean given provided vector N.
        Default behavior sets w to uniform distribution.

        Post Condition for VB
        ---------
        Attribute theta is set so q(w) has mean of beta and moderate variance.
        """
        if beta is None:
            beta = 1.0 / K * np.ones(K)
        assert beta.ndim == 1
        assert beta.size == K

        self.K = int(K)
        if self.inferType == 'EM':
            self.w = beta.copy()
        else:
            self.theta = self.K * beta
            self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
Пример #2
0
    def set_global_params(self,
                          hmodel=None,
                          K=None,
                          w=None,
                          beta=None,
                          theta=None,
                          **kwargs):
        """ Set global parameters to provided values.

        Post Condition for EM
        -------
        w set to valid vector with K components.

        Post Condition for VB
        -------
        theta set to define valid posterior over K components.
        """
        if hmodel is not None:
            self.setParamsFromHModel(hmodel)
        elif beta is not None:
            self.setParamsFromBeta(K, beta=beta)
        elif w is not None:
            self.setParamsFromBeta(K, beta=w)
        elif theta is not None and self.inferType.count('VB'):
            self.K = int(K)
            self.theta = theta
            self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
        else:
            raise ValueError("Unrecognized set_global_params args")
Пример #3
0
def calcELBO_LinearTerms(SS=None,
                         StartStateCount=None,
                         TransStateCount=None,
                         rho=None,
                         omega=None,
                         Ebeta=None,
                         startTheta=None,
                         transTheta=None,
                         startAlpha=0,
                         alpha=0,
                         kappa=None,
                         gamma=None,
                         afterGlobalStep=0,
                         todict=0,
                         **kwargs):
    """ Calculate ELBO objective terms that are linear in suff stats.

    Returns
    -------
    L : scalar float
        L is sum of any term in ELBO that is const/linear wrt suff stats.
    """
    Ltop = L_top(rho=rho,
                 omega=omega,
                 alpha=alpha,
                 gamma=gamma,
                 kappa=kappa,
                 startAlpha=startAlpha)
    LdiffcDir = -c_Dir(transTheta) - c_Dir(startTheta)
    if afterGlobalStep:
        if todict:
            return dict(Lalloc=Ltop + LdiffcDir, Lslack=0)
        return Ltop + LdiffcDir

    K = rho.size
    if Ebeta is None:
        Ebeta = rho2beta(rho, returnSize='K+1')

    if SS is not None:
        StartStateCount = SS.StartStateCount
        TransStateCount = SS.TransStateCount
    # Augment suff stats to be sure have 0 in final column,
    # which represents inactive states.
    if StartStateCount.size == K:
        StartStateCount = np.hstack([StartStateCount, 0])
    if TransStateCount.shape[-1] == K:
        TransStateCount = np.hstack([TransStateCount, np.zeros((K, 1))])

    LstartSlack = np.inner(StartStateCount + startAlpha * Ebeta - startTheta,
                           digamma(startTheta) - digamma(startTheta.sum()))

    alphaEbetaPlusKappa = alpha * np.tile(Ebeta, (K, 1))
    alphaEbetaPlusKappa[:, :K] += kappa * np.eye(K)
    digammaSum = digamma(np.sum(transTheta, axis=1))
    LtransSlack = np.sum((TransStateCount + alphaEbetaPlusKappa - transTheta) *
                         (digamma(transTheta) - digammaSum[:, np.newaxis]))

    if todict:
        return dict(Lalloc=Ltop + LdiffcDir, Lslack=LstartSlack + LtransSlack)
    return Ltop + LdiffcDir + LstartSlack + LtransSlack
Пример #4
0
 def get_trans_prob_matrix(self):
     ''' Get matrix of transition probabilities for all K active states
     '''
     digammaSumVec = digamma(np.sum(self.transTheta, axis=1))
     expELogPi = digamma(self.transTheta) - digammaSumVec[:, np.newaxis]
     np.exp(expELogPi, out=expELogPi)
     return expELogPi[0:self.K, 0:self.K]
Пример #5
0
    def setParamsFromCountVec(self, K, N=None):
        """ Set params to reasonable values given counts for each comp.

        Parameters
        --------
        K : int
            number of components
        N : 1D array, size K. optional, default=[1 1 1 1 ... 1]
            size of each component

        Post Condition for EM
        --------
        Attribute w is set to posterior mean given provided vector N.
        Default behavior sets w to uniform distribution.

        Post Condition for VB
        ---------
        Attribute theta is set so q(w) equals posterior given vector N.
        Default behavior has q(w) with mean of uniform and moderate variance.
        """
        if N is None:
            N = 1.0 * np.ones(K)
        assert N.ndim == 1
        assert N.size == K

        self.K = int(K)
        if self.inferType == 'EM':
            self.w = N + (self.gamma / K)
            self.w /= self.w.sum()
        else:
            self.theta = N + self.gamma / K
            self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
Пример #6
0
 def get_init_prob_vector(self):
     ''' Get vector of initial probabilities for all K active states
     '''
     expELogPi0 = digamma(
         self.startTheta) - digamma(np.sum(self.startTheta))
     np.exp(expELogPi0, out=expELogPi0)
     return expELogPi0[0:self.K]
Пример #7
0
 def from_dict(self, myDict):
     self.inferType = myDict['inferType']
     self.K = myDict['K']
     if self.inferType == 'EM':
         self.w = myDict['w']
     else:
         self.theta = myDict['theta']
         self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
Пример #8
0
    def calc_local_params(self, Data, LP, **kwargs):
        ''' Calculate local parameters for each data item and each component.

        This is part of the E-step.
        Note that this is the main place we differ from FiniteMixtureModel.py

        Args
        -------
        Data : bnpy data object with Data.nObs observations
        LP : local param dict with fields
              E_log_soft_ev : Data.nObs x K x K array
                  E_log_soft_ev[n,l,m] = log p(data obs n | comps l, m)

        Returns
        -------
        LP : local param dict with fields
             resp : 2D array, size Data.nObs x K array
                    resp[n,l,m] = posterior responsibility comps. l,m have for
                    item n
        '''

        if self.inferType.count('EM') > 0:
            raise NotImplementedError(
                'EM not implemented for FiniteSMSB (yet)')

        N = Data.nNodes
        K = self.K
        logSoftEv = LP['E_log_soft_ev']  # E x K x K
        logSoftEv[np.where(Data.sourceID == Data.destID), :, :] = 0
        logSoftEv = np.reshape(logSoftEv, (N, N, K, K))

        if 'respSingle' not in LP:
            LP['respSingle'] = np.ones((N, K)) / K
        resp = LP['respSingle']

        Elogpi = digamma(self.theta) - digamma(np.sum(self.theta))  # Size K

        respTerm = np.zeros(K)
        for lap in xrange(self.EStepLaps):
            for i in xrange(Data.nNodes):
                respTerm = np.einsum(
                    'jlm,jm->l', logSoftEv[i, :, :, :], resp) + \
                    np.einsum('jlm,jl->m', logSoftEv[:, i, :, :], resp)
                resp[i, :] = np.exp(Elogpi + respTerm)
                resp[i, :] /= np.sum(resp[i, :])

        # For now, do the stupid thing of building the N^2 x K resp matrix
        #   (soon to change when using sparse data)
        # np.einsum makes fullResp[i,j,l,m] = resp[i,l]*resp[j,m]
        fullResp = np.einsum('il,jm->ijlm', resp, resp)
        fullResp = fullResp.reshape((N**2, K, K))
        fullResp[np.where(Data.sourceID == Data.destID), :, :] = 0
        LP['resp'] = fullResp
        LP['respSingle'] = resp
        self.make_hard_asgn_local_params(Data, LP)

        return LP
Пример #9
0
 def get_init_prob_vector(self):
     ''' Get vector of initial probabilities for all K active states
     '''
     if self.inferType == 'EM':
         pi0 = self.startPi
     else:
         pi0 = np.exp(
             digamma(self.startTheta) - digamma(np.sum(self.startTheta)))
     return pi0
Пример #10
0
def Lalloc(Nvec=None, SS=None, gamma=0.5, theta=None, Elogw=None):
    assert theta is not None
    K = theta.size
    if Elogw is None:
        Elogw = digamma(theta) - digamma(theta.sum())
    if Nvec is None:
        Nvec = SS.N
    Lalloc = c_Dir(gamma / K * np.ones(K)) - c_Dir(theta)
    Lalloc_slack = np.inner(Nvec + gamma / K - theta, Elogw)
    return Lalloc + Lalloc_slack
Пример #11
0
    def E_logPi(self):
        ''' Compute expected probability \pi for each node and state

        Returns
        -------
        ElogPi : nNodes x K
        '''
        ElogPi = digamma(self.theta) - \
            digamma(np.sum(self.theta, axis=1))[:, np.newaxis]
        return ElogPi
Пример #12
0
 def get_trans_prob_matrix(self):
     ''' Get matrix of transition probabilities for all K active states
     '''
     if self.inferType == 'EM':
         EPiMat = self.transPi
     else:
         digammasumVec = digamma(np.sum(self.transTheta, axis=1))
         EPiMat = np.exp(
             digamma(self.transTheta) - digammasumVec[:, np.newaxis])
     return EPiMat
Пример #13
0
    def E_logPi(self):
        ''' Compute expected value of log \pi for each node and state.

        Returns
        -------
        ElogPi : 2D array, nNodes x K
        '''
        sumtheta = self.theta.sum(axis=1)
        ElogPi = digamma(self.theta) - digamma(sumtheta)[:, np.newaxis]
        return ElogPi
Пример #14
0
    def update_global_params_VB(self, SS, **kwargs):
        """ Update attribute theta to optimize the ELBO objective.

        Post Condition for VB
        -------
        theta set to valid posterior for SS.K components.
        """
        self.theta = self.gamma / SS.K + SS.N
        self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
        self.K = SS.K
Пример #15
0
def updateRhoOmega(
        theta=None, thetaRem=None,
        initrho=None,
        omega=None,
        alpha=0.5, gamma=10,
        logFunc=None):
    ''' Update rho, omega via numerical optimization.

    Will set vector omega to reasonable fixed value,
    and do gradient descent to optimize the vector rho.

    Returns
    -------
    rho : 1D array, size K
    omega : 1D array, size K
    '''
    nDoc = theta.shape[0]
    K = theta.shape[1]
    # Verify initial rho
    assert initrho is not None
    assert initrho.size == K
    # Verify initial omega
    assert omega is not None
    assert omega.size == K
    # Compute summaries of theta needed to update rho
    # sumLogPi : 1D array, size K
    # sumLogPiRem : scalar
    digammasumtheta = digamma(theta.sum(axis=1) + thetaRem)
    ElogPi = digamma(theta) - digammasumtheta[:, np.newaxis]
    sumLogPi = np.sum(ElogPi, axis=0)
    ElogPiRem = digamma(thetaRem) - digammasumtheta
    sumLogPiRem = np.sum(ElogPiRem)
    # Do the optimization
    try:
        rho, omega, fofu, Info = \
            OptimizerRhoOmegaBetter.find_optimum_multiple_tries(
                nDoc=nDoc,
                sumLogPiActiveVec=sumLogPi,
                sumLogPiRem=sumLogPiRem,
                gamma=gamma,
                alpha=alpha,
                initrho=initrho,
                initomega=omega,
                do_grad_omega=0,
                do_grad_rho=1)
    except ValueError as error:
        if logFunc:
            logFunc('***** Rho optim failed. Remain at cur val. ' + \
                str(error))
        rho = initrho

    assert rho.size == K
    assert omega.size == K
    return rho, omega
Пример #16
0
    def update_global_params_soVB(self, SS, rho, **kwargs):
        """ Update attribute theta to optimize stochastic ELBO objective.

        Post Condition for VB
        -------
        theta set to valid posterior for SS.K components.
        """
        thetaStar = self.gamma / SS.K + SS.N
        self.theta = rho * thetaStar + (1 - rho) * self.theta
        self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
        self.K = SS.K
Пример #17
0
def calcBetaExpectations(eta1, eta0):
    ''' Evaluate expected value of log u under Beta(u | eta1, eta0)

    Returns
    -------
    ElogU : 1D array, size K
    Elog1mU : 1D array, size K
    '''
    digammaBoth = digamma(eta0 + eta1)
    ElogU = digamma(eta1) - digammaBoth
    Elog1mU = digamma(eta0) - digammaBoth
    return ElogU, Elog1mU
Пример #18
0
    def L_slack(self, SS):
        ''' Compute slack term of the allocation objective function.

        Returns
        -------
        L : scalar float
        '''
        ElogPi = digamma(self.theta) - \
            digamma(np.sum(self.theta, axis=1))[:, np.newaxis]
        Q = SS.NodeStateCount + self.alpha / SS.K - self.theta
        Lslack = np.sum(Q * ElogPi)
        return Lslack
Пример #19
0
    def E_logPi(self, returnRem=0):
        ''' Compute expected probability \pi for each node and state

        Returns
        -------
        ElogPi : nNodes x K
        '''
        digammasumtheta = digamma(
            self.theta.sum(axis=1) + self.thetaRem)
        ElogPi = digamma(self.theta) - digammasumtheta[:, np.newaxis]
        if returnRem:
            ElogPiRem = digamma(self.thetaRem) - digammasumtheta
            return ElogPi, ElogPiRem
        return ElogPi
Пример #20
0
    def E_logPi(self, returnRem=0):
        ''' Compute expected value of log \pi for each node and state.

        Returns
        -------
        ElogPi : 2D array, nNodes x K
        '''
        digammasumtheta = digamma(
            self.theta.sum(axis=1) + self.thetaRem)
        ElogPi = digamma(self.theta) - digammasumtheta[:, np.newaxis]
        if returnRem:
            ElogPiRem = digamma(self.thetaRem) - digammasumtheta
            return ElogPi, ElogPiRem
        return ElogPi
Пример #21
0
def E_cDalphabeta_surrogate(alpha, rho, omega):
    ''' Compute expected value of cumulant function of alpha * beta.

    Returns
    -------
    csur : scalar float
    '''
    K = rho.size
    eta1 = rho * omega
    eta0 = (1 - rho) * omega
    digammaBoth = digamma(eta1 + eta0)
    ElogU = digamma(eta1) - digammaBoth
    Elog1mU = digamma(eta0) - digammaBoth
    OFFcoef = kvec(K)
    calpha = gammaln(alpha) + (K + 1) * np.log(alpha)
    return calpha + np.sum(ElogU) + np.inner(OFFcoef, Elog1mU)
Пример #22
0
    def applyHardMergePairToLP(self, LP, kA, kB):
        ''' Apply hard merge pair to provided local parameters

        Returns
        --------
        mergeLP : dict of updated local parameters
        '''
        resp = np.delete(LP['resp'], kB, axis=1)
        theta = np.delete(LP['theta'], kB, axis=1)
        DocTopicCount = np.delete(LP['DocTopicCount'], kB, axis=1)

        resp[:, kA] += LP['resp'][:, kB]
        theta[:, kA] += LP['theta'][:, kB]
        DocTopicCount[:, kA] += LP['DocTopicCount'][:, kB]

        ElogPi = np.delete(LP['ElogPi'], kB, axis=1)
        ElogPi[:, kA] = digamma(theta[:, kA]) - LP['digammaSumTheta']

        return dict(resp=resp,
                    theta=theta,
                    thetaRem=LP['thetaRem'],
                    ElogPi=ElogPi,
                    ElogPiRem=LP['ElogPiRem'],
                    DocTopicCount=DocTopicCount,
                    digammaSumTheta=LP['digammaSumTheta'])
Пример #23
0
def L_alloc(nDoc=None,
            rho=None,
            omega=None,
            alpha=None,
            gamma=None,
            todict=0,
            **kwargs):
    ''' Evaluate the top-level term of the surrogate objective
    '''
    K = rho.size
    eta1 = rho * omega
    eta0 = (1 - rho) * omega
    digammaBoth = digamma(eta1 + eta0)
    ElogU = digamma(eta1) - digammaBoth
    Elog1mU = digamma(eta0) - digammaBoth

    Ltop_c_p = K * c_Beta(1, gamma)
    Ltop_c_q = -c_Beta(eta1, eta0)
    Ltop_cDiff = Ltop_c_p + Ltop_c_q
    Ltop_logpDiff = np.inner(1.0 - eta1, ElogU) + \
        np.inner(gamma - eta0, Elog1mU)

    nDoc = np.asarray(nDoc)
    if nDoc.size > 1:
        LcDsur_const = 0
        LcDsur_rhoomega = 0
        for Kd in range(nDoc.size):
            LcDsur_const += nDoc[Kd] * Kd * np.log(alpha)
            LcDsur_rhoomega += nDoc[Kd] * (np.sum(ElogU[:Kd]) + \
                np.inner(kvec(Kd), Elog1mU[:Kd]))
    else:
        LcDsur_const = nDoc * K * np.log(alpha)
        LcDsur_rhoomega = nDoc * np.sum(ElogU) + \
            nDoc * np.inner(kvec(K), Elog1mU)

    Lalloc = Ltop_cDiff + Ltop_logpDiff + LcDsur_const + LcDsur_rhoomega

    if todict:
        return dict(Lalloc=Lalloc,
                    Lalloc_top_cDiff=Ltop_cDiff,
                    Lalloc_top_logpDiff=Ltop_logpDiff,
                    Lalloc_cDsur_const=LcDsur_const,
                    Lalloc_cDsur_rhoomega=LcDsur_rhoomega,
                    Lalloc_rhoomega=Ltop_c_q + Ltop_logpDiff + LcDsur_rhoomega)
    return Lalloc
Пример #24
0
    def setParamsFromHModel(self, hmodel):
        """ Set parameters exactly as in provided HModel object.

        Parameters
        ------
        hmodel : bnpy.HModel
            The model to copy parameters from.

        Post Condition
        ------
        w or theta will be set exactly equal to hmodel's allocModel.
        """
        self.K = hmodel.allocModel.K
        if self.inferType == 'EM':
            self.w = hmodel.allocModel.w.copy()
        else:
            self.theta = hmodel.allocModel.theta.copy()
            self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
Пример #25
0
    def initLPFromResp(self, Data, LP):
        ''' Fill in remaining local parameters given token-topic resp.

        Args
        ----
        LP : dict with fields
            * resp : 2D array, size N x K

        Returns
        -------
        LP : dict with fields
            * DocTopicCount
            * theta
            * ElogPi
        '''
        resp = LP['resp']
        K = resp.shape[1]
        DocTopicCount = np.zeros((Data.nDoc, K))
        for d in xrange(Data.nDoc):
            start = Data.doc_range[d]
            stop = Data.doc_range[d + 1]
            if hasattr(Data, 'word_count'):
                DocTopicCount[d, :] = np.dot(Data.word_count[start:stop],
                                             resp[start:stop, :])
            else:
                DocTopicCount[d, :] = np.sum(resp[start:stop, :], axis=0)

        remMass = np.minimum(0.1, 1.0 / (K * K))
        newEbeta = (1 - remMass) / K
        theta = DocTopicCount + self.alpha * newEbeta
        digammaSumTheta = digamma(theta.sum(axis=1))
        ElogPi = digamma(theta) - digammaSumTheta[:, np.newaxis]

        LP['DocTopicCount'] = DocTopicCount
        LP['theta'] = theta
        LP['ElogPi'] = ElogPi
        return LP
Пример #26
0
def L_top(nDoc=None,
          rho=None,
          omega=None,
          alpha=None,
          gamma=None,
          todict=0,
          **kwargs):
    ''' Evaluate the top-level term of the surrogate objective
    '''
    K = rho.size
    eta1 = rho * omega
    eta0 = (1 - rho) * omega
    digammaBoth = digamma(eta1 + eta0)
    ElogU = digamma(eta1) - digammaBoth
    Elog1mU = digamma(eta0) - digammaBoth

    ONcoef = nDoc + 1.0 - eta1
    OFFcoef = nDoc * kvec(K) + gamma - eta0

    calpha = nDoc * K * np.log(alpha)
    cDiff = K * c_Beta(1, gamma) - c_Beta(eta1, eta0)
    return calpha + \
        cDiff + \
         + np.inner(ONcoef, ElogU) + np.inner(OFFcoef, Elog1mU)
Пример #27
0
def L_top(rho=None,
          omega=None,
          alpha=None,
          gamma=None,
          kappa=0,
          startAlpha=0,
          **kwargs):
    ''' Evaluate the top-level term of the surrogate objective
    '''
    if startAlpha == 0:
        startAlpha = alpha

    K = rho.size
    eta1 = rho * omega
    eta0 = (1 - rho) * omega
    digamma_omega = digamma(omega)
    ElogU = digamma(eta1) - digamma_omega
    Elog1mU = digamma(eta0) - digamma_omega
    diff_cBeta = K * c_Beta(1.0, gamma) - c_Beta(eta1, eta0)

    tAlpha = K * K * np.log(alpha) + K * np.log(startAlpha)
    if kappa > 0:
        coefU = K + 1.0 - eta1
        coef1mU = K * OptimizerRhoOmega.kvec(K) + 1.0 + gamma - eta0
        sumEBeta = np.sum(rho2beta(rho, returnSize='K'))
        tBeta = sumEBeta * (np.log(alpha + kappa) - np.log(kappa))
        tKappa = K * (np.log(kappa) - np.log(alpha + kappa))
    else:
        coefU = (K + 1) + 1.0 - eta1
        coef1mU = (K + 1) * OptimizerRhoOmega.kvec(K) + gamma - eta0
        tBeta = 0
        tKappa = 0

    diff_logU = np.inner(coefU, ElogU) \
        + np.inner(coef1mU, Elog1mU)
    return tAlpha + tKappa + tBeta + diff_cBeta + diff_logU
Пример #28
0
def calcMergeTermsFromSeparateLP(Data=None,
                                 LPa=None,
                                 SSa=None,
                                 LPb=None,
                                 SSb=None,
                                 mUIDPairs=None):
    ''' Compute merge terms that combine two comps from separate LP dicts.
    
    Returns
    -------
    Mdict : dict of key, array-value pairs
    '''
    M = len(mUIDPairs)
    m_sumLogPi = np.zeros(M)
    m_gammalnTheta = np.zeros(M)
    m_slackTheta = np.zeros(M)
    m_Hresp = np.zeros(M)

    assert np.allclose(LPa['digammaSumTheta'], LPb['digammaSumTheta'])
    for m, (uidA, uidB) in enumerate(mUIDPairs):
        kA = SSa.uid2k(uidA)
        kB = SSb.uid2k(uidB)

        m_resp = LPa['resp'][:, kA] + LPb['resp'][:, kB]
        if hasattr(Data, 'word_count') and \
                Data.nUniqueToken == m_resp.shape[0]:
            m_Hresp[m] = -1 * calcRlogRdotv(m_resp[:, np.newaxis],
                                            Data.word_count)
        else:
            m_Hresp[m] = -1 * calcRlogR(m_resp[:, np.newaxis])

        DTC_vec = LPa['DocTopicCount'][:, kA] + LPb['DocTopicCount'][:, kB]
        theta_vec = LPa['theta'][:, kA] + LPb['theta'][:, kB]
        m_gammalnTheta[m] = np.sum(gammaln(theta_vec))
        ElogPi_vec = digamma(theta_vec) - LPa['digammaSumTheta']
        m_sumLogPi[m] = np.sum(ElogPi_vec)
        # slack = (Ndm - theta_dm) * E[log pi_dm]
        slack_vec = ElogPi_vec
        slack_vec *= (DTC_vec - theta_vec)
        m_slackTheta[m] = np.sum(slack_vec)
    return dict(Hresp=m_Hresp,
                gammalnTheta=m_gammalnTheta,
                slackTheta=m_slackTheta,
                sumLogPi=m_sumLogPi)
Пример #29
0
    def find_optimum_rhoOmega(self, **kwargs):
        ''' Performs numerical optimization of rho and omega for M-step update.

        Note that the optimizer forces rho to be in [EPS, 1-EPS] for
        the sake of numerical stability

        Returns
        -------
        rho : 1D array, size K
        omega : 1D array, size K
        Info : dict of information about optimization.
        '''

        # Calculate expected log transition probability
        # using theta vectors for all K states plus initial state
        ELogPi = digamma(self.transTheta) \
            - digamma(np.sum(self.transTheta, axis=1))[:, np.newaxis]
        sumELogPi = np.sum(ELogPi, axis=0)
        startELogPi = digamma(self.startTheta) \
            - digamma(np.sum(self.startTheta))

        # Select initial rho, omega values for gradient descent
        if hasattr(self, 'rho') and self.rho.size == self.K:
            initRho = self.rho
        else:
            initRho = None

        if hasattr(self, 'omega') and self.omega.size == self.K:
            initOmega = self.omega
        else:
            initOmega = None

        # Do the optimization
        try:
            rho, omega, fofu, Info = \
                OptimizerRhoOmega.find_optimum_multiple_tries(
                    sumLogPi=sumELogPi,
                    sumLogPiActiveVec=None,
                    sumLogPiRemVec=None,
                    startAlphaLogPi=self.startAlpha * startELogPi,
                    nDoc=self.K + 1,
                    gamma=self.gamma,
                    alpha=self.transAlpha,
                    kappa=self.kappa,
                    initrho=initRho,
                    initomega=initOmega)
            self.OptimizerInfo = Info
            self.OptimizerInfo['fval'] = fofu

        except ValueError as error:
            if hasattr(self, 'rho') and self.rho.size == self.K:
                Log.error(
                    '***** Optim failed. Remain at cur val. ' +
                    str(error))
                rho = self.rho
                omega = self.omega
            else:
                Log.error('***** Optim failed. Set to prior. ' + str(error))
                omega = (self.gamma + 1) * np.ones(SS.K)
                rho = 1 / float(1 + self.gamma) * np.ones(SS.K)

        return rho, omega
Пример #30
0
def calcLocalParams(Data, LP,
                    transTheta=None, startTheta=None,
                    limitMemoryLP=1,
                    hmm_feature_method_LP='forward+backward',
                    mPairIDs=None,
                    cslice=(0, None),
                    **kwargs):
    ''' Compute local parameters for provided dataset.

    Returns
    -------
    LP : dict of local params, with fields
        * resp : 2D array, nAtom x K
        if limitMemoryLP=0:
            * respPair : 3D array, nAtom x K x K
        if limitMemoryLP=1:
            * TransCount : 3D array, nSeq x K x K
    '''
    # Unpack soft evidence 2D array
    logLik = LP['E_log_soft_ev']
    nAtom, K = logLik.shape

    # Calculate trans prob 2D array
    digammaSumTransTheta = digamma(np.sum(transTheta[:K, :K + 1], axis=1))
    transPi = digamma(transTheta[:K, :K]) - digammaSumTransTheta[:, np.newaxis]
    np.exp(transPi, out=transPi)

    # Calculate LOG of start state prob vector
    logstartPi = digamma(startTheta[:K]) - digamma(np.sum(startTheta[:K + 1]))

    # Set starting probs to uniform,
    # because Line A below updates first state's logLik to include logstartPi
    startPi = np.ones(K)
    logMargPr = np.empty(Data.nDoc)
    resp = np.empty((nAtom, K))

    # Unpack pairs to track for merging.
    if mPairIDs is None:
        mPairIDs = np.zeros((0, 2))
        M = 0
    else:
        if len(mPairIDs) == 0:
            mPairIDs = np.zeros((0, 2))
            M = 0
        else:
            mPairIDs = as2D(mPairIDs)
            M = mPairIDs.shape[0]
    assert mPairIDs.shape[1] == 2
    if hmm_feature_method_LP == 'forward':
        fmsg = np.zeros_like(LP['E_log_soft_ev'])
        # Run forward backward algorithm on each sequence n
        for n in xrange(Data.nDoc):
            start = Data.doc_range[n]
            stop = Data.doc_range[n + 1]
            logLik_n = logLik[start:stop]
            # Adding in start state probs, in log space for stability.
            logLik_n[0] += logstartPi

            PiInit, PiMat, K = _parseInput_TransParams(startPi, transPi)
            logSoftEv = _parseInput_SoftEv(logLik_n, K)
            T = logSoftEv.shape[0]
            SoftEv, lognormC = expLogLik(logSoftEv)
            fmsg_n, margPrObs = FwdAlg(PiInit, PiMat, SoftEv)
            if not np.all(np.isfinite(margPrObs)):
                raise ValueError('NaN values found. Numerical badness!')
            fmsg[start:stop] = fmsg_n
        LP['fmsg'] = fmsg

    elif limitMemoryLP:
        # Track sufficient statistics directly at each sequence.
        TransCount = np.empty((Data.nDoc, K, K))
        Htable = np.empty((Data.nDoc, K, K))
        mHtable = np.zeros((2 * M, K))

        # Run forward backward algorithm on each sequence n
        for n in xrange(Data.nDoc):
            start = Data.doc_range[n]
            stop = Data.doc_range[n + 1]
            logLik_n = logLik[start:stop]
            # Adding in start state probs, in log space for stability.
            logLik_n[0] += logstartPi  # Line A

            # Run fwd-fwd alg and record result.
            resp_n, lp_n, TransCount_n, Htable_n, mHtable_n = \
                FwdBwdAlg_LimitMemory(startPi, transPi, logLik_n, mPairIDs)
            resp[start:stop] = resp_n
            logMargPr[n] = lp_n
            TransCount[n] = TransCount_n
            Htable[n] = Htable_n
            mHtable += mHtable_n

        LP['resp'] = resp
        LP['evidence'] = np.sum(logMargPr)
        LP['TransCount'] = TransCount
        LP['Htable'] = Htable
        LP['mHtable'] = mHtable
    else:
        # Track pair-wise assignment probs for each sequence
        respPair = np.empty((nAtom, K, K))

        # Run the forward backward algorithm on each sequence
        for n in xrange(Data.nDoc):
            start = Data.doc_range[n]
            stop = Data.doc_range[n + 1]
            logLik_n = logLik[start:stop]
            # Adding in start state probs, in log space for stability.
            logLik_n[0] += logstartPi  # Line A

            resp_n, respPair_n, lp_n = \
                FwdBwdAlg(startPi, transPi, logLik_n)
            resp[start:stop] = resp_n
            respPair[start:stop] = respPair_n
            logMargPr[n] = lp_n

        LP['evidence'] = np.sum(logMargPr)
        LP['resp'] = resp
        LP['respPair'] = respPair
    # ... end if statement on limitMemoryLP

    return LP