Exemplo n.º 1
0
 def __generalizedBessel(self, y, M, lambda_, tau, alpha):
     # M represents sup[index freq]. y,lambda,tau must be 1d-array.
     W = np.array([(lambda_**j * (y / tau)**(j * alpha)) /
                   (gammaFunc(j + 1) * gammaFunc(j * alpha))
                   for j in range(1, M + 1)])
     W = W.sum(
         axis=0
     )  # summuation for j=1:M and array shape transform into (nsample, 1)
     return W
Exemplo n.º 2
0
def loggammaFunc(num):
    result = 0
    if num < 170:
        result = log(gammaFunc(num), e)
    else:
        result = log(num - 1, e) + loggammaFunc(num - 1)
    return result
Exemplo n.º 3
0
 def GammaDist(self, gamma):
     return ((self.be**self.al) / gammaFunc(self.al)) * (gamma**(
         self.al - 1)) * mpmath.exp(-self.be * gamma)
Exemplo n.º 4
0
    def performLDA(self):
        for iteration in range(self.numIterations):
            self.logLDA()
            self.logOut('Variational Iteration : ' + str(iteration))
            # E-Step : Learning phi and gamma

            # initialize the variational parameter
            phi = []
            gamma = ndarray(shape=(self.intNumDoc, self.intNumTopic),
                            dtype=float)
            for d in range(self.intNumDoc):
                phi.append(
                    ndarray(shape=(self.numWordPerDoc[d], self.intNumTopic),
                            dtype=float))
                for n in range(self.numWordPerDoc[d]):
                    for k in range(self.intNumTopic):
                        phi[d][n][k] = 1.0 / float(self.intNumTopic)
            for k in range(self.intNumTopic):
                for d in range(self.intNumDoc):
                    gamma[d][k] = self.alpha[k] + float(
                        self.intUniqueWord) / float(self.intNumTopic)

            for d in range(self.intNumDoc):
                for iterationInternal in range(self.numInternalIterations):
                    # Learning phi
                    for n in range(self.numWordPerDoc[d]):
                        for k in range(self.intNumTopic):
                            phi[d][n][k] = self.beta[k][
                                self.corpusList[d][n]] * exp(
                                    polygamma(0, gamma[d][k]))
                        normalizeConstantPhi = sum(phi[d][n])
                        for k in range(self.intNumTopic):
                            phi[d][n][k] = phi[d][n][k] / normalizeConstantPhi
                    # Learning gamma
                    for k in range(self.intNumTopic):
                        gamma[d][k] = self.alpha[k]
                        for n in range(self.numWordPerDoc[d]):
                            gamma[d][k] = gamma[d][k] + phi[d][n][k]

            # M-Step : Learning alpha and beta

            # Learning Beta
            for k in range(self.intNumTopic):
                for d in range(self.intNumDoc):
                    for n in range(self.numWordPerDoc[d]):
                        self.beta[k][self.corpusList[d][n]] = self.beta[k][
                            self.corpusList[d][n]] + phi[d][n][k]
                normalizeConstantBeta = sum(self.beta[k])
                for v in range(self.intUniqueWord):
                    self.beta[k][v] = self.beta[k][v] / normalizeConstantBeta

            # Learning Alpha
            # calculate current ELBO with respect to Current Alpha
            ELBOMax = 0
            for d in range(self.intNumDoc):
                ELBOMax = ELBOMax + log(gammaFunc(sum(self.alpha)), e)
                for k in range(self.intNumTopic):
                    ELBOMax = ELBOMax - log(gammaFunc(self.alpha[k]), e)
                    ELBOMax = ELBOMax + (self.alpha[k] - 1) * (polygamma(
                        0, gamma[d][k]) - polygamma(0, sum(gamma[d])))
            tempAlpha = ndarray(shape=(self.intNumTopic), dtype=float)
            bestAlpha = ndarray(shape=(self.intNumTopic), dtype=float)
            for k in range(self.intNumTopic):
                bestAlpha[k] = self.alpha[k]
                tempAlpha[k] = self.alpha[k]
            self.logOut('Newton-Rhapson Itr - ELBO : ' + str(ELBOMax) +
                        ' | alpha : ' + str(tempAlpha))
            # Newton-Rhapson optimization
            for itr in range(self.numNewtonIteration):
                # Building Hessian Matrix and Derivative Vector
                H = ndarray(shape=(self.intNumTopic, self.intNumTopic),
                            dtype=float)
                g = ndarray(shape=(self.intNumTopic), dtype=float)
                for k1 in range(self.intNumTopic):
                    g[k1] = float(self.intNumDoc) * (polygamma(
                        0, sum(tempAlpha)) - polygamma(0, tempAlpha[k1]))
                    for d in range(self.intNumDoc):
                        g[k1] = g[k1] + (polygamma(0, gamma[d][k1]) -
                                         polygamma(0, sum(gamma[d])))
                    for k2 in range(self.intNumTopic):
                        H[k1][k2] = 0
                        if k1 == k2:
                            H[k1][k2] = H[k1][k2] - float(
                                self.intNumDoc) * polygamma(1, tempAlpha[k1])
                        H[k1][k2] = H[k1][k2] + float(
                            self.intNumDoc) * polygamma(1, sum(tempAlpha))

                # Update Alpha in Log Domain
                deltaAlpha = np.dot(np.linalg.inv(H), g)

                for k in range(self.intNumTopic):
                    logAlphaK = log(tempAlpha[k], e)
                    logAlphaK = logAlphaK - deltaAlpha[k]
                    tempAlpha[k] = exp(logAlphaK)
                    if tempAlpha[k] < 0.00001:
                        tempAlpha[k] = 0.00001

                # calculate current ELBO with respect to New Alpha
                ELBOAfter = 0
                for d in range(self.intNumDoc):
                    ELBOAfter = ELBOAfter + log(gammaFunc(sum(tempAlpha)), e)
                    for k in range(self.intNumTopic):
                        ELBOAfter = ELBOAfter - log(gammaFunc(tempAlpha[k]), e)
                        ELBOAfter = ELBOAfter + (
                            tempAlpha[k] - 1) * (polygamma(0, gamma[d][k]) -
                                                 polygamma(0, sum(gamma[d])))

                self.logOut('Newton-Rhapson Itr - ELBO : ' + str(ELBOAfter) +
                            ' | alpha : ' + str(tempAlpha))

                if ELBOMax <= ELBOAfter:
                    ELBOMax = ELBOAfter
                    for k in range(self.intNumTopic):
                        bestAlpha[k] = tempAlpha[k]

            self.alpha = bestAlpha
Exemplo n.º 5
0
    def performLDA(self):
        for iteration in range(self.numIterations):
            self.logLDA()
            self.logOut( 'Variational Iteration : '+str(iteration) )
            # E-Step : Learning phi and gamma

            # initialize the variational parameter
            phi = []
            gamma = ndarray(shape=(self.intNumDoc, self.intNumTopic), dtype=float)
            for d in range(self.intNumDoc):
                phi.append(ndarray(shape=(self.numWordPerDoc[d], self.intNumTopic), dtype=float))
                for n in range(self.numWordPerDoc[d]):
                    for k in range(self.intNumTopic):
                        phi[d][n][k] = 1.0 / float(self.intNumTopic)
            for k in range(self.intNumTopic):
                for d in range(self.intNumDoc):
                    gamma[d][k] = self.alpha[k] + float(self.intUniqueWord) / float(self.intNumTopic)

            for d in range(self.intNumDoc):
                for iterationInternal in range(self.numInternalIterations):
                    # Learning phi
                    for n in range(self.numWordPerDoc[d]):
                        for k in range(self.intNumTopic):
                            phi[d][n][k] = self.beta[k][self.corpusList[d][n]]*exp(polygamma(0,gamma[d][k]))
                        normalizeConstantPhi = sum(phi[d][n])
                        for k in range(self.intNumTopic):
                            phi[d][n][k] = phi[d][n][k] / normalizeConstantPhi
                    # Learning gamma
                    for k in range(self.intNumTopic):
                        gamma[d][k] = self.alpha[k]
                        for n in range(self.numWordPerDoc[d]):
                            gamma[d][k] = gamma[d][k] + phi[d][n][k]

            # M-Step : Learning alpha and beta

            # Learning Beta
            for k in range(self.intNumTopic):
                for d in range(self.intNumDoc):
                    for n in range(self.numWordPerDoc[d]):
                        self.beta[k][self.corpusList[d][n]] = self.beta[k][self.corpusList[d][n]] + phi[d][n][k]
                normalizeConstantBeta = sum(self.beta[k])
                for v in range(self.intUniqueWord):
                    self.beta[k][v] = self.beta[k][v] / normalizeConstantBeta

            # Learning Alpha
            # calculate current ELBO with respect to Current Alpha
            ELBOMax = 0
            for d in range(self.intNumDoc):
                ELBOMax = ELBOMax + log(gammaFunc(sum(self.alpha)), e)
                for k in range(self.intNumTopic):
                    ELBOMax = ELBOMax - log(gammaFunc(self.alpha[k]), e)
                    ELBOMax = ELBOMax + (self.alpha[k] - 1) * (
                    polygamma(0, gamma[d][k]) - polygamma(0, sum(gamma[d])))
            tempAlpha = ndarray(shape=(self.intNumTopic), dtype=float)
            bestAlpha = ndarray(shape=(self.intNumTopic), dtype=float)
            for k in range(self.intNumTopic):
                bestAlpha[k] = self.alpha[k]
                tempAlpha[k] = self.alpha[k]
            self.logOut( 'Newton-Rhapson Itr - ELBO : '+str(ELBOMax)+' | alpha : '+str(tempAlpha) )
            # Newton-Rhapson optimization
            for itr in range(self.numNewtonIteration):
                # Building Hessian Matrix and Derivative Vector
                H = ndarray(shape=(self.intNumTopic,self.intNumTopic), dtype=float)
                g = ndarray(shape=(self.intNumTopic), dtype=float)
                for k1 in range(self.intNumTopic):
                    g[k1] = float(self.intNumDoc)*(polygamma(0,sum(tempAlpha))-polygamma(0,tempAlpha[k1]))
                    for d in range(self.intNumDoc):
                        g[k1] = g[k1] + ( polygamma(0,gamma[d][k1]) - polygamma(0,sum(gamma[d])) )
                    for k2 in range(self.intNumTopic):
                        H[k1][k2] = 0
                        if k1 == k2:
                            H[k1][k2] = H[k1][k2] - float(self.intNumDoc) * polygamma(1,tempAlpha[k1])
                        H[k1][k2] = H[k1][k2] + float(self.intNumDoc) * polygamma(1,sum(tempAlpha))

                # Update Alpha in Log Domain
                deltaAlpha = np.dot(np.linalg.inv(H),g)

                for k in range(self.intNumTopic):
                    logAlphaK = log(tempAlpha[k],e)
                    logAlphaK = logAlphaK - deltaAlpha[k]
                    tempAlpha[k] = exp(logAlphaK)
                    if tempAlpha[k] < 0.00001:
                        tempAlpha[k] = 0.00001

                # calculate current ELBO with respect to New Alpha
                ELBOAfter = 0
                for d in range(self.intNumDoc):
                    ELBOAfter = ELBOAfter + log(gammaFunc(sum(tempAlpha)), e)
                    for k in range(self.intNumTopic):
                        ELBOAfter = ELBOAfter - log(gammaFunc(tempAlpha[k]),e)
                        ELBOAfter = ELBOAfter + (tempAlpha[k]-1)*(polygamma(0,gamma[d][k])-polygamma(0,sum(gamma[d])))

                self.logOut( 'Newton-Rhapson Itr - ELBO : '+str(ELBOAfter)+' | alpha : '+str(tempAlpha) )

                if ELBOMax <= ELBOAfter:
                    ELBOMax = ELBOAfter
                    for k in range(self.intNumTopic):
                        bestAlpha[k] = tempAlpha[k]

            self.alpha = bestAlpha