示例#1
0
 def _compute_posteriors(self, fwdlattice, bwdlattice):
     # gamma is guaranteed to be correctly normalized by logprob at
     # all frames, unless we do approximate inference using pruning.
     # So, we will normalize each frame explicitly in case we
     # pruned too aggressively.
     log_gamma = fwdlattice + bwdlattice
     log_normalize(log_gamma, axis=1)
     return np.exp(log_gamma)
示例#2
0
    def compute_rho(self, x):

        gamma = self.prior['gamma']
        log_like_x = self.caclulate_log_lik_x(x)
        # collapsed
        E_Nc_minus_n = np.sum(self.rho, 0, keepdims=True) - self.rho
        E_Nc_minus_n_cumsum_geq = np.fliplr(
            np.cumsum(np.fliplr(E_Nc_minus_n), axis=1))
        E_Nc_minus_n_cumsum = E_Nc_minus_n_cumsum_geq - E_Nc_minus_n

        # var_not_i = np.sum(self.rho * (1 - self.rho), 0, keepdims=True) - self.rho * (1 - self.rho)
        # var_not_i_eq_k = np.zeros((self.N, self.T))
        # for t in range(self.T):
        #     if t != 0:
        #         var_not_i_eq_k[:, t] = np.sum(E_Nc_minus_n[:, :t], 1)
        # var_not_i_eq_k = var_not_i_eq_k * E_greater_i
        # rho += (np.log(1 + E_Nc_minus_n) - var_not_i / (2 * ((1 + E_Nc_minus_n) ** 2))) + (
        #             np.log(gamma + E_greater_i) - var_not_i_eq_k / (2 * ((gamma + E_greater_i) ** 2))) + np.log(
        #     1 + gamma + E_Nc_minus_n + E_greater_i)

        first_tem = np.log(1 + E_Nc_minus_n) - np.log(1 + gamma +
                                                      E_Nc_minus_n_cumsum_geq)
        first_tem[:, self.T - 1] = 0
        dummy = np.log(gamma +
                       E_Nc_minus_n_cumsum) - np.log(1 + gamma +
                                                     E_Nc_minus_n_cumsum_geq)
        second_term = np.cumsum(dummy, axis=1) - dummy
        rho = log_like_x + (first_tem + second_term)

        log_rho, log_n = log_normalize(rho)
        rho = np.exp(log_rho)
        return rho
示例#3
0
文件: hdp.py 项目: aacharya/DSLDA2
    def __init__(self, T, K, D,  size_vocab, eta, trsz, hdp_hyperparam):
        ''' this follows the convention of the HDP paper'''
        ''' gamma, first level concentration ''' 
        ''' alpha, second level concentration '''
        ''' eta, the topic Dirichlet '''
        ''' T, top level truncation level '''
        ''' K, second level truncation level '''
        ''' size_vocab, size of vocab'''
        ''' hdp_hyperparam, the hyperparameter of hdp '''
    
        self.m_hdp_hyperparam = hdp_hyperparam

        self.m_T = T # higher level truncation
        self.m_K = K # for now, we assume all the same for the second level truncation
        self.m_size_vocab = size_vocab

        # print "%d %d %d" %(T, size_vocab, D)

        self.m_beta = np.random.gamma(1.0, 1.0, (T, size_vocab)) * D*100/(T*size_vocab)
        (log_m_beta, log_norm) = utils.log_normalize(self.m_beta)
        self.m_beta = np.exp(log_m_beta)
	self.save_topics("lambda.txt");
        self.m_eta = eta  

        self.m_alpha = hdp_hyperparam.m_alpha_a/hdp_hyperparam.m_alpha_b
        self.m_gamma = hdp_hyperparam.m_gamma_a/hdp_hyperparam.m_gamma_b
        self.m_var_sticks = np.zeros((2, T-1))
        self.m_var_sticks[0] = 1.0
        self.m_var_sticks[1] = self.m_gamma
        self.r = np.zeros((6, self.m_K))
        self.dmu = np.zeros((trsz, 6)) 

        # variational posterior parameters for hdp
        self.m_var_gamma_a = hdp_hyperparam.m_gamma_a
        self.m_var_gamma_b = hdp_hyperparam.m_gamma_b
示例#4
0
    def CStep(self, reg_irls):
        # cluster posterior probabilities p(c_i=g|X)
        self.h_ig = np.exp(utl.log_normalize(self.log_alphag_fg_xij))

        self.MAP()
        # c_ig the hard partition of the curves

        #Compute the optimized criterion
        cig_log_alphag_fg_xij = self.c_ig * self.log_alphag_fg_xij
        self.comp_loglik = sum(cig_log_alphag_fg_xij.sum(axis=1)) + reg_irls
示例#5
0
    def var_inf_2d(self, x, Elogsticks_1nd, ite):

        Elog_phi = self.caclulate_log_lik_x(x)

        second_max_iter = 2000 if self.second_max_iter == -1 else self.second_max_iter
        lambdas = 1
        self.init_second_params(x.shape[0], x)
        Elogsticks_2nd = self.expect_log_sticks(self.rho, self.T,
                                                self.prior['gamma'],
                                                self.prior['eta'])
        for i in range(second_max_iter):
            # compute var_theta

            if (i + 1) % (second_max_iter // 10) == 0:
                lambdas -= 0.1
            temp_var_theta = self.var_theta
            self.var_theta = self.rho.T.dot(Elog_phi) + Elogsticks_1nd
            log_var_theta, log_n = log_normalize(self.var_theta)
            self.var_theta = (
                1 - lambdas) * temp_var_theta + lambdas * np.exp(log_var_theta)

            temp_rho = self.rho
            self.rho = self.var_theta.dot(Elog_phi.T).T + Elogsticks_2nd
            log_rho, log_n = log_normalize(self.rho)
            self.rho = (1 - lambdas) * temp_rho + lambdas * np.exp(log_rho)
            Elogsticks_2nd = self.expect_log_sticks(self.rho, self.T,
                                                    self.prior['gamma'],
                                                    self.prior['eta'])

        self.temp_top_stick += np.sum(self.var_theta, 0)
        self.temp_k_ss += np.sum(self.rho.dot(self.var_theta), 0)
        dot_x = np.einsum("ijk, ilk -> ijl", x[:, :, np.newaxis],
                          x[:, :, np.newaxis])
        t_r = np.sum(
            self.rho[:, :, np.newaxis, np.newaxis] * dot_x[:, np.newaxis], 0)
        self.temp_xi_ss += np.sum(
            self.var_theta[:, :, np.newaxis, np.newaxis] * t_r[:, np.newaxis],
            0)

        return None
示例#6
0
def gmm_sample(means, stds, log_pais, num_samples):
    samples = torch.cat([
        gaussian_sample(mean, std, num_samples)[:, np.newaxis, :]
        for mean, std in zip(means, stds)
    ],
                        axis=1)
    # ixs = np.random.choice(k, size=num_samples, p=np.exp(log_weights))
    # weights = log_normalize(log_pais)
    # log_weights = log_normalize(log_pais)
    # print(torch.exp(log_weights))
    # print(log_weights)
    weights = torch.exp(log_normalize(log_pais))
    ixs = torch.multinomial(weights, num_samples, replacement=True)
    # ixs = np.random.choice(2, size=num_samples, p=weights.detach())

    return torch.stack([samples[i, ix, :] for i, ix in enumerate(ixs)])
示例#7
0
    def var_inf(self, x):

        begin = time.time()
        for ite in range(self.args.max_iter):
            # compute rho
            E_log_1_pi = np.roll(
                np.cumsum(digamma(self.h) - digamma(self.g + self.h)), 1)
            E_log_1_pi[0] = 0

            self.rho = self.caclulate_log_lik_x(x) + digamma(
                self.g) - digamma(self.g + self.h) + E_log_1_pi

            log_rho, log_n = log_normalize(self.rho)
            self.rho = np.exp(log_rho)

            # compute k
            self.k = self.u / self.v
            self.k[self.k > self.max_k] = self.max_k

            self.update_zeta_xi(x, self.rho)
            self.update_u_v(self.rho)
            self.update_g_h(self.rho)

            print(ite)
            if ite == self.args.max_iter - 1:
                times = time.time() - begin
                logger = open(os.path.join(LOG_DIR, "log_times_0.txt"), 'a')
                logger.write('nyu: times: {}\n'.format(times))
                logger.close()
                self.k = self.u / self.v
                self.k[self.k > self.max_k] = self.max_k
                self.pi = calculate_mix(self.g, self.h, self.T)
                self.calculate_new_com()
                if self.args.verbose:
                    print('mu: {}'.format(self.xi))
                    print('k: {}'.format(self.k))
                    print('pi: {}'.format(self.pi))
                    print('times: {}'.format(times))
示例#8
0
文件: hdp.py 项目: qss2012/LDA-VEM
    def __init__(self, T, K, D, size_vocab, eta, trsz, hdp_hyperparam):
        ''' this follows the convention of the HDP paper'''
        ''' gamma, first level concentration '''
        ''' alpha, second level concentration '''
        ''' eta, the topic Dirichlet '''
        ''' T, top level truncation level '''
        ''' K, second level truncation level '''
        ''' size_vocab, size of vocab'''
        ''' hdp_hyperparam, the hyperparameter of hdp '''

        self.m_hdp_hyperparam = hdp_hyperparam

        self.m_T = T  # higher level truncation
        self.m_K = K  # for now, we assume all the same for the second level truncation
        self.m_size_vocab = size_vocab

        # print "%d %d %d" %(T, size_vocab, D)

        self.m_beta = np.random.gamma(
            1.0, 1.0, (T, size_vocab)) * D * 100 / (T * size_vocab)
        (log_m_beta, log_norm) = utils.log_normalize(self.m_beta)
        self.m_beta = np.exp(log_m_beta)
        self.save_topics("lambda.txt")
        self.m_eta = eta

        self.m_alpha = hdp_hyperparam.m_alpha_a / hdp_hyperparam.m_alpha_b
        self.m_gamma = hdp_hyperparam.m_gamma_a / hdp_hyperparam.m_gamma_b
        self.m_var_sticks = np.zeros((2, T - 1))
        self.m_var_sticks[0] = 1.0
        self.m_var_sticks[1] = self.m_gamma
        self.r = np.zeros((6, self.m_K))
        self.dmu = np.zeros((trsz, 6))

        # variational posterior parameters for hdp
        self.m_var_gamma_a = hdp_hyperparam.m_gamma_a
        self.m_var_gamma_b = hdp_hyperparam.m_gamma_b
def count_ps_from_beta_ref(n, beta):
    log_ws = [
        log_counts_to_cols(count) + (-beta * entropy_from_counts(count))
        for count in enumerate_counts_iter(n)
    ]
    return map(exp, log_normalize(log_ws))
示例#10
0
    def var_inf_2d(self, x, Elogsticks_1nd, ite):

        D = self.D
        Elog_phi = ((x.dot((self.xi * (self.u / self.v)[:, np.newaxis]).T)) +
                    (D / 2 - 1) * (digamma(self.u) - np.log(self.v)) -
                    (D / 2 * np.log(2 * np.pi)) -
                    (d_besseli(D / 2 - 1, self.k)) *
                    (self.u / self.v - self.k) -
                    np.log(iv((D / 2 - 1), self.k) + np.exp(-700)))

        second_max_iter = 5000 if self.second_max_iter == -1 else self.second_max_iter
        self.init_second_params(x.shape[0])
        likelihood = 0.0
        old_likelihood = 1
        converge = 1
        Elogsticks_2nd = self.expect_log_sticks(self.g, self.h, self.T)
        for i in range(second_max_iter):
            # compute var_theta

            self.var_theta = self.rho.T.dot(Elog_phi) + Elogsticks_1nd
            log_var_theta, log_n = log_normalize(self.var_theta)
            self.var_theta = np.exp(log_var_theta)

            self.rho = self.var_theta.dot(Elog_phi.T).T + Elogsticks_2nd
            log_rho, log_n = log_normalize(self.rho)
            self.rho = np.exp(log_rho)

            self.update_g_h(self.rho)
            Elogsticks_2nd = self.expect_log_sticks(self.g, self.h, self.T)

            likelihood = 0.0
            # compute likelihood
            likelihood += np.sum(
                (Elogsticks_1nd - log_var_theta) * self.var_theta)

            v = np.vstack((self.g, self.h))
            log_alpha = np.log(self.prior['gamma'])
            likelihood += (self.T - 1) * log_alpha
            dig_sum = digamma(np.sum(v, 0))
            likelihood += np.sum(
                (np.array([1.0, self.prior['gamma']])[:, np.newaxis] - v) *
                (digamma(v) - dig_sum))
            likelihood -= np.sum(gammaln(np.sum(v, 0))) - np.sum(gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_rho) * self.rho)

            # X part, the data part
            likelihood += np.sum(self.rho.T *
                                 np.dot(self.var_theta, Elog_phi.T))

            if i > 0:
                converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood

            if converge < self.args.threshold:
                break

        self.temp_top_stick += np.sum(self.var_theta, 0)
        self.temp_k_ss += np.sum(self.rho.dot(self.var_theta), 0)
        self.temp_xi_ss += self.var_theta.T.dot(self.rho.T.dot(x))

        if ite == self.args.max_iter - 1:
            self.container['rho'].append(self.rho)
            self.container['var_theta'].append(self.var_theta)

        return likelihood
示例#11
0
    def doc_e_step(self,
                   doc,
                   ss,
                   Elogbeta,
                   Elogsticks_1st,
                   var_converge,
                   fresh=False):

        Elogbeta_doc = Elogbeta[:, doc.words]
        v = np.zeros((2, self.m_K - 1))

        phi = np.ones((doc.length, self.m_K)) * 1.0 / self.m_K

        # the following line is of no use
        Elogsticks_2nd = expect_log_sticks(v)

        likelihood = 0.0
        old_likelihood = -1e1000
        converge = 1.0
        eps = 1e-100

        iter = 0
        max_iter = 100
        #(TODO): support second level optimization in the future
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi
            if iter < 3 and fresh:
                var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(
                    phi.T, (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

        # phi
            if iter < 3:
                phi = np.dot(var_phi, Elogbeta_doc).T
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            phi_all = phi * np.array(doc.counts)[:, np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:, :self.m_K - 1], 0)
            phi_cum = np.flipud(np.sum(phi_all[:, 1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K - 1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum(
                (np.array([1.0, self.m_alpha])[:, np.newaxis] - v) *
                (sp.psi(v) - dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(
                sp.gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T *
                                 np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood

            if converge < 0:
                print "warning, likelihood is decreasing!"

            iter += 1

        # update the suff_stat ss
        ss.m_var_sticks_ss += np.sum(var_phi, 0)
        ss.m_var_beta_ss[:, doc.words] += np.dot(var_phi.T, phi.T * doc.counts)

        return (likelihood)
示例#12
0
    def doc_e_step(self,
                   doc,
                   ss,
                   Elogsticks_1st,
                   word_list,
                   unique_words,
                   var_converge,
                   max_iter=100):
        """
        e step for a single doc, update local hidden variables
        """

        batchids = [unique_words[id] for id in doc.words]

        Elogbeta_doc = self.m_Elogbeta[:, doc.words]
        ## very similar to the HDP equations
        v = np.zeros(
            (2, self.m_K - 1))  # pi[i] ~ Beta(1, alpha), i = 1, ..., T-1
        v[0] = 1.0
        v[1] = self.m_alpha

        # The following line is of no use.
        Elogsticks_2nd = expect_log_sticks(
            v)  # Elogsticks_2nd represents document level

        # back to the uniform
        phi = np.ones((len(
            doc.words), self.m_K)) * 1.0 / self.m_K  # this is phi[d, n, i]

        likelihood = 0.0
        old_likelihood = -1e100
        converge = 1.0
        eps = 1e-100

        iter = 0
        # not yet support second level optimization yet, to be done in the future
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi
            # var_phi seems to be zeta
            if iter < 3:
                var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(
                    phi.T, (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

            # phi
            if iter < 3:
                phi = np.dot(var_phi, Elogbeta_doc).T
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            # v seems to be gamma
            phi_all = phi * np.array(doc.counts)[:, np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:, :self.m_K - 1], 0)
            phi_cum = np.flipud(np.sum(phi_all[:, 1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K - 1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum(
                (np.array([1.0, self.m_alpha])[:, np.newaxis] - v) *
                (sp.psi(v) - dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(
                sp.gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T *
                                 np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood

            #if converge < -0.000001:
            #print("warning, likelihood is decreasing!")

            iter += 1

        # update the suff_stat ss
        # this time it only contains information from one doc
        ss.m_var_sticks_ss += np.sum(var_phi, 0)
        ss.m_var_beta_ss[:, batchids] += np.dot(var_phi.T, phi.T * doc.counts)

        return (likelihood)
示例#13
0
    def doc_e_step(self, batch_count, doc, ss, Elogsticks_1st, word_list, unique_words, var_converge, max_iter=500):
        """
        e step for a single doc
        """

        batchids = [unique_words[id] for id in doc.words]  # 生成文档对应的这个batch的id,不是全局的id(doc.words保存的是全局id)

        Elogbeta_doc = self.m_Elogbeta[:, doc.words] 
        ## very similar to the hdp equations
        v = np.zeros((2, self.m_K-1))  
        v[0] = 1.0
        v[1] = self.m_alpha

        # The following line is of no use.
        Elogsticks_2nd = expect_log_sticks(v)

        # back to the uniform
        phi = np.ones((len(doc.words), self.m_K)) * 1.0/self.m_K
        var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T) # K x T

        likelihood = 0.0
        old_likelihood = -1e100
        converge = 1.0 
        eps = 1e-100
        
        iter = 0
        # not yet support second level optimization yet, to be done in the future
        while iter < max_iter \
                and (converge < 0.0 or converge > var_converge):
            # print "%s [batch_count = %d] converge = %f" % (getTime(), batch_count, converge)
            # last_var_phi = var_phi
            # last_phi = phi
            # last_v = v.copy()

            ### update variational parameters
            # var_phi 公式里面的phi 和batch版本hdp的更新相同
            if iter < 3:
                var_phi = np.dot(phi.T,  (Elogbeta_doc * doc.counts).T)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(phi.T,  (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

            # phi 公式里面的zeta 和batch版本hdp的更新相同
            if iter < 3:
                phi = np.dot(var_phi, Elogbeta_doc).T
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            phi_all = phi * np.array(doc.counts)[:,np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:,:self.m_K-1], 0)
            phi_cum = np.flipud(np.sum(phi_all[:,1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))
            Elogsticks_2nd = expect_log_sticks(v)

            # meanchange_var_phi = np.mean(abs(var_phi - last_var_phi))
            # meanchange_phi = np.mean(abs(phi - last_phi))
            # meanchange_v = np.mean(abs(v - last_v))

            # print "%s [batch_count = %d iter = %d] meanchange_var_phi = %f, meanchange_phi = %f, meanchange_v = %f" \
            #       % (getTime(), batch_count, iter, meanchange_var_phi, meanchange_phi, meanchange_v)
            # if (meanchange_v < 0.001 and meanchange_phi < 0.001 and meanchange_var_phi < 0.001):
            #     break

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K-1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum((np.array([1.0, self.m_alpha])[:,np.newaxis]-v) * (sp.psi(v)-dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(sp.gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T * np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood)/abs(old_likelihood)

            if converge < -0.000001:
                print "%s [batch_count = %d] warning, likelihood is decreasing! old_likelihood = %f new_likelihood = %f" % (getTime(), batch_count, old_likelihood, likelihood)

            old_likelihood = likelihood

            iter += 1
            
        # update the suff_stat ss 
        # this time it only contains information from one doc
        ss.m_var_sticks_ss += np.sum(var_phi, 0)   
        ss.m_var_beta_ss[:, batchids] += np.dot(var_phi.T, phi.T * doc.counts) # T x mini-batch的独立词的个数

        return(likelihood)
示例#14
0
文件: hdp.py 项目: aacharya/DSLDA2
    def doc_e_step(self, doc, ss, trlabel, docnum, Elogbeta, Elogsticks_1st, Elogsticks_2nd, var_converge, fresh=False):

        Elogbeta_doc = Elogbeta[:, doc.words] 
        v = np.zeros((2, self.m_K-1))

        phi = np.ones((doc.length, self.m_K)) * 1.0/self.m_K  # should be zeta

        likelihood = 0.0
        old_likelihood = -1e1000
        converge = 1.0 
        eps = 1e-100
        
        iter = 0
        max_iter = 10
        #(TODO): support second level optimization in the future
        while iter < max_iter: #and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # smallphi
            
            var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
            (log_var_phi, log_norm) = utils.log_normalize(var_phi)
            var_phi = np.exp(log_var_phi)

 
            # phi  #zeta
            sval   = np.zeros((1, self.m_K))
            nwords = np.sum(doc.counts)
            tmp    = (self.r[trlabel,:] - self.r)
            sval   = np.dot(self.dmu[docnum,:],tmp) 
            sval   = sval/nwords
	    sval   = 0;

            phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd + sval
            (log_phi, log_norm) = utils.log_normalize(phi)
            phi = np.exp(log_phi)
            phi_all = phi * np.array(doc.counts)[:,np.newaxis]


            # local sticks
            v[0] = 1.0 + np.sum(phi_all[:,:self.m_K-1], 0)      #a_{nt}
            phi_cum = np.flipud(np.sum(phi_all[:,1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum)) #b_{nt}
            Elogsticks_2nd = expect_log_sticks(v)
            
            if iter==max_iter-1: 
              self.write_local_sticks(v);

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K-1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum((np.array([1.0, self.m_alpha])[:,np.newaxis]-v) * (sp.psi(v)-dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(sp.gammaln(v))

            # Z part 
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T * np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood)/abs(old_likelihood)
            old_likelihood = likelihood
            
            if converge < 0:
                print "warning, likelihood is decreasing!"
            
            iter = iter + 1
            
        # update the suff_stat ss 
        ss.m_var_sticks_ss += np.sum(var_phi, 0)   
        ss.m_var_beta_ss[:, doc.words] += np.dot(var_phi.T, phi.T * doc.counts)
        ss.m_var_zeta[docnum,:] = np.sum((phi.T * doc.counts).T,0)

        return(likelihood)
示例#15
0
文件: hdp.py 项目: qss2012/LDA-VEM
    def doc_inference(self, doc, docnum, Elogbeta, Elogsticks_1st,
                      var_converge, m_var_zeta):

        Elogbeta_doc = Elogbeta[:, doc.words]
        v = np.zeros((2, self.m_K - 1))

        phi = np.ones(
            (doc.length, self.m_K)) * 1.0 / self.m_K  # should be zeta

        # the following line is of no use
        Elogsticks_2nd = expect_log_sticks(v)

        likelihood = 0.0
        old_likelihood = -1e1000
        converge = 1.0
        eps = 1e-100

        iter = 0
        max_iter = 100
        #(TODO): support second level optimization in the future
        while iter < 20:  #and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi

            var_phi = np.dot(phi.T,
                             (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
            (log_var_phi, log_norm) = utils.log_normalize(var_phi)
            var_phi = np.exp(log_var_phi)

            # phi  #zeta
            phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
            (log_phi, log_norm) = utils.log_normalize(phi)
            phi = np.exp(log_phi)
            phi_all = phi * np.array(doc.counts)[:, np.newaxis]

            # local sticks
            v[0] = 1.0 + np.sum(phi_all[:, :self.m_K - 1], 0)  #a_{jt}
            phi_cum = np.flipud(np.sum(phi_all[:, 1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))  #b_{jt}
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K - 1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum(
                (np.array([1.0, self.m_alpha])[:, np.newaxis] - v) *
                (sp.psi(v) - dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(
                sp.gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T *
                                 np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood
            """if converge < 0:
                print "warning, likelihood is decreasing!" """

            iter = iter + 1

        m_var_zeta[docnum, :] = np.sum((phi.T * doc.counts).T, 0)

        return (likelihood, m_var_zeta)
示例#16
0
    def doc_e_step(self,
                   batch_count,
                   doc,
                   ss,
                   Elogsticks_1st,
                   Elogsticks_2nd,
                   word_list,
                   unique_words,
                   var_converge,
                   max_iter=500):
        """
        e step for a single doc
        """
        batchids = [unique_words[id] for id in doc.words
                    ]  # 生成文档对应的这个batch的id,不是全局的id(doc.words保存的是全局id)
        Elogbeta_doc = self.m_Elogbeta[:, :, doc.words]
        Elogbeta_doc_noise = self.m_Elogbeta_noise[doc.words]
        # Elogtime_doc.shape = (T, K)
        Elogtime_doc = np.array([[scipy.stats.norm.logpdf(doc.time, self.m_mu_t[t][k], self.m_sigma_t[t][k])\
                                  for k in range(self.m_K)] for t in range(self.m_T)])
        # Eloglocation_doc.shape = (T, K)
        Eloglocation_doc = np.array([[scipy.stats.multivariate_normal.logpdf((doc.latitude, doc.longitude),\
                                                                             self.m_mu_l[t][k], self.m_sigma_l[t][k])\
                                      for k in range(self.m_K)] for t in range(self.m_T)])

        # 初始化x_hat
        # np.shape(x_hat) = (doc.length)
        x_hat = np.ones(doc.length) / 2  # 每个词都初始化为0.5
        x_hat_bar = x_hat

        # 表示微博属于不同节点的概率,初始化为均匀分布,微博属于不同节点的概率都相同
        # np.shape(phi) = (T, K)
        phi = np.ones((self.m_T, self.m_K)) * 1.0 / self.m_K
        # 表示微博属于不同事件的概率
        # np.shape(Elogbeta_doc * doc.counts) = (T, K, N),
        # np.shape(x_hat) = (N,) = (N x 1)
        # np.shape(var_phi) = (T,) = (T x 1)
        var_phi = np.sum(np.dot((Elogbeta_doc * doc.counts), x_hat) * phi, 1)

        likelihood = 0.0
        old_likelihood = -1e100
        converge = 1.0
        eps = 1e-100

        iter = 0
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            if iter < 3:
                var_phi = np.sum(
                    np.dot((Elogbeta_doc * doc.counts), x_hat) * phi, 1)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.sum(np.dot((Elogbeta_doc * doc.counts), x_hat) * phi, 1) + Elogsticks_1st \
                          + np.sum(phi * Elogtime_doc, 1) + np.sum(phi * Eloglocation_doc, 1)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

            if iter < 3:
                phi = np.dot((Elogbeta_doc * doc.counts), x_hat)
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(
                    (Elogbeta_doc * doc.counts), x_hat
                ) + Elogsticks_2nd + Eloglocation_doc + Eloglocation_doc
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # 更新x_hat 这里使用一个二维的多项分布来近似,和笔记不同
            # 先转置三维数组,(T, K, N)转换为(N, T, K)然后和phi逐项相乘,然后K那一维相加,最后乘以var_phi
            x_hat = self.m_Elogx[0] + np.dot(
                np.sum(
                    np.transpose(Elogbeta_doc * doc.counts,
                                 (2, 0, 1)) * phi, 2), var_phi)
            x_hat_bar = self.m_Elogx[1] + Elogbeta_doc_noise
            # 然后合并两个矩阵,用log_normalize归一化,得到 N x 2的矩阵
            (log_x_hat, log_norm) = utils.log_normalize(
                np.column_stack((x_hat, x_hat_bar)))
            x_hat_final = np.exp(log_x_hat)
            # 再分开两列,后面要用
            log_x_hat_bar = log_x_hat[:, 1]
            log_x_hat = log_x_hat[:, 0]
            x_hat = x_hat_final[:, 0]
            x_hat_bar = x_hat_final[:, 1]

            likelihood = 0.0
            # compute likelihood
            # 取文档内的参数的似然相加
            # 展开式的第五项和第七项相加, # np.shape(Elogsticks_1st) = (T,) np.shape(log_var_phi) = (T,) = np.shape(var_phi)
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)
            # 展开式的第六项和第八项相加  # np.shape(Elogsticks_2nd) = (T, K) np.shape(log_phi) = (T, K) = np.shape(phi)
            likelihood += np.sum(
                np.sum((Elogsticks_2nd - log_phi) * phi, 1) * var_phi)
            # 展开式的第四项和第九项相加  # np.shape(self.m_Elogx) = (2,) np.shape(log_x_hat) = np.shape(x_hat) = (N,)
            likelihood += np.sum(
                np.sum(self.m_Elogx[0] - log_x_hat) * x_hat) + np.sum(
                    np.sum(self.m_Elogx[1] - log_x_hat_bar) * x_hat_bar)
            # 展开式的第一项,分为两部分,一部分是噪声生成的词项,一部分是非噪声
            #  np.shape(Elogbeta_doc) = (T, K, N) np.shape(var_phi) = (T,) np.shape(Elogbeta_doc_noise) = (N,)
            likelihood += np.sum(np.sum(np.dot((Elogbeta_doc * doc.counts), x_hat) * phi, 1) * var_phi) + \
                np.dot((Elogbeta_doc_noise * doc.counts), x_hat_bar)
            # 展开式的第二项
            likelihood += np.sum(np.sum(Elogtime_doc * phi, 1) * var_phi)
            # 展开式的第三项
            likelihood += np.sum(np.sum(Eloglocation_doc * phi, 1) * var_phi)

            converge = (likelihood - old_likelihood) / abs(old_likelihood)

            if converge < -0.000001:
                print "%s [batch_count = %d iter = %d] warning, likelihood is decreasing! old_likelihood = %f new_likelihood = %f" % (
                    getTime(), batch_count, iter, old_likelihood, likelihood)

            old_likelihood = likelihood

            iter += 1
        # print "%s [batch_count = %d iter = %d]  new_likelihood = %f" % (getTime(), batch_count, iter, likelihood)

        # update the suff_stat ss
        # this time it only contains information from one doc
        st = phi * var_phi[:, np.newaxis]
        ss.m_var_sticks_1st_ss += var_phi
        ss.m_var_sticks_2nd_ss += st
        #
        ss.m_var_beta_ss[:, :, batchids] += np.ones(
            [self.m_T, self.m_K, doc.length]) * x_hat * doc.counts * (
                (phi * var_phi[:, np.newaxis])[:, :, np.newaxis])
        ss.m_var_beta_noise_ss[batchids] += x_hat_bar * doc.counts
        #
        ss.m_var_mu_time_ss_numerator += st * doc.time
        ss.m_var_mu_time_ss_denominator += st
        ss.m_var_sigma_time_ss_numerator += st * pow(doc.time - self.m_mu_t, 2)
        ss.m_var_sigma_time_ss_denominator += st
        # 矩阵逐项相乘 是后对齐
        ss.m_var_mu_location_ss_numerator += np.ones([
            self.m_T, self.m_K, 2
        ]) * np.array([doc.latitude, doc.longitude]) * (
            (phi * var_phi[:, np.newaxis])[:, :, np.newaxis])
        ss.m_var_mu_location_ss_denominator += st[:, :, np.newaxis]
        ss.m_var_sigma_location_ss_numerator += np.array([[ np.array([(pow(doc.latitude - self.m_mu_l[t][k][0], 2), (doc.latitude - self.m_mu_l[t][k][0]) * (doc.longitude - self.m_mu_l[t][k][1])),\
                                                                      ((doc.latitude - self.m_mu_l[t][k][0]) * (doc.longitude - self.m_mu_l[t][k][1]), pow(doc.longitude - self.m_mu_l[t][k][1], 2))]) \
                                                            * phi[t][k] * var_phi[t] for k in range(self.m_K)] for t in range(self.m_T)])
        ss.m_var_sigma_location_ss_denominator += np.array([[ np.ones((2, 2))* phi[t][k] * var_phi[t] \
                                                              for k in range(self.m_K)] for t in range(self.m_T)])

        return (likelihood)
示例#17
0
    def doc_e_step(self, doc, ss, Elogsticks_1st, \
                   word_list, unique_words, var_converge, \
                   max_iter=100):
        """
        e step for a single doc
        """

        batchids = [unique_words[id] for id in doc.words]

        Elogbeta_doc = self.m_Elogbeta[:, doc.words] 
        ## very similar to the hdp equations
        v = np.zeros((2, self.m_K-1))  
        v[0] = 1.0
        v[1] = self.m_alpha

        # The following line is of no use.
        Elogsticks_2nd = expect_log_sticks(v)

        # back to the uniform
        phi = np.ones((len(doc.words), self.m_K)) * 1.0/self.m_K

        likelihood = 0.0
        old_likelihood = -1e100
        converge = 1.0 
        eps = 1e-100
        
        iter = 0
        # not yet support second level optimization yet, to be done in the future
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi 
            if iter < 3:
                var_phi = np.dot(phi.T,  (Elogbeta_doc * doc.counts).T)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(phi.T,  (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            
            # phi
            if iter < 3:
                phi = np.dot(var_phi, Elogbeta_doc).T
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            phi_all = phi * np.array(doc.counts)[:,np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:,:self.m_K-1], 0)
            phi_cum = np.flipud(np.sum(phi_all[:,1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K-1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum((np.array([1.0, self.m_alpha])[:,np.newaxis]-v) * (sp.psi(v)-dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(sp.gammaln(v))

            # Z part 
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T * np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood)/abs(old_likelihood)
            old_likelihood = likelihood

            if converge < -0.000001:
                print "warning, likelihood is decreasing!"
            
            iter += 1
            
        # update the suff_stat ss 
        # this time it only contains information from one doc
        ss.m_var_sticks_ss += np.sum(var_phi, 0)   
        ss.m_var_beta_ss[:, batchids] += np.dot(var_phi.T, phi.T * doc.counts)

        return(likelihood)
示例#18
0
    def doc_e_step(self,
                   count,
                   doc,
                   ss,
                   Elogbeta,
                   Elogsticks_1st,
                   var_converge,
                   fresh=False):

        Elogbeta_doc = Elogbeta[:, doc.
                                words]  # T x doc.length fancy索引,将这个文档里面的词的对应的参数取出
        v = np.zeros((2, self.m_K - 1))  # 2 x K-1维 0
        # 这里原先没有,由于Elogsticks_2nd在一开始没有用到,所以下面这句其实也可不加
        v[0] = 1.0
        v[1] = self.m_alpha

        phi = np.ones(
            (doc.length, self.m_K)) * 1.0 / self.m_K  # doc.length x K 归一化

        # the following line is of no use
        Elogsticks_2nd = expect_log_sticks(v)  # 计算Eq[log_pi_jt], K维

        likelihood = 0.0
        # 这里原来是-1e1000
        old_likelihood = -1e100
        converge = 1.0
        eps = 1e-100

        iter = 0
        max_iter = 100
        # (TODO): support second level optimization in the future
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi,其实是公式里面的phi
            if iter < 3 and fresh:
                var_phi = np.dot(phi.T, (Elogbeta_doc *
                                         doc.counts).T)  # 乘完之后是K x T维矩阵
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(
                    phi.T, (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

            # phi 公式里面的zeta
            if iter < 3:
                phi = np.dot(var_phi,
                             Elogbeta_doc).T  # 乘完之后是doc.length x K维矩阵 KT与论文相反
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            phi_all = phi * np.array(doc.counts)[:, np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:, :self.m_K - 1], 0)  # 更新变分参数a_jt
            phi_cum = np.flipud(np.sum(phi_all[:, 1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))  # 更新变分参数b_jt
            Elogsticks_2nd = expect_log_sticks(v)  # K维

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            # 似然的展开式的第二项和第五项相加
            likelihood += np.sum(
                (Elogsticks_1st - log_var_phi) *
                var_phi)  # 这里 1 x T维减去 K x T维,结果是把第一个补全为K x T维之后相减

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            # 第四项中的B函数展开是(self.m_K - 1) * log_alpha
            likelihood += (self.m_K - 1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            # 第四项和第七项中的log_pi_jt和log_1-pi_jt合并,v.shape = (K-1,)
            likelihood += np.sum(
                (np.array([1.0, self.m_alpha])[:, np.newaxis] - v) *
                (sp.psi(v) - dig_sum))
            # 第七项中的B函数展开
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(
                sp.gammaln(v))

            # Z part
            # 似然的展开式的第三项和第六项相加
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T *
                                 np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood

            # if converge < 0:
            #     print "warning, likelihood is decreasing!"
            if converge < -0.000001:
                print "%s [batch_count = %d] warning, likelihood is decreasing! old_likelihood = %f new_likelihood = %f" % (
                    getTime(), count, old_likelihood, likelihood)

            iter += 1

        # update the suff_stat ss
        # 这里为m步的更新uk,vk和lambda_kw作准备
        ss.m_var_sticks_ss += np.sum(var_phi,
                                     0)  # 竖着加,加完最后是sigma(j,sigma(t, var_phi))
        ss.m_var_beta_ss[:, doc.words] += np.dot(
            var_phi.T, phi.T * doc.counts)  # lambda更新公式的eta后面那一项

        return (likelihood)
示例#19
0
    def doc_e_step(self, doc, ss, Elogbeta, Elogsticks_1st, var_converge, fresh=False):

        Elogbeta_doc = Elogbeta[:, doc.words] 
        v = np.zeros((2, self.m_K-1))

        phi = np.ones((doc.length, self.m_K)) * 1.0/self.m_K

        # the following line is of no use
        Elogsticks_2nd = expect_log_sticks(v)

        likelihood = 0.0
        old_likelihood = -1e1000
        converge = 1.0 
        eps = 1e-100
        
        iter = 0
        max_iter = 100
        #(TODO): support second level optimization in the future
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi 
            if iter < 3 and fresh:
                var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T)
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = utils.log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

           # phi
            if iter < 3:
                phi = np.dot(var_phi, Elogbeta_doc).T
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)
            else: 
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = utils.log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            phi_all = phi * np.array(doc.counts)[:,np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:,:self.m_K-1], 0)
            phi_cum = np.flipud(np.sum(phi_all[:,1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K-1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum((np.array([1.0, self.m_alpha])[:,np.newaxis]-v) * (sp.psi(v)-dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(sp.gammaln(v))

            # Z part 
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T * np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood)/abs(old_likelihood)
            old_likelihood = likelihood

            if converge < 0:
                print "warning, likelihood is decreasing!"
            
            iter += 1
            
        # update the suff_stat ss 
        ss.m_var_sticks_ss += np.sum(var_phi, 0)   
        ss.m_var_beta_ss[:, doc.words] += np.dot(var_phi.T, phi.T * doc.counts)

        return(likelihood)
示例#20
0
    def EStep(self, mixModel, mixParam, phi, variance_type):
        """
        E-step
        """
        for g in range(0, mixModel.G):
            alpha_g = mixParam.alpha_g[g]
            beta_g = mixParam.beta_g[g, :, :]
            #Wg = self.param.Wg[g,:,:]
            pi_jgk = mixParam.pi_jgk[g, :, :]

            log_pijgk_fgk_xij = np.zeros((mixModel.n * mixModel.m, mixModel.K))
            for k in range(0, mixModel.K):
                beta_gk = beta_g[:, k]
                if variance_type == enums.variance_types.common:
                    sgk = mixParam.sigma_g[g]
                else:
                    #?
                    sgk = mixParam.sigma_g[g, k]

                temp = phi.XBeta @ beta_gk
                temp = temp.reshape((len(temp), 1))
                z = ((mixModel.XR - temp)**2) / sgk
                #print(sgk)
                temp = np.array([
                    np.log(pi_jgk[:, k]) - 0.5 *
                    (np.log(2 * np.pi) + np.log(sgk))
                ]).T - 0.5 * z
                log_pijgk_fgk_xij[:,
                                  k] = temp.T  #pdf cond à c_i = g et z_i = k de xij

            log_pijgk_fgk_xij = np.minimum(log_pijgk_fgk_xij,
                                           np.log(sys.float_info.max))
            log_pijgk_fgk_xij = np.maximum(log_pijgk_fgk_xij,
                                           np.log(sys.float_info.min))

            pijgk_fgk_xij = np.exp(log_pijgk_fgk_xij)
            sumk_pijgk_fgk_xij = np.array([pijgk_fgk_xij.sum(axis=1)
                                           ]).T  # sum over k
            log_sumk_pijgk_fgk_xij = np.log(sumk_pijgk_fgk_xij)  #[nxm x 1]

            self.log_tau_ijgk[
                g, :, :] = log_pijgk_fgk_xij - log_sumk_pijgk_fgk_xij @ np.ones(
                    (1, mixModel.K))
            self.tau_ijgk[g, :, :] = np.exp(
                utl.log_normalize(self.log_tau_ijgk[g, :, :]))

            temp = np.reshape(log_sumk_pijgk_fgk_xij.T,
                              (mixModel.n, mixModel.m))
            self.log_fg_xij[:, g] = temp.sum(
                axis=1
            )  #[n x 1]:  sum over j=1,...,m: fg_xij = prod_j sum_k pi_{jgk} N(x_{ij},mu_{gk},s_{gk))
            self.log_alphag_fg_xij[:, g] = np.log(
                alpha_g) + self.log_fg_xij[:, g]  # [nxg]

        self.log_alphag_fg_xij = np.minimum(self.log_alphag_fg_xij,
                                            np.log(sys.float_info.max))
        self.log_alphag_fg_xij = np.maximum(self.log_alphag_fg_xij,
                                            np.log(sys.float_info.min))

        # cluster posterior probabilities p(c_i=g|X)
        self.h_ig = np.exp(utl.log_normalize(self.log_alphag_fg_xij))
        # log-likelihood
        temp = np.exp(self.log_alphag_fg_xij)
        self.loglik = sum(np.log(temp.sum(axis=1)))
示例#21
0
def log_gmm(x, means, stds, log_pais):
    component_log_densities = torch.stack([log_gaussian(x, mu, std) for (mu, std) in zip(means, stds)]).T
    # log_weights = torch.log(pais)
    log_weights = log_normalize(log_pais)
    return torch.logsumexp(component_log_densities + log_weights, axis=-1, keepdims=False)
示例#22
0
文件: hdp.py 项目: qss2012/LDA-VEM
    def doc_e_step(self,
                   doc,
                   ss,
                   trlabel,
                   docnum,
                   Elogbeta,
                   Elogsticks_1st,
                   Elogsticks_2nd,
                   var_converge,
                   fresh=False):

        Elogbeta_doc = Elogbeta[:, doc.words]
        v = np.zeros((2, self.m_K - 1))

        phi = np.ones(
            (doc.length, self.m_K)) * 1.0 / self.m_K  # should be zeta

        likelihood = 0.0
        old_likelihood = -1e1000
        converge = 1.0
        eps = 1e-100

        iter = 0
        max_iter = 10
        #(TODO): support second level optimization in the future
        while iter < max_iter:  #and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # smallphi

            var_phi = np.dot(phi.T,
                             (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
            (log_var_phi, log_norm) = utils.log_normalize(var_phi)
            var_phi = np.exp(log_var_phi)

            # phi  #zeta
            sval = np.zeros((1, self.m_K))
            nwords = np.sum(doc.counts)
            tmp = (self.r[trlabel, :] - self.r)
            sval = np.dot(self.dmu[docnum, :], tmp)
            sval = sval / nwords
            sval = 0

            phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd + sval
            (log_phi, log_norm) = utils.log_normalize(phi)
            phi = np.exp(log_phi)
            phi_all = phi * np.array(doc.counts)[:, np.newaxis]

            # local sticks
            v[0] = 1.0 + np.sum(phi_all[:, :self.m_K - 1], 0)  #a_{nt}
            phi_cum = np.flipud(np.sum(phi_all[:, 1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))  #b_{nt}
            Elogsticks_2nd = expect_log_sticks(v)

            if iter == max_iter - 1:
                self.write_local_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K - 1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum(
                (np.array([1.0, self.m_alpha])[:, np.newaxis] - v) *
                (sp.psi(v) - dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(
                sp.gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T *
                                 np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood

            if converge < 0:
                print "warning, likelihood is decreasing!"

            iter = iter + 1

        # update the suff_stat ss
        ss.m_var_sticks_ss += np.sum(var_phi, 0)
        ss.m_var_beta_ss[:, doc.words] += np.dot(var_phi.T, phi.T * doc.counts)
        ss.m_var_zeta[docnum, :] = np.sum((phi.T * doc.counts).T, 0)

        return (likelihood)
示例#23
0
文件: hdp.py 项目: aacharya/DSLDA2
    def doc_inference(self, doc, docnum, Elogbeta, Elogsticks_1st, var_converge, m_var_zeta):

        Elogbeta_doc = Elogbeta[:, doc.words] 
        v = np.zeros((2, self.m_K-1))         

        phi = np.ones((doc.length, self.m_K)) * 1.0/self.m_K  # should be zeta

        # the following line is of no use
        Elogsticks_2nd = expect_log_sticks(v)

        likelihood = 0.0
        old_likelihood = -1e1000
        converge = 1.0 
        eps = 1e-100
        
        iter = 0
        max_iter = 100
        #(TODO): support second level optimization in the future
        while iter < 20: #and (converge < 0.0 or converge > var_converge):
            ### update variational parameters
            # var_phi

            var_phi = np.dot(phi.T, (Elogbeta_doc * doc.counts).T) + Elogsticks_1st
            (log_var_phi, log_norm) = utils.log_normalize(var_phi)
            var_phi = np.exp(log_var_phi)

            # phi  #zeta
            phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd 
            (log_phi, log_norm) = utils.log_normalize(phi)
            phi = np.exp(log_phi)
            phi_all = phi * np.array(doc.counts)[:,np.newaxis]
              
            # local sticks
            v[0] = 1.0 + np.sum(phi_all[:,:self.m_K-1], 0)  #a_{jt}
            phi_cum = np.flipud(np.sum(phi_all[:,1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum)) #b_{jt}
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K-1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum((np.array([1.0, self.m_alpha])[:,np.newaxis]-v) * (sp.psi(v)-dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(sp.gammaln(v))

            # Z part 
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(phi.T * np.dot(var_phi, Elogbeta_doc * doc.counts))

            converge = (likelihood - old_likelihood)/abs(old_likelihood)
            old_likelihood = likelihood
            
            """if converge < 0:
                print "warning, likelihood is decreasing!" """
            
            iter = iter + 1
        
        m_var_zeta[docnum,:] = np.sum((phi.T * doc.counts).T,0)

        return(likelihood, m_var_zeta)