Пример #1
0
 def log_likelihood(self):
     ll = 0
     for states in self.states_list:
         psi = states.stateseq.dot(self.C.T) + self.mu
         pi = ln_psi_to_pi(psi)
         ll += np.sum(states.data * np.log(pi))
     return ll
Пример #2
0
def lda_initializer(model):
    T, V = model.T, model.V
    model.beta = np.exp(np.loadtxt('ctm-out/000-log-beta.dat')
                            .reshape((-1,V))).T
    lmbda = np.loadtxt('ctm-out/000-lambda.dat').reshape((-1,T))
    nonempty_docs = np.asarray(model.data.sum(1) > 0).ravel()
    model.theta[nonempty_docs] = ln_psi_to_pi(lmbda)
    model.resample_z()
    return model
Пример #3
0
        def log_joint_C(C):
            ll = 0
            for states in self.states_list:
                z = states.stateseq
                psi = z.dot(C.T) + self.mu
                pi = ln_psi_to_pi(psi)

                # TODO: Generalize for multinomial
                ll += np.nansum(states.data * np.log(pi))

            ll += (-0.5*C**2/self.sigma_C).sum()

            return ll
Пример #4
0
def compute_pred_likelihood(model, samples, test):
    Z_pred = get_inputs(test)

    preds = []
    for sample in samples:
        model.set_sample(sample)
        preds.append(model.predict(Z_pred, full_output=True)[1])

    psi_pred_mean = np.mean(preds, axis=0)

    if isinstance(model, pgmult.gp.MultinomialGP):
        pi_pred_mean = np.array([psi_to_pi(psi) for psi in psi_pred_mean])
    elif isinstance(model, pgmult.gp.LogisticNormalGP):
        from pgmult.internals.utils import ln_psi_to_pi
        pi_pred_mean = np.array([ln_psi_to_pi(psi) for psi in psi_pred_mean])
    else:
        raise NotImplementedError

    pll_gp = gammaln(test.data.sum(axis=1)+1).sum() - gammaln(test.data+1).sum()
    pll_gp += np.nansum(test.data * np.log(pi_pred_mean))
    return pll_gp
Пример #5
0
def compute_pred_likelihood(model, samples, test):
    Z_pred = get_inputs(test)

    preds = []
    for sample in samples:
        model.set_sample(sample)
        preds.append(model.predict(Z_pred, full_output=True)[1])

    psi_pred_mean = np.mean(preds, axis=0)

    if isinstance(model, pgmult.gp.MultinomialGP):
        pi_pred_mean = np.array([psi_to_pi(psi) for psi in psi_pred_mean])
    elif isinstance(model, pgmult.gp.LogisticNormalGP):
        from pgmult.internals.utils import ln_psi_to_pi
        pi_pred_mean = np.array([ln_psi_to_pi(psi) for psi in psi_pred_mean])
    else:
        raise NotImplementedError

    pll_gp = gammaln(test.data.sum(axis=1)+1).sum() - gammaln(test.data+1).sum()
    pll_gp += np.nansum(test.data * np.log(pi_pred_mean))
    return pll_gp
Пример #6
0
    def predictive_log_likelihood(self, Xtest, data_index=0, Npred=100):
        """
        Hacky way of computing the predictive log likelihood
        :param X_pred:
        :param data_index:
        :param M:
        :return:
        """
        Tpred = Xtest.shape[0]

        # Sample particle trajectories
        preds = self.states_list[data_index].sample_predictions(Tpred, Npred)
        preds = np.transpose(preds, [2,0,1])
        assert preds.shape == (Npred, Tpred, self.n)

        psis = np.array([pred.dot(self.C.T) + self.mu for pred in preds])
        pis = np.array([ln_psi_to_pi(psi) for psi in psis])

        # TODO: Generalize for multinomial
        lls = np.zeros(Npred)
        for m in xrange(Npred):
            # lls[m] = np.sum(
            #     [Multinomial(weights=pis[m,t,:], K=self.p).log_likelihood(Xtest[t][None,:])
            #      for t in xrange(Tpred)])
            lls[m] = np.nansum(Xtest * np.log(pis[m]))


        # Compute the average
        hll = logsumexp(lls) - np.log(Npred)

        # Use bootstrap to compute error bars
        samples = np.random.choice(lls, size=(100, Npred), replace=True)
        hll_samples = logsumexp(samples, axis=1) - np.log(Npred)
        std_hll = hll_samples.std()

        return hll, std_hll
Пример #7
0
 def pi(self):
     psi = self.stateseq.dot(self.C.T)
     return ln_psi_to_pi(psi)
Пример #8
0
    nonempty_docs = np.asarray(model.data.sum(1) > 0).ravel()
    model.theta[nonempty_docs] = ln_psi_to_pi(lmbda)
    model.resample_z()
    return model


fit_lda_gibbs = sampler_fitter(
    'fit_lda_gibbs', StandardLDA, 'resample', lda_initializer)
fit_lda_collapsed = sampler_fitter(
    'fit_lda_collapsed', StandardLDA, 'resample_collapsed', lda_initializer)
fit_lnctm_gibbs = sampler_fitter(
    'fit_lnctm_gibbs', LogisticNormalCorrelatedLDA, 'resample',
    make_ctm_initializer(lambda lmbda: lmbda))
fit_sbctm_gibbs = sampler_fitter(
    'fit_sbctm_gibbs', StickbreakingCorrelatedLDA, 'resample',
    make_ctm_initializer(lambda lmbda: pi_to_psi(ln_psi_to_pi(lmbda))))


########################
#  inspecting results  #
########################

def plot_sb_interpretable_results(sb_results, words):
    nwords = 5
    Sigma = sb_results[-1][-1]
    T = Sigma.shape[0]

    def get_topwords(topic):
        return words[np.argsort(sb_results[-1][0][:,topic])[-nwords:]]

    lim = np.abs(Sigma).max()
Пример #9
0
 def theta(self):
     return ln_psi_to_pi(self.psi)