示例#1
0
def fit_gp_multinomial_model(model, test, pi_train=None, N_samples=100, run=1):
    if pi_train is not None:
        if isinstance(model, pgmult.gp.LogisticNormalGP):
            model.data_list[0]["psi"] = ln_pi_to_psi(pi_train) - model.mu
        elif isinstance(model, pgmult.gp.MultinomialGP):
            model.data_list[0]["psi"] = pi_to_psi(pi_train) - model.mu
            model.resample_omega()
    else:
        model.initialize_from_data()

    ### Inference
    results_base = os.path.join("results", "names", "run%03d" % run, "results")
    results_file = results_base + ".pkl.gz"
    if os.path.exists(results_file):
        with gzip.open(results_file, "r") as f:
            samples, lls, pred_lls, timestamps = pickle.load(f)

    else:
        Z_test = get_inputs(test)
        lls = [model.log_likelihood()]
        samples = [model.copy_sample()]
        pred_ll, pred_pi = model.predictive_log_likelihood(Z_test, test.data)
        pred_lls = [pred_ll]
        pred_pis = [pred_pi]
        times = [0]

        # Print initial values
        print("Initial LL: ", lls[0])
        print("Initial Pred LL: ", pred_lls[0])


        for itr in xrange(N_samples):
            print("Iteration ", itr)
            tic = time.time()
            model.resample_model(verbose=True)
            times.append(time.time()-tic)

            samples.append(model.copy_sample())
            lls.append(model.log_likelihood())
            pred_ll, pred_pi = model.predictive_log_likelihood(get_inputs(test), test.data)
            pred_lls.append(pred_ll)
            pred_pis.append(pred_pi)

            print("Log likelihood: ", lls[-1])
            print("Pred Log likelihood: ", pred_ll)

            # Save this sample
            # with gzip.open(results_file + ".itr%03d.pkl.gz" % itr, "w") as f:
            #     pickle.dump(model, f, protocol=-1)

        lls = np.array(lls)
        pred_lls = np.array(pred_lls)
        timestamps = np.cumsum(times)

    return samples, lls, pred_lls, pred_pis, timestamps
示例#2
0
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting SBM-LDS with %d states using pMCMC" % D)
    models = [ParticleSBMultinomialLDS(
                init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
                dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
                emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
                mu=pi_to_psi(np.ones(K)/K),
                sigma_C=1.0)
             for _ in xrange(Nx)]

    for model in models:
        model.A = 0.5*np.eye(D)
        model.sigma_states = np.eye(D)
        model.C = np.random.randn(K-1,D)
        model.sigma_obs = 0.1*np.eye(K)

    for X, model in zip(Xs, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xte, model in zip(Xtest, models):
            pred_ll += model.predictive_log_likelihood(Xte, Npred=100)[0]
        return pred_ll

    init_results = (0, None, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#3
0
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    print("Fitting SBM-LDS with %d states using pMCMC" % D)
    model = ParticleSBMultinomialLDS(
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
        emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
        mu=pi_to_psi(np.ones(K)/K), sigma_C=0.01)

    model.A = 0.5*np.eye(D)
    model.sigma_states = np.eye(D)
    model.C = 0.01 * np.random.randn(K-1,D)
    model.sigma_obs = 0.1*np.eye(K)

    for X in Xs:
        model.add_data(X)

    init_results = (0, None, model.log_likelihood(),
                    np.nan, model.predictive_log_likelihood(Xtest, Npred=1000))

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        pred_ll = model.predictive_log_likelihood(Xtest, Npred=1000)

        return toc, None, model.log_likelihood(), \
            np.nan, \
            pred_ll

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#4
0
文件: ctm.py 项目: ariddell/pgmult
    nonempty_docs = np.asarray(model.data.sum(1) > 0).ravel()
    model.theta[nonempty_docs] = ln_psi_to_pi(lmbda)
    model.resample_z()
    return model


fit_lda_gibbs = sampler_fitter(
    'fit_lda_gibbs', StandardLDA, 'resample', lda_initializer)
fit_lda_collapsed = sampler_fitter(
    'fit_lda_collapsed', StandardLDA, 'resample_collapsed', lda_initializer)
fit_lnctm_gibbs = sampler_fitter(
    'fit_lnctm_gibbs', LogisticNormalCorrelatedLDA, 'resample',
    make_ctm_initializer(lambda lmbda: lmbda))
fit_sbctm_gibbs = sampler_fitter(
    'fit_sbctm_gibbs', StickbreakingCorrelatedLDA, 'resample',
    make_ctm_initializer(lambda lmbda: pi_to_psi(ln_psi_to_pi(lmbda))))


########################
#  inspecting results  #
########################

def plot_sb_interpretable_results(sb_results, words):
    nwords = 5
    Sigma = sb_results[-1][-1]
    T = Sigma.shape[0]

    def get_topwords(topic):
        return words[np.argsort(sb_results[-1][0][:,topic])[-nwords:]]

    lim = np.abs(Sigma).max()
示例#5
0
文件: lda.py 项目: ariddell/pgmult
 def theta(self, theta):
     self.psi = pi_to_psi(theta)