def fit_gp_multinomial_model(model, test, pi_train=None, N_samples=100, run=1): if pi_train is not None: if isinstance(model, pgmult.gp.LogisticNormalGP): model.data_list[0]["psi"] = ln_pi_to_psi(pi_train) - model.mu elif isinstance(model, pgmult.gp.MultinomialGP): model.data_list[0]["psi"] = pi_to_psi(pi_train) - model.mu model.resample_omega() else: model.initialize_from_data() ### Inference results_base = os.path.join("results", "names", "run%03d" % run, "results") results_file = results_base + ".pkl.gz" if os.path.exists(results_file): with gzip.open(results_file, "r") as f: samples, lls, pred_lls, timestamps = pickle.load(f) else: Z_test = get_inputs(test) lls = [model.log_likelihood()] samples = [model.copy_sample()] pred_ll, pred_pi = model.predictive_log_likelihood(Z_test, test.data) pred_lls = [pred_ll] pred_pis = [pred_pi] times = [0] # Print initial values print("Initial LL: ", lls[0]) print("Initial Pred LL: ", pred_lls[0]) for itr in xrange(N_samples): print("Iteration ", itr) tic = time.time() model.resample_model(verbose=True) times.append(time.time()-tic) samples.append(model.copy_sample()) lls.append(model.log_likelihood()) pred_ll, pred_pi = model.predictive_log_likelihood(get_inputs(test), test.data) pred_lls.append(pred_ll) pred_pis.append(pred_pi) print("Log likelihood: ", lls[-1]) print("Pred Log likelihood: ", pred_ll) # Save this sample # with gzip.open(results_file + ".itr%03d.pkl.gz" % itr, "w") as f: # pickle.dump(model, f, protocol=-1) lls = np.array(lls) pred_lls = np.array(pred_lls) timestamps = np.cumsum(times) return samples, lls, pred_lls, pred_pis, timestamps
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100): """ Fit a logistic normal LDS model with pMCMC """ Nx = len(Xs) assert len(Xtest) == Nx print("Fitting SBM-LDS with %d states using pMCMC" % D) models = [ParticleSBMultinomialLDS( init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)), dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)), emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)), mu=pi_to_psi(np.ones(K)/K), sigma_C=1.0) for _ in xrange(Nx)] for model in models: model.A = 0.5*np.eye(D) model.sigma_states = np.eye(D) model.C = np.random.randn(K-1,D) model.sigma_obs = 0.1*np.eye(K) for X, model in zip(Xs, models): model.add_data(X) def compute_pred_ll(): pred_ll = 0 for Xte, model in zip(Xtest, models): pred_ll += model.predictive_log_likelihood(Xte, Npred=100)[0] return pred_ll init_results = (0, None, np.nan, np.nan, compute_pred_ll()) def resample(): tic = time.time() [model.resample_model() for model in models] toc = time.time() - tic return toc, None, np.nan, np.nan, compute_pred_ll() times, samples, lls, test_lls, pred_lls = \ map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples, perline=5)]))) timestamps = np.cumsum(times) return Results(lls, test_lls, pred_lls, samples, timestamps)
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100): """ Fit a logistic normal LDS model with pMCMC """ print("Fitting SBM-LDS with %d states using pMCMC" % D) model = ParticleSBMultinomialLDS( init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)), dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)), emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)), mu=pi_to_psi(np.ones(K)/K), sigma_C=0.01) model.A = 0.5*np.eye(D) model.sigma_states = np.eye(D) model.C = 0.01 * np.random.randn(K-1,D) model.sigma_obs = 0.1*np.eye(K) for X in Xs: model.add_data(X) init_results = (0, None, model.log_likelihood(), np.nan, model.predictive_log_likelihood(Xtest, Npred=1000)) def resample(): tic = time.time() model.resample_model() toc = time.time() - tic pred_ll = model.predictive_log_likelihood(Xtest, Npred=1000) return toc, None, model.log_likelihood(), \ np.nan, \ pred_ll times, samples, lls, test_lls, pred_lls = \ map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)]))) timestamps = np.cumsum(times) return Results(lls, test_lls, pred_lls, samples, timestamps)
nonempty_docs = np.asarray(model.data.sum(1) > 0).ravel() model.theta[nonempty_docs] = ln_psi_to_pi(lmbda) model.resample_z() return model fit_lda_gibbs = sampler_fitter( 'fit_lda_gibbs', StandardLDA, 'resample', lda_initializer) fit_lda_collapsed = sampler_fitter( 'fit_lda_collapsed', StandardLDA, 'resample_collapsed', lda_initializer) fit_lnctm_gibbs = sampler_fitter( 'fit_lnctm_gibbs', LogisticNormalCorrelatedLDA, 'resample', make_ctm_initializer(lambda lmbda: lmbda)) fit_sbctm_gibbs = sampler_fitter( 'fit_sbctm_gibbs', StickbreakingCorrelatedLDA, 'resample', make_ctm_initializer(lambda lmbda: pi_to_psi(ln_psi_to_pi(lmbda)))) ######################## # inspecting results # ######################## def plot_sb_interpretable_results(sb_results, words): nwords = 5 Sigma = sb_results[-1][-1] T = Sigma.shape[0] def get_topwords(topic): return words[np.argsort(sb_results[-1][0][:,topic])[-nwords:]] lim = np.abs(Sigma).max()
def theta(self, theta): self.psi = pi_to_psi(theta)