long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data) chain_groups = len(long_patches) / 15 # components per goup = 15 # form first proposal with Variational Bayes # run variational bayes with samples --> use the long patches as initial guess vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches) print 'running VB1...' vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=.5*len(vb.data)/vb.K, verbose=True) vbmix = vb.make_mixture() # calculate perp/ess vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix) vb_sampler.run(N_perp_ess, trace_sort=False) vb_weighted_samples = vb_sampler.history[-1] vb_weights = vb_weighted_samples[:,0] vb_perp = perp(vb_weights) vb_ess = ess (vb_weights) components_vb = len(vbmix) print 'VB1 done' print 'running VB2...' prior_for_vb2 = vb.posterior2prior() prior_for_vb2.pop('alpha0') vb2 = pypmc.mix_adapt.variational.GaussianInference(vb_weighted_samples[:vb2_N,1:], weights=vb_weights[:vb2_N], initial_guess=vbmix, **prior_for_vb2) vb2.run(1000, abs_tol=1e-5, rel_tol=1e-10, verbose=True) vb2mix = vb2.make_mixture() vb2_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vb2mix) vb2_sampler.run(N_perp_ess, trace_sort=False) vb2_weighted_samples = vb2_sampler.history[-1]
# *********************************************************************** # ****************** nothing below should be changed ! ****************** # *********************************************************************** # use importance sampling to calculate perplexity and effective sample size # define an ImportanceSampler object using ``reduced_proposal`` sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, final_proposal) # run N_perp_ess steps sampler.run(N_perp_ess, trace_sort=True) # get the weights from the samples that have just been generated weighted_samples = sampler.history[-1] weights = weighted_samples[:,0] # calculate perplexity and ess perplexity = perp(weights) ess = ess (weights) # save perplexity and ess params.update( [('perplexity', perplexity), ('ess', ess)] ) # dump results save_final_proposal(final_proposal, params)
chain_groups = len(long_patches) / 15 # components per goup = 15 # form first proposal with PMC print "running PMC" pmcmix = pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], long_patches, copy=True) pmcmix.prune(0.5 / len(long_patches)) for i in range(1000 - 1): print i pypmc.mix_adapt.pmc.gaussian_pmc(stacked_data[::100], pmcmix, copy=False) pmcmix.prune(0.5 / len(long_patches)) pmc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, pmcmix) pmc_sampler.run(N_perp_ess, trace_sort=True) pmc_weighted_samples = pmc_sampler.history[-1] pmc_weights = pmc_weighted_samples[:, 0] pmc_perp = perp(pmc_weights) pmc_ess = ess(pmc_weights) components_pmc = len(pmcmix) print "PMC done" # form first proposal with Variational Bayes # run variational bayes with samples --> use the long patches as initial guess vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches) print "running VB..." vb.run(1000, abs_tol=1e-5, rel_tol=1e-10, prune=0.5 * len(vb.data) / vb.K, verbose=True) vbmix = vb.make_mixture() # calculate perp/ess vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix) vb_sampler.run(N_perp_ess, trace_sort=True)
stacked_data = np.vstack(mcmc_data) print 'Markov Chains done' # form the "long_patches" long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data) chain_groups = len(long_patches) / 15 # components per goup = 15 # form first proposal with Variational Bayes # run variational bayes with samples --> use the long patches as initial guess vb = pypmc.mix_adapt.variational.GaussianInference(stacked_data[::100], initial_guess=long_patches) print 'running VB...' vb.run(1000, abs_tol=1e-5, rel_tol=1e-5, prune=.5*len(vb.data)/vb.K, verbose=True) vbmix = vb.make_mixture() # calculate perp/ess vb_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, vbmix) vb_sampler.run(N_perp_ess, trace_sort=True) vb_weighted_samples = vb_sampler.history[-1] vb_weights = vb_weighted_samples[:,0] vb_perp = perp(vb_weights) vb_ess = ess (vb_weights) components_vb = len(vbmix) print 'VB done' # append this run's results to output file outfile = open('calculate_perp_ess_VB_loose_convergence.txt', 'a') outfile.write('\n' + str(chain_groups) + ' ' + str(components_vb) + ' ' + str(vb_perp) + ' ' + str(vb_ess)) outfile.close()
long_patches = pypmc.mix_adapt.r_value.make_r_gaussmix(mcmc_data, K_g=5) chain_groups = len(long_patches) / 5 # components per goup = 5 # for hierarchical clustering (and VBMerge) form "short_patches" short_patches = pypmc.tools.patch_data(stacked_data, L=100) # run hierarchical clustering hc = pypmc.mix_adapt.hierarchical.Hierarchical(short_patches, long_patches, verbose=True) print "running HC..." hc.run() hcmix = hc.g hc_sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix) hc_sampler.run(N_perp_ess, trace_sort=True) hc_weighted_samples = hc_sampler.history[-1] hc_weights = hc_weighted_samples[:, 0] hc_perp = perp(hc_weights) hc_ess = ess(hc_weights) components_hc = len(hcmix) print "HC done" # importance sampling main loop sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, hcmix) previous_perp = -np.inf converge_step = None for i in range(25): print "step", i # run len(sampler.proposal) * N_c steps
# ---------------------- continue with importance sampling ---------------------- # define an ImportanceSampler object using ``reduced_proposal`` sampler = pypmc.sampler.importance_sampling.ImportanceSampler(log_target, reduced_proposal) generating_components = [] perplexities = [0.] for i in range(10): print "step", i # run 10,000 steps generating_components.append(sampler.run(N_c * (sampler.proposal.weights!=0).sum(), trace_sort=True)) # get the weighted samples that have just been generated weighted_samples = sampler.history[-1] perplexities.append(perp(weighted_samples[:,0])) if (perplexities[i+1] - perplexities[i]) / perplexities[i+1] < .05: statusfile.write('PMC converged in step ' + str(i) + '\n') break # update the proposal using PMC pypmc.mix_adapt.pmc.gaussian_pmc(weighted_samples[:,1:], sampler.proposal, weights=weighted_samples[:,0], latent=generating_components[-1], rb=True, mincount=20, copy=False) # plot plt.figure() plt.title('proposal after PMC update ' + str(i)) plot_mixture(sampler.proposal, 0,1, cutoff=.01) plotfile.savefig() statusfile.write('have %i live components PMC step %i\n' %((sampler.proposal.weights!=0).sum(), i) ) statusfile.write('Perplexities:\n') for perp in perplexities[1:]: #[1:] because of initial 0.