class Stage(object): """ Stage, containing sampling results and intermediate sampler parameters. """ number = None path = None step = None updates = None mtrace = None def __init__(self, handler=None, homepath=None, stage_number=-1, backend='csv'): if handler is not None: self.handler = handler elif handler is None and homepath is not None: self.handler = SampleStage(homepath, backend=backend) else: raise TypeError('Either handler or homepath have to be not None') self.backend = backend self.number = stage_number def load_results(self, varnames=None, model=None, stage_number=None, chains=None, load='trace'): """ Load stage results from sampling. Parameters ---------- model : :class:`pymc3.model.Model` stage_number : int Number of stage to load chains : list, optional of result chains to load load : str what to load and return 'full', 'trace', 'params' """ if varnames is None and model is not None: varnames = [var.name for var in model.unobserved_RVs] elif varnames is None and model is None: raise ValueError( 'Either "varnames" or "model" need to be not None!') if stage_number is None: stage_number = self.number self.path = self.handler.stage_path(stage_number) if not os.path.exists(self.path): stage_number = self.handler.highest_sampled_stage() logger.info('Stage results %s do not exist! Loading last completed' ' stage %s' % (self.path, stage_number)) self.path = self.handler.stage_path(stage_number) self.number = stage_number if load == 'full': to_load = ['params', 'trace'] else: to_load = [load] if 'trace' in to_load: self.mtrace = self.handler.load_multitrace(stage_number, varnames=varnames, chains=chains) if 'params' in to_load: if model is not None: with model: self.step, self.updates = self.handler.load_sampler_params( stage_number) else: raise ValueError('To load sampler params model is required!')
def _test_sample(self, n_jobs, test_folder): logger.info('Running on %i cores...' % n_jobs) n = 4 mu1 = num.ones(n) * (1. / 2) mu2 = -mu1 stdev = 0.1 sigma = num.power(stdev, 2) * num.eye(n) isigma = num.linalg.inv(sigma) dsigma = num.linalg.det(sigma) w1 = stdev w2 = (1 - stdev) def two_gaussians(x): log_like1 = - 0.5 * n * tt.log(2 * num.pi) \ - 0.5 * tt.log(dsigma) \ - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1) log_like2 = - 0.5 * n * tt.log(2 * num.pi) \ - 0.5 * tt.log(dsigma) \ - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2)) with pm.Model() as PT_test: X = pm.Uniform('X', shape=n, lower=-2. * num.ones_like(mu1), upper=2. * num.ones_like(mu1), testval=-1. * num.ones_like(mu1), transform=None) like = pm.Deterministic('tmp', two_gaussians(X)) llk = pm.Potential('like', like) with PT_test: step = metropolis.Metropolis( n_chains=n_jobs, likelihood_name=PT_test.deterministics[0].name, proposal_name='MultivariateCauchy', tune_interval=self.tune_interval) pt.pt_sample( step, n_chains=n_jobs, n_samples=self.n_samples, swap_interval=self.swap_interval, beta_tune_interval=self.beta_tune_interval, n_workers_posterior=self.n_workers_posterior, homepath=test_folder, progressbar=False, buffer_size=self.buffer_size, model=PT_test, rm_flag=False, keep_tmp=False) stage_handler = SampleStage(test_folder) mtrace = stage_handler.load_multitrace(-1, varnames=PT_test.vars) history = load_objects(os.path.join(stage_handler.stage_path(-1), sample_p_outname)) n_steps = self.n_samples burn = self.burn thin = self.thin def burn_sample(x): if n_steps == 1: return x else: nchains = int(x.shape[0] / n_steps) xout = [] for i in range(nchains): nstart = int((n_steps * i) + (n_steps * burn)) nend = int(n_steps * (i + 1) - 1) xout.append(x[nstart:nend:thin]) return num.vstack(xout) from pymc3 import traceplot from matplotlib import pyplot as plt with PT_test: traceplot(mtrace, transform=burn_sample) fig, axes = plt.subplots( nrows=1, ncols=2, figsize=mpl_papersize('a5', 'portrait')) axes[0].plot(history.acceptance, 'r') axes[0].set_ylabel('Acceptance ratio') axes[0].set_xlabel('Update interval') axes[1].plot(num.array(history.t_scales), 'k') axes[1].set_ylabel('Temperature scaling') axes[1].set_xlabel('Update interval') n_acceptances = len(history) ncol = 3 nrow = int(num.ceil(n_acceptances / float(ncol))) fig2, axes1 = plt.subplots( nrows=nrow, ncols=ncol, figsize=mpl_papersize('a4', 'portrait')) axes1 = num.atleast_2d(axes1) fig3, axes2 = plt.subplots( nrows=nrow, ncols=ncol, figsize=mpl_papersize('a4', 'portrait')) axes2 = num.atleast_2d(axes2) acc_arrays = history.get_acceptance_matrixes_array() sc_arrays = history.get_sample_counts_array() scvmin = sc_arrays.min(0).min(0) scvmax = sc_arrays.max(0).max(0) accvmin = acc_arrays.min(0).min(0) accvmax = acc_arrays.max(0).max(0) for i in range(ncol * nrow): rowi, coli = mod_i(i, ncol) #if i == n_acceptances: # pass #plt.colorbar(im, axes1[rowi, coli]) #plt.colorbar(im2, axes2[rowi, coli]) if i > n_acceptances - 1: try: fig2.delaxes(axes1[rowi, coli]) fig3.delaxes(axes2[rowi, coli]) except KeyError: pass else: axes1[rowi, coli].matshow( history.acceptance_matrixes[i], vmin=accvmin[i], vmax=accvmax[i], cmap='hot') axes1[rowi, coli].set_title('min %i, max%i' % (accvmin[i], accvmax[i])) axes1[rowi, coli].get_xaxis().set_ticklabels([]) axes2[rowi, coli].matshow( history.sample_counts[i], vmin=scvmin[i], vmax=scvmax[i], cmap='hot') axes2[rowi, coli].set_title('min %i, max%i' % (scvmin[i], scvmax[i])) axes2[rowi, coli].get_xaxis().set_ticklabels([]) fig2.suptitle('Accepted number of samples') fig2.tight_layout() fig3.tight_layout() fig3.suptitle('Total number of samples') plt.show()