def run(model, real_parameters, noise_used, log_prior_used): # Create some toy data times = np.linspace(1, 1000, 50) org_values = model.simulate(real_parameters, times) # Add noise noise = 10 values = org_values + np.random.normal(0, noise, org_values.shape) real_parameters = np.array(real_parameters) # Create an object with links to the model and time series problem = pints.SingleOutputProblem(model, times, values) # Create a log-likelihood function (adds an extra parameter!) log_likelihood_used = pints.GaussianKnownSigmaLogLikelihood(problem, [noise_used]) # Create a uniform prior over both the parameters and the new noise variable # Create a posterior log-likelihood (log(likelihood * prior)) log_posterior = pints.LogPosterior(log_likelihood_used, log_prior_used) # Choose starting points for 3 mcmc chains xs = [ real_parameters, real_parameters * 1.01, real_parameters * 0.99, ] # Create mcmc routine with four chains mcmc = pints.MCMCController(log_posterior, 3, xs, method=pints.HaarioACMC) sample_size = 4000 # Add stopping criterion mcmc.set_max_iterations(sample_size) # Start adapting after 1000 iterations mcmc.set_initial_phase_iterations(sample_size//4) # Disable logging mode mcmc.set_log_to_screen(False) # Run! print('Running...') chains = mcmc.run() print('Done!') s = sample_size//4+1 #HMC: s = 1 b = False while s < sample_size: chains_cut = chains[:,sample_size//4:s+1] rhat = pints.rhat(chains_cut) s+=1 if rhat[0] < 1.05: b = True break print(s) return chains[0][s:][:, 0]
def run_mcmc(self, num_mcmc_samples, num_chains, iprint=True, method=pints.PopulationMCMC, enforce_convergence=False): """Run MCMC to obtain posterior samples. Parameters ---------- num_mcmc_samples : int The total number of MCMC samples to run num_chains : int Number of separate MCMC chains to run iprint : bool, optional (True) Whether or not to print iteration number method : type, optional (pints.PopulationMCMC) Which MCMC method (pints.MCMCSampler) to use enforce_convergence : bool, optional (False) Whether to raise an error if the Rhat convergence statistic is less than 1.05. Returns ------- np.ndarray MCMC chain, with shape (num_samples, num_parameters) """ starting_points = self.get_initial_conditions(num_chains) mcmc = pints.MCMCController(self.posterior, num_chains, starting_points, method=method) mcmc.set_max_iterations(num_mcmc_samples) mcmc.set_log_to_screen(iprint) chains = mcmc.run() # Check convergence rs = pints.rhat(chains[:, num_mcmc_samples // 2:, :]) if max(rs) > 1.05: message = 'MCMC chains failed to converge, R={}'.format(str(rs)) if enforce_convergence: raise RuntimeError(message) else: warnings.warn(message) # Get one chain and discard burn in chain = chains[0][num_mcmc_samples // 2:] return chain
def _make_summary(self): """ Calculates posterior summaries for all parameters. """ stacked = np.vstack(self._chains) # Mean, std and quantiles self._mean = np.mean(stacked, axis=0) self._std = np.std(stacked, axis=0) self._quantiles = np.percentile(stacked, [2.5, 25, 50, 75, 97.5], axis=0) # Rhat self._rhat = pints.rhat(self._chains) # Effective sample size self._ess = np.zeros(self._n_parameters) for i, chain in enumerate(self._chains): self._ess += pints.effective_sample_size(chain) if self._time is not None: self._ess_per_second = np.array(self._ess) / self._time # Create for i in range(0, self._n_parameters): row = [ self._parameter_names[i], self._mean[i], self._std[i], self._quantiles[0, i], self._quantiles[1, i], self._quantiles[2, i], self._quantiles[3, i], self._quantiles[4, i], self._rhat[i], self._ess[i], ] if self._time is not None: row.append(self._ess_per_second[i]) self._summary_list.append(row)
mcmc = pints.MCMCController(log_posterior, 3, xs, method=pints.HaarioACMC) sample_size = Ldash * L // N_effective1[i] mcmc.set_max_iterations(sample_size) mcmc.set_initial_phase_iterations(sample_size // 4) mcmc.set_log_to_screen(False) chains = mcmc.run() s = sample_size // 4 + 1 #HMC: s = 1 b = False while s < sample_size - L: chains_cut = chains[:, sample_size // 4:s + 1] #HMC: chains_cut = chains[:,0:s+1] rhat = pints.rhat(chains_cut) s += 1 if rhat[0] < 1.05: b = True break if b == False: d += 1 continue chain0 = chains[0] N_effective2 = pints.effective_sample_size(chain0) chain = chain0[s:] N_effective3 = pints.effective_sample_size(chain)
np.savetxt('data/kCatsAllData.txt', noisy_data) # fit to this data using MCMC default_params_noise = np.hstack([default_params, 0.1]) problem = pints.MultiOutputProblem(model, times, noisy_data) log_prior = pints.UniformLogPrior(pints.RectangularBoundaries(0.1*default_params_noise, 10*default_params_noise)) log_likelihood = MultiplicativeGaussianLogLikelihood(problem) log_posterior = pints.LogPosterior(log_likelihood, log_prior) mcmc = pints.MCMCController(log_posterior, 3, [default_params_noise, default_params_noise*0.8, default_params_noise*1.25], method=pints.HaarioBardenetACMC) mcmc.set_parallel(False) mcmc.set_max_iterations(2000) chains = mcmc.run() reps = 1 while max(pints.rhat(chains[:, :, :])) > 1.10 or min(pints.effective_sample_size(chains[0, :, :-1])) < 450: mcmc = pints.MCMCController(log_posterior, 3, chains[:, -1, :], method=pints.HaarioBardenetACMC) mcmc.set_parallel(False) mcmc.set_max_iterations(2000) mcmc.set_log_to_screen(False) chain = mcmc.run() new_chains = np.zeros((chains.shape[0], chains.shape[1] + 2000, chains.shape[2])) new_chains[:, :-2000, :] = chains new_chains[:, -2000:, :] = chain[:, :, :] chains = new_chains reps += 1 print(pints.rhat(chains[:, :, :])) for i in range(3): np.savetxt('results/kCatsAll_' + str(i + 1), chains[i, :, :])
def Algorithm1WithConvergence(L, N, model, log_prior, log_prior_used, times, noise, noise_used, MCMCmethod, param=0): time_start = time.time() sum_p = 0 sum_p_theta = 0 sum_p_y = 0 c = 1 for i in range(c): print(i) res1 = [] res2 = [] thetatildeArray = [] #np.empty(N, dtype=float) ytildeArray = [] #np.empty(N, dtype=float) d = 0 for n in range(N): print(n) thetatilde = log_prior.sample(n=1)[0] org_values = model.simulate(thetatilde, times) ytilde_n = org_values + np.random.normal(0, noise, org_values.shape) problem = pints.SingleOutputProblem(model, times, ytilde_n) log_likelihood_used = pints.GaussianKnownSigmaLogLikelihood( problem, [noise_used]) log_posterior = pints.LogPosterior(log_likelihood_used, log_prior_used) #Start from thetatilde xs = [thetatilde, thetatilde * 1.01, thetatilde * 0.99] isinf = False for x in xs: #print(x) if (math.isinf(log_posterior.evaluateS1(x)[0])): isinf = True d += 1 break if (isinf == True): print('isinf:', isinf) continue #Run Markov chain L steps from thetatilde''' mcmc = pints.MCMCController(log_posterior, len(xs), xs, method=MCMCmethod) # Add stopping criterion sample_size = 3000 mcmc.set_max_iterations(sample_size) # Start adapting after sample_size//4 iterations mcmc.set_initial_phase_iterations(sample_size // 4) # Disable logging mode mcmc.set_log_to_screen(False) chains = mcmc.run() s = sample_size // 4 + 1 b = False while s < sample_size: chains_cut = chains[:, sample_size // 4:s + 1] #HMC: chains_cut = chains[:,0:s+1] rhat = pints.rhat(chains_cut) s += 1 if rhat[0] < 1.05: print('converge') b = True break if b == False: d += 1 continue print(s) thetatilde_n = chains[0][(s + sample_size) // 2 - 1] print(thetatilde) thetatildeArray.append(thetatilde_n[param]) ytildeArray.append(ytilde_n[param]) res1.append((thetatilde_n[param], ytilde_n[param])) thetaArray = np.empty(N - d, dtype=float) yArray = np.empty(N - d, dtype=float) for n in range(N - d): theta_n = log_prior.sample(n=1)[0] org_values = model.simulate(theta_n, times) y_n = org_values + np.random.normal(0, noise, org_values.shape) thetaArray[n] = theta_n[param] yArray[n] = y_n[param] res2.append((theta_n[param], y_n[param])) p = ks2d2s(thetatildeArray, ytildeArray, thetaArray, yArray) statistic_theta, p_theta = ks_2samp(thetatildeArray, thetaArray) statistic_y, p_y = ks_2samp(ytildeArray, yArray) sum_p += p sum_p_theta += p_theta sum_p_y += p_y time_end = time.time() duration = time_end - time_start average_p = sum_p / c average_p_theta = sum_p_theta / c average_p_y = sum_p_y / c print('average_p:', average_p) print('average_p_theta:', average_p_theta) print('average_p_y:', average_p_y) return average_p, average_p_theta, average_p_y, duration, thetatildeArray, thetaArray, ytildeArray, yArray
def run_pints(problem, likelihood, x0, num_mcmc_samples, num_chains=3, log_prior=None, likelihood_args=None, enforce_convergence=False, mcmc_method=None): """Perform infernce with Pints using a specified model and likelihood. Parameters ---------- problem : pints.Problem Pints problem holding the times and data likelihood : pints.ProblemLogLikelihood Pints likelihood for the data x0 : array_like of float Starting point of model parameters. num_mcmc_samples : int Total number of MCMC samples. num_chains : int Number of separate MCMC chains. log_prior : pints.LogPrior Prior distribution on all parameters in the likelihood. If None, a uniform prior from 0 to 1e6 is chosen for all parameters. likelihood_args : list Any other arguments besides the pints problem which must be provided when instantiating the likelihood. enforce_convergence : bool Whether to raise an error if the chains have not converged. After finishing the MCMC chain, the Rhat value is calculated, and any value of Rhat greater than 1.05 is assumed to indicate lack of convergence. mcmc_method : str Name of any MCMC sampler implemented in Pints. Returns ------- np.ndarray MCMC samples of posterior. One chain is provided with the first half discarded as burn-in. """ if likelihood_args is None: log_likelihood = likelihood(problem) else: log_likelihood = likelihood(problem, *likelihood_args) # Get the number of parameters to infer = model params plus noise params num_params = len(x0) if log_prior is None: log_prior = pints.UniformLogPrior([0] * num_params, [1e6] * num_params) log_posterior = pints.LogPosterior(log_likelihood, log_prior) x0 = [np.array(x0), 1.1 * np.array(x0), 0.9 * np.array(x0)] # Run MCMC if mcmc_method is None: mcmc = pints.MCMCController(log_posterior, num_chains, x0) else: mcmc = pints.MCMCController(log_posterior, num_chains, x0, method=mcmc_method) mcmc.set_max_iterations(num_mcmc_samples) mcmc.set_log_to_screen(True) chains = mcmc.run() # Check convergence rs = pints.rhat(chains[:, num_mcmc_samples // 2:, :]) if max(rs) > 1.05: message = 'MCMC chains failed to converge, R={}'.format(str(rs)) if enforce_convergence: pints.plot.trace(chains) plt.show() raise RuntimeError(message) else: warnings.warn(message) # Get one chain, discard first half burn in chain = chains[0][num_mcmc_samples // 2:] return chain
10 * default_params_noise)) log_likelihood = MultiplicativeGaussianLogLikelihood(problem) log_posterior = pints.LogPosterior(log_likelihood, log_prior) mcmc = pints.MCMCController( log_posterior, 3, [ default_params_noise, default_params_noise * 0.8, default_params_noise * 1.25 ], method=pints.HaarioBardenetACMC) mcmc.set_parallel(False) mcmc.set_max_iterations(2000) chains = mcmc.run() reps = 1 while max(pints.rhat(chains[:, :, :])) > 1.10: mcmc = pints.MCMCController(log_posterior, 3, chains[:, -1, :], method=pints.HaarioBardenetACMC) mcmc.set_parallel(False) mcmc.set_max_iterations(2000) mcmc.set_log_to_screen(False) chain = mcmc.run() new_chains = np.zeros( (chains.shape[0], chains.shape[1] + 2000, chains.shape[2])) new_chains[:, :-2000, :] = chains new_chains[:, -2000:, :] = chain[:, :, :] chains = new_chains reps += 1 print(pints.rhat(chains[:, :, :]))
10 * default_params_noise)) log_likelihood = MultiplicativeGaussianLogLikelihood(problem) log_posterior = pints.LogPosterior(log_likelihood, log_prior) mcmc = pints.MCMCController( log_posterior, 3, [ default_params_noise, default_params_noise * 0.8, default_params_noise * 1.25 ], method=pints.HaarioBardenetACMC) mcmc.set_parallel(False) mcmc.set_max_iterations(200) chains = mcmc.run() reps = 1 while max(pints.rhat(chains[:, :, :])) > 1.10 or min( pints.effective_sample_size(chains[0, :, :-1])) < 450: mcmc = pints.MCMCController(log_posterior, 3, chains[:, -1, :], method=pints.HaarioBardenetACMC) mcmc.set_parallel(False) mcmc.set_max_iterations(2000) mcmc.set_log_to_screen(False) chain = mcmc.run() new_chains = np.zeros( (chains.shape[0], chains.shape[1] + 2000, chains.shape[2])) new_chains[:, :-2000, :] = chains new_chains[:, -2000:, :] = chain[:, :, :] chains = new_chains reps += 1
def run_figure3(num_mcmc_samples=20000, num_mcmc_chains=3, num_runs=1, output_dir='./'): """Run the block noise process. This function runs the simulations and saves the results to pickle. """ random.seed(1234) np.random.seed(1234) iid_runs = [] correct_infer_runs = [] block_runs_theta = [] block_runs_cov = [] for run in range(num_runs): # Make a synthetic time series times, values, data = generate_time_series(model='logistic', noise='blocks', n_times=500) # Make Pints model and problem model = pints.toy.LogisticModel() problem = pints.SingleOutputProblem(model, times, data) # Initial conditions for model parameters model_starting_point = [0.08, 50] # Run MCMC for IID posterior likelihood = pints.GaussianLogLikelihood x0 = model_starting_point + [2] posterior_iid = run_pints(problem, likelihood, x0, num_mcmc_samples) iid_runs.append(posterior_iid) # Run with fixed correct blocks likelihood = flexnoise.KnownBlocksCovarianceLogLikelihood kernel = flexnoise.kernels.LaplacianKernel(None) blocks = [] for i in range(5): blocks.append(list(range(100 * i, 100 * i + 100))) x0 = model_starting_point.copy() for i in range(len(blocks)): x0 += [-1.0, 0.0] log_prior = pints.UniformLogPrior([0, 0] + [-1e6] * 10, [1e6] * 12) posterior_known_blocks_infer = run_pints( problem, likelihood, x0, num_mcmc_samples, likelihood_args=[kernel, blocks, True], log_prior=log_prior) posterior_known_blocks_infer = posterior_known_blocks_infer[:, :2] correct_infer_runs.append(posterior_known_blocks_infer) # Run block noise process num_mcmc_samples = 200 model_prior = pints.UniformLogPrior([0] * 2, [1e6] * 2) kernel = flexnoise.kernels.LaplacianKernel bnp = flexnoise.BlockNoiseProcess(problem, kernel, np.array(model_starting_point), model_prior) theta_chains = [] cov_chains = [] for _ in range(num_mcmc_chains): theta, cov = bnp.run_mcmc(num_mcmc_samples) theta_chains.append(np.array(theta)) cov_chains.append(np.array(cov)) rs = pints.rhat(np.array(theta_chains)) if max(rs) > 1.05: warnings.warn('MCMC chains failed to converge') block_runs_theta.append(theta) block_runs_cov.append(cov) # Save all results to pickle results = [ iid_runs, correct_infer_runs, block_runs_theta, block_runs_cov, times, data, values, model, problem ] fname = os.path.join(output_dir, 'fig3_data.pkl') with open(fname, 'wb') as f: pickle.dump(results, f)