def sample(self, n_samples, warmup=None, n_chains=4, threshold=None, initials=None, algorithm='nuts', n_evidence=None, **kwargs): r"""Sample the posterior distribution of BOLFI. Here the likelihood is defined through the cumulative density function of the standard normal distribution: L(\theta) \propto F((h-\mu(\theta)) / \sigma(\theta)) where h is the threshold, and \mu(\theta) and \sigma(\theta) are the posterior mean and (noisy) standard deviation of the associated Gaussian process. The sampling is performed with an MCMC sampler (the No-U-Turn Sampler, NUTS). Parameters ---------- n_samples : int Number of requested samples from the posterior for each chain. This includes warmup, and note that the effective sample size is usually considerably smaller. warmpup : int, optional Length of warmup sequence in MCMC sampling. Defaults to n_samples//2. n_chains : int, optional Number of independent chains. threshold : float, optional The threshold (bandwidth) for posterior (give as log if log discrepancy). initials : np.array of shape (n_chains, n_params), optional Initial values for the sampled parameters for each chain. Defaults to best evidence points. algorithm : string, optional Sampling algorithm to use. Currently only 'nuts' is supported. n_evidence : int If the regression model is not fitted yet, specify the amount of evidence Returns ------- BolfiSample """ if self.state['n_batches'] == 0: self.fit(n_evidence) # TODO: other MCMC algorithms posterior = self.extract_posterior(threshold) warmup = warmup or n_samples // 2 # Unless given, select the evidence points with smallest discrepancy if initials is not None: if np.asarray(initials).shape != (n_chains, self.target_model.input_dim): raise ValueError("The shape of initials must be (n_chains, n_params).") else: inds = np.argsort(self.target_model.Y[:, 0]) initials = np.asarray(self.target_model.X[inds]) self.target_model.is_sampling = True # enables caching for default RBF kernel tasks_ids = [] ii_initial = 0 # sampling is embarrassingly parallel, so depending on self.client this may parallelize for ii in range(n_chains): seed = get_sub_seed(self.seed, ii) # discard bad initialization points while np.isinf(posterior.logpdf(initials[ii_initial])): ii_initial += 1 if ii_initial == len(inds): raise ValueError( "BOLFI.sample: Cannot find enough acceptable initialization points!") tasks_ids.append( self.client.apply( mcmc.nuts, n_samples, initials[ii_initial], posterior.logpdf, posterior.gradient_logpdf, n_adapt=warmup, seed=seed, **kwargs)) ii_initial += 1 # get results from completed tasks or run sampling (client-specific) chains = [] for id in tasks_ids: chains.append(self.client.get_result(id)) chains = np.asarray(chains) print( "{} chains of {} iterations acquired. Effective sample size and Rhat for each " "parameter:".format(n_chains, n_samples)) for ii, node in enumerate(self.parameter_names): print(node, mcmc.eff_sample_size(chains[:, :, ii]), mcmc.gelman_rubin(chains[:, :, ii])) self.target_model.is_sampling = False return BolfiSample( method_name='BOLFI', chains=chains, parameter_names=self.parameter_names, warmup=warmup, threshold=float(posterior.threshold), n_sim=self.state['n_sim'], seed=self.seed)
def test_ESS(): assert np.isclose(mcmc.eff_sample_size(chains_Stan), ess_Stan, atol=0.01)