示例#1
0
    def _set_rejection_round(self, round):

        self._update_round_info(self.state['round'])

        # Get a subseed for this round for ensuring consistent results for the round
        seed = self.seed if round == 0 else get_sub_seed(self.seed, round)
        self._round_random_state = np.random.RandomState(seed)
        self._rejection = Rejection(
            self.model,
            discrepancy_name=self.discrepancy_name,
            output_names=self.output_names,
            batch_size=self.batch_size,
            seed=seed,
            max_parallel_batches=self.max_parallel_batches)
示例#2
0
    def _init_new_round(self):
        round = self.state['round']

        dashes = '-'*16
        logger.info('%s Starting round %d %s' % (dashes, round, dashes))

        # Get a subseed for this round for ensuring consistent results for the round
        seed = self.seed if round == 0 else get_sub_seed(self.seed, round)
        self._round_random_state = np.random.RandomState(seed)

        self._rejection = Rejection(self.model,
                                    discrepancy_name=self.discrepancy_name,
                                    output_names=self.output_names,
                                    batch_size=self.batch_size,
                                    seed=seed,
                                    max_parallel_batches=self.max_parallel_batches)

        self._rejection.set_objective(self.objective['n_samples'],
                                      threshold=self.current_population_threshold)
示例#3
0
    def sample(self,
               n_samples,
               warmup=None,
               n_chains=4,
               threshold=None,
               initials=None,
               algorithm='nuts',
               n_evidence=None,
               **kwargs):
        r"""Sample the posterior distribution of BOLFI.

        Here the likelihood is defined through the cumulative density function
        of the standard normal distribution:

        L(\theta) \propto F((h-\mu(\theta)) / \sigma(\theta))

        where h is the threshold, and \mu(\theta) and \sigma(\theta) are the posterior mean and
        (noisy) standard deviation of the associated Gaussian process.

        The sampling is performed with an MCMC sampler (the No-U-Turn Sampler, NUTS).

        Parameters
        ----------
        n_samples : int
            Number of requested samples from the posterior for each chain. This includes warmup,
            and note that the effective sample size is usually considerably smaller.
        warmpup : int, optional
            Length of warmup sequence in MCMC sampling. Defaults to n_samples//2.
        n_chains : int, optional
            Number of independent chains.
        threshold : float, optional
            The threshold (bandwidth) for posterior (give as log if log discrepancy).
        initials : np.array of shape (n_chains, n_params), optional
            Initial values for the sampled parameters for each chain.
            Defaults to best evidence points.
        algorithm : string, optional
            Sampling algorithm to use. Currently only 'nuts' is supported.
        n_evidence : int
            If the regression model is not fitted yet, specify the amount of evidence

        Returns
        -------
        BolfiSample

        """
        if self.state['n_batches'] == 0:
            self.fit(n_evidence)

        # TODO: other MCMC algorithms

        posterior = self.extract_posterior(threshold)
        warmup = warmup or n_samples // 2

        # Unless given, select the evidence points with smallest discrepancy
        if initials is not None:
            if np.asarray(initials).shape != (n_chains, self.target_model.input_dim):
                raise ValueError("The shape of initials must be (n_chains, n_params).")
        else:
            inds = np.argsort(self.target_model.Y[:, 0])
            initials = np.asarray(self.target_model.X[inds])

        self.target_model.is_sampling = True  # enables caching for default RBF kernel

        tasks_ids = []
        ii_initial = 0

        # sampling is embarrassingly parallel, so depending on self.client this may parallelize
        for ii in range(n_chains):
            seed = get_sub_seed(self.seed, ii)
            # discard bad initialization points
            while np.isinf(posterior.logpdf(initials[ii_initial])):
                ii_initial += 1
                if ii_initial == len(inds):
                    raise ValueError(
                        "BOLFI.sample: Cannot find enough acceptable initialization points!")

            tasks_ids.append(
                self.client.apply(
                    mcmc.nuts,
                    n_samples,
                    initials[ii_initial],
                    posterior.logpdf,
                    posterior.gradient_logpdf,
                    n_adapt=warmup,
                    seed=seed,
                    **kwargs))
            ii_initial += 1

        # get results from completed tasks or run sampling (client-specific)
        chains = []
        for id in tasks_ids:
            chains.append(self.client.get_result(id))

        chains = np.asarray(chains)

        print(
            "{} chains of {} iterations acquired. Effective sample size and Rhat for each "
            "parameter:".format(n_chains, n_samples))
        for ii, node in enumerate(self.parameter_names):
            print(node, mcmc.eff_sample_size(chains[:, :, ii]),
                  mcmc.gelman_rubin(chains[:, :, ii]))

        self.target_model.is_sampling = False

        return BolfiSample(
            method_name='BOLFI',
            chains=chains,
            parameter_names=self.parameter_names,
            warmup=warmup,
            threshold=float(posterior.threshold),
            n_sim=self.state['n_sim'],
            seed=self.seed)
示例#4
0
    def sample(self,
               n_samples,
               warmup=None,
               n_chains=4,
               initials=None,
               algorithm='nuts',
               sigma_proposals=None,
               n_evidence=None,
               *args,
               **kwargs):
        """Sample from the posterior distribution of BOLFIRE.

        Sampling is performed with an MCMC sampler.

        Parameters
        ----------
        n_samples: int
            Number of requested samples from the posterior for each chain. This includes warmup,
            and note that the effective sample size is usually considerably smaller.
        warmup: int, optional
            Length of warmup sequence in MCMC sampling.
        n_chains: int, optional
            Number of independent chains.
        initials: np.ndarray (n_chains, n_params), optional
            Initial values for the sampled parameters for each chain.
        algorithm: str, optional
            Sampling algorithm to use.
        sigma_proposals: np.ndarray
            Standard deviations for Gaussian proposals of each parameter for Metropolis-Hastings.
        n_evidence: int, optional
            If the surrogate model is not fitted yet, specify the amount of evidence.

        Returns
        -------
        BOLFIRESample

        """
        # Fit posterior in case not done
        if self.state['n_batches'] == 0:
            self.fit(n_evidence)

        # Check algorithm
        if algorithm not in ['nuts', 'metropolis']:
            raise ValueError('The given algorithm is not supported.')

        # Check standard deviations of Gaussian proposals when using Metropolis-Hastings
        if algorithm == 'metropolis':
            if sigma_proposals is None:
                raise ValueError('Gaussian proposal standard deviations have '
                                 'to be provided for Metropolis-sampling.')
            elif sigma_proposals.shape[0] != self.target_model.input_dim:
                raise ValueError('The length of Gaussian proposal standard '
                                 'deviations must be n_params.')

        posterior = self.extract_result()
        warmup = warmup or n_samples // 2

        # Unless given, select the evidence points with best likelihood ratio
        if initials is not None:
            if np.asarray(initials).shape != (n_chains,
                                              self.target_model.input_dim):
                raise ValueError(
                    'The shape of initials must be (n_chains, n_params).')
        else:
            inds = np.argsort(self.target_model.Y[:, 0])
            initials = np.asarray(self.target_model.X[inds])

        # Enable caching for default RBF kernel
        self.target_model.is_sampling = True

        tasks_ids = []
        ii_initial = 0
        for ii in range(n_chains):
            seed = get_sub_seed(self.seed, ii)
            # Discard bad initialization points
            while np.isinf(posterior.logpdf(initials[ii_initial])):
                ii_initial += 1
                if ii_initial == len(inds):
                    raise ValueError(
                        'BOLFIRE.sample: Cannot find enough acceptable '
                        'initialization points!')

            if algorithm == 'nuts':
                tasks_ids.append(
                    self.client.apply(mcmc.nuts,
                                      n_samples,
                                      initials[ii_initial],
                                      posterior.logpdf,
                                      posterior.gradient_logpdf,
                                      n_adapt=warmup,
                                      seed=seed,
                                      **kwargs))

            elif algorithm == 'metropolis':
                tasks_ids.append(
                    self.client.apply(mcmc.metropolis,
                                      n_samples,
                                      initials[ii_initial],
                                      posterior.logpdf,
                                      sigma_proposals,
                                      warmup,
                                      seed=seed,
                                      **kwargs))

            ii_initial += 1

        # Get results from completed tasks or run sampling (client-specific)
        chains = []
        for id in tasks_ids:
            chains.append(self.client.get_result(id))

        chains = np.asarray(chains)

        logger.info(f'{n_chains} chains of {n_samples} iterations acquired. '
                    'Effective sample size and Rhat for each parameter:')
        for ii, node in enumerate(self.parameter_names):
            logger.info(f'{node} {mcmc.eff_sample_size(chains[:, :, ii])} '
                        f'{mcmc.gelman_rubin_statistic(chains[:, :, ii])}')

        self.target_model.is_sampling = False

        return BOLFIRESample(method_name='BOLFIRE',
                             chains=chains,
                             parameter_names=self.parameter_names,
                             warmup=warmup,
                             n_sim=self.state['n_sim'],
                             seed=self.seed,
                             *args,
                             **kwargs)