示例#1
0
文件: snpe_base.py 项目: boyali/sbi
    def _run_sims(
        self,
        round_,
        num_simulations_per_round,
    ):
        """
        Runs the simulations at the beginning of each round.

        Args:
            round_: int. Round
            num_simulations_per_round: int. Number of simulations in current round

        Returns:
            self._parameter_bank: torch.tensor. theta used for training
            self._observation_bank: torch.tensor. x used for training
            self._prior_masks: torch.tensor. Masks of 0/1 for each prior sample,
                indicating whether prior sample will be used in current round
        """
        # Generate parameters from prior in first round, and from most recent posterior
        # estimate in subsequent rounds.
        if round_ == 0:
            parameters, observations = simulate_in_batches(
                simulator=self._simulator,
                parameter_sample_fn=lambda num_samples: self._prior.sample(
                    (num_samples, )),
                num_samples=np.maximum(
                    0, num_simulations_per_round - self._num_pilot_samples),
                simulation_batch_size=self._simulation_batch_size,
                x_dim=self._true_observation.
                shape[1:],  # do not pass batch_dim
            )
            parameters = torch.cat(
                (parameters,
                 self.pilot_parameters[:num_simulations_per_round]),
                dim=0)
            observations = torch.cat(
                (observations,
                 self.pilot_observations[:num_simulations_per_round]),
                dim=0,
            )
        else:
            parameters, observations = simulate_in_batches(
                simulator=self._simulator,
                parameter_sample_fn=lambda num_samples: self._neural_posterior.
                sample(
                    num_samples,
                    context=self._true_observation,
                ),
                num_samples=num_simulations_per_round,
                simulation_batch_size=self._simulation_batch_size,
                x_dim=self._true_observation.
                shape[1:],  # do not pass batch_dim
            )

        # Store (parameter, observation) pairs.
        self._parameter_bank.append(parameters)
        self._observation_bank.append(observations)
        self._prior_masks.append(
            torch.ones(num_simulations_per_round, 1) if round_ ==
            0 else torch.zeros(num_simulations_per_round, 1))
示例#2
0
def test_benchmarking_sp(sim_batch_size):

    num_simulations = 100
    theta = torch.zeros(num_simulations, 2)
    show_pbar = True

    tic = time.time()
    simulate_in_batches(
        slow_linear_gaussian,
        theta,
        sim_batch_size,
        num_workers=1,
        show_progress_bars=show_pbar,
    )
    toc_sp = time.time() - tic

    tic = time.time()
    simulate_in_batches(
        slow_linear_gaussian,
        theta,
        sim_batch_size,
        num_workers=10,
        show_progress_bars=show_pbar,
    )
    toc_joblib = time.time() - tic

    # Allow joblib to be 10 percent slower.
    assert toc_joblib <= toc_sp * 1.1
示例#3
0
def test_simulate_in_batches(
    num_sims,
    batch_size,
    simulator=diagonal_linear_gaussian,
    prior=BoxUniform(zeros(5), ones(5)),
):
    """Test combinations of num_sims and simulation_batch_size. """

    theta = prior.sample((num_sims,))
    simulate_in_batches(simulator, theta, batch_size)
示例#4
0
def test_simulate_in_batches(
        num_sims,
        batch_size,
        simulator,
        prior=BoxUniform(zeros(5), ones(5)),
):
    """Test combinations of num_sims and simulation_batch_size. """

    simulator, prior = prepare_for_sbi(simulator, prior)
    theta = prior.sample((num_sims, ))
    simulate_in_batches(simulator, theta, batch_size)
示例#5
0
def test_simulate_in_batches(
        num_samples,
        batch_size,
        simulator=linear_gaussian,
        prior=BoxUniform(torch.zeros(5), torch.ones(5)),
):
    """Test combinations of num_samples and simulation_batch_size. """

    simulate_in_batches(
        simulator,
        lambda n: prior.sample((n, )),
        num_samples,
        batch_size,
        torch.Size([5]),
    )
示例#6
0
    def __init__(
        self,
        simulator: Callable,
        prior,
        distance: Union[str, Callable] = "l2",
        num_workers: int = 1,
        simulation_batch_size: int = 1,
        show_progress_bars: bool = True,
    ) -> None:
        r"""Base class for Approximate Bayesian Computation methods.

        Args:
            simulator: A function that takes parameters $\theta$ and maps them to
                simulations, or observations, `x`, $\mathrm{sim}(\theta)\to x$. Any
                regular Python callable (i.e. function or class with `__call__` method)
                can be used.
            prior: A probability distribution that expresses prior knowledge about the
                parameters, e.g. which ranges are meaningful for them. Any
                object with `.log_prob()`and `.sample()` (for example, a PyTorch
                distribution) can be used.
            distance: Distance function to compare observed and simulated data. Can be
                a custom function or one of `l1`, `l2`, `mse`.
            num_workers: Number of parallel workers to use for simulations.
            simulation_batch_size: Number of parameter sets that the simulator
                maps to data x at once. If None, we simulate all parameter sets at the
                same time. If >= 1, the simulator has to process data of shape
                (simulation_batch_size, parameter_dimension).
            show_progress_bars: Whether to show a progressbar during simulation and
                sampling.
        """

        self.prior = prior
        self._simulator = simulator
        self._show_progress_bars = show_progress_bars

        # Select distance function.
        if type(distance) == str:
            distances = ["l1", "l2", "mse"]
            assert (distance in distances
                    ), f"Distance function str must be one of {distances}."
            self.distance = self.choose_distance_function(
                distance_type=distance)

        self._batched_simulator = lambda theta: simulate_in_batches(
            simulator=self._simulator,
            theta=theta,
            sim_batch_size=simulation_batch_size,
            num_workers=num_workers,
            show_progress_bars=self._show_progress_bars,
        )

        self.logger = logging.getLogger(__name__)
示例#7
0
def simulate_for_sbi(
    simulator: Callable,
    proposal: Any,
    num_simulations: int,
    num_workers: int = 1,
    simulation_batch_size: int = 1,
    show_progress_bar: bool = True,
) -> Tuple[Tensor, Tensor]:
    r"""
    Returns ($\theta, x$) pairs obtained from sampling the proposal and simulating.

    This function performs two steps:

    - Sample parameters $\theta$ from the `proposal`.
    - Simulate these parameters to obtain $x$.

    Args:
        simulator: A function that takes parameters $\theta$ and maps them to
            simulations, or observations, `x`, $\text{sim}(\theta)\to x$. Any
            regular Python callable (i.e. function or class with `__call__` method)
            can be used.
        proposal: Probability distribution that the parameters $\theta$ are sampled
            from.
        num_simulations: Number of simulations that are run.
        num_workers: Number of parallel workers to use for simulations.
        simulation_batch_size: Number of parameter sets that the simulator
            maps to data x at once. If None, we simulate all parameter sets at the
            same time. If >= 1, the simulator has to process data of shape
            (simulation_batch_size, parameter_dimension).
        show_progress_bar: Whether to show a progress bar for simulating. This will not
            affect whether there will be a progressbar while drawing samples from the
            proposal.

    Returns: Sampled parameters $\theta$ and simulation-outputs $x$.
    """

    check_if_proposal_has_default_x(proposal)

    theta = proposal.sample((num_simulations,))

    x = simulate_in_batches(
        simulator,
        theta,
        simulation_batch_size,
        num_workers,
        show_progress_bar,
    )

    return theta, x
示例#8
0
文件: base.py 项目: ulamaca/sbi
    def __init__(
        self,
        simulator: Callable,
        prior,
        num_workers: int = 1,
        simulation_batch_size: int = 1,
        device: str = "cpu",
        logging_level: Union[int, str] = "WARNING",
        summary_writer: Optional[SummaryWriter] = None,
        show_progress_bars: bool = True,
        show_round_summary: bool = False,
    ):
        r"""
        Base class for inference methods.

        Args:
            simulator: A function that takes parameters $\theta$ and maps them to
                simulations, or observations, `x`, $\mathrm{sim}(\theta)\to x$. Any
                regular Python callable (i.e. function or class with `__call__` method)
                can be used.
            prior: A probability distribution that expresses prior knowledge about the
                parameters, e.g. which ranges are meaningful for them. Any
                object with `.log_prob()`and `.sample()` (for example, a PyTorch
                distribution) can be used.
            num_workers: Number of parallel workers to use for simulations.
            simulation_batch_size: Number of parameter sets that the simulator
                maps to data x at once. If None, we simulate all parameter sets at the
                same time. If >= 1, the simulator has to process data of shape
                (simulation_batch_size, parameter_dimension).
            device: torch device on which to compute, e.g. gpu or cpu.
            logging_level: Minimum severity of messages to log. One of the strings
               "INFO", "WARNING", "DEBUG", "ERROR" and "CRITICAL".
            summary_writer: A `SummaryWriter` to control, among others, log
                file location (default is `<current working directory>/logs`.)
            show_progress_bars: Whether to show a progressbar during simulation and
                sampling.
            show_round_summary: Whether to show the validation loss and leakage after
                each round.
        """

        # We set the device globally by setting the default tensor type for all tensors.
        assert device in (
            "gpu",
            "cpu",
        ), "Currently, only 'gpu' or 'cpu' are supported as devices."

        self._device = configure_default_device(device)

        self._simulator, self._prior = simulator, prior

        self._show_progress_bars = show_progress_bars
        self._show_round_summary = show_round_summary

        self._batched_simulator = lambda theta: simulate_in_batches(
            self._simulator,
            theta,
            simulation_batch_size,
            num_workers,
            self._show_progress_bars,
        )

        # Initialize roundwise (theta, x, prior_masks) for storage of parameters,
        # simulations and masks indicating if simulations came from prior.
        self._theta_roundwise, self._x_roundwise, self._prior_masks = [], [], []

        # Initialize list that indicates the round from which simulations were drawn.
        self._data_round_index = []

        self._round = 0

        # XXX We could instantiate here the Posterior for all children. Two problems:
        #     1. We must dispatch to right PotentialProvider for mcmc based on name
        #     2. `method_family` cannot be resolved only from `self.__class__.__name__`,
        #         since SRE, AALR demand different handling but are both in SRE class.

        self._summary_writer = (self._default_summary_writer()
                                if summary_writer is None else summary_writer)

        # Logging during training (by SummaryWriter).
        self._summary = dict(
            median_observation_distances=[],
            epochs=[],
            best_validation_log_probs=[],
        )
示例#9
0
文件: snpe_base.py 项目: boyali/sbi
    def __init__(
        self,
        simulator: Callable,
        prior,
        true_observation: Tensor,
        num_pilot_samples: int = 100,
        density_estimator=None,
        calibration_kernel: Optional[Callable] = None,
        z_score_obs: bool = True,
        simulation_batch_size: int = 1,
        use_combined_loss: bool = False,
        retrain_from_scratch_each_round: bool = False,
        discard_prior_samples: bool = False,
        device: Optional[torch.device] = None,
        sample_with_mcmc: bool = False,
        mcmc_method: str = "slice-np",
        summary_writer: Optional[SummaryWriter] = None,
    ):
        """
        See NeuralInference docstring for all other arguments.

        Args:         
            num_pilot_samples: number of simulations that are run when
                instantiating an object. Used to z-score the observations.   
            density_estimator: neural density estimator
            calibration_kernel: a function to calibrate the context
            z_score_obs: whether to z-score the data features x
            use_combined_loss: whether to jointly neural_net prior samples 
                using maximum likelihood. Useful to prevent density leaking when using box uniform priors.
            retrain_from_scratch_each_round: whether to retrain the conditional
                density estimator for the posterior from scratch each round.
            discard_prior_samples: whether to discard prior samples from round
                two onwards.
        """

        super().__init__(
            simulator,
            prior,
            true_observation,
            simulation_batch_size,
            device,
            summary_writer,
        )

        self.z_score_obs = z_score_obs

        self._num_pilot_samples = num_pilot_samples
        self._use_combined_loss = use_combined_loss
        self._discard_prior_samples = discard_prior_samples

        self._prior_masks = []
        self._model_bank = []

        self._retrain_from_scratch_each_round = retrain_from_scratch_each_round

        # run prior samples
        (
            self.pilot_parameters,
            self.pilot_observations,
        ) = simulate_in_batches(
            simulator=self._simulator,
            parameter_sample_fn=lambda num_samples: self._prior.sample(
                (num_samples, )),
            num_samples=num_pilot_samples,
            simulation_batch_size=self._simulation_batch_size,
            x_dim=self._true_observation.shape[1:],  # do not pass batch_dim
        )

        # create the deep neural density estimator
        if density_estimator is None:
            density_estimator = utils.posterior_nn(
                model="maf",
                prior=self._prior,
                context=self._true_observation,
            )
        # create the neural posterior which can sample(), log_prob()
        self._neural_posterior = Posterior(
            algorithm_family="snpe",
            neural_net=density_estimator,
            prior=prior,
            context=self._true_observation,
            sample_with_mcmc=sample_with_mcmc,
            mcmc_method=mcmc_method,
            get_potential_function=PotentialFunctionProvider(),
        )

        # obtain z-score for observations and define embedding net
        if self.z_score_obs:
            self.obs_mean = torch.mean(self.pilot_observations, dim=0)
            self.obs_std = torch.std(self.pilot_observations, dim=0)
        else:
            self.obs_mean = torch.zeros(self._true_observation.shape)
            self.obs_std = torch.ones(self._true_observation.shape)

        # new embedding_net contains z-scoring
        if not isinstance(self._neural_posterior.neural_net,
                          MultivariateGaussianMDN):
            embedding = nn.Sequential(
                utils.Normalize(self.obs_mean, self.obs_std),
                self._neural_posterior.neural_net._embedding_net,
            )
            self._neural_posterior.set_embedding_net(embedding)
        elif z_score_obs:
            warnings.warn("z-scoring of observation not implemented for MDNs")

        # calibration kernels proposed in Lueckmann, Goncalves et al 2017
        if calibration_kernel is None:
            self.calibration_kernel = lambda context_input: torch.ones(
                [len(context_input)])
        else:
            self.calibration_kernel = calibration_kernel

        # If we're retraining from scratch each round,
        # keep a copy of the original untrained model for reinitialization.
        self._untrained_neural_posterior = deepcopy(self._neural_posterior)

        # extra SNPE-specific fields summary_writer
        self._summary.update({"rejection_sampling_acceptance_rates": []})
示例#10
0
文件: abcrf.py 项目: mackelab/sbibm
    log = sbibm.get_logger(__name__)
    log.info(f"Starting to run RF-ABC")

    prior = task.get_prior()
    simulator = task.get_simulator()
    if observation is None:
        observation = task.get_observation(num_observation)

    # Simulate training data set
    log.info(f"Generating data set as reference table")
    thetas = prior(num_samples=num_simulations)

    xs = simulate_in_batches(
        simulator,
        theta=thetas,
        sim_batch_size=batch_size,
        num_workers=1,
        show_progress_bars=True,
    )

    assert not thetas.isnan().any()
    assert not xs.isnan().any()
    assert not observation.isnan().any()

    dim_thetas = thetas.shape[1]
    dim_xs = xs.shape[1]

    names_thetas = [f"t{i}" for i in range(dim_thetas)]
    names_xs = [f"x{i}" for i in range(dim_xs)]

    np_thetas = thetas.numpy().astype(np.float64)