Exemplo n.º 1
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    batch_size: int = 1,
    **kwargs: Any,
) -> torch.Tensor:
    """Runtime baseline

    Draws `num_simulations` samples from prior and simulates, discards outcomes,
    returns tensor of NaNs.

    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        batch_size: Batch size for simulations

    Returns:
        Random samples from prior
    """
    prior = task.get_prior()
    simulator = task.get_simulator()

    batch_size = min(batch_size, num_simulations)
    num_batches = int(num_simulations / batch_size)

    for i in tqdm(range(num_batches)):
        _ = simulator(prior(num_samples=batch_size))

    assert simulator.num_simulations == num_simulations

    samples = float("nan") * torch.ones((num_samples, task.dim_parameters))

    return samples
Exemplo n.º 2
0
def run_rejection_abc(
    task: Task,
    num_simulations: int,
    population_size: int,
    observation: Optional[torch.Tensor] = None,
    distance: str = "l2",
    batch_size: int = 1000,
):
    """Return posterior and distances from a ABC with fixed budget."""

    inferer = MCABC(
        simulator=task.get_simulator(max_calls=num_simulations),
        prior=task.get_prior_dist(),
        simulation_batch_size=batch_size,
        distance=distance,
        show_progress_bars=True,
    )
    posterior, distances = inferer(
        x_o=observation,
        num_simulations=num_simulations,
        eps=None,
        quantile=population_size / num_simulations,
        return_distances=True,
    )
    return posterior, distances
Exemplo n.º 3
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    num_rounds: int = 10,
    neural_net: str = "nsf",
    hidden_features: int = 50,
    simulation_batch_size: int = 1000,
    training_batch_size: int = 10000,
    num_atoms: int = 10,
    automatic_transforms_enabled: bool = False,
    z_score_x: bool = True,
    z_score_theta: bool = True,
) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
    """Runs (S)NPE from `sbi`

    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        num_rounds: Number of rounds
        neural_net: Neural network to use, one of maf / mdn / made / nsf
        hidden_features: Number of hidden features in network
        simulation_batch_size: Batch size for simulator
        training_batch_size: Batch size for training network
        num_atoms: Number of atoms, -1 means same as `training_batch_size`
        automatic_transforms_enabled: Whether to enable automatic transforms
        z_score_x: Whether to z-score x
        z_score_theta: Whether to z-score theta

    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)

    log = logging.getLogger(__name__)

    if num_rounds == 1:
        log.info(f"Running NPE")
        num_simulations_per_round = num_simulations
    else:
        log.info(f"Running SNPE")
        num_simulations_per_round = math.floor(num_simulations / num_rounds)

    if simulation_batch_size > num_simulations_per_round:
        simulation_batch_size = num_simulations_per_round
        log.warn("Reduced simulation_batch_size to num_simulation_per_round")

    if training_batch_size > num_simulations_per_round:
        training_batch_size = num_simulations_per_round
        log.warn("Reduced training_batch_size to num_simulation_per_round")

    prior = task.get_prior_dist()
    if observation is None:
        observation = task.get_observation(num_observation)

    simulator = task.get_simulator(max_calls=num_simulations)

    transforms = task._get_transforms(
        automatic_transforms_enabled)["parameters"]

    if automatic_transforms_enabled:
        prior = wrap_prior_dist(prior, transforms)
        simulator = wrap_simulator_fn(simulator, transforms)

    density_estimator_fun = posterior_nn(
        model=neural_net.lower(),
        hidden_features=hidden_features,
        z_score_x=z_score_x,
        z_score_theta=z_score_theta,
    )

    inference_method = inference.SNPE_C(
        prior, density_estimator=density_estimator_fun)
    posteriors = []
    proposal = prior

    for _ in range(num_rounds):
        theta, x = inference.simulate_for_sbi(
            simulator,
            proposal,
            num_simulations=num_simulations_per_round,
            simulation_batch_size=simulation_batch_size,
        )

        density_estimator = inference_method.append_simulations(
            theta, x, proposal=proposal).train(
                num_atoms=num_atoms,
                training_batch_size=training_batch_size,
                retrain_from_scratch_each_round=False,
                discard_prior_samples=False,
                use_combined_loss=False,
                show_train_summary=True,
            )
        posterior = inference_method.build_posterior(density_estimator,
                                                     sample_with_mcmc=False)
        proposal = posterior.set_default_x(observation)
        posteriors.append(posterior)

    posterior = wrap_posterior(posteriors[-1], transforms)

    assert simulator.num_simulations == num_simulations

    samples = posterior.sample((num_samples, )).detach()

    if num_observation is not None:
        true_parameters = task.get_true_parameters(
            num_observation=num_observation)
        log_prob_true_parameters = posterior.log_prob(true_parameters)
        return samples, simulator.num_simulations, log_prob_true_parameters
    else:
        return samples, simulator.num_simulations, None
Exemplo n.º 4
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    num_rounds: int = 10,
    neural_net: str = "resnet",
    hidden_features: int = 50,
    simulation_batch_size: int = 1000,
    training_batch_size: int = 10000,
    num_atoms: int = 10,
    automatic_transforms_enabled: bool = True,
    mcmc_method: str = "slice_np_vectorized",
    mcmc_parameters: Dict[str, Any] = {
        "num_chains": 100,
        "thin": 10,
        "warmup_steps": 100,
        "init_strategy": "sir",
        "sir_batch_size": 1000,
        "sir_num_batches": 100,
    },
    z_score_x: bool = True,
    z_score_theta: bool = True,
    variant: str = "B",
) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
    """Runs (S)NRE from `sbi`

    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        num_simulations: Simulation budget
        num_rounds: Number of rounds
        neural_net: Neural network to use, one of linear / mlp / resnet
        hidden_features: Number of hidden features in network
        simulation_batch_size: Batch size for simulator
        training_batch_size: Batch size for training network
        num_atoms: Number of atoms, -1 means same as `training_batch_size`
        automatic_transforms_enabled: Whether to enable automatic transforms
        mcmc_method: MCMC method
        mcmc_parameters: MCMC parameters
        z_score_x: Whether to z-score x
        z_score_theta: Whether to z-score theta
        variant: Can be used to switch between SNRE-A (AALR) and -B (SRE)

    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)

    log = logging.getLogger(__name__)

    if num_rounds == 1:
        log.info(f"Running NRE")
        num_simulations_per_round = num_simulations
    else:
        log.info(f"Running SNRE")
        num_simulations_per_round = math.floor(num_simulations / num_rounds)

    if simulation_batch_size > num_simulations_per_round:
        simulation_batch_size = num_simulations_per_round
        log.warn("Reduced simulation_batch_size to num_simulation_per_round")

    if training_batch_size > num_simulations_per_round:
        training_batch_size = num_simulations_per_round
        log.warn("Reduced training_batch_size to num_simulation_per_round")

    prior = task.get_prior_dist()
    if observation is None:
        observation = task.get_observation(num_observation)

    simulator = task.get_simulator(max_calls=num_simulations)

    transforms = task._get_transforms(
        automatic_transforms_enabled)["parameters"]
    if automatic_transforms_enabled:
        prior = wrap_prior_dist(prior, transforms)
        simulator = wrap_simulator_fn(simulator, transforms)

    classifier = classifier_nn(
        model=neural_net.lower(),
        hidden_features=hidden_features,
        z_score_x=z_score_x,
        z_score_theta=z_score_theta,
    )
    if variant == "A":
        inference_class = inference.SNRE_A
        inference_method_kwargs = {}
    elif variant == "B":
        inference_class = inference.SNRE_B
        inference_method_kwargs = {"num_atoms": num_atoms}
    else:
        raise NotImplementedError

    inference_method = inference_class(classifier=classifier, prior=prior)

    posteriors = []
    proposal = prior
    mcmc_parameters["warmup_steps"] = 25

    for r in range(num_rounds):
        theta, x = inference.simulate_for_sbi(
            simulator,
            proposal,
            num_simulations=num_simulations_per_round,
            simulation_batch_size=simulation_batch_size,
        )

        density_estimator = inference_method.append_simulations(
            theta, x, from_round=r).train(
                training_batch_size=training_batch_size,
                retrain_from_scratch_each_round=False,
                discard_prior_samples=False,
                show_train_summary=True,
                **inference_method_kwargs,
            )
        if r > 1:
            mcmc_parameters["init_strategy"] = "latest_sample"
        posterior = inference_method.build_posterior(
            density_estimator,
            mcmc_method=mcmc_method,
            mcmc_parameters=mcmc_parameters)
        # Copy hyperparameters, e.g., mcmc_init_samples for "latest_sample" strategy.
        if r > 0:
            posterior.copy_hyperparameters_from(posteriors[-1])
        proposal = posterior.set_default_x(observation)
        posteriors.append(posterior)

    posterior = wrap_posterior(posteriors[-1], transforms)

    assert simulator.num_simulations == num_simulations

    samples = posterior.sample((num_samples, )).detach()

    return samples, simulator.num_simulations, None
Exemplo n.º 5
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    num_chains: int = 10,
    num_warmup: int = 1000,
) -> (torch.Tensor, int, Optional[torch.Tensor]):
    """Runs BOLFI from elfi package
     
    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        num_chains: Number of chains
        num_warmup: Warmup steps

    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)

    logging.basicConfig(level=logging.INFO)

    log = logging.getLogger(__name__)
    log.warn("ELFI is not fully supported yet!")

    # Initialize model object
    m = elfi.ElfiModel()

    # Prior
    bounds = build_prior(task=task, model=m)

    # Observation
    if observation is None:
        observation = task.get_observation(num_observation)
    observation = observation.numpy()

    # Simulator
    simulator = task.get_simulator(max_calls=num_simulations)
    elfi.Simulator(
        Simulator(simulator),
        *[m[f"parameter_{dim}"] for dim in range(task.dim_parameters)],
        observed=observation,
        name=task.name,
    )

    # Euclidean distance
    elfi.Distance("euclidean", m[task.name], name="distance")

    # Log distance
    elfi.Operation(np.log, m["distance"], name="log_distance")

    # Inference
    num_samples_per_chain = ceil(num_samples / num_chains)
    tic = time.time()
    bolfi = elfi.BOLFI(model=m, target_name="log_distance", bounds=bounds)
    bolfi.fit(n_evidence=num_simulations)
    result_BOLFI = bolfi.sample(
        num_samples_per_chain + num_warmup,
        warmup=num_warmup,
        n_chains=num_chains,
        info_freq=int(100),
    )
    toc = time.time()

    samples = torch.from_numpy(result_BOLFI.samples_array.astype(
        np.float32)).reshape(-1, task.dim_parameters)[:num_samples, :]

    assert samples.shape[0] == num_samples

    # TODO: return log prob of true parameters

    return samples, simulator.num_simulations, None
Exemplo n.º 6
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    num_top_samples: Optional[int] = 100,
    quantile: Optional[float] = None,
    eps: Optional[float] = None,
    distance: str = "l2",
    batch_size: int = 1000,
    save_distances: bool = False,
    kde_bandwidth: Optional[str] = "cv",
    sass: bool = False,
    sass_fraction: float = 0.5,
    sass_feature_expansion_degree: int = 3,
    lra: bool = False,
) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
    """Runs REJ-ABC from `sbi`

    Choose one of `num_top_samples`, `quantile`, `eps`.

    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        num_top_samples: If given, will use `top=True` with num_top_samples
        quantile: Quantile to use
        eps: Epsilon threshold to use
        distance: Distance to use
        batch_size: Batch size for simulator
        save_distances: If True, stores distances of samples to disk
        kde_bandwidth: If not None, will resample using KDE when necessary, set
            e.g. to "cv" for cross-validated bandwidth selection
        sass: If True, summary statistics are learned as in
            Fearnhead & Prangle 2012.
        sass_fraction: Fraction of simulation budget to use for sass.
        sass_feature_expansion_degree: Degree of polynomial expansion of the summary
            statistics.
        lra: If True, posterior samples are adjusted with
            linear regression as in Beaumont et al. 2002.
    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)

    assert not (num_top_samples is None and quantile is None and eps is None)

    log = sbibm.get_logger(__name__)
    log.info(f"Running REJ-ABC")

    prior = task.get_prior_dist()
    simulator = task.get_simulator(max_calls=num_simulations)
    kde = kde_bandwidth is not None
    if observation is None:
        observation = task.get_observation(num_observation)

    if num_top_samples is not None and quantile is None:
        if sass:
            quantile = num_top_samples / (num_simulations -
                                          int(sass_fraction * num_simulations))
        else:
            quantile = num_top_samples / num_simulations

    inference_method = MCABC(
        simulator=simulator,
        prior=prior,
        simulation_batch_size=batch_size,
        distance=distance,
        show_progress_bars=True,
    )
    # Returns samples or kde posterior in output.
    output, summary = inference_method(
        x_o=observation,
        num_simulations=num_simulations,
        eps=eps,
        quantile=quantile,
        return_summary=True,
        kde=kde,
        kde_kwargs={} if run_kde else {"kde_bandwidth": kde_bandwidth},
        lra=lra,
        sass=sass,
        sass_expansion_degree=sass_feature_expansion_degree,
        sass_fraction=sass_fraction,
    )

    assert simulator.num_simulations == num_simulations

    if save_distances:
        save_tensor_to_csv("distances.csv", summary["distances"])

    if kde:
        kde_posterior = output
        samples = kde_posterior.sample(num_simulations)

        # LPTP can only be returned with KDE posterior.
        if num_observation is not None:
            true_parameters = task.get_true_parameters(
                num_observation=num_observation)
            log_prob_true_parameters = kde_posterior.log_prob(
                true_parameters.squeeze())
            return samples, simulator.num_simulations, log_prob_true_parameters
    else:
        samples = output
        return samples, simulator.num_simulations, None
Exemplo n.º 7
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    population_size: Optional[int] = None,
    distance: str = "l2",
    epsilon_decay: float = 0.2,
    distance_based_decay: bool = True,
    ess_min: Optional[float] = None,
    initial_round_factor: int = 5,
    batch_size: int = 1000,
    kernel: str = "gaussian",
    kernel_variance_scale: float = 0.5,
    use_last_pop_samples: bool = True,
    algorithm_variant: str = "C",
    save_summary: bool = False,
    sass: bool = False,
    sass_fraction: float = 0.5,
    sass_feature_expansion_degree: int = 3,
    lra: bool = False,
    lra_sample_weights: bool = True,
    kde_bandwidth: Optional[str] = "cv",
    kde_sample_weights: bool = False,
) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
    """Runs SMC-ABC from `sbi`

    SMC-ABC supports two different ways of scheduling epsilon:
    1) Exponential decay: eps_t+1 = epsilon_decay * eps_t
    2) Distance based decay: the new eps is determined from the "epsilon_decay" 
        quantile of the distances of the accepted simulations in the previous population. This is used if `distance_based_decay` is set to True.

    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        population_size: If None, uses heuristic: 1000 if `num_simulations` is greater
            than 10k, else 100
        distance: Distance function, options = {l1, l2, mse}
        epsilon_decay: Decay for epsilon; treated as quantile in case of distance based decay.
        distance_based_decay: Whether to determine new epsilon from quantile of
            distances of the previous population.
        ess_min: Threshold for resampling a population if effective sampling size is 
            too small.
        initial_round_factor: Used to determine initial round size
        batch_size: Batch size for the simulator
        kernel: Kernel distribution used to perturb the particles.
        kernel_variance_scale: Scaling factor for kernel variance.
        use_last_pop_samples: If True, samples of a population that was quit due to
            budget are used by filling up missing particles from the previous
            population.
        algorithm_variant: There are three SMCABC variants implemented: A, B, and C.
            See doctstrings in SBI package for more details.
        save_summary: Whether to save a summary containing all populations, distances,
            etc. to file.
        sass: If True, summary statistics are learned as in
            Fearnhead & Prangle 2012.
        sass_fraction: Fraction of simulation budget to use for sass.
        sass_feature_expansion_degree: Degree of polynomial expansion of the summary
            statistics.
        lra: If True, posterior samples are adjusted with
            linear regression as in Beaumont et al. 2002.
        lra_sample_weights: Whether to weigh LRA samples
        kde_bandwidth: If not None, will resample using KDE when necessary, set
            e.g. to "cv" for cross-validated bandwidth selection
        kde_sample_weights: Whether to weigh KDE samples


    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)

    log = sbibm.get_logger(__name__)
    smc_papers = dict(A="Toni 2010", B="Sisson et al. 2007", C="Beaumont et al. 2009")
    log.info(f"Running SMC-ABC as in {smc_papers[algorithm_variant]}.")

    prior = task.get_prior_dist()
    simulator = task.get_simulator(max_calls=num_simulations)
    if observation is None:
        observation = task.get_observation(num_observation)

    if population_size is None:
        population_size = 100
        if num_simulations > 10_000:
            population_size = 1000

    population_size = min(population_size, num_simulations)

    initial_round_size = clip_int(
        value=initial_round_factor * population_size,
        minimum=population_size,
        maximum=max(0.5 * num_simulations, population_size),
    )

    inference_method = SMCABC(
        simulator=simulator,
        prior=prior,
        simulation_batch_size=batch_size,
        distance=distance,
        show_progress_bars=True,
        kernel=kernel,
        algorithm_variant=algorithm_variant,
    )
    posterior, summary = inference_method(
        x_o=observation,
        num_particles=population_size,
        num_initial_pop=initial_round_size,
        num_simulations=num_simulations,
        epsilon_decay=epsilon_decay,
        distance_based_decay=distance_based_decay,
        ess_min=ess_min,
        kernel_variance_scale=kernel_variance_scale,
        use_last_pop_samples=use_last_pop_samples,
        return_summary=True,
        lra=lra,
        lra_with_weights=lra_sample_weights,
        sass=sass,
        sass_fraction=sass_fraction,
        sass_expansion_degree=sass_feature_expansion_degree,
    )

    if save_summary:
        log.info("Saving smcabc summary to csv.")
        pd.DataFrame.from_dict(summary,).to_csv("summary.csv", index=False)

    assert simulator.num_simulations == num_simulations

    if kde_bandwidth is not None:
        samples = posterior._samples

        log.info(
            f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}"
        )

        kde = get_kde(
            samples,
            bandwidth=kde_bandwidth,
            sample_weight=posterior._log_weights.exp() if kde_sample_weights else None,
        )
        samples = kde.sample(num_samples)
    else:
        samples = posterior.sample((num_samples,)).detach()

    if num_observation is not None:
        true_parameters = task.get_true_parameters(num_observation=num_observation)
        log_prob_true_parameters = posterior.log_prob(true_parameters)
        return samples, simulator.num_simulations, log_prob_true_parameters
    else:
        return samples, simulator.num_simulations, None
Exemplo n.º 8
0
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    population_size: Optional[int] = None,
    distance: Optional[str] = "l2",
    initial_round_factor: int = 5,
    batch_size: int = 1000,
    epsilon_decay: Optional[float] = 0.5,
    kernel: Optional[str] = "gaussian",
    kernel_variance_scale: Optional[float] = 0.5,
    population_strategy: Optional[str] = "constant",
    use_last_pop_samples: bool = False,
    num_workers: int = 1,
    sass: bool = False,
    sass_sample_weights: bool = False,
    sass_feature_expansion_degree: int = 1,
    sass_fraction: float = 0.5,
    lra: bool = False,
    lra_sample_weights: bool = True,
    kde_bandwidth: Optional[str] = None,
    kde_sample_weights: bool = False,
) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
    """ABC-SMC using pyabc toolbox

    Args:
        task: Task instance
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        population_size: If None, uses heuristic: 1000 if `num_simulations` is greater
            than 10k, else 100
        distance: Distance function, options = {l1, l2, mse}
        epsilon_decay: Decay for epsilon, quantile based.
        kernel: Kernel distribution used to perturb the particles.
        kernel_variance_scale: Scaling factor for kernel variance.
        sass: If True, summary statistics are learned as in
            Fearnhead & Prangle 2012.
        sass_sample_weights: Whether to weigh SASS samples
        sass_feature_expansion_degree: Degree of polynomial expansion of the summary
            statistics.
        sass_fraction: Fraction of simulation budget to use for sass.
        lra: If True, posterior samples are adjusted with
            linear regression as in Beaumont et al. 2002.
        lra_sample_weights: Whether to weigh LRA samples
        kde_bandwidth: If not None, will resample using KDE when necessary, set
            e.g. to "cv" for cross-validated bandwidth selection
        kde_sample_weights: Whether to weigh KDE samples
    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)
    log = sbibm.get_logger(__name__)
    db = "sqlite:///" + os.path.join(
        tempfile.gettempdir(),
        f"pyabc_{time.time()}_{random.randint(0, 1e9)}.db")

    # Wrap sbibm prior and simulator for pyABC
    prior = wrap_prior(task)
    simulator = PyAbcSimulator(task)
    distance_str = distance

    if observation is None:
        observation = task.get_observation(num_observation)

    # Population size strategy
    if population_size is None:
        population_size = 100
        if num_simulations > 10_000:
            population_size = 1000

    # Find initial epsilon with rej abc run.
    initial_round_size = clip_int(
        value=initial_round_factor * population_size,
        minimum=population_size,
        maximum=max(0.5 * num_simulations, population_size),
    )
    log.info(
        f"Running REJ-ABC with {initial_round_size} samples to find initial epsilon."
    )
    _, distances = run_rejection_abc(task, initial_round_size, population_size,
                                     observation, distance_str, batch_size)
    initial_epsilon = distances[-1].item()

    # Wrap observation and distance for pyabc.
    distance = get_distance(distance_str)
    observation = np.atleast_1d(np.array(observation, dtype=float).squeeze())
    # Define quantile based epsilon decay.
    epsilon = pyabc.epsilon.QuantileEpsilon(initial_epsilon=initial_epsilon,
                                            alpha=epsilon_decay)

    # Perturbation kernel
    transition = pyabc.transition.MultivariateNormalTransition(
        scaling=kernel_variance_scale)

    population_size = min(population_size, num_simulations)

    if population_strategy == "constant":
        population_size_strategy = population_size
    elif population_strategy == "adaptive":
        raise NotImplementedError("Not implemented atm.")
        population_size_strategy = pyabc.populationstrategy.AdaptivePopulationSize(
            start_nr_particles=population_size,
            max_population_size=int(10 * population_size),
            min_population_size=int(0.1 * population_size),
        )

    # Multiprocessing
    if num_workers > 1:
        sampler = pyabc.sampler.MulticoreParticleParallelSampler(
            n_procs=num_workers)
    else:
        sampler = pyabc.sampler.SingleCoreSampler(check_max_eval=False)

    # Collect kwargs
    kwargs = dict(
        parameter_priors=[prior],
        distance_function=distance,
        population_size=population_size_strategy,
        transitions=[transition],
        eps=epsilon,
        sampler=sampler,
    )

    # Semi-automatic summary statistics.
    if sass:
        num_pilot_simulations = int(sass_fraction * num_simulations)
        log.info(f"SASS pilot run with {num_pilot_simulations} simulations.")
        kwargs["models"] = [simulator]

        # Run pyabc with fixed budget.
        pilot_theta, pilot_weights = run_pyabc(
            task,
            db,
            num_pilot_simulations,
            observation,
            pyabc_kwargs=kwargs,
            use_last_pop_samples=use_last_pop_samples,
            distance_str=distance_str,
            batch_size=batch_size,
        )

        # Regression
        # TODO: Posterior does not return xs, which we would need for
        # regression adjustment. So we will resimulate, which is
        # unneccessary. Should ideally change `inference_method` to return xs
        # if requested instead. This step thus does not count towards budget
        pilot_x = task.get_simulator(max_calls=None)(pilot_theta)

        # Run SASS.
        sumstats_transform = get_sass_transform(
            theta=pilot_theta,
            x=pilot_x,
            expansion_degree=sass_feature_expansion_degree,
            sample_weight=pilot_weights if sass_sample_weights else None,
        )

        # Update simulator to use sass summary stats.
        def sumstats_simulator(theta):
            # Pyabc simulator returns dict.
            x = simulator(theta)["data"].reshape(1, -1)
            # Transform return Tensor.
            sx = sumstats_transform(x)
            return dict(data=sx.numpy().squeeze())

        observation = sumstats_transform(observation.reshape(1, -1))
        observation = np.atleast_1d(
            np.array(observation, dtype=float).squeeze())
        log.info(f"Finished learning summary statistics.")
    else:
        sumstats_simulator = simulator
        num_pilot_simulations = 0
        population_size = min(population_size, num_simulations)

    log.info("""Running ABC-SMC-pyabc with {} simulations""".format(
        num_simulations - num_pilot_simulations))
    kwargs["models"] = [sumstats_simulator]

    # Run pyabc with fixed budget.
    particles, weights = run_pyabc(
        task,
        db,
        num_simulations=num_simulations - num_pilot_simulations,
        observation=observation,
        pyabc_kwargs=kwargs,
        use_last_pop_samples=use_last_pop_samples,
        distance_str=distance_str,
        batch_size=batch_size,
    )

    if lra:
        log.info(f"Running linear regression adjustment.")
        # TODO: Posterior does not return xs, which we would need for
        # regression adjustment. So we will resimulate, which is
        # unneccessary. Should ideally change `inference_method` to return xs
        # if requested instead.
        xs = task.get_simulator(max_calls=None)(particles)

        # NOTE: If posterior is bounded we should do the regression in
        # unbounded space, as described in https://arxiv.org/abs/1707.01254
        transform_to_unbounded = True
        transforms = task._get_transforms(transform_to_unbounded)["parameters"]

        # Update the particles with LRA.
        particles = run_lra(
            theta=particles,
            x=xs,
            observation=torch.tensor(observation,
                                     dtype=torch.float32).unsqueeze(0),
            sample_weight=weights if lra_sample_weights else None,
            transforms=transforms,
        )

        # TODO: Maybe set weights uniform because they can't be updated?
        # weights = torch.ones(particles.shape[0]) / particles.shape[0]

    if kde_bandwidth is not None:
        samples = particles

        log.info(
            f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}"
        )
        kde = get_kde(
            samples,
            bandwidth=kde_bandwidth,
            sample_weight=weights if kde_sample_weights else None,
        )
        samples = kde.sample(num_samples)
    else:
        log.info(f"Sampling {num_samples} samples from trace")
        samples = sample_with_weights(particles,
                                      weights,
                                      num_samples=num_samples)

    log.info(f"Unique samples: {torch.unique(samples, dim=0).shape[0]}")

    return samples, simulator.simulator.num_simulations, None
Exemplo n.º 9
0
Arquivo: sl.py Projeto: mackelab/sbibm
def run(
    task: Task,
    num_samples: int,
    num_simulations: int,
    num_simulations_per_step: int = 100,
    num_observation: Optional[int] = None,
    observation: Optional[torch.Tensor] = None,
    automatic_transforms_enabled: bool = False,
    mcmc_method: str = "slice_np",
    mcmc_parameters: Dict[str, Any] = {},
    diag_eps: float = 0.0,
) -> (torch.Tensor, int, Optional[torch.Tensor]):
    """Runs (S)NLE from `sbi`

    Args:
        task: Task instance
        num_observation: Observation number to load, alternative to `observation`
        observation: Observation, alternative to `num_observation`
        num_samples: Number of samples to generate from posterior
        num_simulations: Simulation budget
        num_simulations_per_step: Number of simulations per MCMC step
        automatic_transforms_enabled: Whether to enable automatic transforms
        mcmc_method: MCMC method
        mcmc_parameters: MCMC parameters
        diag_eps: Epsilon applied to diagonal

    Returns:
        Samples from posterior, number of simulator calls, log probability of true params if computable
    """
    assert not (num_observation is None and observation is None)
    assert not (num_observation is not None and observation is not None)

    log = logging.getLogger(__name__)

    log.info(f"Running SL")

    prior = task.get_prior_dist()
    if observation is None:
        observation = task.get_observation(num_observation)

    simulator = task.get_simulator()

    transforms = task._get_transforms(automatic_transforms_enabled)["parameters"]
    prior = wrap_prior_dist(prior, transforms)
    simulator = wrap_simulator_fn(simulator, transforms)

    likelihood_estimator = SynthLikNet(
        simulator=simulator,
        num_simulations_per_step=num_simulations_per_step,
        diag_eps=diag_eps,
    )

    posterior = LikelihoodBasedPosterior(
        method_family="snle",
        neural_net=likelihood_estimator,
        prior=prior,
        x_shape=observation.shape,
        mcmc_parameters=mcmc_parameters,
    )

    posterior.set_default_x(observation)

    posterior = wrap_posterior(posterior, transforms)

    # assert simulator.num_simulations == num_simulations

    samples = posterior.sample((num_samples,)).detach()

    return samples, simulator.num_simulations, None