def run_rejection_abc( task: Task, num_simulations: int, population_size: int, observation: Optional[torch.Tensor] = None, distance: str = "l2", batch_size: int = 1000, ): """Return posterior and distances from a ABC with fixed budget.""" inferer = MCABC( simulator=task.get_simulator(max_calls=num_simulations), prior=task.get_prior_dist(), simulation_batch_size=batch_size, distance=distance, show_progress_bars=True, ) posterior, distances = inferer( x_o=observation, num_simulations=num_simulations, eps=None, quantile=population_size / num_simulations, return_distances=True, ) return posterior, distances
def get_proposal( task: Task, samples: torch.Tensor, prior_weight: float = 0.01, bounded: bool = True, density_estimator: str = "flow", flow_model: str = "nsf", **kwargs: Any, ) -> torch.Tensor: """Gets proposal distribution by performing density estimation on `samples` If `prior_weight` > 0., the proposal is defensive, i.e., the prior is mixed in Args: task: Task instance samples: Samples to fit prior_weight: Prior weight bounded: If True, will automatically transform proposal density to bounded space density_estimator: Density estimator flow_model: Flow to use if `density_estimator` is `flow` kwargs: Passed on to `get_flow` or `get_kde` Returns: Proposal distribution """ tic = time.time() log = sbibm.get_logger(__name__) log.info("Get proposal distribution called") prior_dist = task.get_prior_dist() transform = task._get_transforms( automatic_transforms_enabled=bounded)["parameters"] if density_estimator == "flow": density_estimator_ = get_flow(model=flow_model, dim_distribution=task.dim_parameters, **kwargs) density_estimator_ = train_flow(density_estimator_, samples, transform=transform) elif density_estimator == "kde": density_estimator_ = get_kde(X=samples, transform=transform, **kwargs) else: raise NotImplementedError proposal_dist = DenfensiveProposal( dim=task.dim_parameters, proposal=density_estimator_, prior=prior_dist, prior_weight=prior_weight, ) log.info(f"Proposal distribution is set up, took {time.time()-tic:.3f}sec") return proposal_dist
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_rounds: int = 10, neural_net: str = "nsf", hidden_features: int = 50, simulation_batch_size: int = 1000, training_batch_size: int = 10000, num_atoms: int = 10, automatic_transforms_enabled: bool = False, z_score_x: bool = True, z_score_theta: bool = True, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs (S)NPE from `sbi` Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_rounds: Number of rounds neural_net: Neural network to use, one of maf / mdn / made / nsf hidden_features: Number of hidden features in network simulation_batch_size: Batch size for simulator training_batch_size: Batch size for training network num_atoms: Number of atoms, -1 means same as `training_batch_size` automatic_transforms_enabled: Whether to enable automatic transforms z_score_x: Whether to z-score x z_score_theta: Whether to z-score theta Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = logging.getLogger(__name__) if num_rounds == 1: log.info(f"Running NPE") num_simulations_per_round = num_simulations else: log.info(f"Running SNPE") num_simulations_per_round = math.floor(num_simulations / num_rounds) if simulation_batch_size > num_simulations_per_round: simulation_batch_size = num_simulations_per_round log.warn("Reduced simulation_batch_size to num_simulation_per_round") if training_batch_size > num_simulations_per_round: training_batch_size = num_simulations_per_round log.warn("Reduced training_batch_size to num_simulation_per_round") prior = task.get_prior_dist() if observation is None: observation = task.get_observation(num_observation) simulator = task.get_simulator(max_calls=num_simulations) transforms = task._get_transforms( automatic_transforms_enabled)["parameters"] if automatic_transforms_enabled: prior = wrap_prior_dist(prior, transforms) simulator = wrap_simulator_fn(simulator, transforms) density_estimator_fun = posterior_nn( model=neural_net.lower(), hidden_features=hidden_features, z_score_x=z_score_x, z_score_theta=z_score_theta, ) inference_method = inference.SNPE_C( prior, density_estimator=density_estimator_fun) posteriors = [] proposal = prior for _ in range(num_rounds): theta, x = inference.simulate_for_sbi( simulator, proposal, num_simulations=num_simulations_per_round, simulation_batch_size=simulation_batch_size, ) density_estimator = inference_method.append_simulations( theta, x, proposal=proposal).train( num_atoms=num_atoms, training_batch_size=training_batch_size, retrain_from_scratch_each_round=False, discard_prior_samples=False, use_combined_loss=False, show_train_summary=True, ) posterior = inference_method.build_posterior(density_estimator, sample_with_mcmc=False) proposal = posterior.set_default_x(observation) posteriors.append(posterior) posterior = wrap_posterior(posteriors[-1], transforms) assert simulator.num_simulations == num_simulations samples = posterior.sample((num_samples, )).detach() if num_observation is not None: true_parameters = task.get_true_parameters( num_observation=num_observation) log_prob_true_parameters = posterior.log_prob(true_parameters) return samples, simulator.num_simulations, log_prob_true_parameters else: return samples, simulator.num_simulations, None
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_rounds: int = 10, neural_net: str = "resnet", hidden_features: int = 50, simulation_batch_size: int = 1000, training_batch_size: int = 10000, num_atoms: int = 10, automatic_transforms_enabled: bool = True, mcmc_method: str = "slice_np_vectorized", mcmc_parameters: Dict[str, Any] = { "num_chains": 100, "thin": 10, "warmup_steps": 100, "init_strategy": "sir", "sir_batch_size": 1000, "sir_num_batches": 100, }, z_score_x: bool = True, z_score_theta: bool = True, variant: str = "B", ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs (S)NRE from `sbi` Args: task: Task instance num_samples: Number of samples to generate from posterior num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_simulations: Simulation budget num_rounds: Number of rounds neural_net: Neural network to use, one of linear / mlp / resnet hidden_features: Number of hidden features in network simulation_batch_size: Batch size for simulator training_batch_size: Batch size for training network num_atoms: Number of atoms, -1 means same as `training_batch_size` automatic_transforms_enabled: Whether to enable automatic transforms mcmc_method: MCMC method mcmc_parameters: MCMC parameters z_score_x: Whether to z-score x z_score_theta: Whether to z-score theta variant: Can be used to switch between SNRE-A (AALR) and -B (SRE) Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = logging.getLogger(__name__) if num_rounds == 1: log.info(f"Running NRE") num_simulations_per_round = num_simulations else: log.info(f"Running SNRE") num_simulations_per_round = math.floor(num_simulations / num_rounds) if simulation_batch_size > num_simulations_per_round: simulation_batch_size = num_simulations_per_round log.warn("Reduced simulation_batch_size to num_simulation_per_round") if training_batch_size > num_simulations_per_round: training_batch_size = num_simulations_per_round log.warn("Reduced training_batch_size to num_simulation_per_round") prior = task.get_prior_dist() if observation is None: observation = task.get_observation(num_observation) simulator = task.get_simulator(max_calls=num_simulations) transforms = task._get_transforms( automatic_transforms_enabled)["parameters"] if automatic_transforms_enabled: prior = wrap_prior_dist(prior, transforms) simulator = wrap_simulator_fn(simulator, transforms) classifier = classifier_nn( model=neural_net.lower(), hidden_features=hidden_features, z_score_x=z_score_x, z_score_theta=z_score_theta, ) if variant == "A": inference_class = inference.SNRE_A inference_method_kwargs = {} elif variant == "B": inference_class = inference.SNRE_B inference_method_kwargs = {"num_atoms": num_atoms} else: raise NotImplementedError inference_method = inference_class(classifier=classifier, prior=prior) posteriors = [] proposal = prior mcmc_parameters["warmup_steps"] = 25 for r in range(num_rounds): theta, x = inference.simulate_for_sbi( simulator, proposal, num_simulations=num_simulations_per_round, simulation_batch_size=simulation_batch_size, ) density_estimator = inference_method.append_simulations( theta, x, from_round=r).train( training_batch_size=training_batch_size, retrain_from_scratch_each_round=False, discard_prior_samples=False, show_train_summary=True, **inference_method_kwargs, ) if r > 1: mcmc_parameters["init_strategy"] = "latest_sample" posterior = inference_method.build_posterior( density_estimator, mcmc_method=mcmc_method, mcmc_parameters=mcmc_parameters) # Copy hyperparameters, e.g., mcmc_init_samples for "latest_sample" strategy. if r > 0: posterior.copy_hyperparameters_from(posteriors[-1]) proposal = posterior.set_default_x(observation) posteriors.append(posterior) posterior = wrap_posterior(posteriors[-1], transforms) assert simulator.num_simulations == num_simulations samples = posterior.sample((num_samples, )).detach() return samples, simulator.num_simulations, None
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_top_samples: Optional[int] = 100, quantile: Optional[float] = None, eps: Optional[float] = None, distance: str = "l2", batch_size: int = 1000, save_distances: bool = False, kde_bandwidth: Optional[str] = "cv", sass: bool = False, sass_fraction: float = 0.5, sass_feature_expansion_degree: int = 3, lra: bool = False, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs REJ-ABC from `sbi` Choose one of `num_top_samples`, `quantile`, `eps`. Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_top_samples: If given, will use `top=True` with num_top_samples quantile: Quantile to use eps: Epsilon threshold to use distance: Distance to use batch_size: Batch size for simulator save_distances: If True, stores distances of samples to disk kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget to use for sass. sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with linear regression as in Beaumont et al. 2002. Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) assert not (num_top_samples is None and quantile is None and eps is None) log = sbibm.get_logger(__name__) log.info(f"Running REJ-ABC") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) kde = kde_bandwidth is not None if observation is None: observation = task.get_observation(num_observation) if num_top_samples is not None and quantile is None: if sass: quantile = num_top_samples / (num_simulations - int(sass_fraction * num_simulations)) else: quantile = num_top_samples / num_simulations inference_method = MCABC( simulator=simulator, prior=prior, simulation_batch_size=batch_size, distance=distance, show_progress_bars=True, ) # Returns samples or kde posterior in output. output, summary = inference_method( x_o=observation, num_simulations=num_simulations, eps=eps, quantile=quantile, return_summary=True, kde=kde, kde_kwargs={} if run_kde else {"kde_bandwidth": kde_bandwidth}, lra=lra, sass=sass, sass_expansion_degree=sass_feature_expansion_degree, sass_fraction=sass_fraction, ) assert simulator.num_simulations == num_simulations if save_distances: save_tensor_to_csv("distances.csv", summary["distances"]) if kde: kde_posterior = output samples = kde_posterior.sample(num_simulations) # LPTP can only be returned with KDE posterior. if num_observation is not None: true_parameters = task.get_true_parameters( num_observation=num_observation) log_prob_true_parameters = kde_posterior.log_prob( true_parameters.squeeze()) return samples, simulator.num_simulations, log_prob_true_parameters else: samples = output return samples, simulator.num_simulations, None
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, batch_size: int = 100000, proposal_dist: Optional[DenfensiveProposal] = None, **kwargs: Any, ) -> torch.Tensor: """Random samples from Sequential Importance Resampling (SIR) as a baseline SIR is also referred to as weighted bootstrap [1]. The prior is used as a proposal, so that the weights become the likelihood, this has also been referred to as likelihood weighting in the literature. Args: task: Task instance num_samples: Number of samples to generate from posterior num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` batch_size: Batch size for simulations proposal_dist: If specified, will be used as a proposal distribution instead of prior kwargs: Not used Returns: Random samples from reference posterior [1] A. F. M. Smith and A. E. Gelfand. Bayesian statistics without tears: a sampling-resampling perspective. The American Statistician, 46(2):84-88, 1992. doi:10.1080/00031305.1992.10475856. """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) tic = time.time() log = sbibm.get_logger(__name__) log.info("Sequential Importance Resampling (SIR)") prior_dist = task.get_prior_dist() if proposal_dist is None: proposal_dist = prior_dist log_prob_fn = task._get_log_prob_fn( num_observation=num_observation, observation=observation, implementation="experimental", posterior=True, ) batch_size = min(batch_size, num_simulations) num_batches = int(num_simulations / batch_size) particles = [] log_weights = [] for i in tqdm(range(num_batches)): batch_draws = proposal_dist.sample((batch_size, )) log_weights.append( log_prob_fn(batch_draws) - proposal_dist.log_prob(batch_draws)) particles.append(batch_draws) log.info("Finished sampling") particles = torch.cat(particles) log_weights = torch.cat(log_weights) probs = torch.exp(log_weights.view(-1)) probs /= probs.sum() indices = torch.arange(0, len(probs)) idxs = choice(indices, num_samples, True, probs) samples = particles[idxs, :] log.info("Finished resampling") num_unique = torch.unique(samples, dim=0).shape[0] log.info(f"Unique particles: {num_unique} out of {len(samples)}") toc = time.time() log.info(f"Finished after {toc-tic:.3f} seconds") return samples
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, population_size: Optional[int] = None, distance: str = "l2", epsilon_decay: float = 0.2, distance_based_decay: bool = True, ess_min: Optional[float] = None, initial_round_factor: int = 5, batch_size: int = 1000, kernel: str = "gaussian", kernel_variance_scale: float = 0.5, use_last_pop_samples: bool = True, algorithm_variant: str = "C", save_summary: bool = False, sass: bool = False, sass_fraction: float = 0.5, sass_feature_expansion_degree: int = 3, lra: bool = False, lra_sample_weights: bool = True, kde_bandwidth: Optional[str] = "cv", kde_sample_weights: bool = False, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs SMC-ABC from `sbi` SMC-ABC supports two different ways of scheduling epsilon: 1) Exponential decay: eps_t+1 = epsilon_decay * eps_t 2) Distance based decay: the new eps is determined from the "epsilon_decay" quantile of the distances of the accepted simulations in the previous population. This is used if `distance_based_decay` is set to True. Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` population_size: If None, uses heuristic: 1000 if `num_simulations` is greater than 10k, else 100 distance: Distance function, options = {l1, l2, mse} epsilon_decay: Decay for epsilon; treated as quantile in case of distance based decay. distance_based_decay: Whether to determine new epsilon from quantile of distances of the previous population. ess_min: Threshold for resampling a population if effective sampling size is too small. initial_round_factor: Used to determine initial round size batch_size: Batch size for the simulator kernel: Kernel distribution used to perturb the particles. kernel_variance_scale: Scaling factor for kernel variance. use_last_pop_samples: If True, samples of a population that was quit due to budget are used by filling up missing particles from the previous population. algorithm_variant: There are three SMCABC variants implemented: A, B, and C. See doctstrings in SBI package for more details. save_summary: Whether to save a summary containing all populations, distances, etc. to file. sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget to use for sass. sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with linear regression as in Beaumont et al. 2002. lra_sample_weights: Whether to weigh LRA samples kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection kde_sample_weights: Whether to weigh KDE samples Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = sbibm.get_logger(__name__) smc_papers = dict(A="Toni 2010", B="Sisson et al. 2007", C="Beaumont et al. 2009") log.info(f"Running SMC-ABC as in {smc_papers[algorithm_variant]}.") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) if observation is None: observation = task.get_observation(num_observation) if population_size is None: population_size = 100 if num_simulations > 10_000: population_size = 1000 population_size = min(population_size, num_simulations) initial_round_size = clip_int( value=initial_round_factor * population_size, minimum=population_size, maximum=max(0.5 * num_simulations, population_size), ) inference_method = SMCABC( simulator=simulator, prior=prior, simulation_batch_size=batch_size, distance=distance, show_progress_bars=True, kernel=kernel, algorithm_variant=algorithm_variant, ) posterior, summary = inference_method( x_o=observation, num_particles=population_size, num_initial_pop=initial_round_size, num_simulations=num_simulations, epsilon_decay=epsilon_decay, distance_based_decay=distance_based_decay, ess_min=ess_min, kernel_variance_scale=kernel_variance_scale, use_last_pop_samples=use_last_pop_samples, return_summary=True, lra=lra, lra_with_weights=lra_sample_weights, sass=sass, sass_fraction=sass_fraction, sass_expansion_degree=sass_feature_expansion_degree, ) if save_summary: log.info("Saving smcabc summary to csv.") pd.DataFrame.from_dict(summary,).to_csv("summary.csv", index=False) assert simulator.num_simulations == num_simulations if kde_bandwidth is not None: samples = posterior._samples log.info( f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}" ) kde = get_kde( samples, bandwidth=kde_bandwidth, sample_weight=posterior._log_weights.exp() if kde_sample_weights else None, ) samples = kde.sample(num_samples) else: samples = posterior.sample((num_samples,)).detach() if num_observation is not None: true_parameters = task.get_true_parameters(num_observation=num_observation) log_prob_true_parameters = posterior.log_prob(true_parameters) return samples, simulator.num_simulations, log_prob_true_parameters else: return samples, simulator.num_simulations, None
def run( task: Task, num_samples: int, num_simulations: int, num_simulations_per_step: int = 100, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, automatic_transforms_enabled: bool = False, mcmc_method: str = "slice_np", mcmc_parameters: Dict[str, Any] = {}, diag_eps: float = 0.0, ) -> (torch.Tensor, int, Optional[torch.Tensor]): """Runs (S)NLE from `sbi` Args: task: Task instance num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_simulations_per_step: Number of simulations per MCMC step automatic_transforms_enabled: Whether to enable automatic transforms mcmc_method: MCMC method mcmc_parameters: MCMC parameters diag_eps: Epsilon applied to diagonal Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = logging.getLogger(__name__) log.info(f"Running SL") prior = task.get_prior_dist() if observation is None: observation = task.get_observation(num_observation) simulator = task.get_simulator() transforms = task._get_transforms(automatic_transforms_enabled)["parameters"] prior = wrap_prior_dist(prior, transforms) simulator = wrap_simulator_fn(simulator, transforms) likelihood_estimator = SynthLikNet( simulator=simulator, num_simulations_per_step=num_simulations_per_step, diag_eps=diag_eps, ) posterior = LikelihoodBasedPosterior( method_family="snle", neural_net=likelihood_estimator, prior=prior, x_shape=observation.shape, mcmc_parameters=mcmc_parameters, ) posterior.set_default_x(observation) posterior = wrap_posterior(posterior, transforms) # assert simulator.num_simulations == num_simulations samples = posterior.sample((num_samples,)).detach() return samples, simulator.num_simulations, None