def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_top_samples: Optional[int] = 100, quantile: Optional[float] = None, eps: Optional[float] = None, distance: str = "l2", batch_size: int = 1000, save_distances: bool = False, kde_bandwidth: Optional[str] = "cv", sass: bool = False, sass_fraction: float = 0.5, sass_feature_expansion_degree: int = 3, lra: bool = False, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs REJ-ABC from `sbi` Choose one of `num_top_samples`, `quantile`, `eps`. Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_top_samples: If given, will use `top=True` with num_top_samples quantile: Quantile to use eps: Epsilon threshold to use distance: Distance to use batch_size: Batch size for simulator save_distances: If True, stores distances of samples to disk kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget to use for sass. sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with linear regression as in Beaumont et al. 2002. Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) assert not (num_top_samples is None and quantile is None and eps is None) log = sbibm.get_logger(__name__) log.info(f"Running REJ-ABC") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) kde = kde_bandwidth is not None if observation is None: observation = task.get_observation(num_observation) if num_top_samples is not None and quantile is None: if sass: quantile = num_top_samples / (num_simulations - int(sass_fraction * num_simulations)) else: quantile = num_top_samples / num_simulations inference_method = MCABC( simulator=simulator, prior=prior, simulation_batch_size=batch_size, distance=distance, show_progress_bars=True, ) # Returns samples or kde posterior in output. output, summary = inference_method( x_o=observation, num_simulations=num_simulations, eps=eps, quantile=quantile, return_summary=True, kde=kde, kde_kwargs={} if run_kde else {"kde_bandwidth": kde_bandwidth}, lra=lra, sass=sass, sass_expansion_degree=sass_feature_expansion_degree, sass_fraction=sass_fraction, ) assert simulator.num_simulations == num_simulations if save_distances: save_tensor_to_csv("distances.csv", summary["distances"]) if kde: kde_posterior = output samples = kde_posterior.sample(num_simulations) # LPTP can only be returned with KDE posterior. if num_observation is not None: true_parameters = task.get_true_parameters( num_observation=num_observation) log_prob_true_parameters = kde_posterior.log_prob( true_parameters.squeeze()) return samples, simulator.num_simulations, log_prob_true_parameters else: samples = output return samples, simulator.num_simulations, None
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, num_rounds: int = 10, neural_net: str = "nsf", hidden_features: int = 50, simulation_batch_size: int = 1000, training_batch_size: int = 10000, num_atoms: int = 10, automatic_transforms_enabled: bool = False, z_score_x: bool = True, z_score_theta: bool = True, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs (S)NPE from `sbi` Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` num_rounds: Number of rounds neural_net: Neural network to use, one of maf / mdn / made / nsf hidden_features: Number of hidden features in network simulation_batch_size: Batch size for simulator training_batch_size: Batch size for training network num_atoms: Number of atoms, -1 means same as `training_batch_size` automatic_transforms_enabled: Whether to enable automatic transforms z_score_x: Whether to z-score x z_score_theta: Whether to z-score theta Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = logging.getLogger(__name__) if num_rounds == 1: log.info(f"Running NPE") num_simulations_per_round = num_simulations else: log.info(f"Running SNPE") num_simulations_per_round = math.floor(num_simulations / num_rounds) if simulation_batch_size > num_simulations_per_round: simulation_batch_size = num_simulations_per_round log.warn("Reduced simulation_batch_size to num_simulation_per_round") if training_batch_size > num_simulations_per_round: training_batch_size = num_simulations_per_round log.warn("Reduced training_batch_size to num_simulation_per_round") prior = task.get_prior_dist() if observation is None: observation = task.get_observation(num_observation) simulator = task.get_simulator(max_calls=num_simulations) transforms = task._get_transforms( automatic_transforms_enabled)["parameters"] if automatic_transforms_enabled: prior = wrap_prior_dist(prior, transforms) simulator = wrap_simulator_fn(simulator, transforms) density_estimator_fun = posterior_nn( model=neural_net.lower(), hidden_features=hidden_features, z_score_x=z_score_x, z_score_theta=z_score_theta, ) inference_method = inference.SNPE_C( prior, density_estimator=density_estimator_fun) posteriors = [] proposal = prior for _ in range(num_rounds): theta, x = inference.simulate_for_sbi( simulator, proposal, num_simulations=num_simulations_per_round, simulation_batch_size=simulation_batch_size, ) density_estimator = inference_method.append_simulations( theta, x, proposal=proposal).train( num_atoms=num_atoms, training_batch_size=training_batch_size, retrain_from_scratch_each_round=False, discard_prior_samples=False, use_combined_loss=False, show_train_summary=True, ) posterior = inference_method.build_posterior(density_estimator, sample_with_mcmc=False) proposal = posterior.set_default_x(observation) posteriors.append(posterior) posterior = wrap_posterior(posteriors[-1], transforms) assert simulator.num_simulations == num_simulations samples = posterior.sample((num_samples, )).detach() if num_observation is not None: true_parameters = task.get_true_parameters( num_observation=num_observation) log_prob_true_parameters = posterior.log_prob(true_parameters) return samples, simulator.num_simulations, log_prob_true_parameters else: return samples, simulator.num_simulations, None
def run( task: Task, num_samples: int, num_simulations: int, num_observation: Optional[int] = None, observation: Optional[torch.Tensor] = None, population_size: Optional[int] = None, distance: str = "l2", epsilon_decay: float = 0.2, distance_based_decay: bool = True, ess_min: Optional[float] = None, initial_round_factor: int = 5, batch_size: int = 1000, kernel: str = "gaussian", kernel_variance_scale: float = 0.5, use_last_pop_samples: bool = True, algorithm_variant: str = "C", save_summary: bool = False, sass: bool = False, sass_fraction: float = 0.5, sass_feature_expansion_degree: int = 3, lra: bool = False, lra_sample_weights: bool = True, kde_bandwidth: Optional[str] = "cv", kde_sample_weights: bool = False, ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: """Runs SMC-ABC from `sbi` SMC-ABC supports two different ways of scheduling epsilon: 1) Exponential decay: eps_t+1 = epsilon_decay * eps_t 2) Distance based decay: the new eps is determined from the "epsilon_decay" quantile of the distances of the accepted simulations in the previous population. This is used if `distance_based_decay` is set to True. Args: task: Task instance num_samples: Number of samples to generate from posterior num_simulations: Simulation budget num_observation: Observation number to load, alternative to `observation` observation: Observation, alternative to `num_observation` population_size: If None, uses heuristic: 1000 if `num_simulations` is greater than 10k, else 100 distance: Distance function, options = {l1, l2, mse} epsilon_decay: Decay for epsilon; treated as quantile in case of distance based decay. distance_based_decay: Whether to determine new epsilon from quantile of distances of the previous population. ess_min: Threshold for resampling a population if effective sampling size is too small. initial_round_factor: Used to determine initial round size batch_size: Batch size for the simulator kernel: Kernel distribution used to perturb the particles. kernel_variance_scale: Scaling factor for kernel variance. use_last_pop_samples: If True, samples of a population that was quit due to budget are used by filling up missing particles from the previous population. algorithm_variant: There are three SMCABC variants implemented: A, B, and C. See doctstrings in SBI package for more details. save_summary: Whether to save a summary containing all populations, distances, etc. to file. sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget to use for sass. sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with linear regression as in Beaumont et al. 2002. lra_sample_weights: Whether to weigh LRA samples kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection kde_sample_weights: Whether to weigh KDE samples Returns: Samples from posterior, number of simulator calls, log probability of true params if computable """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = sbibm.get_logger(__name__) smc_papers = dict(A="Toni 2010", B="Sisson et al. 2007", C="Beaumont et al. 2009") log.info(f"Running SMC-ABC as in {smc_papers[algorithm_variant]}.") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) if observation is None: observation = task.get_observation(num_observation) if population_size is None: population_size = 100 if num_simulations > 10_000: population_size = 1000 population_size = min(population_size, num_simulations) initial_round_size = clip_int( value=initial_round_factor * population_size, minimum=population_size, maximum=max(0.5 * num_simulations, population_size), ) inference_method = SMCABC( simulator=simulator, prior=prior, simulation_batch_size=batch_size, distance=distance, show_progress_bars=True, kernel=kernel, algorithm_variant=algorithm_variant, ) posterior, summary = inference_method( x_o=observation, num_particles=population_size, num_initial_pop=initial_round_size, num_simulations=num_simulations, epsilon_decay=epsilon_decay, distance_based_decay=distance_based_decay, ess_min=ess_min, kernel_variance_scale=kernel_variance_scale, use_last_pop_samples=use_last_pop_samples, return_summary=True, lra=lra, lra_with_weights=lra_sample_weights, sass=sass, sass_fraction=sass_fraction, sass_expansion_degree=sass_feature_expansion_degree, ) if save_summary: log.info("Saving smcabc summary to csv.") pd.DataFrame.from_dict(summary,).to_csv("summary.csv", index=False) assert simulator.num_simulations == num_simulations if kde_bandwidth is not None: samples = posterior._samples log.info( f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}" ) kde = get_kde( samples, bandwidth=kde_bandwidth, sample_weight=posterior._log_weights.exp() if kde_sample_weights else None, ) samples = kde.sample(num_samples) else: samples = posterior.sample((num_samples,)).detach() if num_observation is not None: true_parameters = task.get_true_parameters(num_observation=num_observation) log_prob_true_parameters = posterior.log_prob(true_parameters) return samples, simulator.num_simulations, log_prob_true_parameters else: return samples, simulator.num_simulations, None