def posterior_to_xarray(self): """Convert the posterior to an xarray dataset.""" var_names = get_default_varnames(self.trace.varnames, include_transformed=False) data = {} data_warmup = {} for var_name in var_names: if self.warmup_trace: data_warmup[var_name] = np.array( self.warmup_trace.get_values(var_name, combine=False, squeeze=False)) if self.posterior_trace: data[var_name] = np.array( self.posterior_trace.get_values(var_name, combine=False, squeeze=False)) return ( dict_to_dataset( data, library=pymc, coords=self.coords, dims=self.dims, attrs=self.attrs, ), dict_to_dataset( data_warmup, library=pymc, coords=self.coords, dims=self.dims, attrs=self.attrs, ), )
def __init__(self, model): self.model = model self.var_names = get_default_varnames(self.model.named_vars, include_transformed=False) self.var_list = self.model.named_vars.values() self.transform_map = { v.transformed: v.name for v in self.var_list if hasattr(v, "transformed") } self._deterministics = None
def sample_numpyro_nuts( draws: int = 1000, tune: int = 1000, chains: int = 4, target_accept: float = 0.8, random_seed: int = None, initvals: Optional[Union[StartDict, Sequence[Optional[StartDict]]]] = None, model: Optional[Model] = None, var_names=None, progress_bar: bool = True, keep_untransformed: bool = False, chain_method: str = "parallel", idata_kwargs: Optional[Dict] = None, nuts_kwargs: Optional[Dict] = None, ): """ Draw samples from the posterior using the NUTS method from the ``numpyro`` library. Parameters ---------- draws : int, default 1000 The number of samples to draw. The number of tuned samples are discarded by default. tune : int, default 1000 Number of iterations to tune. Samplers adjust the step sizes, scalings or similar during tuning. Tuning samples will be drawn in addition to the number specified in the ``draws`` argument. chains : int, default 4 The number of chains to sample. target_accept : float in [0, 1]. The step size is tuned such that we approximate this acceptance rate. Higher values like 0.9 or 0.95 often work better for problematic posteriors. random_seed : int, default 10 Random seed used by the sampling steps. model : Model, optional Model to sample from. The model needs to have free random variables. When inside a ``with`` model context, it defaults to that model, otherwise the model must be passed explicitly. var_names : iterable of str, optional Names of variables for which to compute the posterior samples. Defaults to all variables in the posterior progress_bar : bool, default True Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the sampling speed in samples per second (SPS), and the estimated remaining time until completion ("expected time of arrival"; ETA). keep_untransformed : bool, default False Include untransformed variables in the posterior samples. Defaults to False. chain_method : str, default "parallel" Specify how samples should be drawn. The choices include "sequential", "parallel", and "vectorized". idata_kwargs : dict, optional Keyword arguments for :func:`arviz.from_dict`. It also accepts a boolean as value for the ``log_likelihood`` key to indicate that the pointwise log likelihood should not be included in the returned object. Returns ------- InferenceData ArviZ ``InferenceData`` object that contains the posterior samples, together with their respective sample stats and pointwise log likeihood values (unless skipped with ``idata_kwargs``). """ import numpyro from numpyro.infer import MCMC, NUTS model = modelcontext(model) if var_names is None: var_names = model.unobserved_value_vars vars_to_sample = list( get_default_varnames(var_names, include_transformed=keep_untransformed)) coords = { cname: np.array(cvals) if isinstance(cvals, tuple) else cvals for cname, cvals in model.coords.items() if cvals is not None } if hasattr(model, "RV_dims"): dims = { var_name: [dim for dim in dims if dim is not None] for var_name, dims in model.RV_dims.items() } else: dims = {} if random_seed is None: random_seed = model.rng_seeder.randint(2**30, dtype=np.int64) tic1 = datetime.now() print("Compiling...", file=sys.stdout) init_params = _get_batched_jittered_initial_points( model=model, chains=chains, initvals=initvals, random_seed=random_seed, ) logp_fn = get_jaxified_logp(model, negative_logp=False) if nuts_kwargs is None: nuts_kwargs = {} nuts_kernel = NUTS( potential_fn=logp_fn, target_accept_prob=target_accept, adapt_step_size=True, adapt_mass_matrix=True, dense_mass=False, **nuts_kwargs, ) pmap_numpyro = MCMC( nuts_kernel, num_warmup=tune, num_samples=draws, num_chains=chains, postprocess_fn=None, chain_method=chain_method, progress_bar=progress_bar, ) tic2 = datetime.now() print("Compilation time = ", tic2 - tic1, file=sys.stdout) print("Sampling...", file=sys.stdout) map_seed = jax.random.PRNGKey(random_seed) if chains > 1: map_seed = jax.random.split(map_seed, chains) pmap_numpyro.run( map_seed, init_params=init_params, extra_fields=( "num_steps", "potential_energy", "energy", "adapt_state.step_size", "accept_prob", "diverging", ), ) raw_mcmc_samples = pmap_numpyro.get_samples(group_by_chain=True) tic3 = datetime.now() print("Sampling time = ", tic3 - tic2, file=sys.stdout) print("Transforming variables...", file=sys.stdout) mcmc_samples = {} for v in vars_to_sample: jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=[v]) result = jax.vmap(jax.vmap(jax_fn))(*raw_mcmc_samples)[0] mcmc_samples[v.name] = result tic4 = datetime.now() print("Transformation time = ", tic4 - tic3, file=sys.stdout) if idata_kwargs is None: idata_kwargs = {} else: idata_kwargs = idata_kwargs.copy() if idata_kwargs.pop("log_likelihood", True): log_likelihood = _get_log_likelihood(model, raw_mcmc_samples) else: log_likelihood = None attrs = { "sampling_time": (tic3 - tic2).total_seconds(), } posterior = mcmc_samples az_trace = az.from_dict( posterior=posterior, log_likelihood=log_likelihood, observed_data=find_observations(model), sample_stats=_sample_stats_to_xarray(pmap_numpyro), coords=coords, dims=dims, attrs=make_attrs(attrs, library=numpyro), **idata_kwargs, ) return az_trace
def sample_blackjax_nuts( draws=1000, tune=1000, chains=4, target_accept=0.8, random_seed=10, initvals=None, model=None, var_names=None, keep_untransformed=False, chain_method="parallel", idata_kwargs=None, ): """ Draw samples from the posterior using the NUTS method from the ``blackjax`` library. Parameters ---------- draws : int, default 1000 The number of samples to draw. The number of tuned samples are discarded by default. tune : int, default 1000 Number of iterations to tune. Samplers adjust the step sizes, scalings or similar during tuning. Tuning samples will be drawn in addition to the number specified in the ``draws`` argument. chains : int, default 4 The number of chains to sample. target_accept : float in [0, 1]. The step size is tuned such that we approximate this acceptance rate. Higher values like 0.9 or 0.95 often work better for problematic posteriors. random_seed : int, default 10 Random seed used by the sampling steps. model : Model, optional Model to sample from. The model needs to have free random variables. When inside a ``with`` model context, it defaults to that model, otherwise the model must be passed explicitly. var_names : iterable of str, optional Names of variables for which to compute the posterior samples. Defaults to all variables in the posterior keep_untransformed : bool, default False Include untransformed variables in the posterior samples. Defaults to False. chain_method : str, default "parallel" Specify how samples should be drawn. The choices include "parallel", and "vectorized". idata_kwargs : dict, optional Keyword arguments for :func:`arviz.from_dict`. It also accepts a boolean as value for the ``log_likelihood`` key to indicate that the pointwise log likelihood should not be included in the returned object. Returns ------- InferenceData ArviZ ``InferenceData`` object that contains the posterior samples, together with their respective sample stats and pointwise log likeihood values (unless skipped with ``idata_kwargs``). """ import blackjax model = modelcontext(model) if var_names is None: var_names = model.unobserved_value_vars vars_to_sample = list( get_default_varnames(var_names, include_transformed=keep_untransformed)) coords = { cname: np.array(cvals) if isinstance(cvals, tuple) else cvals for cname, cvals in model.coords.items() if cvals is not None } if hasattr(model, "RV_dims"): dims = { var_name: [dim for dim in dims if dim is not None] for var_name, dims in model.RV_dims.items() } else: dims = {} tic1 = datetime.now() print("Compiling...", file=sys.stdout) init_params = _get_batched_jittered_initial_points( model=model, chains=chains, initvals=initvals, random_seed=random_seed, ) if chains == 1: init_params = [np.stack(init_params)] init_params = [ np.stack(init_state) for init_state in zip(*init_params) ] logprob_fn = get_jaxified_logp(model) seed = jax.random.PRNGKey(random_seed) keys = jax.random.split(seed, chains) get_posterior_samples = partial( _blackjax_inference_loop, logprob_fn=logprob_fn, tune=tune, draws=draws, target_accept=target_accept, ) tic2 = datetime.now() print("Compilation time = ", tic2 - tic1, file=sys.stdout) print("Sampling...", file=sys.stdout) # Adapted from numpyro if chain_method == "parallel": map_fn = jax.pmap elif chain_method == "vectorized": map_fn = jax.vmap else: raise ValueError( "Only supporting the following methods to draw chains:" ' "parallel" or "vectorized"') states, _ = map_fn(get_posterior_samples)(keys, init_params) raw_mcmc_samples = states.position tic3 = datetime.now() print("Sampling time = ", tic3 - tic2, file=sys.stdout) print("Transforming variables...", file=sys.stdout) mcmc_samples = {} for v in vars_to_sample: jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=[v]) result = jax.vmap(jax.vmap(jax_fn))(*raw_mcmc_samples)[0] mcmc_samples[v.name] = result tic4 = datetime.now() print("Transformation time = ", tic4 - tic3, file=sys.stdout) if idata_kwargs is None: idata_kwargs = {} else: idata_kwargs = idata_kwargs.copy() if idata_kwargs.pop("log_likelihood", True): log_likelihood = _get_log_likelihood(model, raw_mcmc_samples) else: log_likelihood = None attrs = { "sampling_time": (tic3 - tic2).total_seconds(), } posterior = mcmc_samples az_trace = az.from_dict( posterior=posterior, log_likelihood=log_likelihood, observed_data=find_observations(model), coords=coords, dims=dims, attrs=make_attrs(attrs, library=blackjax), **idata_kwargs, ) return az_trace
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, seed: Optional[int] = None, **kwargs): """Finds the local maximum a posteriori point given a model. `find_MAP` should not be used to initialize the NUTS sampler. Simply call ``pymc.sample()`` and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.initial_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used `find_MAP` to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside ``pymc.sample()`` and you should thus avoid this method. """ model = modelcontext(model) if vars is None: vars = model.cont_vars if not vars: raise ValueError("Model has no unobserved continuous variables.") vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) ipfn = make_initial_point_fn( model=model, jitter_rvs={}, return_transformed=True, overrides=start, ) if seed is None: seed = model.rng_seeder.randint(2**30, dtype=np.int64) start = ipfn(seed) model.check_start_vals(start) x0 = DictToArrayBijection.map(start) # TODO: If the mapping is fixed, we can simply create graphs for the # mapping and avoid all this bijection overhead def logp_func(x): return DictToArrayBijection.mapf(model.fastlogp_nojac)(RaveledVars( x, x0.point_map_info)) try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) def dlogp_func(x): return DictToArrayBijection.mapf(model.fastdlogp_nojac(vars))( RaveledVars(x, x0.point_map_info)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0.data, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print(file=sys.stdout) mx0 = RaveledVars(mx0, x0.point_map_info) vars = get_default_varnames(model.unobserved_value_vars, include_transformed) mx = { var.name: value for var, value in zip( vars, model.fastfn(vars)(DictToArrayBijection.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
def __init__(self, model): self.model = model self._all_var_names = get_default_varnames(self.model.named_vars, include_transformed=False) self.var_list = self.model.named_vars.values()
def sample_numpyro_nuts( draws=1000, tune=1000, chains=4, target_accept=0.8, random_seed=10, model=None, var_names=None, progress_bar=True, keep_untransformed=False, chain_method="parallel", ): from numpyro.infer import MCMC, NUTS model = modelcontext(model) if var_names is None: var_names = model.unobserved_value_vars vars_to_sample = list( get_default_varnames(var_names, include_transformed=keep_untransformed)) coords = { cname: np.array(cvals) if isinstance(cvals, tuple) else cvals for cname, cvals in model.coords.items() if cvals is not None } if hasattr(model, "RV_dims"): dims = { var_name: [dim for dim in dims if dim is not None] for var_name, dims in model.RV_dims.items() } else: dims = {} tic1 = pd.Timestamp.now() print("Compiling...", file=sys.stdout) rv_names = [rv.name for rv in model.value_vars] init_state = [model.initial_point[rv_name] for rv_name in rv_names] init_state_batched = jax.tree_map( lambda x: np.repeat(x[None, ...], chains, axis=0), init_state) logp_fn = get_jaxified_logp(model) nuts_kernel = NUTS( potential_fn=logp_fn, target_accept_prob=target_accept, adapt_step_size=True, adapt_mass_matrix=True, dense_mass=False, ) pmap_numpyro = MCMC( nuts_kernel, num_warmup=tune, num_samples=draws, num_chains=chains, postprocess_fn=None, chain_method=chain_method, progress_bar=progress_bar, ) tic2 = pd.Timestamp.now() print("Compilation time = ", tic2 - tic1, file=sys.stdout) print("Sampling...", file=sys.stdout) seed = jax.random.PRNGKey(random_seed) map_seed = jax.random.split(seed, chains) if chains == 1: init_params = init_state map_seed = seed else: init_params = init_state_batched pmap_numpyro.run( map_seed, init_params=init_params, extra_fields=( "num_steps", "potential_energy", "energy", "adapt_state.step_size", "accept_prob", "diverging", ), ) raw_mcmc_samples = pmap_numpyro.get_samples(group_by_chain=True) tic3 = pd.Timestamp.now() print("Sampling time = ", tic3 - tic2, file=sys.stdout) print("Transforming variables...", file=sys.stdout) mcmc_samples = {} for v in vars_to_sample: fgraph = FunctionGraph(model.value_vars, [v], clone=False) optimize_graph(fgraph, include=["fast_run"], exclude=["cxx_only", "BlasOpt"]) jax_fn = jax_funcify(fgraph) result = jax.vmap(jax.vmap(jax_fn))(*raw_mcmc_samples)[0] mcmc_samples[v.name] = result tic4 = pd.Timestamp.now() print("Transformation time = ", tic4 - tic3, file=sys.stdout) posterior = mcmc_samples az_trace = az.from_dict( posterior=posterior, log_likelihood=_get_log_likelihood(model, raw_mcmc_samples), observed_data=find_observations(model), sample_stats=_sample_stats_to_xarray(pmap_numpyro), coords=coords, dims=dims, ) return az_trace