def test_elbo(): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) # Create a model for test with Model() as model: mu = Normal('mu', mu=mu0, sd=sigma) y = Normal('y', mu=mu, sd=1, observed=y_obs) vars = inputvars(model.vars) # Create variational gradient tensor grad, elbo, shared, uw = variational_gradient_estimate( vars, model, n_mcsamples=10000, random_seed=1) # Variational posterior parameters uw_ = np.array([1.88, np.log(1)]) # Calculate elbo computed with MonteCarlo f = function([uw], elbo) elbo_mc = f(uw_) # Exact value elbo_true = (-0.5 * ( 3 + 3 * uw_[0]**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * uw_[0] + y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1)) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def __init__( self, draws=10000, model=None, random_seed=-1, chain=0, frac_validate=0.8, alpha=(0,0), rho=0.01, verbose=False, ): self.draws = draws self.model = model self.random_seed = random_seed self.chain = chain self.frac_validate = frac_validate self.alpha = alpha self.rho = rho self.verbose = verbose self.model = modelcontext(model) if self.random_seed != -1: np.random.seed(self.random_seed) self.variables = inputvars(self.model.vars) self.log_marginal_likelihood = 0 self.log_volume_factor = np.zeros(1) self.prior_weight = np.ones(self.draws) / self.draws self.posterior_weights = np.array([]) self.log_evidences = np.array([]) self.cumul_evidences = np.zeros(1) self.likelihood_logp_thresh = np.array([-np.inf]) self.posterior_logp_thresh = np.array([])
def test_elbo(): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) # Create a model for test with Model() as model: mu = Normal('mu', mu=mu0, sd=sigma) Normal('y', mu=mu, sd=1, observed=y_obs) vars = inputvars(model.vars) # Create variational gradient tensor elbo, _ = _calc_elbo(vars, model, n_mcsamples=10000, random_seed=1) # Variational posterior parameters uw_ = np.array([1.88, np.log(1)]) # Calculate elbo computed with MonteCarlo uw_shared = shared(uw_, 'uw_shared') elbo = CallableTensor(elbo)(uw_shared) f = function([], elbo) elbo_mc = f() # Exact value elbo_true = (-0.5 * (3 + 3 * uw_[0]**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * uw_[0] + y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1)) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def test_elbo(self): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) # Create a model for test with Model() as model: mu = Normal('mu', mu=mu0, sd=sigma) Normal('y', mu=mu, sd=1, observed=y_obs) model_vars = inputvars(model.vars) # Create variational gradient tensor elbo, _ = _calc_elbo(model_vars, model, n_mcsamples=10000, random_seed=self.random_seed) # Variational posterior parameters uw_ = np.array([1.88, np.log(1)]) # Calculate elbo computed with MonteCarlo uw_shared = shared(uw_, 'uw_shared') elbo = CallableTensor(elbo)(uw_shared) f = function([], elbo) elbo_mc = f() # Exact value elbo_true = (-0.5 * ( 3 + 3 * uw_[0]**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * uw_[0] + y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1)) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", model=None): _log.warning("The BART model is experimental. Use with caution.") model = modelcontext(model) vars = inputvars(vars) self.bart = vars[0].distribution self.tune = True self.idx = 0 self.iter = 0 self.sum_trees = [] self.chunk = chunk if chunk == "auto": self.chunk = max(1, int(self.bart.m * 0.1)) self.bart.chunk = self.chunk self.num_particles = num_particles self.log_num_particles = np.log(num_particles) self.indices = list(range(1, num_particles)) self.max_stages = max_stages self.old_trees_particles_list = [] for i in range(self.bart.m): p = ParticleTree(self.bart.trees[i], self.bart.prior_prob_leaf_node) self.old_trees_particles_list.append(p) shared = make_shared_replacements(vars, model) self.likelihood_logp = logp([model.datalogpt], vars, shared) super().__init__(vars, shared)
def __init__( self, draws=1000, kernel="metropolis", n_steps=25, parallel=False, start=None, cores=None, tune_steps=True, p_acc_rate=0.99, threshold=0.5, epsilon=1.0, dist_func="absolute_error", sum_stat=False, progressbar=False, model=None, random_seed=-1, ): self.draws = draws self.kernel = kernel self.n_steps = n_steps self.parallel = parallel self.start = start self.cores = cores self.tune_steps = tune_steps self.p_acc_rate = p_acc_rate self.threshold = threshold self.epsilon = epsilon self.dist_func = dist_func self.sum_stat = sum_stat self.progressbar = progressbar self.model = model self.random_seed = random_seed self.model = modelcontext(model) if self.random_seed != -1: np.random.seed(self.random_seed) if self.cores is None: self.cores = _cpu_count() self.beta = 0 self.max_steps = n_steps self.proposed = draws * n_steps self.acc_rate = 1 self.acc_per_chain = np.ones(self.draws) self.model.marginal_log_likelihood = 0 self.variables = inputvars(self.model.vars) dimension = sum(v.dsize for v in self.variables) self.scalings = np.ones(self.draws) * min(1, 2.38 ** 2 / dimension) self.discrete = np.concatenate( [[v.dtype in discrete_types] * (v.dsize or 1) for v in self.variables] ) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all()
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, potential=None, integrator="leapfrog", **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None and potential is None: scaling = model.test_point if isinstance(scaling, dict): scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (model.ndim ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov, as_cov=False) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.compute_velocity, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, integrator, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, integrator="leapfrog", dtype=None, **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance blocked: bool, default=True potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) size = self._logp_dlogp_func.size if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (size ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func)
def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs): self.model = modelcontext(model) chol = get_chol(prior_cov, prior_chol) self.prior_chol = tt.as_tensor_variable(chol) if vars is None: vars = self.model.cont_vars vars = inputvars(vars) super().__init__(vars, [self.model.fastlogp], **kwargs)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector state State object model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None: scaling = model.test_point if isinstance(scaling, dict): scaling = guess_scaling(Point(scaling, model=model), model=model, vars=vars) n = scaling.shape[0] self.step_size = step_scale / (n ** 0.25) self.potential = quad_potential(scaling, is_cov, as_cov=False) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.leapfrog, self._vars = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def __init__(self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, **kwargs): self.model = modelcontext(model) self.w = w self.tune = tune self.n_tunes = 0.0 self.iter_limit = iter_limit if vars is None: vars = self.model.cont_vars vars = inputvars(vars) super().__init__(vars, [self.model.fastlogp], **kwargs)
def __new__(cls, *args, **kwargs): blocked = kwargs.get("blocked") if blocked is None: # Try to look up default value from class blocked = getattr(cls, "default_blocked", True) kwargs["blocked"] = blocked model = modelcontext(kwargs.get("model")) kwargs.update({"model": model}) # vars can either be first arg or a kwarg if "vars" not in kwargs and len(args) >= 1: vars = args[0] args = args[1:] elif "vars" in kwargs: vars = kwargs.pop("vars") else: # Assume all model variables vars = model.vars # get the actual inputs from the vars vars = inputvars(vars) if len(vars) == 0: raise ValueError("No free random variables to sample.") if not blocked and len(vars) > 1: # In this case we create a separate sampler for each var # and append them to a CompoundStep steps = [] for var in vars: step = super().__new__(cls) # If we don't return the instance we have to manually # call __init__ step.__init__([var], *args, **kwargs) # Hack for creating the class correctly when unpickling. step.__newargs = ([var], ) + args, kwargs steps.append(step) return CompoundStep(steps) else: step = super().__new__(cls) # Hack for creating the class correctly when unpickling. step.__newargs = (vars, ) + args, kwargs return step
def __init__( self, draws=2000, kernel="metropolis", n_steps=25, start=None, tune_steps=True, p_acc_rate=0.85, threshold=0.5, save_sim_data=False, save_log_pseudolikelihood=True, model=None, random_seed=-1, chain=0, ): self.draws = draws self.kernel = kernel.lower() self.n_steps = n_steps self.start = start self.tune_steps = tune_steps self.p_acc_rate = p_acc_rate self.threshold = threshold self.save_sim_data = save_sim_data self.save_log_pseudolikelihood = save_log_pseudolikelihood self.model = model self.random_seed = random_seed self.chain = chain self.model = modelcontext(model) if self.random_seed != -1: np.random.seed(self.random_seed) self.beta = 0 self.max_steps = n_steps self.proposed = draws * n_steps self.acc_rate = 1 self.variables = inputvars(self.model.vars) self.weights = np.ones(self.draws) / self.draws self.log_marginal_likelihood = 0 self.sim_data = [] self.log_pseudolikelihood = []
def fixed_hessian(point, vars=None, model=None): """ Returns a fixed Hessian for any chain location. Parameters ---------- model: Model (optional if in `with` context) point: dict vars: list Variables for which Hessian is to be calculated. """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) point = Point(point, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), point) rval = np.ones(bij.map(point).size) / 10 return rval
def __init__(self, vars=None, model=None, point=None): self.model = pm.modelcontext(model) # Work out the full starting coordinates if point is None: point = self.model.test_point else: pm.util.update_start_vals(point, self.model.test_point, self.model) # Fit all the parameters by default if vars is None: vars = self.model.cont_vars self.vars = inputvars(vars) allinmodel(self.vars, self.model) # Work out the relevant bijection map point = Point(point, model=self.model) self.bijection = DictToArrayBijection(ArrayOrdering(self.vars), point) # Pre-compile the theano model and gradient nlp = -self.model.logpt grad = theano.grad(nlp, self.vars, disconnected_inputs="ignore") self.func = get_theano_function_for_var([nlp] + grad, model=self.model)
def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, minibatch_RVs=None, minibatch_tensors=None, minibatches=None, global_RVs=None, local_RVs=None, observed_RVs=None, encoder_params=None, total_size=None, optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None, mode=None): """Perform mini-batch ADVI. This function implements a mini-batch automatic differentiation variational inference (ADVI; Kucukelbir et al., 2015) with the meanfield approximation. Autoencoding variational Bayes (AEVB; Kingma and Welling, 2014) is also supported. For explanation, we classify random variables in probabilistic models into three types. Observed random variables :math:`{\cal Y}=\{\mathbf{y}_{i}\}_{i=1}^{N}` are :math:`N` observations. Each :math:`\mathbf{y}_{i}` can be a set of observed random variables, i.e., :math:`\mathbf{y}_{i}=\{\mathbf{y}_{i}^{k}\}_{k=1}^{V_{o}}`, where :math:`V_{k}` is the number of the types of observed random variables in the model. The next ones are global random variables :math:`\Theta=\{\\theta^{k}\}_{k=1}^{V_{g}}`, which are used to calculate the probabilities for all observed samples. The last ones are local random variables :math:`{\cal Z}=\{\mathbf{z}_{i}\}_{i=1}^{N}`, where :math:`\mathbf{z}_{i}=\{\mathbf{z}_{i}^{k}\}_{k=1}^{V_{l}}`. These RVs are used only in AEVB. The goal of ADVI is to approximate the posterior distribution :math:`p(\Theta,{\cal Z}|{\cal Y})` by variational posterior :math:`q(\Theta)\prod_{i=1}^{N}q(\mathbf{z}_{i})`. All of these terms are normal distributions (mean-field approximation). :math:`q(\Theta)` is parametrized with its means and standard deviations. These parameters are denoted as :math:`\gamma`. While :math:`\gamma` is a constant, the parameters of :math:`q(\mathbf{z}_{i})` are dependent on each observation. Therefore these parameters are denoted as :math:`\\xi(\mathbf{y}_{i}; \\nu)`, where :math:`\\nu` is the parameters of :math:`\\xi(\cdot)`. For example, :math:`\\xi(\cdot)` can be a multilayer perceptron or convolutional neural network. In addition to :math:`\\xi(\cdot)`, we can also include deterministic mappings for the likelihood of observations. We denote the parameters of the deterministic mappings as :math:`\eta`. An example of such mappings is the deconvolutional neural network used in the convolutional VAE example in the PyMC3 notebook directory. This function maximizes the evidence lower bound (ELBO) :math:`{\cal L}(\gamma, \\nu, \eta)` defined as follows: .. math:: {\cal L}(\gamma,\\nu,\eta) & = \mathbf{c}_{o}\mathbb{E}_{q(\Theta)}\left[ \sum_{i=1}^{N}\mathbb{E}_{q(\mathbf{z}_{i})}\left[ \log p(\mathbf{y}_{i}|\mathbf{z}_{i},\Theta,\eta) \\right]\\right] \\\\ & - \mathbf{c}_{g}KL\left[q(\Theta)||p(\Theta)\\right] - \mathbf{c}_{l}\sum_{i=1}^{N} KL\left[q(\mathbf{z}_{i})||p(\mathbf{z}_{i})\\right], where :math:`KL[q(v)||p(v)]` is the Kullback-Leibler divergence .. math:: KL[q(v)||p(v)] = \int q(v)\log\\frac{q(v)}{p(v)}dv, :math:`\mathbf{c}_{o/g/l}` are vectors for weighting each term of ELBO. More precisely, we can write each of the terms in ELBO as follows: .. math:: \mathbf{c}_{o}\log p(\mathbf{y}_{i}|\mathbf{z}_{i},\Theta,\eta) & = & \sum_{k=1}^{V_{o}}c_{o}^{k} \log p(\mathbf{y}_{i}^{k}| {\\rm pa}(\mathbf{y}_{i}^{k},\Theta,\eta)) \\\\ \mathbf{c}_{g}KL\left[q(\Theta)||p(\Theta)\\right] & = & \sum_{k=1}^{V_{g}}c_{g}^{k}KL\left[ q(\\theta^{k})||p(\\theta^{k}|{\\rm pa(\\theta^{k})})\\right] \\\\ \mathbf{c}_{l}KL\left[q(\mathbf{z}_{i}||p(\mathbf{z}_{i})\\right] & = & \sum_{k=1}^{V_{l}}c_{l}^{k}KL\left[ q(\mathbf{z}_{i}^{k})|| p(\mathbf{z}_{i}^{k}|{\\rm pa}(\mathbf{z}_{i}^{k}))\\right], where :math:`{\\rm pa}(v)` denotes the set of parent variables of :math:`v` in the directed acyclic graph of the model. When using mini-batches, :math:`c_{o}^{k}` and :math:`c_{l}^{k}` should be set to :math:`N/M`, where :math:`M` is the number of observations in each mini-batch. Another weighting scheme was proposed in (Blundell et al., 2015) for accelarating model fitting. For working with ADVI, we need to give the probabilistic model (:code:`model`), the three types of RVs (:code:`observed_RVs`, :code:`global_RVs` and :code:`local_RVs`), the tensors to which mini-bathced samples are supplied (:code:`minibatches`) and parameters of deterministic mappings :math:`\\xi` and :math:`\eta` (:code:`encoder_params`) as input arguments. :code:`observed_RVs` is a :code:`OrderedDict` of the form :code:`{y_k: c_k}`, where :code:`y_k` is a random variable defined in the PyMC3 model. :code:`c_k` is a scalar (:math:`c_{o}^{k}`) and it can be a shared variable. :code:`global_RVs` is a :code:`OrderedDict` of the form :code:`{t_k: c_k}`, where :code:`t_k` is a random variable defined in the PyMC3 model. :code:`c_k` is a scalar (:math:`c_{g}^{k}`) and it can be a shared variable. :code:`local_RVs` is a :code:`OrderedDict` of the form :code:`{z_k: ((m_k, s_k), c_k)}`, where :code:`z_k` is a random variable defined in the PyMC3 model. :code:`c_k` is a scalar (:math:`c_{l}^{k}`) and it can be a shared variable. :code:`(m_k, s_k)` is a pair of tensors of means and log standard deviations of the variational distribution; samples drawn from the variational distribution replaces :code:`z_k`. It should be noted that if :code:`z_k` has a transformation that changes the dimension (e.g., StickBreakingTransform), the variational distribution must have the same dimension. For example, if :code:`z_k` is distributed with Dirichlet distribution with :code:`p` choices, :math:`m_k` and :code:`s_k` has the shape :code:`(n_samples_in_minibatch, p - 1)`. :code:`minibatch_tensors` is a list of tensors (can be shared variables) to which mini-batch samples are set during the optimization. These tensors are observations (:code:`obs=`) in :code:`observed_RVs`. :code:`minibatches` is a generator of a list of :code:`numpy.ndarray`. Each item of the list will be set to tensors in :code:`minibatch_tensors`. :code:`encoder_params` is a list of shared variables of the parameters :math:`\\nu` and :math:`\eta`. We do not need to include the variational parameters of the global variables, :math:`\gamma`, because these are automatically created and updated in this function. The following is a list of example notebooks using advi_minibatch: - docs/source/notebooks/GLM-hierarchical-advi-minibatch.ipynb - docs/source/notebooks/bayesian_neural_network_advi.ipynb - docs/source/notebooks/convolutional_vae_keras_advi.ipynb - docs/source/notebooks/gaussian-mixture-model-advi.ipynb - docs/source/notebooks/lda-advi-aevb.ipynb Parameters ---------- vars : object List of random variables. If None, variational posteriors (normal distribution) are fit for all RVs in the given model. start : Dict or None Initial values of parameters (variational means). model : Model Probabilistic model. n : int Number of iterations updating parameters. n_mcsamples : int Number of Monte Carlo samples to approximate ELBO. minibatch_RVs : list of ObservedRVs Random variables in the model for which mini-batch tensors are set. When this argument is given, both of arguments local_RVs and observed_RVs must be None. minibatch_tensors : list of (tensors or shared variables) Tensors used to create ObservedRVs in minibatch_RVs. minibatches : generator of list Generates a set of minibatches when calling next(). The length of the returned list must be the same with the number of random variables in `minibatch_tensors`. total_size : int Total size of training samples. This is used to appropriately scale the log likelihood terms corresponding to mini-batches in ELBO. observed_RVs : Ordered dict Include a scaling constant for the corresponding RV. See the above description. global_RVs : Ordered dict or None Include a scaling constant for the corresponding RV. See the above description. If :code:`None`, it is set to :code:`{v: 1 for v in grvs}`, where :code:`grvs` is :code:`list(set(vars) - set(list(local_RVs) + list(observed_RVs)))`. local_RVs : Ordered dict or None Include encoded variational parameters and a scaling constant for the corresponding RV. See the above description. encoder_params : list of theano shared variables Parameters of encoder. optimizer : (loss, list of shared variables) -> dict or OrderedDict A function that returns parameter updates given loss and shared variables of parameters. If :code:`None` (default), a default Adagrad optimizer is used with parameters :code:`learning_rate` and :code:`epsilon` below. learning_rate: float Base learning rate for adagrad. This parameter is ignored when :code:`optimizer` is set. epsilon : float Offset in denominator of the scale of learning rate in Adagrad. This parameter is ignored when :code:`optimizer` is set. random_seed : int Seed to initialize random state. Returns ------- ADVIFit Named tuple, which includes 'means', 'stds', and 'elbo_vals'. References ---------- - Kingma, D. P., & Welling, M. (2014). Auto-Encoding Variational Bayes. stat, 1050, 1. - Kucukelbir, A., Ranganath, R., Gelman, A., & Blei, D. (2015). Automatic variational inference in Stan. In Advances in neural information processing systems (pp. 568-576). - Blundell, C., Cornebise, J., Kavukcuoglu, K., & Wierstra, D. (2015). Weight Uncertainty in Neural Network. In Proceedings of the 32nd International Conference on Machine Learning (ICML-15) (pp. 1613-1622). """ if encoder_params is None: encoder_params = [] model = pm.modelcontext(model) vars = inputvars(vars if vars is not None else model.vars) start = start if start is not None else model.test_point if not pm.model.all_continuous(vars): raise ValueError('Model can not include discrete RVs for ADVI.') _check_minibatches(minibatch_tensors, minibatches) if encoder_params is None: encoder_params = [] # Prepare optimizer if optimizer is None: optimizer = adagrad_optimizer(learning_rate, epsilon) # For backward compatibility in how input arguments are given local_RVs, observed_RVs = _get_rvss(minibatch_RVs, local_RVs, observed_RVs, minibatch_tensors, total_size) # Replace local_RVs with transformed variables def get_transformed(v): if hasattr(v, 'transformed'): return v.transformed return v local_RVs = OrderedDict([(get_transformed(v), (uw, s)) for v, (uw, s) in local_RVs.items()]) # Get global variables grvs = list(set(vars) - set(list(local_RVs) + list(observed_RVs))) if global_RVs is None: global_RVs = OrderedDict({v: 1 for v in grvs}) elif len(grvs) != len(global_RVs): _value_error('global_RVs ({}) must have all global RVs: {}'.format( [v for v in global_RVs], grvs)) # ELBO wrt variational parameters elbo, uw_l, uw_g = _make_elbo_t(observed_RVs, global_RVs, local_RVs, model.potentials, n_mcsamples, random_seed) # Replacements tensors of variational parameters in the graph replaces = dict() # Variational parameters for global RVs if 0 < len(global_RVs): uw_global_shared, bij = _init_uw_global_shared(start, global_RVs) replaces.update({uw_g: uw_global_shared}) # Variational parameters for local RVs, encoded from samples in # mini-batches if 0 < len(local_RVs): uws = [uw for _, (uw, _) in local_RVs.items()] uw_local_encoded = tt.concatenate([uw[0].ravel() for uw in uws] + [uw[1].ravel() for uw in uws]) replaces.update({uw_l: uw_local_encoded}) # Replace tensors of variational parameters in ELBO elbo = theano.clone(elbo, OrderedDict(replaces), strict=False) # Replace input shared variables with tensors def is_shared(t): return isinstance(t, theano.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)}) elbo = theano.clone(elbo, updates, strict=False) # Create parameter update function used in the training loop params = encoder_params if 0 < len(global_RVs): params += [uw_global_shared] updates = OrderedDict(optimizer(loss=-1 * elbo, param=params)) f = theano.function(tensors, elbo, updates=updates, mode=mode) # Optimization loop elbos = np.empty(n) progress = tqdm.trange(n) for i in progress: e = f(*next(minibatches)) if np.isnan(e): raise FloatingPointError('NaN occurred in ADVI optimization.') elbos[i] = e if n < 10: progress.set_description('ELBO = {:,.2f}'.format(elbos[i])) elif i % (n // 10) == 0 and i > 0: avg_elbo = infmean(elbos[i - n // 10:i]) progress.set_description('Average ELBO = {:,.2f}'.format(avg_elbo)) pm._log.info('Finished minibatch ADVI: ELBO = {:,.2f}'.format(elbos[-1])) # Variational parameters of global RVs if 0 < len(global_RVs): l = int(uw_global_shared.get_value(borrow=True).size / 2) u = bij.rmap(uw_global_shared.get_value(borrow=True)[:l]) w = bij.rmap(uw_global_shared.get_value(borrow=True)[l:]) # w is in log space for var in w.keys(): w[var] = np.exp(w[var]) else: u = dict() w = dict() return ADVIFit(u, w, elbos)
def __init__(self, vars=None, out_vars=None, covariance=None, scale=1., n_chains=100, tune=True, tune_interval=100, model=None, check_bound=True, likelihood_name='like', proposal_name='MultivariateNormal', coef_variation=1., **kwargs): model = modelcontext(model) if vars is None: vars = model.vars vars = inputvars(vars) if out_vars is None: out_vars = model.unobserved_RVs out_varnames = [out_var.name for out_var in out_vars] self.scaling = np.atleast_1d(scale) if covariance is None and proposal_name == 'MultivariateNormal': self.covariance = np.eye(sum(v.dsize for v in vars)) scale = self.covariance self.tune = tune self.check_bnd = check_bound self.tune_interval = tune_interval self.steps_until_tune = tune_interval self.proposal_name = proposal_name self.proposal_dist = choose_proposal(self.proposal_name, scale=scale) self.proposal_samples_array = self.proposal_dist(n_chains) self.stage_sample = 0 self.accepted = 0 self.beta = 0 self.stage = 0 self.chain_index = 0 self.resampling_indexes = np.arange(n_chains) self.coef_variation = coef_variation self.n_chains = n_chains self.likelihoods = np.zeros(n_chains) self.likelihood_name = likelihood_name self._llk_index = out_varnames.index(likelihood_name) self.discrete = np.concatenate( [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # create initial population self.population = [] self.array_population = np.zeros(n_chains) for i in range(self.n_chains): dummy = pm.Point({v.name: v.random() for v in vars}, model=model) self.population.append(dummy) self.population[0] = model.test_point self.chain_previous_lpoint = copy.deepcopy(self.population) shared = make_shared_replacements(vars, model) self.logp_forw = logp_forw(out_vars, vars, shared) self.check_bnd = logp_forw([model.varlogpt], vars, shared) super(ATMCMC, self).__init__(vars, out_vars, shared)
def optimize(start=None, vars=None, model=None, return_info=False, verbose=True, **kwargs): """Maximize the log prob of a PyMC3 model using scipy All extra arguments are passed directly to the ``scipy.optimize.minimize`` function. Args: start: The PyMC3 coordinate dictionary of the starting position vars: The variables to optimize model: The PyMC3 model return_info: Return both the coordinate dictionary and the result of ``scipy.optimize.minimize`` verbose: Print the success flag and log probability to the screen """ from scipy.optimize import minimize model = pm.modelcontext(model) # Work out the full starting coordinates if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) # Fit all the parameters by default if vars is None: vars = model.cont_vars vars = inputvars(vars) allinmodel(vars, model) # Work out the relevant bijection map start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) # Pre-compile the theano model and gradient nlp = -model.logpt grad = theano.grad(nlp, vars, disconnected_inputs="ignore") func = get_theano_function_for_var([nlp] + grad, model=model) if verbose: names = [ get_untransformed_name(v.name) if is_transformed_name(v.name) else v.name for v in vars ] sys.stderr.write("optimizing logp for variables: [{0}]\n".format( ", ".join(names))) bar = tqdm.tqdm() # This returns the objective function and its derivatives def objective(vec): res = func(*get_args_for_theano_function(bij.rmap(vec), model=model)) d = dict(zip((v.name for v in vars), res[1:])) g = bij.map(d) if verbose: bar.set_postfix(logp="{0:e}".format(-res[0])) bar.update() return res[0], g # Optimize using scipy.optimize x0 = bij.map(start) initial = objective(x0)[0] kwargs["jac"] = True info = minimize(objective, x0, **kwargs) # Only accept the output if it is better than it was x = info.x if (np.isfinite(info.fun) and info.fun < initial) else x0 # Coerce the output into the right format vars = get_default_varnames(model.unobserved_RVs, True) point = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(x))) } if verbose: bar.close() sys.stderr.write("message: {0}\n".format(info.message)) sys.stderr.write("logp: {0} -> {1}\n".format(-initial, -info.fun)) if not np.isfinite(info.fun): logger.warning("final logp not finite, returning initial point") logger.warning( "this suggests that something is wrong with the model") logger.debug("{0}".format(info)) if return_info: return point, info return point
def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, minibatch_RVs=None, minibatch_tensors=None, minibatches=None, local_RVs=None, observed_RVs=None, encoder_params=[], total_size=None, optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None, verbose=1, dp_par=None): """Perform mini-batch ADVI. This function implements a mini-batch ADVI with the meanfield approximation. Autoencoding variational inference is also supported. The log probability terms for mini-batches, corresponding to RVs in minibatch_RVs, are scaled to (total_size) / (the number of samples in each mini-batch), where total_size is an argument for the total data size. minibatch_tensors is a list of tensors (can be shared variables) to which mini-batch samples are set during the optimization. In most cases, these tensors are observations for RVs in the model. local_RVs and observed_RVs are used for autoencoding variational Bayes. Both of these RVs are associated with each of given samples. The difference is that local_RVs are unkown and their posterior distributions are approximated. local_RVs are Ordered dict, whose keys and values are RVs and a tuple of two objects. The first is the theano expression of variational parameters (mean and log of std) of the approximate posterior, which are encoded from given samples by an arbitrary deterministic function, e.g., MLP. The other one is a scaling constant to be multiplied to the log probability term corresponding to the RV. observed_RVs are also Ordered dict with RVs as the keys, but whose values are only the scaling constant as in local_RVs. In this case, total_size is ignored. If local_RVs is None (thus not using autoencoder), the following two settings are equivalent: - observed_RVs=OrderedDict([(rv, total_size / minibatch_size)]) - minibatch_RVs=[rv], total_size=total_size where minibatch_size is minibatch_tensors[0].shape[0]. The variational parameters and the parameters of the autoencoder are simultaneously optimized with given optimizer, which is a function that returns a dictionary of parameter updates as provided to Theano function. See the docstring of pymc3.variational.advi(). Parameters ---------- vars : object List of random variables. If None, variational posteriors (normal distribution) are fit for all RVs in the given model. start : Dict or None Initial values of parameters (variational means). model : Model Probabilistic model. n : int Number of interations updating parameters. n_mcsamples : int Number of Monte Carlo samples to approximate ELBO. minibatch_RVs : list of ObservedRVs Random variables in the model for which mini-batch tensors are set. When this argument is given, both of arguments local_RVs and global_RVs must be None. minibatch_tensors : list of (tensors or shared variables) Tensors used to create ObservedRVs in minibatch_RVs. minibatches : generator of list Generates a set of minibatches when calling next(). The length of the returned list must be the same with the number of random variables in `minibatch_tensors`. total_size : int Total size of training samples. This is used to appropriately scale the log likelihood terms corresponding to mini-batches in ELBO. local_RVs : Ordered dict Include encoded variational parameters and a scaling constant for the corresponding RV. See the above description. observed_RVs : Ordered dict Include a scaling constant for the corresponding RV. See the above description encoder_params : list of theano shared variables Parameters of encoder. optimizer : (loss, tensor) -> dict or OrderedDict A function that returns parameter updates given loss and parameter tensor. If :code:`None` (default), a default Adagrad optimizer is used with parameters :code:`learning_rate` and :code:`epsilon` below. learning_rate: float Base learning rate for adagrad. This parameter is ignored when an optimizer is given. epsilon : float Offset in denominator of the scale of learning rate in Adagrad. This parameter is ignored when an optimizer is given. random_seed : int Seed to initialize random state. Returns ------- ADVIFit Named tuple, which includes 'means', 'stds', and 'elbo_vals'. """ theano.config.compute_test_value = 'ignore' model = modelcontext(model) vars = inputvars(vars if vars is not None else model.vars) start = start if start is not None else model.test_point check_discrete_rvs(vars) _check_minibatches(minibatch_tensors, minibatches) # Prepare optimizer if optimizer is None: optimizer = adagrad_optimizer(learning_rate, epsilon) # For backward compatibility in how input arguments are given local_RVs, observed_RVs = _get_rvss(minibatch_RVs, local_RVs, observed_RVs, minibatch_tensors, total_size) # Replace local_RVs with transformed variables ds = model.deterministics def get_transformed(v): if v in ds: return v.transformed return v local_RVs = OrderedDict([(get_transformed(v), (uw, s)) for v, (uw, s) in local_RVs.items()]) # Get global variables global_RVs = list(set(vars) - set(list(local_RVs) + list(observed_RVs))) # Ordering for concatenation of random variables global_order = ArrayOrdering([v for v in global_RVs]) local_order = ArrayOrdering([v for v in local_RVs]) # ELBO wrt variational parameters inarray_g, uw_g, replace_g = _join_global_RVs(global_RVs, global_order) inarray_l, uw_l, replace_l = _join_local_RVs(local_RVs, local_order) logpt = _make_logpt(global_RVs, local_RVs, observed_RVs, model) replace = replace_g if replace_l is not None: replace.update(replace_l) logp = theano.clone(logpt, replace, strict=False) elbo = _elbo_t(logp, uw_g, uw_l, inarray_g, inarray_l, n_mcsamples, random_seed) del logpt # Variational parameters for global RVs uw_global_shared, bij = _init_uw_global_shared(start, global_RVs, global_order) # Variational parameters for local RVs, encoded from samples in # mini-batches if 0 < len(local_RVs): uws = [uw for _, (uw, _) in local_RVs.items()] uw_local_encoded = tt.concatenate([uw[0].ravel() for uw in uws] + [uw[1].ravel() for uw in uws]) # Replace tensors in ELBO updates = {uw_g: uw_global_shared, uw_l: uw_local_encoded} \ if 0 < len(local_RVs) else \ {uw_g: uw_global_shared} elbo = theano.clone(elbo, updates, strict=False) # Replace input shared variables with tensors def is_shared(t): return isinstance(t, theano.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)}) elbo = theano.clone(elbo, updates, strict=False) # Create parameter update function used in the training loop params = [uw_global_shared] + encoder_params updates = OrderedDict() for param in params: # g = tt.grad(elbo, wrt=param) # updates.update(adagrad(g, param, learning_rate, epsilon, n=10)) updates.update( optimizer(likeloss=-1 * elbo[0], entroloss=-1 * elbo[1], param=param, dp_par=dp_par, n_par=len(vars))) f = theano.function(tensors, tt.add(elbo[1], tt.sum(elbo[0], axis=0)), updates=updates) # Optimization loop elbos = np.empty(n) l = int(uw_global_shared.get_value(borrow=True).size / 2) for i in range(n): u_old = bij.rmap(uw_global_shared.get_value(borrow=True)[:l]) w_old = bij.rmap(uw_global_shared.get_value(borrow=True)[l:]) e = f(*next(minibatches)) if np.isnan(e): print('NaNs produced at iteration {}'.format(i)) for var in w_old.keys(): w_old[var] = np.exp(w_old[var]) return ADVIFit(u_old, w_old, elbos[:i]) elbos[i] = e if verbose and not i % (n // 10): if not i: print('Iteration {0} [{1}%]: ELBO = {2}'.format( i, 100 * i // n, e.round(2))) else: avg_elbo = elbos[i - n // 10:i].mean() print('Iteration {0} [{1}%]: Average ELBO = {2}'.format( i, 100 * i // n, avg_elbo.round(2))) if verbose: print('Finished [100%]: ELBO = {}'.format(elbos[-1].round(2))) u = bij.rmap(uw_global_shared.get_value(borrow=True)[:l]) w = bij.rmap(uw_global_shared.get_value(borrow=True)[l:]) # w is in log space for var in w.keys(): w[var] = np.exp(w[var]) return ADVIFit(u, w, elbos)
def __init__(self, n0=10, init_samples=None, k_trunc=np.inf, eps_z=.01, nf_iter=2, N=10, t_ess=0.5, beta_max=1, model=None, random_seed=-1, chain=0, frac_validate=0.0, iteration=None, alpha_w=(0, 0), alpha_uw=(0, 0), verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor_min=1.0, bw_factor_max=1.0, bw_factor_num=1, rel_bw=1, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, trainable_qw=False, sgd_steps=0, knots_trainable=5, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], bounds=None): self.N = N self.n0 = n0 self.model = model self.chain = chain # Init method params. self.init_samples = init_samples self.random_seed = random_seed # Set the torch seed. if self.random_seed != 1: np.random.seed(self.random_seed) torch.manual_seed(self.random_seed) # Separating out so I can keep track. These are SINF params. assert 0.0 <= frac_validate <= 1.0 self.frac_validate = frac_validate self.iteration = iteration self.alpha_uw = alpha_uw self.alpha_w = alpha_w self.k_trunc = k_trunc self.verbose = verbose self.n_component = n_component self.interp_nbin = interp_nbin self.KDE = KDE self.bw_factors = np.linspace(bw_factor_min, bw_factor_max, bw_factor_num) self.edge_bins = edge_bins self.ndata_wT = ndata_wT self.MSWD_max_iter = MSWD_max_iter self.NBfirstlayer = NBfirstlayer self.logit = logit self.Whiten = Whiten self.batchsize = batchsize self.nocuda = nocuda self.patch = patch self.shape = shape #convert array of bounds passed in from [][x1min,x2min,...],[x1max,x2max...]] to what SINF wants, [[x1min,x1max],[x2min,x2max],...] if (bounds is not None): bounds_sinf = list([list(b) for b in bounds.T]) else: bounds_sinf = [ [None, None] for i in range(init_samples.shape[1]) ] #get the dimensionality from initial samples assuming (N,d) shape self.bounds = bounds_sinf #trainable sinf self.trainable_qw = trainable_qw self.sgd_steps = sgd_steps self.knots_trainable = knots_trainable #nfo self.t_ess = t_ess self.beta_max = beta_max self.beta = 0 #initial value of beta before iterating, match smc self.rel_bw = rel_bw self.model = modelcontext(model) self.variables = inputvars(self.model.vars)
def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, use_single_leapfrog=False, potential=None, integrator="leapfrog", **theano_kwargs): """Superclass to implement Hamiltonian/hybrid monte carlo Parameters ---------- vars : list of theano variables scaling : array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. step_scale : float, default=0.25 Size of steps to take, automatically scaled down by 1/n**(1/4) is_cov : bool, default=False Treat scaling as a covariance matrix/vector if True, else treat it as a precision matrix/vector model : pymc3 Model instance. default=Context model blocked: Boolean, default True use_single_leapfrog: Boolean, will leapfrog steps take a single step at a time. default False. potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. **theano_kwargs: passed to theano functions """ model = modelcontext(model) if vars is None: vars = model.cont_vars vars = inputvars(vars) if scaling is None and potential is None: size = sum(np.prod(var.dshape, dtype=int) for var in vars) mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) potential = QuadPotentialDiagAdapt(size, mean, var, 10) if isinstance(scaling, dict): point = Point(scaling, model=model) scaling = guess_scaling(point, model=model, vars=vars) if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") self.step_size = step_scale / (model.ndim**0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) shared = make_shared_replacements(vars, model) if theano_kwargs is None: theano_kwargs = {} self.H, self.compute_energy, self.compute_velocity, self.leapfrog, self.dlogp = get_theano_hamiltonian_functions( vars, shared, model.logpt, self.potential, use_single_leapfrog, integrator, **theano_kwargs) super(BaseHMC, self).__init__(vars, shared, blocked=blocked)
def find_MAP(start=None, vars=None, method="L-BFGS-B", return_raw=False, include_transformed=True, progressbar=True, maxeval=5000, model=None, *args, **kwargs): """ Finds the local maximum a posteriori point given a model. find_MAP should not be used to initialize the NUTS sampler. Simply call pymc3.sample() and it will automatically initialize NUTS in a better way. Parameters ---------- start: `dict` of parameter values (Defaults to `model.test_point`) vars: list List of variables to optimize and set to optimum (Defaults to all continuous). method: string or callable Optimization algorithm (Defaults to 'L-BFGS-B' unless discrete variables are specified in `vars`, then `Powell` which will perform better). For instructions on use of a callable, refer to SciPy's documentation of `optimize.minimize`. return_raw: bool Whether to return the full output of scipy.optimize.minimize (Defaults to `False`) include_transformed: bool, optional defaults to True Flag for reporting automatically transformed variables in addition to original variables. progressbar: bool, optional defaults to True Whether or not to display a progress bar in the command line. maxeval: int, optional, defaults to 5000 The maximum number of times the posterior distribution is evaluated. model: Model (optional if in `with` context) *args, **kwargs Extra args passed to scipy.optimize.minimize Notes ----- Older code examples used find_MAP() to initialize the NUTS sampler, but this is not an effective way of choosing starting values for sampling. As a result, we have greatly enhanced the initialization of NUTS and wrapped it inside pymc3.sample() and you should thus avoid this method. """ model = modelcontext(model) if start is None: start = model.test_point else: update_start_vals(start, model.test_point, model) check_start_vals(start, model) if vars is None: vars = model.cont_vars vars = inputvars(vars) disc_vars = list(typefilter(vars, discrete_types)) allinmodel(vars, model) start = Point(start, model=model) bij = DictToArrayBijection(ArrayOrdering(vars), start) logp_func = bij.mapf(model.fastlogp_nojac) x0 = bij.map(start) try: dlogp_func = bij.mapf(model.fastdlogp_nojac(vars)) compute_gradient = True except (AttributeError, NotImplementedError, tg.NullTypeGradError): compute_gradient = False if disc_vars or not compute_gradient: pm._log.warning( "Warning: gradient not available." + "(E.g. vars contains discrete variables). MAP " + "estimates may not be accurate for the default " + "parameters. Defaulting to non-gradient minimization " + "'Powell'.") method = "Powell" if "fmin" in kwargs: fmin = kwargs.pop("fmin") warnings.warn( "In future versions, set the optimization algorithm with a string. " 'For example, use `method="L-BFGS-B"` instead of ' '`fmin=sp.optimize.fmin_l_bfgs_b"`.') cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) # Check to see if minimization function actually uses the gradient if "fprime" in getargspec(fmin).args: def grad_logp(point): return nan_to_num(-dlogp_func(point)) opt_result = fmin(cost_func, x0, fprime=grad_logp, *args, **kwargs) else: # Check to see if minimization function uses a starting value if "x0" in getargspec(fmin).args: opt_result = fmin(cost_func, x0, *args, **kwargs) else: opt_result = fmin(cost_func, *args, **kwargs) if isinstance(opt_result, tuple): mx0 = opt_result[0] else: mx0 = opt_result else: # remove 'if' part, keep just this 'else' block after version change if compute_gradient: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func, dlogp_func) else: cost_func = CostFuncWrapper(maxeval, progressbar, logp_func) try: opt_result = minimize(cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs) mx0 = opt_result["x"] # r -> opt_result except (KeyboardInterrupt, StopIteration) as e: mx0, opt_result = cost_func.previous_x, None if isinstance(e, StopIteration): pm._log.info(e) finally: last_v = cost_func.n_eval if progressbar: assert isinstance(cost_func.progress, ProgressBar) cost_func.progress.total = last_v cost_func.progress.update(last_v) print() vars = get_default_varnames(model.unobserved_RVs, include_transformed) mx = { var.name: value for var, value in zip(vars, model.fastfn(vars)(bij.rmap(mx0))) } if return_raw: return mx, opt_result else: return mx
def __init__( self, draws=2000, start=None, threshold=0.5, model=None, random_seed=-1, chain=0, frac_validate=0.1, iteration=None, alpha=(0, 0), k_trunc=0.5, pareto=False, epsilon=1e-3, local_thresh=3, local_step_size=0.1, local_grad=True, nf_local_iter=0, max_line_search=2, verbose=False, n_component=None, interp_nbin=None, KDE=True, bw_factor=0.5, edge_bins=None, ndata_wT=None, MSWD_max_iter=None, NBfirstlayer=True, logit=False, Whiten=False, batchsize=None, nocuda=False, patch=False, shape=[28, 28, 1], ): self.draws = draws self.start = start self.threshold = threshold self.model = model self.random_seed = random_seed self.chain = chain self.frac_validate = frac_validate self.iteration = iteration self.alpha = alpha self.k_trunc = k_trunc self.pareto = pareto self.epsilon = epsilon self.local_thresh = local_thresh self.local_step_size = local_step_size self.local_grad = local_grad self.nf_local_iter = nf_local_iter self.max_line_search = max_line_search self.verbose = verbose self.n_component = n_component self.interp_nbin = interp_nbin self.KDE = KDE self.bw_factor = bw_factor self.edge_bins = edge_bins self.ndata_wT = ndata_wT self.MSWD_max_iter = MSWD_max_iter self.NBfirstlayer = NBfirstlayer self.logit = logit self.Whiten = Whiten self.batchsize = batchsize self.nocuda = nocuda self.patch = patch self.shape = shape self.model = modelcontext(model) if self.random_seed != -1: np.random.seed(self.random_seed) self.beta = 0 self.variables = inputvars(self.model.vars) self.weights = np.ones(self.draws) / self.draws #self.sinf_logq = np.array([]) self.log_marginal_likelihood = 0
def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, minibatch_RVs=None, minibatch_tensors=None, minibatches=None, local_RVs=None, observed_RVs=None, encoder_params=None, total_size=None, optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None): """Perform mini-batch ADVI. This function implements a mini-batch ADVI with the meanfield approximation. Autoencoding variational inference is also supported. The log probability terms for mini-batches, corresponding to RVs in minibatch_RVs, are scaled to (total_size) / (the number of samples in each mini-batch), where total_size is an argument for the total data size. minibatch_tensors is a list of tensors (can be shared variables) to which mini-batch samples are set during the optimization. In most cases, these tensors are observations for RVs in the model. local_RVs and observed_RVs are used for autoencoding variational Bayes. Both of these RVs are associated with each of given samples. The difference is that local_RVs are unkown and their posterior distributions are approximated. local_RVs are Ordered dict, whose keys and values are RVs and a tuple of two objects. The first is the theano expression of variational parameters (mean and log of std) of the approximate posterior, which are encoded from given samples by an arbitrary deterministic function, e.g., MLP. The other one is a scaling constant to be multiplied to the log probability term corresponding to the RV. observed_RVs are also Ordered dict with RVs as the keys, but whose values are only the scaling constant as in local_RVs. In this case, total_size is ignored. If local_RVs is None (thus not using autoencoder), the following two settings are equivalent: - observed_RVs=OrderedDict([(rv, total_size / minibatch_size)]) - minibatch_RVs=[rv], total_size=total_size where minibatch_size is minibatch_tensors[0].shape[0]. The variational parameters and the parameters of the autoencoder are simultaneously optimized with given optimizer, which is a function that returns a dictionary of parameter updates as provided to Theano function. See the docstring of pymc3.variational.advi(). Parameters ---------- vars : object List of random variables. If None, variational posteriors (normal distribution) are fit for all RVs in the given model. start : Dict or None Initial values of parameters (variational means). model : Model Probabilistic model. n : int Number of iterations updating parameters. n_mcsamples : int Number of Monte Carlo samples to approximate ELBO. minibatch_RVs : list of ObservedRVs Random variables in the model for which mini-batch tensors are set. When this argument is given, both of arguments local_RVs and observed_RVs must be None. minibatch_tensors : list of (tensors or shared variables) Tensors used to create ObservedRVs in minibatch_RVs. minibatches : generator of list Generates a set of minibatches when calling next(). The length of the returned list must be the same with the number of random variables in `minibatch_tensors`. total_size : int Total size of training samples. This is used to appropriately scale the log likelihood terms corresponding to mini-batches in ELBO. local_RVs : Ordered dict Include encoded variational parameters and a scaling constant for the corresponding RV. See the above description. observed_RVs : Ordered dict Include a scaling constant for the corresponding RV. See the above description encoder_params : list of theano shared variables Parameters of encoder. optimizer : (loss, list of shared variables) -> dict or OrderedDict A function that returns parameter updates given loss and shared variables of parameters. If :code:`None` (default), a default Adagrad optimizer is used with parameters :code:`learning_rate` and :code:`epsilon` below. learning_rate: float Base learning rate for adagrad. This parameter is ignored when an optimizer is given. epsilon : float Offset in denominator of the scale of learning rate in Adagrad. This parameter is ignored when an optimizer is given. random_seed : int Seed to initialize random state. Returns ------- ADVIFit Named tuple, which includes 'means', 'stds', and 'elbo_vals'. """ theano.config.compute_test_value = 'ignore' model = pm.modelcontext(model) vars = inputvars(vars if vars is not None else model.vars) start = start if start is not None else model.test_point check_discrete_rvs(vars) _check_minibatches(minibatch_tensors, minibatches) if encoder_params is None: encoder_params = [] # Prepare optimizer if optimizer is None: optimizer = adagrad_optimizer(learning_rate, epsilon) # For backward compatibility in how input arguments are given local_RVs, observed_RVs = _get_rvss(minibatch_RVs, local_RVs, observed_RVs, minibatch_tensors, total_size) # Replace local_RVs with transformed variables ds = model.deterministics def get_transformed(v): if v in ds: return v.transformed return v local_RVs = OrderedDict( [(get_transformed(v), (uw, s)) for v, (uw, s) in local_RVs.items()] ) # Get global variables global_RVs = list(set(vars) - set(list(local_RVs) + list(observed_RVs))) # Ordering for concatenation of random variables global_order = pm.ArrayOrdering([v for v in global_RVs]) local_order = pm.ArrayOrdering([v for v in local_RVs]) # ELBO wrt variational parameters inarray_g, uw_g, replace_g = _join_global_RVs(global_RVs, global_order) inarray_l, uw_l, replace_l = _join_local_RVs(local_RVs, local_order) logpt = _make_logpt(global_RVs, local_RVs, observed_RVs, model) replace = replace_g replace.update(replace_l) logp = theano.clone(logpt, replace, strict=False) elbo = _elbo_t(logp, uw_g, uw_l, inarray_g, inarray_l, n_mcsamples, random_seed) del logpt # Replacements tensors of variational parameters in the graph replaces = dict() # Variational parameters for global RVs if 0 < len(global_RVs): uw_global_shared, bij = _init_uw_global_shared(start, global_RVs, global_order) replaces.update({uw_g: uw_global_shared}) # Variational parameters for local RVs, encoded from samples in # mini-batches if 0 < len(local_RVs): uws = [uw for _, (uw, _) in local_RVs.items()] uw_local_encoded = tt.concatenate([uw[0].ravel() for uw in uws] + [uw[1].ravel() for uw in uws]) replaces.update({uw_l: uw_local_encoded}) # Replace tensors of variational parameters in ELBO elbo = theano.clone(elbo, OrderedDict(replaces), strict=False) # Replace input shared variables with tensors def is_shared(t): return isinstance(t, theano.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)} ) elbo = theano.clone(elbo, updates, strict=False) # Create parameter update function used in the training loop params = encoder_params if 0 < len(global_RVs): params += [uw_global_shared] updates = OrderedDict(optimizer(loss=-1 * elbo, param=params)) f = theano.function(tensors, elbo, updates=updates) # Optimization loop elbos = np.empty(n) progress = tqdm.trange(n) for i in progress: e = f(*next(minibatches)) elbos[i] = e if i % (n // 10) == 0 and i > 0: avg_elbo = elbos[i - n // 10:i].mean() progress.set_description('Average ELBO = {:,.2f}'.format(avg_elbo)) pm._log.info('Finished minibatch ADVI: ELBO = {:,.2f}'.format(elbos[-1])) # Variational parameters of global RVs if 0 < len(global_RVs): l = int(uw_global_shared.get_value(borrow=True).size / 2) u = bij.rmap(uw_global_shared.get_value(borrow=True)[:l]) w = bij.rmap(uw_global_shared.get_value(borrow=True)[l:]) # w is in log space for var in w.keys(): w[var] = np.exp(w[var]) else: u = dict() w = dict() return ADVIFit(u, w, elbos)
def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, minibatch_RVs=None, minibatch_tensors=None, minibatches=None, global_RVs=None, local_RVs=None, observed_RVs=None, encoder_params=None, total_size=None, optimizer=None, learning_rate=.001, epsilon=.1, random_seed=None, mode=None): """Perform mini-batch ADVI. This function implements a mini-batch automatic differentiation variational inference (ADVI; Kucukelbir et al., 2015) with the meanfield approximation. Autoencoding variational Bayes (AEVB; Kingma and Welling, 2014) is also supported. For explanation, we classify random variables in probabilistic models into three types. Observed random variables :math:`{\cal Y}=\{\mathbf{y}_{i}\}_{i=1}^{N}` are :math:`N` observations. Each :math:`\mathbf{y}_{i}` can be a set of observed random variables, i.e., :math:`\mathbf{y}_{i}=\{\mathbf{y}_{i}^{k}\}_{k=1}^{V_{o}}`, where :math:`V_{k}` is the number of the types of observed random variables in the model. The next ones are global random variables :math:`\Theta=\{\\theta^{k}\}_{k=1}^{V_{g}}`, which are used to calculate the probabilities for all observed samples. The last ones are local random variables :math:`{\cal Z}=\{\mathbf{z}_{i}\}_{i=1}^{N}`, where :math:`\mathbf{z}_{i}=\{\mathbf{z}_{i}^{k}\}_{k=1}^{V_{l}}`. These RVs are used only in AEVB. The goal of ADVI is to approximate the posterior distribution :math:`p(\Theta,{\cal Z}|{\cal Y})` by variational posterior :math:`q(\Theta)\prod_{i=1}^{N}q(\mathbf{z}_{i})`. All of these terms are normal distributions (mean-field approximation). :math:`q(\Theta)` is parametrized with its means and standard deviations. These parameters are denoted as :math:`\gamma`. While :math:`\gamma` is a constant, the parameters of :math:`q(\mathbf{z}_{i})` are dependent on each observation. Therefore these parameters are denoted as :math:`\\xi(\mathbf{y}_{i}; \\nu)`, where :math:`\\nu` is the parameters of :math:`\\xi(\cdot)`. For example, :math:`\\xi(\cdot)` can be a multilayer perceptron or convolutional neural network. In addition to :math:`\\xi(\cdot)`, we can also include deterministic mappings for the likelihood of observations. We denote the parameters of the deterministic mappings as :math:`\eta`. An example of such mappings is the deconvolutional neural network used in the convolutional VAE example in the PyMC3 notebook directory. This function maximizes the evidence lower bound (ELBO) :math:`{\cal L}(\gamma, \\nu, \eta)` defined as follows: .. math:: {\cal L}(\gamma,\\nu,\eta) & = \mathbf{c}_{o}\mathbb{E}_{q(\Theta)}\left[ \sum_{i=1}^{N}\mathbb{E}_{q(\mathbf{z}_{i})}\left[ \log p(\mathbf{y}_{i}|\mathbf{z}_{i},\Theta,\eta) \\right]\\right] \\\\ & - \mathbf{c}_{g}KL\left[q(\Theta)||p(\Theta)\\right] - \mathbf{c}_{l}\sum_{i=1}^{N} KL\left[q(\mathbf{z}_{i})||p(\mathbf{z}_{i})\\right], where :math:`KL[q(v)||p(v)]` is the Kullback-Leibler divergence .. math:: KL[q(v)||p(v)] = \int q(v)\log\\frac{q(v)}{p(v)}dv, :math:`\mathbf{c}_{o/g/l}` are vectors for weighting each term of ELBO. More precisely, we can write each of the terms in ELBO as follows: .. math:: \mathbf{c}_{o}\log p(\mathbf{y}_{i}|\mathbf{z}_{i},\Theta,\eta) & = & \sum_{k=1}^{V_{o}}c_{o}^{k} \log p(\mathbf{y}_{i}^{k}| {\\rm pa}(\mathbf{y}_{i}^{k},\Theta,\eta)) \\\\ \mathbf{c}_{g}KL\left[q(\Theta)||p(\Theta)\\right] & = & \sum_{k=1}^{V_{g}}c_{g}^{k}KL\left[ q(\\theta^{k})||p(\\theta^{k}|{\\rm pa(\\theta^{k})})\\right] \\\\ \mathbf{c}_{l}KL\left[q(\mathbf{z}_{i}||p(\mathbf{z}_{i})\\right] & = & \sum_{k=1}^{V_{l}}c_{l}^{k}KL\left[ q(\mathbf{z}_{i}^{k})|| p(\mathbf{z}_{i}^{k}|{\\rm pa}(\mathbf{z}_{i}^{k}))\\right], where :math:`{\\rm pa}(v)` denotes the set of parent variables of :math:`v` in the directed acyclic graph of the model. When using mini-batches, :math:`c_{o}^{k}` and :math:`c_{l}^{k}` should be set to :math:`N/M`, where :math:`M` is the number of observations in each mini-batch. Another weighting scheme was proposed in (Blundell et al., 2015) for accelarating model fitting. For working with ADVI, we need to give the probabilistic model (:code:`model`), the three types of RVs (:code:`observed_RVs`, :code:`global_RVs` and :code:`local_RVs`), the tensors to which mini-bathced samples are supplied (:code:`minibatches`) and parameters of deterministic mappings :math:`\\xi` and :math:`\eta` (:code:`encoder_params`) as input arguments. :code:`observed_RVs` is a :code:`OrderedDict` of the form :code:`{y_k: c_k}`, where :code:`y_k` is a random variable defined in the PyMC3 model. :code:`c_k` is a scalar (:math:`c_{o}^{k}`) and it can be a shared variable. :code:`global_RVs` is a :code:`OrderedDict` of the form :code:`{t_k: c_k}`, where :code:`t_k` is a random variable defined in the PyMC3 model. :code:`c_k` is a scalar (:math:`c_{g}^{k}`) and it can be a shared variable. :code:`local_RVs` is a :code:`OrderedDict` of the form :code:`{z_k: ((m_k, s_k), c_k)}`, where :code:`z_k` is a random variable defined in the PyMC3 model. :code:`c_k` is a scalar (:math:`c_{l}^{k}`) and it can be a shared variable. :code:`(m_k, s_k)` is a pair of tensors of means and log standard deviations of the variational distribution; samples drawn from the variational distribution replaces :code:`z_k`. It should be noted that if :code:`z_k` has a transformation that changes the dimension (e.g., StickBreakingTransform), the variational distribution must have the same dimension. For example, if :code:`z_k` is distributed with Dirichlet distribution with :code:`p` choices, :math:`m_k` and :code:`s_k` has the shape :code:`(n_samples_in_minibatch, p - 1)`. :code:`minibatch_tensors` is a list of tensors (can be shared variables) to which mini-batch samples are set during the optimization. These tensors are observations (:code:`obs=`) in :code:`observed_RVs`. :code:`minibatches` is a generator of a list of :code:`numpy.ndarray`. Each item of the list will be set to tensors in :code:`minibatch_tensors`. :code:`encoder_params` is a list of shared variables of the parameters :math:`\\nu` and :math:`\eta`. We do not need to include the variational parameters of the global variables, :math:`\gamma`, because these are automatically created and updated in this function. The following is a list of example notebooks using advi_minibatch: - docs/source/notebooks/GLM-hierarchical-advi-minibatch.ipynb - docs/source/notebooks/bayesian_neural_network_advi.ipynb - docs/source/notebooks/convolutional_vae_keras_advi.ipynb - docs/source/notebooks/gaussian-mixture-model-advi.ipynb - docs/source/notebooks/lda-advi-aevb.ipynb Parameters ---------- vars : object List of random variables. If None, variational posteriors (normal distribution) are fit for all RVs in the given model. start : Dict or None Initial values of parameters (variational means). model : Model Probabilistic model. n : int Number of iterations updating parameters. n_mcsamples : int Number of Monte Carlo samples to approximate ELBO. minibatch_RVs : list of ObservedRVs Random variables in the model for which mini-batch tensors are set. When this argument is given, both of arguments local_RVs and observed_RVs must be None. minibatch_tensors : list of (tensors or shared variables) Tensors used to create ObservedRVs in minibatch_RVs. minibatches : generator of list Generates a set of minibatches when calling next(). The length of the returned list must be the same with the number of random variables in `minibatch_tensors`. total_size : int Total size of training samples. This is used to appropriately scale the log likelihood terms corresponding to mini-batches in ELBO. observed_RVs : Ordered dict Include a scaling constant for the corresponding RV. See the above description. global_RVs : Ordered dict or None Include a scaling constant for the corresponding RV. See the above description. If :code:`None`, it is set to :code:`{v: 1 for v in grvs}`, where :code:`grvs` is :code:`list(set(vars) - set(list(local_RVs) + list(observed_RVs)))`. local_RVs : Ordered dict or None Include encoded variational parameters and a scaling constant for the corresponding RV. See the above description. encoder_params : list of theano shared variables Parameters of encoder. optimizer : (loss, list of shared variables) -> dict or OrderedDict A function that returns parameter updates given loss and shared variables of parameters. If :code:`None` (default), a default Adagrad optimizer is used with parameters :code:`learning_rate` and :code:`epsilon` below. learning_rate: float Base learning rate for adagrad. This parameter is ignored when :code:`optimizer` is set. epsilon : float Offset in denominator of the scale of learning rate in Adagrad. This parameter is ignored when :code:`optimizer` is set. random_seed : int Seed to initialize random state. Returns ------- ADVIFit Named tuple, which includes 'means', 'stds', and 'elbo_vals'. References ---------- - Kingma, D. P., & Welling, M. (2014). Auto-Encoding Variational Bayes. stat, 1050, 1. - Kucukelbir, A., Ranganath, R., Gelman, A., & Blei, D. (2015). Automatic variational inference in Stan. In Advances in neural information processing systems (pp. 568-576). - Blundell, C., Cornebise, J., Kavukcuoglu, K., & Wierstra, D. (2015). Weight Uncertainty in Neural Network. In Proceedings of the 32nd International Conference on Machine Learning (ICML-15) (pp. 1613-1622). """ import warnings warnings.warn('Old ADVI interface is deprecated and be removed in future, use pm.ADVI instead', DeprecationWarning, stacklevel=2) if encoder_params is None: encoder_params = [] model = pm.modelcontext(model) vars = inputvars(vars if vars is not None else model.vars) start = start if start is not None else model.test_point if not pm.model.all_continuous(vars): raise ValueError('Model can not include discrete RVs for ADVI.') _check_minibatches(minibatch_tensors, minibatches) if encoder_params is None: encoder_params = [] # Prepare optimizer if optimizer is None: optimizer = adagrad_optimizer(learning_rate, epsilon) # For backward compatibility in how input arguments are given local_RVs, observed_RVs = _get_rvss(minibatch_RVs, local_RVs, observed_RVs, minibatch_tensors, total_size) # Replace local_RVs with transformed variables def get_transformed(v): if hasattr(v, 'transformed'): return v.transformed return v local_RVs = OrderedDict( [(get_transformed(v), (uw, s)) for v, (uw, s) in local_RVs.items()] ) # Get global variables grvs = list(set(vars) - set(list(local_RVs) + list(observed_RVs))) if global_RVs is None: global_RVs = OrderedDict({v: 1 for v in grvs}) _value_error(len(grvs) == len(global_RVs), 'global_RVs ({}) must have all global RVs: {}'.format( [v for v in global_RVs], grvs) ) # ELBO wrt variational parameters elbo, uw_l, uw_g = _make_elbo_t(observed_RVs, global_RVs, local_RVs, model.potentials, n_mcsamples, random_seed) # Replacements tensors of variational parameters in the graph replaces = dict() # Variational parameters for global RVs if 0 < len(global_RVs): uw_global_shared, bij = _init_uw_global_shared(start, global_RVs) replaces.update({uw_g: uw_global_shared}) # Variational parameters for local RVs, encoded from samples in # mini-batches if 0 < len(local_RVs): uws = [uw for _, (uw, _) in local_RVs.items()] uw_local_encoded = tt.concatenate([uw[0].ravel() for uw in uws] + [uw[1].ravel() for uw in uws]) replaces.update({uw_l: uw_local_encoded}) # Replace tensors of variational parameters in ELBO elbo = theano.clone(elbo, OrderedDict(replaces), strict=False) # Replace input shared variables with tensors def is_shared(t): return isinstance(t, theano.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( {t: t_ for t, t_ in zip(minibatch_tensors, tensors) if is_shared(t)} ) elbo = theano.clone(elbo, updates, strict=False) # Create parameter update function used in the training loop params = encoder_params if 0 < len(global_RVs): params += [uw_global_shared] updates = OrderedDict(optimizer(loss=-1 * elbo, param=params)) f = theano.function(tensors, elbo, updates=updates, mode=mode) # Optimization loop elbos = np.empty(n) progress = tqdm.trange(n) for i in progress: e = f(*next(minibatches)) if np.isnan(e): raise FloatingPointError('NaN occurred in ADVI optimization.') elbos[i] = e if n < 10: progress.set_description('ELBO = {:,.2f}'.format(elbos[i])) elif i % (n // 10) == 0 and i > 0: avg_elbo = infmean(elbos[i - n // 10:i]) progress.set_description('Average ELBO = {:,.2f}'.format(avg_elbo)) pm._log.info('Finished minibatch ADVI: ELBO = {:,.2f}'.format(elbos[-1])) # Variational parameters of global RVs if 0 < len(global_RVs): l = int(uw_global_shared.get_value(borrow=True).size / 2) u = bij.rmap(uw_global_shared.get_value(borrow=True)[:l]) w = bij.rmap(uw_global_shared.get_value(borrow=True)[l:]) # w is in log space for var in w.keys(): w[var] = np.exp(w[var]) else: u = dict() w = dict() return ADVIFit(u, w, elbos)
def __init__(self, vars=None, out_vars=None, covariance=None, scale=1., n_chains=100, tune=True, tune_interval=100, model=None, check_bound=True, likelihood_name='like', backend='csv', proposal_name='MultivariateNormal', **kwargs): model = modelcontext(model) if vars is None: vars = model.vars vars = inputvars(vars) if out_vars is None: out_vars = model.unobserved_RVs out_varnames = [out_var.name for out_var in out_vars] self.scaling = utility.scalar2floatX(num.atleast_1d(scale)) self.tune = tune self.check_bound = check_bound self.tune_interval = tune_interval self.steps_until_tune = tune_interval self.stage_sample = 0 self.cumulative_samples = 0 self.accepted = 0 self.beta = 1. self.stage = 0 self.chain_index = 0 # needed to use the same parallel implementation function as for SMC self.resampling_indexes = num.arange(n_chains) self.n_chains = n_chains self.likelihood_name = likelihood_name self._llk_index = out_varnames.index(likelihood_name) self.backend = backend self.discrete = num.concatenate( [[v.dtype in discrete_types] * (v.dsize or 1) for v in vars]) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() # create initial population self.population = [] self.array_population = num.zeros(n_chains) logger.info('Creating initial population for {}' ' chains ...'.format(self.n_chains)) for i in range(self.n_chains): self.population.append( Point({v.name: v.random() for v in vars}, model=model)) self.population[0] = model.test_point shared = make_shared_replacements(vars, model) self.logp_forw = logp_forw(out_vars, vars, shared) self.check_bnd = logp_forw([model.varlogpt], vars, shared) super(Metropolis, self).__init__(vars, out_vars, shared) # init proposal if covariance is None and proposal_name in multivariate_proposals: t0 = time() self.covariance = init_proposal_covariance( bij=self.bij, vars=vars, model=model, pop_size=1000) t1 = time() logger.info('Time for proposal covariance init: %f' % (t1 - t0)) scale = self.covariance elif covariance is None: scale = num.ones(sum(v.dsize for v in vars)) else: scale = covariance self.proposal_name = proposal_name self.proposal_dist = choose_proposal( self.proposal_name, scale=scale) self.proposal_samples_array = self.proposal_dist(n_chains) self.chain_previous_lpoint = [[]] * self.n_chains self._tps = None