def cdf(self, x): x = self._dist._process_quantiles(x, self.dim) out = self._dist._cdf(x, self.mean, self.cov_info.U) return _squeeze_output(out) return self._dist.cdf(x) x = self._dist._process_quantiles(x, self.dim) out = self._dist._cdf(x, self.mean, self.cov, self.maxpts, self.abseps, self.releps) return _squeeze_output(out)
def logpdf(x, mean=None, cov=None, allow_singular=False, coef=1, psd=None, return_psd=False): if mean is None: mean = np.zeros(x.shape[-1], dtype=np.float64) if cov is None: cov = np.eye(x.shape[-1], dtype=np.float64) if psd is None: psd = _PSD(cov, allow_singular=allow_singular) out = _logpdf(x, mean, psd.U, psd.log_pdet, psd.rank, coef) return (_squeeze_output(out), psd) if return_psd else _squeeze_output(out)
def infer(self, *args, **kwargs): sampler = self._get_sampler(kwargs.pop('sampler', None)) c_n, phi_c = sampler.infer(*args, **kwargs) return _squeeze_output(c_n), phi_c
def pdf(self, x, location=None, scale=1, dof=None, allow_singular=False): """ Multivariate Student's t probability density function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. location : ndarray Location of the distribution scale : array_like Scale matrix of the distribution dof : scalar Degrees-of-freedom of the distribution Returns ------- pdf : ndarray or scalar Probability density function evaluated at `x` """ dim, location, scale, dof = self._process_parameters( None, location, scale, dof) x = self._process_quantiles(x, dim) psd = _PSD(scale, allow_singular=allow_singular) out = np.exp( self._logpdf(x, location, psd.U, psd.log_pdet, psd.rank, dof)) return _squeeze_output(out)
def logpdf(self, x, df, scale): """ Log of the Wishart probability density function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. Each quantile must be a symmetric positive definite matrix. %(_doc_default_callparams)s Notes ----- %(_doc_callparams_note)s Returns ------- pdf : ndarray Log of the probability density function evaluated at `x` """ dim, df, scale = self._process_parameters(df, scale) x = self._process_quantiles(x, dim) # Cholesky decomposition of scale, get log(det(scale)) C = scipy.linalg.cholesky(scale, lower=True) log_det_scale = 2 * np.sum(np.log(C.diagonal())) out = self._logpdf(x, dim, df, scale, log_det_scale, C) return _squeeze_output(out)
def rvs(self, location=None, scale=1, dof=None, size=1, random_state=None): """ Draw random samples from a multivariate Student's t distribution. Parameters location : ndarray Location of the distribution scale : array_like Scale matrix of the distribution dof : scalar Degrees-of-freedom of the distribution size : int Number of samples to draw random_state : np.random.RandomState, optional Returns ------- rvs : ndarray or scalar Random variates of size (`size`, `N`), where `N` is the dimension of the random variable. """ _, location, scale, dof = self._process_parameters( None, location, scale, dof) if dof == np.inf: random_state = self._get_random_state(random_state) out = random_state.multivariate_normal(location, scale, size) return _squeeze_output(out) else: return _multivariate_t_random(location, scale, dof, size, random_state)
def logpdf(self, x, df, scale): """ Log of the inverse Wishart probability density function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. Each quantile must be a symmetric positive definite matrix. %(_doc_default_callparams)s Notes ----- %(_doc_callparams_note)s Returns ------- pdf : ndarray Log of the probability density function evaluated at `x` """ dim, df, scale = self._process_parameters(df, scale) x = self._process_quantiles(x, dim) # TODO replace with np.linalg.slogdet when Numpy 1.5.x not necessary log_det_scale = np.log(np.linalg.det(scale)) out = self._logpdf(x, dim, df, scale, log_det_scale) return _squeeze_output(out)
def _cdf(self, x, df, mean, cov, maxpts, abseps, releps): """ Parameters ---------- x : ndarray Points at which to evaluate the cumulative distribution function. df : float Degrees of freedom of the distribution mean : ndarray Mean of the distribution cov : array_like Covariance matrix of the distribution maxpts: integer The maximum number of points to use for integration abseps: float Absolute error tolerance releps: float Relative error tolerance Notes ----- As this function does no argument checking, it should not be called directly; use 'cdf' instead. .. versionadded:: 1.0.0 """ raise NotImplementedError lower = np.full(mean.shape, -np.inf) # mvnun expects 1-d arguments, so process points sequentially func1d = lambda x_slice: mvn.mvnun(lower, x_slice, mean, cov, maxpts, abseps, releps)[0] out = np.apply_along_axis(func1d, -1, x) return _squeeze_output(out)
def rvs(self, df, scale, size=1): """ Draw random samples from a Wishart distribution. Parameters ---------- %(_doc_default_callparams)s size : integer or iterable of integers, optional Number of samples to draw (default 1). Notes ----- %(_doc_callparams_note)s Returns ------- rvs : ndarray Random variates of shape (`size`) + (`dim`, `dim), where `dim` is the dimension of the scale matrix. """ n, shape = self._process_size(size) dim, df, scale = self._process_parameters(df, scale) # Cholesky decomposition of scale C = scipy.linalg.cholesky(scale, lower=True) out = self._rvs(n, shape, dim, df, C) return _squeeze_output(out)
def cdf(self, x, mean=None, cov=1, allow_singular=False): """ Multivariate laplace cumulative distribution function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. %(_mvl_doc_default_callparams)s Returns ------- cdf : ndarray or scalar Cumulative distribution function evaluated at `x` Notes ----- %(_mvl_doc_callparams_note)s .. versionadded:: 1.0.0 """ dim, mean, cov = self._process_parameters(None, mean, cov) x = self._process_quantiles(x, dim) psd = _PSD(cov, allow_singular=allow_singular) out = self._cdf(x, mean, psd.U) return _squeeze_output(out)
def pdf(self, x, mean=None, cov=1, allow_singular=False): """ Multivariate laplace probability density function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. %(_mvl_doc_default_callparams)s Returns ------- pdf : ndarray or scalar Probability density function evaluated at `x` Notes ----- %(_mvl_doc_callparams_note)s """ dim, mean, cov = self._process_parameters(None, mean, cov) x = self._process_quantiles(x, dim) psd = _PSD(cov, allow_singular=allow_singular) out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)) return _squeeze_output(out)
def var(self, df, scale): """ Variance of the Wishart distribution Parameters ---------- %(_doc_default_callparams)s Returns ------- var : float The variance of the distribution """ dim, df, scale = self._process_parameters(df, scale) out = self._var(dim, df, scale) return _squeeze_output(out)
def mean(self, df, scale): """ Mean of the Wishart distribution Parameters ---------- %(_doc_default_callparams)s Returns ------- mean : float The mean of the distribution """ dim, df, scale = self._process_parameters(df, scale) out = self._mean(dim, df, scale) return _squeeze_output(out)
def mode(self, df, scale): """ Mode of the inverse Wishart distribution Parameters ---------- %(_doc_default_callparams)s Returns ------- mode : float The Mode of the distribution """ dim, df, scale = self._process_parameters(df, scale) out = self._mode(dim, df, scale) return _squeeze_output(out)
def var(self, df, scale): """ Variance of the inverse Wishart distribution Only valid if the degrees of freedom are greater than the dimension of the scale matrix plus three. Parameters ---------- %(_doc_default_callparams)s Returns ------- var : float The variance of the distribution """ dim, df, scale = self._process_parameters(df, scale) out = self._var(dim, df, scale) return _squeeze_output(out) if out is not None else out
def mean(self, df, scale): """ Mean of the inverse Wishart distribution Only valid if the degrees of freedom are greater than the dimension of the scale matrix plus one. Parameters ---------- %(_doc_default_callparams)s Returns ------- mean : float or None The mean of the distribution """ dim, df, scale = self._process_parameters(df, scale) out = self._mean(dim, df, scale) return _squeeze_output(out) if out is not None else out
def rvs(self, df, mean=None, cov=1, size=1, random_state=None): """ Draw random samples from a multivariate Student's T distribution. Parameters ---------- %(_mvt_doc_default_callparams)s size : integer, optional Number of samples to draw (default 1). %(_doc_random_state)s Returns ------- rvs : ndarray or scalar Random variates of size (`size`, `N`), where `N` is the dimension of the random variable. Notes ----- %(_mvt_doc_callparams_note)s Sampling is based on the observation that .. math:: X = \mu + \frac{Y}{\sqrt{\frac{U}{\nu}}} = \mu + Y\sqrt{\frac{\nu}{U}} has a math:`t_\nu({\boldsymbol {\mu}}, {\boldsymbol {\Sigma}})` distribution when math:`Y` has a math:`N(0,{\boldsymbol {\Sigma}})` distribution and independently math:`U` has a math:`\chi^2_{\boldsymbol {\nu}}` distribution. """ dim, df, mean, cov = self._process_parameters(None, df, mean, cov) random_state = self._get_random_state(random_state) # Y, shape (*size, dim) norm_comp = random_state.multivariate_normal(np.zeros(dim), cov, size) # U, shape size chi2_comp = random_state.chisquare(df, size) out = mean + norm_comp * np.sqrt(df / chi2_comp[..., np.newaxis]) return _squeeze_output(out)
def cdf(self, x): x = self._dist._process_quantiles(x, self.dim) out = self._dist._cdf(x, self.location, self.scale, self.dof, self.maxpts, self.abseps, self.releps) return _squeeze_output(out)
def logpdf(self, x): x = self._dist._process_quantiles(x, self.dim) out = self._dist._logpdf(x, self.mean, self.cov_info.U, self.cov_info.log_pdet, self.cov_info.rank) return _squeeze_output(out)
def mode(self): out = self._invwishart._mode(self.dim, self.df, self.scale) return _squeeze_output(out)
def var(self): out = self._invwishart._var(self.dim, self.df, self.scale) return _squeeze_output(out) if out is not None else out
def logpdf(self, x): x = self._wishart._process_quantiles(x, self.dim) out = self._wishart._logpdf(x, self.dim, self.df, self.scale, self.log_det_scale, self.C) return _squeeze_output(out)
def mode(self): out = self._wishart._mode(self.dim, self.df, self.scale) return _squeeze_output(out) if out is not None else out
def rvs(self, size=1): n, shape = self._wishart._process_size(size) out = self._wishart._rvs(n, shape, self.dim, self.df, self.C) return _squeeze_output(out)
def var(self): out = self._wishart._var(self.dim, self.df, self.scale) return _squeeze_output(out)
def rvs(self, w_0, V_0, a_0, b_0, size=1): dim, w_0, V_0 = _process_parameters(None, w_0, V_0) ig = self._invgamma.rvs(a=a_0, scale=b_0, size=size) result = np.vstack([np.append(self._mnorm.rvs(mean=w_0, cov=var*V_0, size=1), var) for var in ig]) return _squeeze_output(result)
def draw(self, size, random_state=None): """ Draw from the Chinese restaurant process. """ mm = self._mixture_model n, m, shape = self._process_size(size) random_state = self._get_random_state(random_state) # Array of vectors of component indicator variables. In the beginning, # assign every example to the component with indicator value 0 c_n = np.empty(m*n, dtype=int).reshape((shape+(n,))) # Maximum number of components is number of examples c_max = n # Array of examples # TODO: Make this truly model-agnostic, i.e. get rid of mm.dim and # dtype=float x_n = np.empty((m*n*mm.dim), dtype=float).reshape((shape+(n,mm.dim))) for index in np.ndindex(shape): process_param = self.DrawParam(self, random_state) n_c = np.zeros(c_max, dtype=int) active_components = set() inactive_components = set(range(c_max)) # Lazily instantiate the components mixture_params = [mm.DrawParam(mm, random_state) for _ in range(c_max)] for i in range(n): # Draw a component k for example i from the Chinese restaurant # process # Get a new component from the stack prop_k = inactive_components.pop() active_components.add(prop_k) # Initialize and populate the log probability accumulator log_dist = np.empty(c_max, dtype=float) log_dist.fill(-np.inf) for k in active_components: # Calculate the process prior log_dist[k] = process_param.log_prior(i+1, n_c[k]) # Sample from log_dist. Normalization is required log_dist -= _logsumexp(c_max, log_dist) next_k = random_state.choice(a=c_max, p=np.exp(log_dist)) # cdf = np.cumsum(np.exp(log_dist - log_dist.max())) # r = random_state.uniform(size=1) * cdf[-1] # [next_k] = cdf.searchsorted(r) c_n[index+(i,)] = next_k # Update component counter n_c[next_k] += 1 # New components are instantiated automatically when needed x_n[index+(i,)] = mixture_params[next_k].draw_x_n() # Cleanup if next_k != prop_k: active_components.remove(prop_k) inactive_components.add(prop_k) # TODO: Make it possible to return the component parameters return _squeeze_output(x_n), _squeeze_output(c_n)
def logpdf(self, x): x = self._dist._process_quantiles(x, self.dim) out = self._dist._logpdf(x, self.location, self.scale_info.U, self.scale_info.log_pdet, self.scale_info.rank, self.dof) return _squeeze_output(out)
def draw(self, size, random_state=None): """ Draw from the Chinese restaurant process. """ mm = self._mixture_model n, m, shape = self._process_size(size) random_state = self._get_random_state(random_state) # Array of vectors of component indicator variables. In the beginning, # assign every example to the component with indicator value 0 c_n = np.empty(m * n, dtype=int).reshape((shape + (n, ))) # Maximum number of components is number of examples c_max = n # Array of examples # TODO: Make this truly model-agnostic, i.e. get rid of mm.dim and # dtype=float x_n = np.empty((m * n * mm.dim), dtype=float).reshape( (shape + (n, mm.dim))) for index in np.ndindex(shape): process_param = self.DrawParam(self, random_state) n_c = np.zeros(c_max, dtype=int) active_components = set() inactive_components = set(range(c_max)) # Lazily instantiate the components mixture_params = [ mm.DrawParam(mm, random_state) for _ in range(c_max) ] for i in range(n): # Draw a component k for example i from the Chinese restaurant # process # Get a new component from the stack prop_k = inactive_components.pop() active_components.add(prop_k) # Initialize and populate the log probability accumulator log_dist = np.empty(c_max, dtype=float) log_dist.fill(-np.inf) for k in active_components: # Calculate the process prior log_dist[k] = process_param.log_prior(i + 1, n_c[k]) # Sample from log_dist. Normalization is required log_dist -= _logsumexp(c_max, log_dist) next_k = random_state.choice(a=c_max, p=np.exp(log_dist)) # cdf = np.cumsum(np.exp(log_dist - log_dist.max())) # r = random_state.uniform(size=1) * cdf[-1] # [next_k] = cdf.searchsorted(r) c_n[index + (i, )] = next_k # Update component counter n_c[next_k] += 1 # New components are instantiated automatically when needed x_n[index + (i, )] = mixture_params[next_k].draw_x_n() # Cleanup if next_k != prop_k: active_components.remove(prop_k) inactive_components.add(prop_k) # TODO: Make it possible to return the component parameters return _squeeze_output(x_n), _squeeze_output(c_n)