def log_likelihood(X, weights, means, covars, scale): """modified sklearn GMM function predicting distribution membership Returns the mixture LL for points X. Used by :func:`~assign_samples` and :func:`~PopPUNK.plot.plot_contours` Args: X (numpy.array) n x 2 array of core and accessory distances for n samples weights (numpy.array) Component weights from :func:`~fit2dMultiGaussian` means (numpy.array) Component means from :func:`~fit2dMultiGaussian` covars (numpy.array) Component covariances from :func:`~fit2dMultiGaussian` scale (numpy.array) Scaling of core and accessory distances from :func:`~fit2dMultiGaussian` Returns: logprob (numpy.array) The log of the probabilities under the mixture model lpr (numpy.array) The components of the log probability from each mixture component """ lpr = (log_multivariate_normal_density(X / scale, means, covars) + np.log(weights)) logprob = sp_logsumexp(lpr, axis=1) return (logprob, lpr)
def logsumexp(a, axis=None): """ Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a Avoids numerical overflow. Parameters ---------- a : array_like The vector to exponentiate and sum axis : int, optional The axis along which to apply the operation. Defaults is None. Returns ------- sum(log(exp(a))) Notes ----- This function was taken from the mailing list http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html This should be superceded by the ufunc when it is finished. """ if axis is None: # Use the scipy.maxentropy version. return sp_logsumexp(a) a = np.asarray(a) shp = list(a.shape) shp[axis] = 1 a_max = a.max(axis=axis) s = np.log(np.exp(a - a_max.reshape(shp)).sum(axis=axis)) lse = a_max + s return lse
def PDI(trace, model): log_px = _log_post_trace(trace, model) # shape (nsamples, N_datapoints) # log posterior predictive density of data point n # = E_{q(\theta)} p(x_n|\theta) lppd_n = sp_logsumexp(log_px, axis=0, b=1.0 / log_px.shape[0]) mu_n = np.exp(lppd_n) var_log_n = np.var(log_px, axis=0) mu_log_n = np.mean(log_px, axis=0) var_n = np.var(np.exp(log_px), axis=0) pdi = np.divide(var_n, mu_n) pdi_log = np.divide(var_log_n, mu_log_n) wapdi = np.divide(var_log_n, np.log(mu_n)) return pdi, pdi_log, wapdi