def sample_aux_vars(betas, num_matches, time_range, covariates=None): pg = PyPolyaGamma() if covariates is None: covariates = identity_matrix(len(betas)) if covariates.ndim == 2: num_players = len(covariates) aux_vars = [ np.matrix([ [ pg.pgdraw(num_matches[t][i, j], (covariates[i] - covariates[j]).dot(betas[t])) #entries for j in range(num_players) # columns ] for i in range(num_players) # rows ]) for t in time_range # index of matrix-list ] else: num_players = len(covariates[0]) aux_vars = [ np.matrix([ [ pg.pgdraw(num_matches[t][i, j], (covariates[t][i] - covariates[t][j]).dot( betas[t])) #entries for j in range(num_players) # columns ] for i in range(num_players) # rows ]) for t in time_range # index of matrix-list ] return aux_vars
class BinomialBayesianTensorFiltering(GaussianBayesianTensorFiltering): def __init__(self, nrows, ncols, ndepth, pg_seed=42, **kwargs): super().__init__(nrows, ncols, ndepth, **kwargs) # Initialize the Polya-Gamma sampler from pypolyagamma import PyPolyaGamma self.pg = PyPolyaGamma(seed=pg_seed) self.nu2 = np.zeros((nrows, ncols, ndepth)) self.nu2_flat = np.zeros(np.prod(self.nu2.shape)) self.sample_nu2 = True def _resample_W(self, data): Y, N = data kappa = (Y - N / 2) * self.nu2 super()._resample_W(kappa) def _resample_V(self, data): Y, N = data kappa = (Y - N / 2) * self.nu2 super()._resample_V(kappa) def _resample_nu2(self, data): '''Update the latent variables, which lead to variance terms in the gaussian sampler steps.''' Y, N = data Mu = np.einsum('nk,mtk->nmt', self.W, self.V) # missing = np.isnan(Y) # for s in np.ndindex(Y.shape): # if missing[s]: # continue # self.nu2[s] = 1/self.pg.pgdraw(N[s], Mu[s]) # print(N.flatten()[:5], Mu.flatten()[:5], self.nu2_flat[:5]) with np.errstate(divide='ignore'): self.pg.pgdrawv(N.flatten(), Mu.flatten(), self.nu2.reshape(-1)) self.nu2 = 1 / self.nu2
def __init__(self, rng, data, n_burn, n_iters, latent_dim, n_clusters, n_rffs, dp_prior_obs, dp_df, disp_prior, bias_var): """Initialize base class for logistic RFLVMs. """ # `_BaseRFLVM` will call `_init_specific_params`, and these need to be # set first. self.disp_prior = disp_prior self.bias_var = bias_var super().__init__(rng=rng, data=data, n_burn=n_burn, n_iters=n_iters, latent_dim=latent_dim, n_clusters=n_clusters, n_rffs=n_rffs, dp_prior_obs=dp_prior_obs, dp_df=dp_df) # Polya-gamma augmentation. self.pg = PyPolyaGamma() prior_Sigma = np.eye(self.M + 1) prior_Sigma[-1, -1] = np.sqrt(self.bias_var) self.inv_B = np.linalg.inv(prior_Sigma) mu_A_b = np.zeros(self.M + 1) self.inv_B_b = self.inv_B @ mu_A_b self.omega = np.empty(self.Y.shape) # Linear coefficients `beta`. b0 = np.zeros(self.M + 1) B0 = np.eye(self.M + 1) self.beta = self.rng.multivariate_normal(b0, B0, size=self._j_func())
def __init__(self, nrows, ncols, ndepth, pg_seed=42, **kwargs): super().__init__(nrows, ncols, ndepth, **kwargs) # Initialize the Polya-Gamma sampler from pypolyagamma import PyPolyaGamma self.pg = PyPolyaGamma(seed=pg_seed) self.nu2 = np.zeros((nrows, ncols, ndepth)) self.nu2_flat = np.zeros(np.prod(self.nu2.shape)) self.sample_nu2 = True
def nb_fit_bayes(Z): from pypolyagamma import PyPolyaGamma from scipy.stats import norm results = [] pgr = PyPolyaGamma(seed=0) model_logr = np.zeros(Z.shape[0]) model_Psi = np.zeros(Z.shape) model_r = np.exp(model_logr) model_P = ilogit(model_Psi) prior_logr_sd = 100. Omegas = np.zeros_like(Z) for step in xrange(3000): # Random-walk MCMC for log(r) for mcmc_step in xrange(30): candidate_logr = model_logr + np.random.normal( 0, 1, size=Z.shape[0]) candidate_r = np.exp(candidate_logr) accept_prior = norm.logpdf( candidate_logr, loc=0, scale=prior_logr_sd) - norm.logpdf( model_logr, loc=0, scale=prior_logr_sd) accept_likelihood = negBinomRatio(Z, candidate_r[:, np.newaxis], model_r[:, np.newaxis], model_P, model_P, log=True).sum(axis=1) accept_probs = np.exp( np.clip(accept_prior + accept_likelihood, -10, 1)) accept_indices = np.random.random(size=Z.shape[0]) <= accept_probs model_logr[accept_indices] = candidate_logr[accept_indices] model_r = np.exp(model_logr) # Polya-Gamma sampler -- Marginal test version only N_ij = Z + model_r[:, np.newaxis] [ pgr.pgdrawv(N_ij[i], np.repeat(model_Psi[i, 0], Z.shape[1]), Omegas[i]) for i in xrange(Z.shape[0]) ] # Sample the logits using only the expressed values -- Marginal test version only v = 1 / (Omegas.sum(axis=1) + 1 / 100.**2) m = v * (Z.sum(axis=1) - Z.shape[1] * model_r) / 2. model_Psi = np.random.normal(loc=m, scale=np.sqrt(v))[:, np.newaxis] model_P = ilogit(model_Psi) if step > 1000 and (step % 2) == 0: results.append([model_r, model_P[:, 0]]) # print(model_r, model_P[:,0]) return np.array(results)
def pg_tree_posterior(states, omega, R, path, depth, nthreads=None): ''' Sample Polya-Gamma w_n,t|x_t,z_{t+1} where the subscript n denotes the hyperplane for which we are augmenting with the Polya-Gamma. Thus will augment all the logistic regressions that was taken while traversing down the tree :param states: This variable contains the continuous latent states. It is a list of numpy arrays :param omega: list for storing polya-gamma variables :param R: normal vectors of hyper-plane where the bias term is the last element in that array. The format is a list of arrays. :param path: path taken through the tree at time t. a list of numpy arrays :param depth: maximum depth of the tree :return: a list of pg rvs for each time series ''' for idx in range(len(states)): T = states[idx][0, :].size b = np.ones(T * (depth - 1)) if nthreads is None: nthreads = cpu_count() v = np.ones((depth - 1, T)) out = np.empty(T * (depth - 1)) #Compute parameters for conditional for d in range(depth - 1): for t in range(T): index = int(path[idx][d, t] - 1) # Find which node you went through v[d, t] = np.matmul(R[d][:-1, index], np.array(states[idx][:, t])) + R[d][-1, index] seeds = np.random.randint(2 ** 16, size=nthreads) ppgs = [PyPolyaGamma(seed) for seed in seeds] #Sample in parallel pypolyagamma.pgdrawvpar(ppgs, b, v.flatten(order='F'), out) omega[idx] = out.reshape((depth - 1, T), order='F') return omega
def pg_spike_train(X, Y, C, Omega, D_out, nthreads=None, N=1, neg_bin=False): """ Sample Polya-Gamma wy|Y,C,D,X where Y are spike trains and X are the continuous latent states :param X: List of continuous latent states :param Y: list of spike trains :param C: emission parameters. bias parameter is appended to last column. :param Omega: list used for storing polya-gamma variables :param D_out: Dimension of output i..e number of neurons :param nthreads: Number of threads for parallel sampling. :param N: Maximum number of spikes N for a binomial distribution, or number of failures in negative binomial :param neg_bin: Boolean flag dictating whether likelihood is negative binomial :return: """ for idx in range(len(X)): T = X[idx][0, 1:].size b = N * np.ones(T * D_out) if neg_bin: b += Y[idx].flatten(order='F') if nthreads is None: nthreads = n_cpu out = np.empty(T * D_out) V = C[:, :-1] @ X[ idx][:, 1:] + C[:, -1][:, na] # Ignore the first point of the time series seeds = np.random.randint(2**16, size=nthreads) ppgs = [PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, b, V.flatten(order='F'), out) Omega[idx] = out.reshape((D_out, T), order='F') return Omega
def __init__(self, V, K, X=None, b=None, sigmasq_b=1.0, sigmasq_prior_prms=None, name=None): self.V, self.K = V, K # Initialize prior sigmasq_prior_prms = sigmasq_prior_prms if sigmasq_prior_prms is not None else {} self.sigmasq_x_prior = self._sigmasq_x_prior_class(K, **sigmasq_prior_prms) self.sigmasq_b = sigmasq_b # Initialize parameters self.X = np.sqrt(self.sigmasq_x) * npr.randn(V, K) if X is None else X * np.ones((V, K)) self.b = np.zeros((V, V)) if b is None else b * np.ones((V, V)) # Models encapsulate data # A: observed adjacency matrix # m: mask for network n specifying which features to use # mask: mask specifying which entries in A were observed/hidden self.As = [] self.ms = [] self.masks = [] # Polya-gamma RNGs num_threads = get_omp_num_threads() seeds = npr.randint(2 ** 16, size=num_threads) self.ppgs = [PyPolyaGamma(seed) for seed in seeds] # Name the model self.name = name if name is not None else "lsm_K{}".format(K)
def _smpl_fn(cls, rng, b, c, size): pg = PyPolyaGamma(rng.randint(2 ** 16)) if not size and b.shape == c.shape == (): return pg.pgdraw(b, c) else: b, c = np.broadcast_arrays(b, c) out_shape = b.shape + tuple(size or ()) smpl_val = np.empty(out_shape, dtype="double") b = np.tile(b, tuple(size or ()) + (1,)) c = np.tile(c, tuple(size or ()) + (1,)) pg.pgdrawv( np.asarray(b.flat).astype("double", copy=True), np.asarray(c.flat).astype("double", copy=True), np.asarray(smpl_val.flat), ) return smpl_val
def _sample_reference_posterior( self, num_samples: int, num_observation: Optional[int] = None, ) -> torch.Tensor: from pypolyagamma import PyPolyaGamma from tqdm import tqdm self.dim_data = 10 # stimulus_I = torch.load(self.path / "files" / "stimulus_I.pt") design_matrix = torch.load(self.path / "files" / "design_matrix.pt") true_parameters = self.get_true_parameters(num_observation) self.raw = True observation_raw = self.get_observation(num_observation) self.raw = False mcmc_num_samples_warmup = 25000 mcmc_thinning = 25 mcmc_num_samples = mcmc_num_samples_warmup + mcmc_thinning * num_samples pg = PyPolyaGamma() X = design_matrix.numpy() obs = observation_raw.numpy() Binv = self.prior_params["precision_matrix"].numpy() sample = true_parameters.numpy().reshape(-1) # Init at true parameters samples = [] for j in tqdm(range(mcmc_num_samples)): psi = np.dot(X, sample) w = np.array([pg.pgdraw(1, b) for b in psi]) O = np.diag(w) # noqa: E741 V = np.linalg.inv(np.dot(np.dot(X.T, O), X) + Binv) m = np.dot(V, np.dot(X.T, obs.reshape(-1) - 1 * 0.5)) sample = np.random.multivariate_normal(np.ravel(m), V) samples.append(sample) samples = np.asarray(samples).astype(np.float32) samples_subset = samples[mcmc_num_samples_warmup::mcmc_thinning, :] reference_posterior_samples = torch.from_numpy(samples_subset) return reference_posterior_samples
def rng_fn(cls, rng, b, c, size): pg = PyPolyaGamma(rng.randint(2**16)) if not size and b.shape == c.shape == (): return pg.pgdraw(b, c) else: b, c = np.broadcast_arrays(b, c) size = tuple(size or ()) if len(size) > 0: b = np.broadcast_to(b, size) c = np.broadcast_to(c, size) smpl_val = np.empty(b.shape, dtype="double") pg.pgdrawv( np.asarray(b.flat).astype("double", copy=True), np.asarray(c.flat).astype("double", copy=True), np.asarray(smpl_val.flat), ) return smpl_val
class BasicRandom(): """ Generators of random variables from the basic distributions used in Bayesian sparse regression. """ def __init__(self, seed=None): self.np_random = np.random self.pg = None self.ts = None self.set_seed(seed) def set_seed(self, seed): self.np_random.seed(seed) pg_seed = np.random.randint(1, 1 + np.iinfo(np.uint32).max) ts_seed = np.random.randint(1, 1 + np.iinfo(np.uint32).max) self.pg = PyPolyaGamma(seed=pg_seed) self.ts = ExpTiltedStableDist(seed=ts_seed) def get_state(self): rand_gen_state = { 'numpy': self.np_random.get_state(), 'tilted_stable': self.ts.get_state(), 'pypolyagamma': self.pg # Don't know how to access the internal state, so just save # the object itself. } return rand_gen_state def set_state(self, rand_gen_state): self.np_random.set_state(rand_gen_state['numpy']) self.ts.set_state(rand_gen_state['tilted_stable']) self.pg = rand_gen_state['pypolyagamma'] def polya_gamma(self, shape, tilt, size): omega = np.zeros(size) self.pg.pgdrawv(shape, tilt, omega) return omega def tilted_stable(self, char_exponent, tilt): return self.ts.rv(char_exponent, tilt)
def logisticAndReject(self, X, Y): pg = PyPolyaGamma() # use N(0, I) prior n = X.shape[0] # Output layer #out_fit = LinearRegression(fit_intercept = False).fit(self.layers[self.nlayer-1].h, Y) #self.layers[self.nlayer].W = out_fit.coef_ prior = np.random.normal(0, 1, size=self.hid_dim) w = np.zeros(n) for k in range(n): w[k] = pg.pgdraw( 1, np.dot(self.layers[self.nlayer - 1].h[k, :], prior)) kappa = self.layers[self.nlayer].h[:, 0] - 0.5 omega = np.diag(w) Vw = np.linalg.inv( np.dot(np.dot(np.transpose(self.layers[self.nlayer].h), omega), self.layers[self.nlayer].h) + 1)[0] mw = Vw * np.dot(np.transpose(self.layers[self.nlayer].h), kappa)[0] self.layers[self.nlayer].W[:, 0] = np.random.normal(mw, Vw) # Hidden layers for l in range(self.nlayer - 1, 0, -1): for j in range(self.hid_dim): # Draw prior beta curr = np.random.normal(0, 1, size=self.hid_dim) for t in range(self.mc_iter): # Draw latent w w = np.zeros(n) for k in range(n): w[k] = pg.pgdraw( 1, np.dot(self.layers[l - 1].h[k, :], curr)) # Draw posterior beta kappa = self.layers[l].h[:, j] - 0.5 omega = np.diag(w) Vw = np.linalg.inv( np.dot(np.dot(np.transpose(self.layers[l].h), omega), self.layers[l].h) + np.eye(self.hid_dim)) mw = np.dot(Vw, np.dot(np.transpose(self.layers[l].h), kappa)) curr = np.random.multivariate_normal(mw, Vw) self.layers[l].W[:, j] = curr
def _init_embedding_aux_params(self): self.pg = PyPolyaGamma() self.gamma = np.empty((self.n_topics, self.n_words)) self.gamma_sum_ax1 = np.zeros(self.n_topics) self.SIGMA_inv = np.empty( (self.n_topics, self.embedding_size, self.embedding_size)) self.b_cgam = np.empty((self.n_topics, self.n_words)) self.b_cgam_sum_ax1 = np.zeros(self.n_topics) self.MU = np.empty((self.n_topics, self.embedding_size)) for k in range(self.n_topics): for word_index in range(self.n_words): self.gamma[k, word_index] = self.pg.pgdraw( 1, self.pi[k, word_index]) self.gamma_sum_ax1[k] += self.gamma[k, word_index] self.SIGMA_inv[k] = np.matmul(self.f_outer.T, self.gamma[k]) + self.sig_I_lamb_inv self.b_cgam[k] = self.b[k] - .5 - self.c[k] * self.gamma[k] self.b_cgam_sum_ax1[k] = np.sum(self.b_cgam[k]) self.b_cgam_f = np.matmul(self.b_cgam, self.f) for k in range(self.n_topics): SIGMA_k = np.linalg.inv(self.SIGMA_inv[k]) self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k])
def logisticAndReject(self, X, Y): pg = PyPolyaGamma() # use N(0, I) prior n = X.shape[0] # Output layer out_fit = LinearRegression(fit_intercept=False).fit( self.layers[self.nlayer - 1].h, Y) self.layers[self.nlayer].W = out_fit.coef_ # Hidden layers for l in range(self.nlayer - 1, 0, -1): # for j in range(self.hid_dim): # Draw prior beta #prior = np.random.normal(0, 1, size = self.hid_dim) # Draw latent w #w = np.zeros(n) #for k in range(n): # w[k] = pg.pgdraw(1, np.dot(self.layers[l-1].h[k,:], prior)) # Draw posterior beta #kappa = self.layers[l].h[:,j] - 0.5 #omega = np.diag(w) #Vw = np.linalg.inv(np.dot(np.dot(np.transpose(self.layers[l].h), omega), self.layers[l].h) + np.eye(self.hid_dim)) #mw = np.dot(Vw, np.dot(np.transpose(self.layers[l].h), kappa)) #self.layers[l].W[:,j] = np.random.multivariate_normal(mw, Vw) # Propose propW = np.zeros(self.layers[l].W.shape) logalpha = 0 for j in range(self.hid_dim): hid_fit = LogisticRegression(fit_intercept=False).fit( self.layers[l - 1].h, self.layers[l].h[:, j]) propW[:, j] = hid_fit.coef_ + np.random.normal(size=len(propW[:, j])) prop_hW = expit(np.dot(self.layers[l - 1].h, propW[:, j])) curr_hW = expit( np.dot(self.layers[l - 1].h, self.layers[l].W[:, j])) # Accept-Reject logalpha = sum( self.layers[l].h[:, j] * np.log(prop_hW / curr_hW) + (1 - self.layers[l].h[:, j]) * np.log((1 - prop_hW) / (1 - curr_hW))) if np.log(np.random.uniform()) < logalpha: self.layers[l].W[:, j] = propW[:, j]
def pg_spike_train(X, C, Omega, D_out, nthreads=None): ''' Sample Polya-Gamma wy|Y,C,D,X where Y are spike trains and X are the continuous latent states :param X: continuous latent states :param C: emission parameters. bias parameter is appended to last column. :param Omega: list used for storing polya-gamma variables :param D_out: Dimension of output i..e number of neurons :return: polya gamma samples from conditional posterior in a list of numpy arrays ''' for idx in range(len(X)): T = X[idx][0, 1:].size b = np.ones(T * D_out) if nthreads is None: nthreads = cpu_count() out = np.empty(T * D_out) V = C[:, :-1] @ X[idx][:, 1:] + C[:, -1][:, na] # Ignore the initial point of the time series seeds = np.random.randint(2 ** 16, size=nthreads) ppgs = [PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, b, V.flatten(order='F'), out) Omega[idx] = out.reshape((D_out, T), order='F') return Omega
import argparse import matplotlib.pyplot as plt import numpy as np import seaborn as sns from polyagamma import polyagamma from pypolyagamma import PyPolyaGamma sns.set_style("darkgrid") rng = np.random.default_rng(0) pg = PyPolyaGamma(0) data = { "devroye": None, "alternate": None, "gamma": None, "saddle": None, "$pypolyagamma$": None } def plot_densities(h=1, z=0, size=1000): for method in data: if method == "$pypolyagamma$": data[method] = [pg.pgdraw(h, z) for _ in range(size)] else: data[method] = polyagamma(h=h, z=z, method=method,
def __init__(self, X, cov_params, base_measure, lmbda=None, burnin=1000, num_integration_points=1000, max_iterations=2000, update_hyperparams=True, update_basemeasure=True, nthreads=1, gp_mu=0, sample_hyperparams_iter=10): """ Initialises class of Gibbs sampler. Sampled data is saved in dictionary 'self.data'. The dictionary self.data contains all the sampled data. 'X' are the locations (observations and latent), 'g' the GP at these locations, 'lmbda' the max rate of latent Poisson process, 'cov_params' the kernel parameters, 'M' the number of latent events, 'time' the time for samples, 'bm_params' the base measure parameters, 'gp_mu' the mean of the GP prior. :param X: Data. :type X: numpy.ndarray [instances x features] :param cov_params: Kernel hyperparameters. List with first entry the prefactor and second a D-dimensional array with length scales. :type cov_params: list :param base_measure: :type base_measure: BaseMeasure :param lmbda: Initial value for max. Poisson rate. If None it will be equal to number of data points. Default is None. :type lmbda: float :param burnin: Number of iteration before the posterior will be sampled. Default=1000. :type burnin: int :param num_integration_points: Number of integration points. Only used for predictive likelihood. Default=1000. :type num_integration_points: int :param max_iterations: Number of iterations the posterior is sampled. Default=2000. :type max_iterations: int :param update_hyperparams: Whether GP hyperparameters should be sampled. Default=True. :type update_hyperparams: bool :param update_basemeasure: Whether base measure parameters should be sampled. Can only be done for certain base measure ('normal', 'laplace', 'standard_t'). Default=True. :type update_basemeasure: bool :param nthreads: Number of threads used for PG sampling. Default=1. :type nthreads: int :param gp_mu: Mean of GP prior. :type gp_mu: float :param sample_hyperparams_iter: Every x^th step hyperparameters are sampler. Default=0. :type sample_hyperparams_iter: float """ self.max_iterations = int(max_iterations) self.D = X.shape[1] self.cov_params = cov_params self.X = X self.N = self.X.shape[0] self.base_measure = base_measure self.noise = 1e-4 if lmbda is None: self.lmbda = self.N / 1. else: self.lmbda = lmbda seeds = numpy.random.randint(2**16, size=nthreads) self.pg = [PyPolyaGamma(seed) for seed in seeds] self.M = int(self.lmbda) self.M_save = numpy.empty(self.max_iterations) # Position of all events (first N are the actual observed ones) self.X_all = numpy.empty([self.N + self.M, self.D]) self.X_all[:self.N] = self.X self.X_all[self.N:] = base_measure.sample_density(self.M) self.marks = numpy.empty(self.N + self.M) self.K = self.cov_func(self.X_all, self.X_all) self.K += self.noise * numpy.eye(self.K.shape[0]) self.L = numpy.linalg.cholesky(self.K) self.L_inv = solve_triangular(self.L, numpy.eye(self.L.shape[0]), lower=True, check_finite=False) self.K_inv = self.L_inv.T.dot(self.L_inv) self.gp_mu = gp_mu self.pred_log_likelihood = [] self.g = numpy.zeros([self.N + self.M]) # Probability of insertion or deletion proposal self.num_iterations = 0 self.burnin = int(burnin) self.num_integration_points = num_integration_points self.place_integration_points() self.update_hyperparams = update_hyperparams self.update_basemeasure = update_basemeasure self.update_hyperparams_iter = sample_hyperparams_iter self.data = { 'X': [], 'g': [], 'lmbda': [], 'cov_params': [], 'M': [], 'time': [], 'bm_params': [], 'gp_mu': [] }
def pg_mcmc(true_params, obs, duration=100, dt=1, seed=None, prior_dist=None): """Polya-Gamma sampler for GLM Returns ------- array : samples from posterior """ if prior_dist is None: prior_dist = smoothing_prior(n_params=true_params.size, seed=seed) # seeding np.random.seed(seed) pg = PyPolyaGamma() # seed=seed # observation I = obs['I'].reshape(1,-1) S_obs = obs['data'].reshape(-1) # simulation protocol num_param_inf = len(true_params) dt = 1 t = np.arange(0, duration, dt) N = 1 # Number of trials M = num_param_inf-1 # Length of the filter # build covariate matrix X, such that X * h returns convolution of x with filter h X = np.zeros(shape=(len(t), M)) for j in range(M): X[j:,j] = I[0,0:len(t)-j] # prior # smoothing prior on h; N(0, 1) on b0. Smoothness encouraged by penalyzing # 2nd order differences of elements of filter #prior_dist = prior(n_params=true_params.size, seed=seed) Binv = prior_dist.P # The sampler consists of two iterative Gibbs updates # 1) sample auxiliary variables: w ~ PG(N, psi) # 2) sample parameters: beta ~ N(m, V); V = inv(X'O X + Binv), m = V*(X'k), k = y - N/2 nsamp = 500000 # samples to evaluate the posterior # add a column of 1s to the covariate matrix X, in order to model the offset too X = np.concatenate((np.ones(shape=(len(t), 1)), X), axis=1) beta = true_params*1. BETA = np.zeros((M+1,nsamp)) for j in tqdm(range(1, nsamp)): psi = np.dot(X, beta) w = np.array([pg.pgdraw(N, b) for b in psi]) O = np.diag(w) V = np.linalg.inv(np.dot(np.dot(X.T, O), X) + Binv) m = np.dot(V, np.dot(X.T, S_obs - N * 0.5)) beta = np.random.multivariate_normal(np.ravel(m), V) BETA[:,j] = beta # burn-in burn_in = 100000 BETA_sub_samp = BETA[:, burn_in:nsamp:30] # return sampling results return BETA_sub_samp
# Consider a simple binomial model with unknown probability # Model the probability as the logistic of a scalar Gaussian. N = 10 mu = 0.0 sigmasq = 1.0 x_true = npr.normal(mu, np.sqrt(sigmasq)) p_true = logistic(x_true) y = npr.binomial(N, p_true) # Gibbs sample the posterior distribution p(x | y) # Introduce PG(N,0) auxiliary variables to render # the model conjugate. First, initialize the PG # sampler and the model parameters. N_samples = 10000 pg = PyPolyaGamma(seed=0) xs = np.zeros(N_samples) omegas = np.ones(N_samples) # Now run the Gibbs sampler for i in range(1, N_samples): # Sample omega given x, y from its PG conditional omegas[i] = pg.pgdraw(N, xs[i-1]) # Sample x given omega, y from its Gaussian conditional sigmasq_hat = 1./(1. / sigmasq + omegas[i]) mu_hat = sigmasq_hat * (mu / sigmasq + (y - N / 2.)) xs[i] = npr.normal(mu_hat, np.sqrt(sigmasq_hat)) # Compute the true posterior density xx = np.linspace(x_true-3., x_true+3, 1000)
class GibbSampler(SGCP_Sampler): def __init__(self, *args, **kwargs): super(GibbSampler, self).__init__(*args, **kwargs) self.pg = PyPolyaGamma(seed=np.random.randint(2 ** 16, size=None)) def run(self): print('Starting Gibbs') latent_events = np.random.rand(self.M, self.dim) * self.diff latent_marks = np.random.rand(self.M, 1) * 2 ** 10 # distribute on space marks = np.random.rand(self.N, 1) * 2 ** 10 # distribute on space start = time.time() for k in range(self.maxiter): if k == 1: loop_start = time.time() if k == 2: print('Approximately %.2f min to go' % (loop * self.maxiter / 60)) if self.inducing_points is not None: if k == 0: self.events_base = self.inducing_points self.K = self.cov_function(self.events_base, self.events_base, self.kernelparameter) self.K += np.eye(len(self.K)) * self.noise self.L = np.linalg.cholesky(self.K) self.L_inv = np.linalg.solve(self.L, np.eye(self.L.shape[0])) self.K_inv = self.L_inv.T @ self.L_inv self.sample_upper_bound(latent_marks.shape[0]) self.sample_gaussian_induced(latent_events, marks, latent_marks) if self.sample_kernel_parameter: if (k % 10) == 0: self.sample_kernelparameter() intensity = self.sample_results() latent_events, g_M, g_N = self.sample_latent_events_induced() latent_marks = self.sample_latent_marks(g_M) marks = self.sample_marks(g_N) else: self.events_base = np.concatenate((self.observed_events, latent_events), axis=0) self.K = self.cov_function(self.events_base, self.events_base, self.kernelparameter) self.K += np.eye(len(self.K)) * self.noise self.L = np.linalg.cholesky(self.K) self.L_inv = np.linalg.solve(self.L, np.eye(self.L.shape[0])) self.K_inv = self.L_inv.T @ self.L_inv self.sample_upper_bound(latent_marks.shape[0]) self.sample_gaussian(marks, latent_marks) if self.sample_kernel_parameter: if (k % 10) == 0: self.sample_kernelparameter() # updates the kernels intensity = self.sample_results() latent_events, g_M = self.sample_latent_events() latent_marks = self.sample_latent_marks(g_M) marks = self.sample_marks(np.array(self.g[:self.N, :])) if ((k > 0) or (k == self.maxiter - 1)) and (k % 50 == 0): if self.inducing_points is not None: print('%d with inducing points' % k) else: print(k) self.llambdas[k] = self.upper_bound self.latent_M[k] = latent_marks.shape[0] self.intensities[k, :] = intensity # self.log_likelihoods[k, :] = log_likelihood if k == 1: loop = time.time() - loop_start self.time = (time.time() - start) / 60 print('Done in %.2f min' % self.time) self.mean_intensities = np.mean(self.intensities[self.burnin:], axis=0) ###################################################################### def sample_gaussian_induced(self, latent_events, marks, latent_marks): L_ind = len(self.inducing_points) kN = self.cov_function(self.inducing_points, self.observed_events, self.kernelparameter) kM = self.cov_function(self.inducing_points, latent_events, self.kernelparameter) BN = kN[np.newaxis, ::] * kN[::, np.newaxis] # (L,L,N) BM = kM[np.newaxis, ::] * kM[::, np.newaxis] # (L,L,M) wN = np.repeat(marks, L_ind, axis=1) wN = np.repeat(wN[:, :, np.newaxis], L_ind, axis=2).T wM = np.repeat(latent_marks, L_ind, axis=1) wM = np.repeat(wM[:, :, np.newaxis], L_ind, axis=2).T B = np.sum(BN * wN, axis=2) + np.sum(BM * wM, axis=2) BLinv = np.linalg.solve(B + self.K, np.eye(L_ind)) sigmaL = self.K @ BLinv @ self.K muL = 0.5 * self.K @ BLinv @ (np.sum(kN, axis=1, keepdims=True) - np.sum(kM, axis=1, keepdims=True)) self.g = Utils.sample_gaussian(muL, sigmaL) # + np.eye(L_ind) * self.noise) def sample_latent_events_induced(self): xx = 0 while (xx == 0): latent_events, g_J, g_N = self.sample_latent_process_induced() xx = len(latent_events) return latent_events, g_J, g_N def sample_latent_process_induced(self): J = np.random.poisson(lam=self.vol * self.upper_bound, size=None) # nb_events events = np.random.rand(J, self.dim) * self.diff g = self.sample_cond(np.concatenate((events, self.observed_events), axis=0)) g_J = np.array(g[:len(events)]) g_N = np.array(g[len(events):]) R = np.random.rand(J) * self.upper_bound idx = R < self.upper_bound * SGCP_Sampler.sigmoid(-g_J.flatten()) acc_events = events[idx, :] return acc_events, g_J[idx, :], g_N ###################################################################### def sample_gaussian(self, marks, latent_marks): M = latent_marks.shape[0] marks_concat = np.concatenate((marks, latent_marks), axis=0) sigma = np.diag(1. / marks_concat.flatten()) sigma_NM = sigma - sigma @ np.linalg.solve(sigma + self.K, np.eye(self.N + M)) @ sigma u = np.concatenate((np.full((self.N, 1), 1. / 2, ), np.full((M, 1), -1. / 2)), axis=0) mean_NM = sigma_NM @ u self.g = Utils.sample_gaussian(mean_NM, sigma_NM) # + np.eye(sigma_NM.shape[0]) * self.noise) def sample_latent_process(self): J = np.random.poisson(lam=self.vol * self.upper_bound, size=None) # nb_events events = np.random.rand(J, self.dim) * self.diff g_J = self.sample_cond(events) R = np.random.rand(J) * self.upper_bound idx = R < self.upper_bound * SGCP_Sampler.sigmoid(-g_J.flatten()) acc_events = events[idx, :] return acc_events, g_J[idx, :] def sample_latent_events(self): xx = 0 while (xx == 0): latent_events, g_J = self.sample_latent_process() xx = len(latent_events) return latent_events, g_J ###################################################################### def sample_upper_bound(self, M): self.upper_bound = np.random.gamma(shape=self.alpha + M + self.N, scale=1. / (self.beta + self.vol)) def sample_latent_marks(self, g_M): M = g_M.shape[0] latent_marks = np.zeros([M, 1]) for i in range(M): latent_marks[i, :] = self.pg.pgdraw(1, g_M[i, :]) return latent_marks def sample_kernelparameter(self): prop = np.random.randn(self.dim + 1) alpha = np.exp(np.log(self.kernelparameter[0]) + prop[0] * 0.05) beta = np.exp(np.log(self.kernelparameter[1]) + prop[1:] * 0.05) proposal = [alpha, beta] K = self.cov_function(self.events_base, self.events_base, proposal) K += np.eye(K.shape[0]) * self.noise L = np.linalg.cholesky(K) L_inv = np.linalg.solve(L, np.eye(L.shape[0])) K_inv = L_inv.T @ L_inv prop = - np.sum(np.log(L.diagonal())) - 0.5 * self.g.T @ K_inv @ self.g old = - np.sum(np.log(self.L.diagonal())) - 0.5 * self.g.T @ self.K_inv @ self.g A = min(0, np.asscalar(prop - old)) u = np.log(np.random.rand()) if u < A: self.K = K self.L = L self.L_inv = L_inv self.K_inv = K_inv self.kernelparameter = proposal print(self.kernelparameter) def predict(self, Xtest): # predict unknown function values of Xtest C = self.cov_function(self.events_base, Xtest, self.kernelparameter) K_test = self.cov_function(Xtest, Xtest, self.kernelparameter) mean_predict = C.T @ self.K_inv @ self.g cov_predict = K_test - C.T @ self.K_inv @ C return mean_predict, cov_predict # posterior mean and covariance def sample_cond(self, Xtest): mean, cov = self.predict(Xtest) tmp = Utils.sample_gaussian(mean, cov + np.eye(cov.shape[0]) * self.noise) return tmp def sample_marks(self, g_N): marks = np.zeros([self.N, 1]) for i in range(self.N): marks[i, :] = self.pg.pgdraw(1, g_N[i, :]) return marks def sample_results(self): self.events.append(self.events_base) self.gaussians.append(self.g) g = self.sample_cond(self.grid_events) return self.upper_bound * SGCP_Sampler.sigmoid(g.flatten())
def __init__(self, *args, **kwargs): super(GibbSampler, self).__init__(*args, **kwargs) self.pg = PyPolyaGamma(seed=np.random.randint(2 ** 16, size=None))
class WEIFTM(): NO_TOPIC = -1 def __init__(self, n_topics, alpha_0=.1, beta_0=.01, sig_0=1, topic_sparsity=.3, delta_0=1): self.n_topics = n_topics self.alpha_0 = alpha_0 self.beta_0 = beta_0 self.sig_0 = sig_0 self.topic_sparsity = topic_sparsity self.delta_0 = delta_0 self.log_likelihoods = [] self.accuracies = [] def get_documents_from_directory(self, directory_path): self.labels = {} count = 0 class_count = -1 classes = set() documents = [] for (path, dirs, files) in os.walk(directory_path): files.sort() cl = path.strip(os.path.sep).split(os.path.sep)[-1] for file_path in files: if file_path.endswith('.txt'): document_path = os.path.join(path, file_path) try: file = open(document_path, 'r') document = file.read() file.close() documents.append(document) if cl not in classes: classes.add(cl) class_count += 1 self.labels[count] = class_count count += 1 except Exception as e: print(e) return documents def get_documents_from_csv(self, csv_path, text_name="text", class_name="class"): with open(csv_path, 'r', encoding='utf8', errors='ignore') as csv_file: dataframe = pd.read_csv(StringIO(csv_file.read())) # dataframe = dataframe.iloc[np.random.permutation(dataframe.shape[0])[:10]] # dataframe = dataframe.reset_index() dataframe = dataframe.fillna(value={class_name: ''}) dataframe[class_name] = LabelEncoder().fit_transform( dataframe[class_name]) self.labels = dict(dataframe[class_name]) return list(dataframe[text_name]) def get_embedding_vocabulary(self, embedding_path): vocabulary = set() with open(embedding_path) as emb_file: for line in emb_file: if line != "": word = line.strip().split(" ", 1)[0] vocabulary.add(word) return vocabulary def load_corpus(self, documents, vocabulary, custom_stop_words=[]): preprocessed_documents = preprocess_tweets(documents, vocabulary, custom_stop_words) self.dictionary = corpora.Dictionary(preprocessed_documents) self.n_words = len(self.dictionary) self.corpus = [ self.dictionary.doc2bow(document) for document in preprocessed_documents ] self.n_documents = len(self.corpus) def load_embeddings(self, embedding_size, embedding_path, corpus_dir, use_pca=False, pca_var=.97): self.embedding_size = embedding_size cache_dir = "./cache/{}/".format( corpus_dir.strip(os.path.sep).strip('.csv').split(os.path.sep)[-1]) embedding_cache_path = cache_dir + "embedding{}.npy".format( embedding_size) if os.path.isfile(embedding_cache_path): self.f = np.load(embedding_cache_path) else: vocabulary = set(self.dictionary.values()) self.f = np.empty((self.n_words, self.embedding_size)) with open(embedding_path) as emb_file: for line in emb_file: if line != "": word, str_embedding = line.strip().split(" ", 1) if word in vocabulary: word_index = self.dictionary.token2id[word] self.f[word_index] = np.array( str_embedding.split(" "), dtype=float) if not os.path.isdir(cache_dir): os.makedirs(cache_dir) np.save(embedding_cache_path, self.f) if use_pca == True: self._embedding_PCA(pca_var) self.f_outer = np.array([np.outer(f_v, f_v) for f_v in self.f]) def _embedding_PCA(self, var_percent): self.pca = PCA(self.embedding_size) self.f_raw = self.f self.pca.fit(self.f_raw) n_components = np.argmax( np.cumsum(self.pca.explained_variance_ratio_) > var_percent) self.f = self.pca.transform(self.f_raw)[:, :n_components] self.embedding_size_raw = self.embedding_size self.embedding_size = n_components def initialize_parameters(self): self._init_b() self._init_n_m_Z() self._init_lamb() self._init_c() self._init_pi() self._init_embedding_aux_params() def _init_b(self): self.b = np.random.binomial(1, self.topic_sparsity, (self.n_topics, self.n_words)) self.b_sum_ax1 = np.sum(self.b, axis=1) def _init_n_m_Z(self): self.n = np.zeros((self.n_topics, self.n_words)) self.m = np.zeros((self.n_documents, self.n_topics)) self.Z = [] for document_index, document in enumerate(self.corpus): Z_document = [] for word_occurrence_tuple in document: word_index = word_occurrence_tuple[0] count = word_occurrence_tuple[1] for _ in range(count): nonzero_b = self.b[:, word_index].nonzero()[0] if len(nonzero_b) == 0: topic_assignment = WEIFTM.NO_TOPIC else: topic_assignment = np.random.choice(nonzero_b) self.n[topic_assignment, word_index] += 1 self.m[document_index, topic_assignment] += 1 Z_document.append([word_index, topic_assignment]) self.Z.append(Z_document) def _init_lamb(self): sig_I_lamb = self.sig_0**2 * np.eye(self.embedding_size) self.lamb = np.random.multivariate_normal(np.zeros( self.embedding_size), sig_I_lamb, size=self.n_topics) self.sig_I_lamb_inv = self.sig_0**-2 * np.eye(self.embedding_size) def _init_c(self): sig_I_c = self.sig_0**2 * np.eye(self.n_topics) self.c = np.random.multivariate_normal(np.zeros(self.n_topics), sig_I_c).reshape((-1, 1)) def _init_pi(self): self.pi = np.matmul(self.lamb, self.f.T) + self.c def _init_embedding_aux_params(self): self.pg = PyPolyaGamma() self.gamma = np.empty((self.n_topics, self.n_words)) self.gamma_sum_ax1 = np.zeros(self.n_topics) self.SIGMA_inv = np.empty( (self.n_topics, self.embedding_size, self.embedding_size)) self.b_cgam = np.empty((self.n_topics, self.n_words)) self.b_cgam_sum_ax1 = np.zeros(self.n_topics) self.MU = np.empty((self.n_topics, self.embedding_size)) for k in range(self.n_topics): for word_index in range(self.n_words): self.gamma[k, word_index] = self.pg.pgdraw( 1, self.pi[k, word_index]) self.gamma_sum_ax1[k] += self.gamma[k, word_index] self.SIGMA_inv[k] = np.matmul(self.f_outer.T, self.gamma[k]) + self.sig_I_lamb_inv self.b_cgam[k] = self.b[k] - .5 - self.c[k] * self.gamma[k] self.b_cgam_sum_ax1[k] = np.sum(self.b_cgam[k]) self.b_cgam_f = np.matmul(self.b_cgam, self.f) for k in range(self.n_topics): SIGMA_k = np.linalg.inv(self.SIGMA_inv[k]) self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k]) def train(self, iters=10): for i in range(iters): start_time = time.time() self._gibbs_sample() print("gibbs", time.time() - start_time) # start_time = time.time() self.log_likelihoods.append(self._compute_total_log_likelihood()) # print("log_likelihood", time.time() - start_time) self.accuracies.append(self.get_classification_accuracy()) return self.log_likelihoods, self.accuracies def _gibbs_sample(self): # gibbs_iter_time = time.time() for document_index, Z_document in enumerate(self.Z): document_length = len(Z_document) for token_index, Z_token_pair in enumerate(Z_document): # print("gibbs iter", time.time() - gibbs_iter_time) # gibbs_iter_time = time.time() # print(token_index, "/", document_length, document_index, "/", self.n_documents) word_index = Z_token_pair[0] topic_assignment = Z_token_pair[1] if topic_assignment != WEIFTM.NO_TOPIC: self.n[topic_assignment, word_index] -= 1 self.m[document_index, topic_assignment] -= 1 # start_time = time.time() self._sample_b(word_index) # print("sample_b", time.time() - start_time) # start_time = time.time() topic_assignment = self._sample_z(document_index, word_index) # print("sample_z", time.time() - start_time) Z_token_pair[1] = topic_assignment if topic_assignment != WEIFTM.NO_TOPIC: self.n[topic_assignment, word_index] += 1 self.m[document_index, topic_assignment] += 1 # start_time = time.time() self._sample_embeddings(word_index) # print("sample_embeddings", time.time() - start_time) def _sample_b(self, word_index): b_not_v = self.b_sum_ax1 - self.b[:, word_index] b_not_v[b_not_v == 0] += self.delta_0 b_not_v_beta = b_not_v * self.beta_0 num_a = b_not_v_beta + np.sum(self.n, axis=1) num_b = self.beta_0 num = beta_function(num_a, num_b) denom = beta_function(b_not_v_beta, self.beta_0) activation = sigmoid(self.pi[:, word_index]) p_1 = num * activation / denom p_0 = 1 - activation p = p_1 / (p_1 + p_0) self.b_sum_ax1 -= self.b[:, word_index] self.b[:, word_index] |= np.random.binomial(1, p) self.b_sum_ax1 += self.b[:, word_index] def _sample_z(self, document_index, word_index): if self.b[:, word_index].sum() == 0: topic_assignment = WEIFTM.NO_TOPIC else: p = (self.alpha_0 + self.m[document_index]) * ( self.n[:, word_index].flatten() + self.beta_0) / (self.n[:, word_index] + self.beta_0).sum() * self.b[:, word_index] p /= p.sum() topic_assignment = np.random.multinomial(1, p).argmax() return topic_assignment def _sample_embeddings(self, word_index): for k in range(self.n_topics): # sample gamma old_gamma_k_word_index = self.gamma[k, word_index] self.gamma[k, word_index] = self.pg.pgdraw(1, self.pi[k, word_index]) self.gamma_sum_ax1[k] += self.gamma[ k, word_index] - old_gamma_k_word_index # sample lamb self.SIGMA_inv[k] += ( self.gamma[k, word_index] - old_gamma_k_word_index) * self.f_outer[word_index] SIGMA_k = np.linalg.inv(self.SIGMA_inv[k]) old_b_cgam_k_word_index = self.b_cgam[k, word_index] self.b_cgam[k, word_index] = self.b[ k, word_index] - .5 - self.c[k] * self.gamma[k, word_index] self.b_cgam_sum_ax1[k] += self.b_cgam[ k, word_index] - old_b_cgam_k_word_index self.b_cgam_f[k] = self.b_cgam[k, word_index] * self.f[word_index] self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k]) self.lamb[k] = np.random.multivariate_normal(self.MU[k], SIGMA_k) # sample c sig_k = (self.gamma_sum_ax1[k] + self.sig_0**-2)**-1 mu_k = sig_k * self.b_cgam_sum_ax1[k] self.c[k] = np.random.normal(mu_k, sig_k) # update pi self.pi = np.matmul(self.lamb, self.f.T) + self.c def dirichlet_pdf_log(self, x, alpha): return np.sum(np.log(np.power(x, alpha - 1))) - np.sum( np.log(gamma_function(alpha))) + np.log( gamma_function(np.sum(alpha))) def _compute_total_log_likelihood(self): log_likelihood = 0 theta = self.get_theta() log_theta = np.log(theta) phi = self.get_phi() log_phi = np.log(phi) ALPHA = self.alpha_0 * np.ones(self.n_topics) for document_index in range(self.n_documents): # theta # log_likelihood += np.log(dirichlet.pdf(theta[document_index], ALPHA)) log_likelihood += self.dirichlet_pdf_log(theta[document_index], ALPHA) for token_index in range(len(self.Z[document_index])): word_index, topic_index = self.Z[document_index][token_index] if topic_index != WEIFTM.NO_TOPIC: # w log_likelihood += log_phi[topic_index, word_index] # z log_likelihood += log_theta[document_index, topic_index] log_likelihood += np.sum( np.log(bernoulli.pmf(self.b, sigmoid(self.pi)))) for k in range(self.n_topics): # phi b_k_nonzero = self.b[k].nonzero()[0] BETA = self.beta_0 * np.ones(b_k_nonzero.shape[0]) # log_likelihood += np.log(dirichlet.pdf(phi[k][b_k_nonzero], BETA)) log_likelihood += self.dirichlet_pdf_log(phi[k][b_k_nonzero], BETA) # c log_likelihood += np.log(norm.pdf(self.c[k], 0, self.sig_0)) for l in range(self.embedding_size): # lamb log_likelihood += np.log( norm.pdf(self.lamb[k, l], 0, self.sig_0)) return log_likelihood def get_phi(self): n_b = (self.n + self.beta_0) * self.b return n_b / n_b.sum(axis=1).reshape(-1, 1) def get_theta(self): return (self.m + self.alpha_0) / (self.m + self.alpha_0).sum( axis=1).reshape(-1, 1) def print_phi(self, n_words): phi = self.get_phi() for topic_index, topic, in enumerate(phi): labelled_probabilities = [(self.dictionary[word_index], prob) for word_index, prob in enumerate(topic)] sorted_probabilities = sorted(labelled_probabilities, key=lambda x: x[1], reverse=True)[:n_words] print('Topic {}:'.format(topic_index), sorted_probabilities) def print_theta(self): theta = self.get_theta() for document_index, document in enumerate(theta): print('Document {}:'.format(document_index), '; Label {}'.format(self.labels[document_index]), document) def get_classification_accuracy(self): theta = self.get_theta() predictions = [distribution.argmax() for distribution in theta] prediction_set = set(predictions) label_set = set(self.labels.values()) accuracies = [] if self.n_topics >= len(label_set): for tup in itertools.permutations(prediction_set, len(label_set)): count = 0. for index in self.labels: if tup[self.labels[index]] == predictions[index]: count += 1. accuracies.append(count / len(predictions)) else: for tup in itertools.permutations(label_set, self.n_topics): count = 0. for index in self.labels: if self.labels[index] == tup[predictions[index]]: count += 1. accuracies.append(count / len(predictions)) return max(accuracies) def plot(self, values, ylabel, path): title = path.strip(os.path.sep).strip('.csv').split(os.path.sep)[-1] plt.title(title) plt.xlabel('epoch') plt.ylabel(ylabel) plt.plot(values) plt.show() def __getstate__(self): state = self.__dict__.copy() state.pop("pg") return state def __setstate__(self, state): self.__dict__.update(state) def save(self, path): pickle.dump(self, open(path, "wb")) @staticmethod def load(path): return pickle.load(open(path, "rb"))
class _BaseLogisticRFLVM(_BaseRFLVM): def __init__(self, rng, data, n_burn, n_iters, latent_dim, n_clusters, n_rffs, dp_prior_obs, dp_df, disp_prior, bias_var): """Initialize base class for logistic RFLVMs. """ # `_BaseRFLVM` will call `_init_specific_params`, and these need to be # set first. self.disp_prior = disp_prior self.bias_var = bias_var super().__init__(rng=rng, data=data, n_burn=n_burn, n_iters=n_iters, latent_dim=latent_dim, n_clusters=n_clusters, n_rffs=n_rffs, dp_prior_obs=dp_prior_obs, dp_df=dp_df) # Polya-gamma augmentation. self.pg = PyPolyaGamma() prior_Sigma = np.eye(self.M + 1) prior_Sigma[-1, -1] = np.sqrt(self.bias_var) self.inv_B = np.linalg.inv(prior_Sigma) mu_A_b = np.zeros(self.M + 1) self.inv_B_b = self.inv_B @ mu_A_b self.omega = np.empty(self.Y.shape) # Linear coefficients `beta`. b0 = np.zeros(self.M + 1) B0 = np.eye(self.M + 1) self.beta = self.rng.multivariate_normal(b0, B0, size=self._j_func()) # ----------------------------------------------------------------------------- # Public API. # ----------------------------------------------------------------------------- def log_likelihood(self, **kwargs): """Generalized, differentiable log likelihood function. """ # This function can be called for two reasons: # # 1. Optimize the log likelihood w.r.t. `X`. # 2. Evaluate the log likelihood w.r.t. a MH-proposed `W`. # X = kwargs.get('X', self.X) W = kwargs.get('W', self.W) phi_X = self.phi(X, W, add_bias=True) psi = phi_X @ self.beta.T LL = self._log_c_func() \ + self._a_func() * psi \ - self._b_func() * np.log(1 + np.exp(psi)) return LL.sum() # ----------------------------------------------------------------------------- # Polya-gamma augmentation. # ----------------------------------------------------------------------------- def _sample_beta(self): """Sample `β|ω ~ N(m, V)`. See (Polson 2013). """ phi_X = self.phi(self.X, self.W, add_bias=True) for j in range(self.J): # This really computes: phi_X.T @ np.diag(omega[:, j]) @ phi_X J = (phi_X * self.omega[:, j][:, None]).T @ phi_X + \ self.inv_B h = phi_X.T @ self._kappa_func(j) + self.inv_B_b joint_sample = self._sample_gaussian(J=J, h=h) self.beta[j] = joint_sample def _sample_omega(self): """Sample `ω|β ~ PG(b, x*β)`. See (Polson 2013). """ phi_X = self.phi(self.X, self.W, add_bias=True) psi = phi_X @ self.beta.T b = self._b_func() self.pg.pgdrawv(b.ravel(), psi.ravel(), self.omega.ravel()) self.omega = self.omega.reshape(self.Y.shape) def _a_func(self, j=None): """This function returns `a(y)`. See the comment at the top of this file and (Polson 2013). """ raise NotImplementedError() def _b_func(self, j=None): """This function returns `b(y)`. See the comment at the top of this file and (Polson 2013). """ raise NotImplementedError() def _log_c_func(self): """This function returns `log c(y)`. This is the normalizer in logistic models and is only used in the log likelihood calculation. See the comment at the top of this file and (Polson 2013). """ raise NotImplementedError() def _j_func(self): """Return number of features to iterate over. This is required because multinomial models decompose the multinomial distribution into `J-1` binomial distributions. """ raise NotImplementedError() def _kappa_func(self, j): """This function returns `kappa(y)`. See the comment at the top of this file and (Polson 2013). """ return self._a_func(j) - (self._b_func(j) / 2.0)
def s_blk(g_num, b_mu_ll, q_mu, b_v, q_v, b_mat_mu, q_arr, b_mu_lk, b_mat_v, ob, g_ij, z_i): #sample b_lk q n_lk, n_lk1, m_l, m_l1 = get_nlk(ob, g_num, g_ij, z_i) for l in range(g_num): for k in range(l, g_num): samplenum = 100 if l == k: b = np.zeros((samplenum, 2)) b[0, 0] = b_mu_ll b[0, 1] = q_mu mu = np.array([b_mu_ll, q_mu]) var = np.array(([b_v, 0], [0, q_v])) pg = PyPolyaGamma(seed=0) omegas = np.ones(2) x = np.array(([1, 0], [1, 1])) k_arr = np.array( [n_lk1[l, l] - n_lk[l, l] / 2, m_l1[l] - m_l[l] / 2]) for t in range(1, samplenum): omegas[0] = pg.pgdraw(n_lk[l, l], b[t - 1, 0]) omegas[1] = pg.pgdraw(m_l[l], np.sum(b[t - 1, :])) omega = np.array(([omegas[0], 0], [0, omegas[1]])) v = inv( np.dot(np.dot(np.transpose(x), omega), x) + inv(var)) m = np.dot( v, np.dot(np.transpose(x), np.transpose(k_arr)) + np.dot(inv(var), mu)) s = npr.multivariate_normal(m, v) b[t, 0] = np.copy(s[0]) b[t, 1] = np.copy(s[1]) b_mat_mu[l, l] = np.sum(b[50:samplenum, 0]) / (samplenum - 50) q_arr[l] = np.sum(b[50:samplenum, 1]) / (samplenum - 50) else: b = np.zeros((samplenum, 2)) b[0, 0] = b_mu_lk b[0, 1] = b_mu_lk mu = np.array([b_mu_lk, b_mu_lk]) var = np.copy(b_mat_v[:, :, l, k]) pg = PyPolyaGamma(seed=0) omegas = np.ones(2) k_arr = np.array([ n_lk1[l, k] - n_lk[l, k] / 2, n_lk1[k, l] - n_lk[k, l] / 2 ]) x = np.array(([1, 0], [0, 1])) for t in range(1, samplenum): omegas[0] = pg.pgdraw(n_lk[l, k], b[t - 1, 0]) omegas[1] = pg.pgdraw(n_lk[k, l], b[t - 1, 1]) omega = np.array(([omegas[0], 0], [0, omegas[1]])) v = inv( np.dot(np.dot(np.transpose(x), omega), x) + inv(var)) m = np.dot( v, np.dot(np.transpose(x), np.transpose(k_arr)) + np.dot(inv(var), mu)) s = npr.multivariate_normal(m, v) b[t, 0] = np.copy(s[0]) b[t, 1] = np.copy(s[1]) b_mat_mu[l, k] = np.sum(b[50:samplenum, 0]) / (samplenum - 50) b_mat_mu[k, l] = np.sum(b[50:samplenum, 1]) / (samplenum - 50)
def set_seed(self, seed): self.np_random.seed(seed) pg_seed = np.random.randint(1, 1 + np.iinfo(np.uint32).max) ts_seed = np.random.randint(1, 1 + np.iinfo(np.uint32).max) self.pg = PyPolyaGamma(seed=pg_seed) self.ts = ExpTiltedStableDist(seed=ts_seed)