def __init__( self, A, ## profile matrix pkappa, ## [mean, var] for kappa ptau, ## [mean, var] for tau SCexpr, ## L-by-N, single cell expression G, ## L-by-1, single cell types itype ## cell ids in each type ): ## data: never changed (self.SCexpr, self.G, self.L) = (SCexpr, G, SCexpr.shape[0]) (self.N, self.K) = A.shape self.SCrd = SCexpr.sum(axis=1) ## read depths self.itype = itype ## parameters: can only be changed by self.update_parameters() self.A = np.array(A, dtype=float, copy=True) self.pkappa = np.array(pkappa, dtype=float, copy=True) self.ptau = np.array(ptau, dtype=float, copy=True) ## zero-expressed entries self.izero = np.where(self.SCexpr == 0) ## for sampling from Polya-Gamma # self.ppgs = ppg.PyPolyaGamma(seed=0) num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = self.initialize_polya_gamma_samplers()
def sample_w(self): """ This method samples the augmenting w parameters from its conditional posterior distribution. For details about the augmentation see the paper. :return: samples for w_i from a polyagamma distribution. list of lists of arrays num_images x num_subjects x T(image, subject). """ nthreads = pypolyagamma.get_omp_num_threads() seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] w = [] for i in range(len(self.saliencies_ts)): w.append([]) for saliency_ts in self.saliencies_ts[i]: T = saliency_ts.shape[0] A = np.ones(T) w_is = np.zeros(T) pypolyagamma.pgdrawvpar( ppgs, A, np.abs(self.b.value * (saliency_ts - self.s_0.value)), w_is) w[-1].append(w_is) return w
def __init__(self, V, K, X=None, b=None, sigmasq_b=1.0, sigmasq_prior_prms=None, name=None): self.V, self.K = V, K # Initialize prior sigmasq_prior_prms = sigmasq_prior_prms if sigmasq_prior_prms is not None else {} self.sigmasq_x_prior = self._sigmasq_x_prior_class(K, **sigmasq_prior_prms) self.sigmasq_b = sigmasq_b # Initialize parameters self.X = np.sqrt(self.sigmasq_x) * npr.randn(V, K) if X is None else X * np.ones((V, K)) self.b = np.zeros((V, V)) if b is None else b * np.ones((V, V)) # Models encapsulate data # A: observed adjacency matrix # m: mask for network n specifying which features to use # mask: mask specifying which entries in A were observed/hidden self.As = [] self.ms = [] self.masks = [] # Polya-gamma RNGs num_threads = get_omp_num_threads() seeds = npr.randint(2 ** 16, size=num_threads) self.ppgs = [PyPolyaGamma(seed) for seed in seeds] # Name the model self.name = name if name is not None else "lsm_K{}".format(K)
def __init__(self, model, covariates=None, data=None, mask=None, stateseq=None, gaussian_states=None, **kwargs): super(PGRecurrentSLDSStates, self).\ __init__(model, covariates=covariates, data=data, mask=mask, stateseq=stateseq, gaussian_states=gaussian_states, **kwargs) # Initialize the Polya gamma samplers if they haven't already been set if not hasattr(self, 'ppgs'): import pypolyagamma as ppg # Initialize the Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize auxiliary variables for transitions self.trans_omegas = np.ones((self.T - 1, self.num_states - 1)) # If discrete and continuous states are given, resample the auxiliary variables once if stateseq is not None and gaussian_states is not None: self.resample_transition_auxiliary_variables()
def __init__(self, N, B, **kwargs): super(_SparsePGRegressionBase, self).__init__(N, B, **kwargs) # Initialize Polya-gamma samplers import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = npr.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
def __init__(self, N, B, **kwargs): super(_SparsePGRegressionBase, self).__init__(N, B, **kwargs) # Initialize Polya-gamma samplers import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = npr.randint(2 ** 16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
def initialize_polya_gamma_samplers(): if "OMP_NUM_THREADS" in os.environ: num_threads = int(os.environ["OMP_NUM_THREADS"]) else: num_threads = ppg.get_omp_num_threads() assert num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=num_threads) return [ppg.PyPolyaGamma(seed) for seed in seeds]
def initialize_polya_gamma_samplers(self): if "OMP_NUM_THREADS" in os.environ: self.num_threads = int(os.environ["OMP_NUM_THREADS"]) else: self.num_threads = ppg.get_omp_num_threads() assert self.num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=self.num_threads) return [ppg.PyPolyaGamma(seed) for seed in seeds]
def __init__(self, N, D, **kwargs): super(_GibbsLogisticEigenmodel, self).__init__(N, D, **kwargs) nthreads = ppg.get_omp_num_threads() seeds = np.random.randint(0, 2**16, size=nthreads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # DEBUG: self.F = np.sqrt(self.sigma_F) * np.random.randn(self.N, self.D) self.mu_0 = self.mu_mu_0 + np.sqrt(self.sigma_mu0) * np.random.randn() self.resample()
def sample_w_i(S, J_i): """ :param S: observation matrix :param J_i: neuron i's couplings :return: samples for w_i from a polyagamma distribution """ nthreads = pypolyagamma.get_omp_num_threads() seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] T = S.shape[0] A = np.ones(T) w_i = np.zeros(T) pypolyagamma.pgdrawvpar(ppgs, A, np.dot(S, J_i), w_i) return w_i
def test_parallel(verbose=False): # Call the parallel vectorized version np.random.seed(0) n = 5 nthreads = pypolyagamma.get_omp_num_threads() v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) if verbose: print(v3) return True
def __init__(self, S, C): self.S, self.C = S, C self.T, self.N = S.shape self.c = np.random.randint(0,C, size=self.N) self.psis = np.zeros((self.T, self.C)) from pybasicbayes.distributions.gaussian import ScalarGaussianNIX self.gaussian = ScalarGaussianNIX(mu_0=0, kappa_0=1, sigmasq_0=1.0, nu_0=2.0) import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] self.omega = np.zeros((self.T, self.N))
def __init__(self, S, C): self.S, self.C = S, C self.T, self.N = S.shape self.c = np.random.randint(0, C, size=self.N) self.psis = np.zeros((self.T, self.C)) from pybasicbayes.distributions.gaussian import ScalarGaussianNIX self.gaussian = ScalarGaussianNIX(mu_0=0, kappa_0=1, sigmasq_0=1.0, nu_0=2.0) import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] self.omega = np.zeros((self.T, self.N))
def __init__(self, X=None, psi=None): """ :param X: TxN matrix of observations """ assert X is not None or psi is not None if psi is not None and X is None: X = self.rvs(psi) assert X.ndim == 2 self.X = X self.T, self.N = X.shape # Initialize Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize auxiliary variables, omega self.omega = np.ones((self.T, self.N))
def __init__(self, model, data=None, mask=None, **kwargs): super(LDSStatesCountData, self). \ __init__(model, data=data, mask=mask, **kwargs) # Check if the emission matrix is a count regression from pypolyagamma.distributions import _PGLogisticRegressionBase if isinstance(self.emission_distn, _PGLogisticRegressionBase): self.has_count_data = True # Initialize the Polya-gamma samplers import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize auxiliary variables, omega self.omega = np.ones((self.T, self.D_emission), dtype=np.float) else: self.has_count_data = False
def __init__(self, S, D): self.S = S self.T, self.N = S.shape self.D = D self.Z = np.zeros((self.N,self.D)) self.omega = np.zeros((self.T, self.N)) # Initialize regression model # from pybasicbayes.distributions.regression import Regression # S_0 = np.eye(self.T) # K_0 = np.eye(self.D+1) # M_0 = np.zeros((self.T, self.D+1)) # nu_0 = self.T+2 # self.regression = Regression(nu_0, S_0, M_0, K_0, affine=True) self.A = np.zeros((self.T, self.D)) self.bias = np.zeros((self.T,)) import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
def __init__(self, S, D): self.S = S self.T, self.N = S.shape self.D = D self.Z = np.zeros((self.N, self.D)) self.omega = np.zeros((self.T, self.N)) # Initialize regression model # from pybasicbayes.distributions.regression import Regression # S_0 = np.eye(self.T) # K_0 = np.eye(self.D+1) # M_0 = np.zeros((self.T, self.D+1)) # nu_0 = self.T+2 # self.regression = Regression(nu_0, S_0, M_0, K_0, affine=True) self.A = np.zeros((self.T, self.D)) self.bias = np.zeros((self.T, )) import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
import matplotlib.pyplot as plt from scipy.stats import norm, probplot # Use a simple Normal-Bernoulli model # z ~ N(z | 0, 1) # x ~ [Bern(x | \sigma(z))]^{1/T} = Bern(x | \sigma(z / T)) # Where T is the temperature of the tempered distribution in [1, \inf) # When T=1 we target the posterior. When T=\inf we target the prior T = 2.0 mu_z = 0.0 sigma_z = 1.0 # Initialize Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] def kappa(x): # Compute kappa = [a(x) - b(x)/2.] / T # for the Bernoulli model where a(x) = x and b(x) = 1 return (x - 0.5) / T def resample_z(x, omega): # Resample z from its Gaussian conditional prior_J = 1. / sigma_z prior_h = prior_J * mu_z
for n in xrange(N): plt.subplot(N,1,n+1) lns.append(plt.plot(psi[:T_plot,n], 'r')[0]) plt.plot(psi[:T_plot,n], 'b') spks = np.where(S[:T_plot,n])[0] plt.plot(spks, np.ones_like(spks), 'ko', markerfacecolor="k") plt.ylim((min(0.9, psi.min()-0.1), max(1.1, psi.max()+0.1))) plt.show() # Do some inference # Instantiate the auxiliary variables omega = np.zeros_like(psi) num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Collect samples b_samples = [] W_samples = [] A_samples = [] psi_samples = [] for itr in xrange(N_samples): print "Iteration ", itr resample_omega() resample_A() resample_W_b()
def test_parallel2(): """Test multiple cases of OMP""" num_threads = pypolyagamma.get_omp_num_threads() if num_threads < 2: return np.random.seed(0) # Case 1: n < nthreads, nthreads = num_threads nthreads = num_threads n = nthreads - 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 2: n < nthreads, nthreads < num_threads nthreads = num_threads - 1 n = nthreads - 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 3: n < nthreads, nthreads > num_threads nthreads = num_threads + 1 n = nthreads - 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 4: n > nthreads, nthreads = num_threads nthreads = num_threads n = nthreads + 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 5: n > nthreads, nthreads < num_threads nthreads = num_threads - 1 n = nthreads + 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 6: n > nthreads, nthreads > num_threads nthreads = num_threads + 1 n = nthreads + 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 7: n = nthreads, nthreads = num_threads nthreads = num_threads n = nthreads v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 8: n = nthreads, nthreads < num_threads nthreads = num_threads - 1 n = nthreads v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 9: n = nthreads, nthreads > num_threads nthreads = num_threads + 1 n = nthreads v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) return True
def __init__(self, population, trunc=200): self.population = population num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed, trunc) for seed in seeds] self.N = self.population.N
def __init__(self, model, data=None, **kwargs): # The data must be provided in sparse row format # This makes it easy to iterate over rows. Basically, # for each row, t, it is easy to get the output dimensions, n, # such that y_{t,n} > 0. super(LDSStatesZeroInflatedCountData, self).\ __init__(model, data=data, **kwargs) # Initialize the Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize the masked data if data is not None: assert isinstance( data, csr_matrix ), "Data must be a sparse row matrix for zero-inflated models" # Initialize a sparse matrix of masked data. The mask # specifies which observations were "exposed" and which # were determinisitcally zero. In other words, the mask # gives the data values at the places where z_{t,n} = 1. T, N, C, D, b = self.T, self.D_emission, self.C, self.D, self.emission_distn.b indptr = [0] indices = [] vals = [] offset = 0 for t in range(T): # Get the nonzero entries in the t-th row ns_t = data.indices[data.indptr[t]:data.indptr[t + 1]] y_t = np.zeros(N) y_t[ns_t] = data.data[data.indptr[t]:data.indptr[t + 1]] # Sample zero inflation mask z_t = np.random.rand(N) < self.rho z_t[ns_t] = True # Construct the sparse matrix t_inds = np.where(z_t)[0] indices.append(t_inds) vals.append(y_t[t_inds]) offset += t_inds.size indptr.append(offset) # Construct a sparse matrix vals = np.concatenate(vals) indices = np.concatenate(indices) indptr = np.array(indptr) self.masked_data = csr_matrix((vals, indices, indptr), shape=(T, N)) # DEBUG: Start with all the data # dense_data = data.toarray() # values = dense_data.ravel() # indices = np.tile(np.arange(self.D_emission), (self.T,)) # indptrs = np.arange(self.T+1) * self.D_emission # self.masked_data = csr_matrix((values, indices, indptrs), (self.T, self.D_emission)) # assert np.allclose(self.masked_data.toarray(), dense_data) self.resample_auxiliary_variables() else: self.masked_data = None self.omega = None