Пример #1
0
 def __init__(
         self,
         A,  ## profile matrix
         pkappa,  ## [mean, var] for kappa
         ptau,  ## [mean, var] for tau
         SCexpr,  ## L-by-N, single cell expression
         G,  ## L-by-1, single cell types
         itype  ## cell ids in each type
 ):
     ## data: never changed
     (self.SCexpr, self.G, self.L) = (SCexpr, G, SCexpr.shape[0])
     (self.N, self.K) = A.shape
     self.SCrd = SCexpr.sum(axis=1)  ## read depths
     self.itype = itype
     ## parameters: can only be changed by self.update_parameters()
     self.A = np.array(A, dtype=float, copy=True)
     self.pkappa = np.array(pkappa, dtype=float, copy=True)
     self.ptau = np.array(ptau, dtype=float, copy=True)
     ## zero-expressed entries
     self.izero = np.where(self.SCexpr == 0)
     ## for sampling from Polya-Gamma
     # self.ppgs = ppg.PyPolyaGamma(seed=0)
     num_threads = ppg.get_omp_num_threads()
     seeds = np.random.randint(2**16, size=num_threads)
     self.ppgs = self.initialize_polya_gamma_samplers()
Пример #2
0
    def sample_w(self):
        """
        This method samples the augmenting w parameters from its conditional posterior distribution.
        For details about the augmentation see the paper.
        :return: samples for w_i from a polyagamma distribution.
                list of lists of arrays num_images x num_subjects x T(image, subject).
        """
        nthreads = pypolyagamma.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=nthreads)
        ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]

        w = []

        for i in range(len(self.saliencies_ts)):
            w.append([])

            for saliency_ts in self.saliencies_ts[i]:
                T = saliency_ts.shape[0]
                A = np.ones(T)
                w_is = np.zeros(T)
                pypolyagamma.pgdrawvpar(
                    ppgs, A,
                    np.abs(self.b.value * (saliency_ts - self.s_0.value)),
                    w_is)
                w[-1].append(w_is)
        return w
Пример #3
0
    def __init__(self, V, K, X=None, b=None, sigmasq_b=1.0,
                 sigmasq_prior_prms=None, name=None):
        self.V, self.K = V, K

        # Initialize prior
        sigmasq_prior_prms = sigmasq_prior_prms if sigmasq_prior_prms is not None else {}
        self.sigmasq_x_prior = self._sigmasq_x_prior_class(K, **sigmasq_prior_prms)
        self.sigmasq_b = sigmasq_b

        # Initialize parameters
        self.X = np.sqrt(self.sigmasq_x) * npr.randn(V, K) if X is None else X * np.ones((V, K))

        self.b = np.zeros((V, V)) if b is None else b * np.ones((V, V))

        # Models encapsulate data
        # A:  observed adjacency matrix
        # m:  mask for network n specifying which features to use
        # mask: mask specifying which entries in A were observed/hidden
        self.As = []
        self.ms = []
        self.masks = []

        # Polya-gamma RNGs
        num_threads = get_omp_num_threads()
        seeds = npr.randint(2 ** 16, size=num_threads)
        self.ppgs = [PyPolyaGamma(seed) for seed in seeds]

        # Name the model
        self.name = name if name is not None else "lsm_K{}".format(K)
Пример #4
0
    def __init__(self,
                 model,
                 covariates=None,
                 data=None,
                 mask=None,
                 stateseq=None,
                 gaussian_states=None,
                 **kwargs):

        super(PGRecurrentSLDSStates, self).\
            __init__(model, covariates=covariates, data=data, mask=mask,
                     stateseq=stateseq, gaussian_states=gaussian_states,
                     **kwargs)

        # Initialize the Polya gamma samplers if they haven't already been set
        if not hasattr(self, 'ppgs'):
            import pypolyagamma as ppg

            # Initialize the Polya-gamma samplers
            num_threads = ppg.get_omp_num_threads()
            seeds = np.random.randint(2**16, size=num_threads)
            self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

        # Initialize auxiliary variables for transitions
        self.trans_omegas = np.ones((self.T - 1, self.num_states - 1))

        # If discrete and continuous states are given, resample the auxiliary variables once
        if stateseq is not None and gaussian_states is not None:
            self.resample_transition_auxiliary_variables()
Пример #5
0
    def __init__(self, N, B, **kwargs):
        super(_SparsePGRegressionBase, self).__init__(N, B, **kwargs)

        # Initialize Polya-gamma samplers
        import pypolyagamma as ppg
        num_threads = ppg.get_omp_num_threads()
        seeds = npr.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
Пример #6
0
    def __init__(self, N, B, **kwargs):
        super(_SparsePGRegressionBase, self).__init__(N, B, **kwargs)

        # Initialize Polya-gamma samplers
        import pypolyagamma as ppg
        num_threads = ppg.get_omp_num_threads()
        seeds = npr.randint(2 ** 16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
Пример #7
0
def initialize_polya_gamma_samplers():
    if "OMP_NUM_THREADS" in os.environ:
        num_threads = int(os.environ["OMP_NUM_THREADS"])
    else:
        num_threads = ppg.get_omp_num_threads()
    assert num_threads > 0

    # Choose random seeds
    seeds = np.random.randint(2**16, size=num_threads)
    return [ppg.PyPolyaGamma(seed) for seed in seeds]
Пример #8
0
    def initialize_polya_gamma_samplers(self):
        if "OMP_NUM_THREADS" in os.environ:
            self.num_threads = int(os.environ["OMP_NUM_THREADS"])
        else:
            self.num_threads = ppg.get_omp_num_threads()
        assert self.num_threads > 0

        # Choose random seeds
        seeds = np.random.randint(2**16, size=self.num_threads)
        return [ppg.PyPolyaGamma(seed) for seed in seeds]
Пример #9
0
    def __init__(self, N, D, **kwargs):
        super(_GibbsLogisticEigenmodel, self).__init__(N, D, **kwargs)

        nthreads = ppg.get_omp_num_threads()
        seeds = np.random.randint(0, 2**16, size=nthreads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

        # DEBUG:
        self.F = np.sqrt(self.sigma_F) * np.random.randn(self.N, self.D)
        self.mu_0 = self.mu_mu_0 + np.sqrt(self.sigma_mu0) * np.random.randn()

        self.resample()
Пример #10
0
def sample_w_i(S, J_i):
    """

    :param S: observation matrix
    :param J_i: neuron i's couplings
    :return: samples for w_i from a polyagamma distribution
    """
    nthreads = pypolyagamma.get_omp_num_threads()
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]

    T = S.shape[0]
    A = np.ones(T)
    w_i = np.zeros(T)
    pypolyagamma.pgdrawvpar(ppgs, A, np.dot(S, J_i), w_i)
    return w_i
Пример #11
0
def test_parallel(verbose=False):
    # Call the parallel vectorized version
    np.random.seed(0)

    n = 5
    nthreads = pypolyagamma.get_omp_num_threads()
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    if verbose:
        print(v3)
    return True
Пример #12
0
def test_parallel(verbose=False):
    # Call the parallel vectorized version
    np.random.seed(0)

    n = 5
    nthreads = pypolyagamma.get_omp_num_threads()
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    if verbose:
        print(v3)
    return True
Пример #13
0
    def __init__(self, S, C):
        self.S, self.C = S, C
        self.T, self.N = S.shape
        self.c = np.random.randint(0,C, size=self.N)
        self.psis = np.zeros((self.T, self.C))

        from pybasicbayes.distributions.gaussian import ScalarGaussianNIX
        self.gaussian = ScalarGaussianNIX(mu_0=0, kappa_0=1, sigmasq_0=1.0, nu_0=2.0)


        import pypolyagamma as ppg
        num_threads = ppg.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

        self.omega = np.zeros((self.T, self.N))
Пример #14
0
    def __init__(self, S, C):
        self.S, self.C = S, C
        self.T, self.N = S.shape
        self.c = np.random.randint(0, C, size=self.N)
        self.psis = np.zeros((self.T, self.C))

        from pybasicbayes.distributions.gaussian import ScalarGaussianNIX
        self.gaussian = ScalarGaussianNIX(mu_0=0,
                                          kappa_0=1,
                                          sigmasq_0=1.0,
                                          nu_0=2.0)

        import pypolyagamma as ppg
        num_threads = ppg.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

        self.omega = np.zeros((self.T, self.N))
Пример #15
0
    def __init__(self, X=None, psi=None):
        """
        :param X: TxN matrix of observations
        """
        assert X is not None or psi is not None
        if psi is not None and X is None:
            X = self.rvs(psi)

        assert X.ndim == 2
        self.X = X
        self.T, self.N = X.shape

        # Initialize Polya-gamma samplers
        num_threads = ppg.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

        # Initialize auxiliary variables, omega
        self.omega = np.ones((self.T, self.N))
Пример #16
0
    def __init__(self, model, data=None, mask=None, **kwargs):
        super(LDSStatesCountData, self). \
            __init__(model, data=data, mask=mask, **kwargs)

        # Check if the emission matrix is a count regression
        from pypolyagamma.distributions import _PGLogisticRegressionBase
        if isinstance(self.emission_distn, _PGLogisticRegressionBase):
            self.has_count_data = True

            # Initialize the Polya-gamma samplers
            import pypolyagamma as ppg
            num_threads = ppg.get_omp_num_threads()
            seeds = np.random.randint(2**16, size=num_threads)
            self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

            # Initialize auxiliary variables, omega
            self.omega = np.ones((self.T, self.D_emission), dtype=np.float)
        else:
            self.has_count_data = False
Пример #17
0
    def __init__(self, S, D):
        self.S = S
        self.T, self.N = S.shape
        self.D =  D

        self.Z = np.zeros((self.N,self.D))
        self.omega = np.zeros((self.T, self.N))

        # Initialize regression model
        # from pybasicbayes.distributions.regression import Regression
        # S_0 = np.eye(self.T)
        # K_0 = np.eye(self.D+1)
        # M_0 = np.zeros((self.T, self.D+1))
        # nu_0 = self.T+2
        # self.regression = Regression(nu_0, S_0, M_0, K_0, affine=True)
        self.A = np.zeros((self.T, self.D))
        self.bias = np.zeros((self.T,))

        import pypolyagamma as ppg
        num_threads = ppg.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
Пример #18
0
    def __init__(self, S, D):
        self.S = S
        self.T, self.N = S.shape
        self.D = D

        self.Z = np.zeros((self.N, self.D))
        self.omega = np.zeros((self.T, self.N))

        # Initialize regression model
        # from pybasicbayes.distributions.regression import Regression
        # S_0 = np.eye(self.T)
        # K_0 = np.eye(self.D+1)
        # M_0 = np.zeros((self.T, self.D+1))
        # nu_0 = self.T+2
        # self.regression = Regression(nu_0, S_0, M_0, K_0, affine=True)
        self.A = np.zeros((self.T, self.D))
        self.bias = np.zeros((self.T, ))

        import pypolyagamma as ppg
        num_threads = ppg.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
Пример #19
0
import matplotlib.pyplot as plt

from scipy.stats import norm, probplot

# Use a simple Normal-Bernoulli model
# z ~ N(z | 0, 1)
# x ~ [Bern(x | \sigma(z))]^{1/T} = Bern(x | \sigma(z / T))
# Where T is the temperature of the tempered distribution in [1, \inf)
# When T=1 we target the posterior. When T=\inf we target the prior
T = 2.0
mu_z = 0.0
sigma_z = 1.0

# Initialize Polya-gamma samplers
num_threads = ppg.get_omp_num_threads()
seeds = np.random.randint(2**16, size=num_threads)
ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]


def kappa(x):
    # Compute kappa = [a(x) - b(x)/2.] / T
    # for the Bernoulli model where a(x) = x and b(x) = 1
    return (x - 0.5) / T


def resample_z(x, omega):
    # Resample z from its Gaussian conditional
    prior_J = 1. / sigma_z
    prior_h = prior_J * mu_z
Пример #20
0
        for n in xrange(N):
            plt.subplot(N,1,n+1)
            lns.append(plt.plot(psi[:T_plot,n], 'r')[0])
            plt.plot(psi[:T_plot,n], 'b')
            spks = np.where(S[:T_plot,n])[0]
            plt.plot(spks, np.ones_like(spks), 'ko', markerfacecolor="k")

            plt.ylim((min(0.9, psi.min()-0.1), max(1.1, psi.max()+0.1)))
        plt.show()


    # Do some inference
    # Instantiate the auxiliary variables
    omega = np.zeros_like(psi)

    num_threads = ppg.get_omp_num_threads()
    seeds = np.random.randint(2**16, size=num_threads)
    ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

    # Collect samples
    b_samples = []
    W_samples = []
    A_samples = []
    psi_samples = []

    for itr in xrange(N_samples):
        print "Iteration ", itr
        resample_omega()
        resample_A()
        resample_W_b()
Пример #21
0
def test_parallel2():
    """Test multiple cases of OMP"""
    num_threads = pypolyagamma.get_omp_num_threads()
    if num_threads < 2:
        return

    np.random.seed(0)

    # Case 1: n < nthreads, nthreads = num_threads
    nthreads = num_threads
    n = nthreads - 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 2: n < nthreads, nthreads < num_threads
    nthreads = num_threads - 1
    n = nthreads - 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 3: n < nthreads, nthreads > num_threads
    nthreads = num_threads + 1
    n = nthreads - 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 4: n > nthreads, nthreads = num_threads
    nthreads = num_threads
    n = nthreads + 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 5: n > nthreads, nthreads < num_threads
    nthreads = num_threads - 1
    n = nthreads + 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 6: n > nthreads, nthreads > num_threads
    nthreads = num_threads + 1
    n = nthreads + 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 7: n = nthreads, nthreads = num_threads
    nthreads = num_threads
    n = nthreads
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 8: n = nthreads, nthreads < num_threads
    nthreads = num_threads - 1
    n = nthreads
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 9: n = nthreads, nthreads > num_threads
    nthreads = num_threads + 1
    n = nthreads
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    return True
Пример #22
0
 def __init__(self, population, trunc=200):
     self.population = population
     num_threads = ppg.get_omp_num_threads()
     seeds = np.random.randint(2**16, size=num_threads)
     self.ppgs = [ppg.PyPolyaGamma(seed, trunc) for seed in seeds]
     self.N = self.population.N
Пример #23
0
 def __init__(self, population, trunc=200):
     self.population = population
     num_threads = ppg.get_omp_num_threads()
     seeds = np.random.randint(2**16, size=num_threads)
     self.ppgs = [ppg.PyPolyaGamma(seed, trunc) for seed in seeds]
     self.N = self.population.N
Пример #24
0
def test_parallel2():
    """Test multiple cases of OMP"""
    num_threads = pypolyagamma.get_omp_num_threads()
    if num_threads < 2:
        return

    np.random.seed(0)

    # Case 1: n < nthreads, nthreads = num_threads
    nthreads = num_threads
    n = nthreads - 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 2: n < nthreads, nthreads < num_threads
    nthreads = num_threads - 1
    n = nthreads - 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 3: n < nthreads, nthreads > num_threads
    nthreads = num_threads + 1
    n = nthreads - 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 4: n > nthreads, nthreads = num_threads
    nthreads = num_threads
    n = nthreads + 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 5: n > nthreads, nthreads < num_threads
    nthreads = num_threads - 1
    n = nthreads + 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 6: n > nthreads, nthreads > num_threads
    nthreads = num_threads + 1
    n = nthreads + 1
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 7: n = nthreads, nthreads = num_threads
    nthreads = num_threads
    n = nthreads
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 8: n = nthreads, nthreads < num_threads
    nthreads = num_threads - 1
    n = nthreads
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    # Case 9: n = nthreads, nthreads > num_threads
    nthreads = num_threads + 1
    n = nthreads
    v3 = np.zeros(n)
    a = 14 * np.ones(n)
    b = 0 * np.ones(n)
    seeds = np.random.randint(2**16, size=nthreads)
    ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds]
    pypolyagamma.pgdrawvpar(ppgs, a, b, v3)

    return True
Пример #25
0
    def __init__(self, model, data=None, **kwargs):

        # The data must be provided in sparse row format
        # This makes it easy to iterate over rows. Basically,
        # for each row, t, it is easy to get the output dimensions, n,
        # such that y_{t,n} > 0.
        super(LDSStatesZeroInflatedCountData, self).\
            __init__(model, data=data, **kwargs)

        # Initialize the Polya-gamma samplers
        num_threads = ppg.get_omp_num_threads()
        seeds = np.random.randint(2**16, size=num_threads)
        self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]

        # Initialize the masked data
        if data is not None:
            assert isinstance(
                data, csr_matrix
            ), "Data must be a sparse row matrix for zero-inflated models"

            # Initialize a sparse matrix of masked data. The mask
            # specifies which observations were "exposed" and which
            # were determinisitcally zero. In other words, the mask
            # gives the data values at the places where z_{t,n} = 1.
            T, N, C, D, b = self.T, self.D_emission, self.C, self.D, self.emission_distn.b
            indptr = [0]
            indices = []
            vals = []
            offset = 0
            for t in range(T):
                # Get the nonzero entries in the t-th row
                ns_t = data.indices[data.indptr[t]:data.indptr[t + 1]]
                y_t = np.zeros(N)
                y_t[ns_t] = data.data[data.indptr[t]:data.indptr[t + 1]]

                # Sample zero inflation mask
                z_t = np.random.rand(N) < self.rho
                z_t[ns_t] = True

                # Construct the sparse matrix
                t_inds = np.where(z_t)[0]
                indices.append(t_inds)
                vals.append(y_t[t_inds])
                offset += t_inds.size
                indptr.append(offset)

            # Construct a sparse matrix
            vals = np.concatenate(vals)
            indices = np.concatenate(indices)
            indptr = np.array(indptr)
            self.masked_data = csr_matrix((vals, indices, indptr),
                                          shape=(T, N))

            # DEBUG: Start with all the data
            # dense_data = data.toarray()
            # values = dense_data.ravel()
            # indices = np.tile(np.arange(self.D_emission), (self.T,))
            # indptrs = np.arange(self.T+1) * self.D_emission
            # self.masked_data = csr_matrix((values, indices, indptrs), (self.T, self.D_emission))
            # assert np.allclose(self.masked_data.toarray(), dense_data)

            self.resample_auxiliary_variables()
        else:
            self.masked_data = None
            self.omega = None