def __init__(self,
                 alpha_0=None,
                 beta_0=None,
                 alphas_0=None,
                 betas_0=None,
                 r_support=None,
                 r_probs=None,
                 r_discrete_distn=None,
                 r=None,
                 ps=None):

        assert (r_discrete_distn is not None) ^ (r_support is not None
                                                 and r_probs is not None)
        if r_discrete_distn is not None:
            r_support, = np.where(r_discrete_distn)
            r_probs = r_discrete_distn[r_support]
            r_support += 1
        self.r_support = np.asarray(r_support)
        self.rho_0 = self.rho_mf = np.log(r_probs)

        assert (alpha_0 is not None and  beta_0 is not None) \
                ^ (alphas_0 is not None and betas_0 is not None)
        alphas_0 = alphas_0 if alphas_0 is not None else [alpha_0
                                                          ] * len(r_support)
        betas_0 = betas_0 if betas_0 is not None else [beta_0] * len(r_support)
        ps = ps if ps is not None else [None] * len(r_support)
        self._fixedr_distns = \
            [self._fixedr_class(r=r,p=p,alpha_0=alpha_0,beta_0=beta_0)
                    for r,p,alpha_0,beta_0 in zip(r_support,ps,alphas_0,betas_0)]

        # for init
        self.ridx = sample_discrete(r_probs)
        self.r = r_support[self.ridx]
示例#2
0
文件: utils.py 项目: yinsenm/pgmult
def downsample_data_slow(X, n):
    """
    Downsample each row of X such that it sums to n by randomly removing entries
    """
    from pybasicbayes.util.stats import sample_discrete
    assert X.ndim == 2

    Xsub = X.copy()

    for i in range(Xsub.shape[0]):
        Mi = int(Xsub[i].sum())
        assert Mi >= n
        # if Mi > 1e8: print "Warning: M is really large!"
        p = Xsub[i] / float(Mi)

        # Random remove one of the entries to remove
        for m in range(Mi-n):
            k = sample_discrete(p)
            assert Xsub[i,k] > 0
            Xsub[i,k] -= 1
            p = Xsub[i] / float(Xsub[i].sum())

        assert Xsub[i].sum() == n

    return Xsub
示例#3
0
    def resample(self,data=[]):
        alpha_n, betas_n, posterior_discrete = self._posterior_hypparams(
                *self._get_statistics(data))

        r_idx = sample_discrete(posterior_discrete)
        self.r = self.r_support[r_idx]
        self.p = np.random.beta(alpha_n, betas_n[r_idx])
示例#4
0
文件: utils.py 项目: viveksck/pgmult
def downsample_data_slow(X, n):
    """
    Downsample each row of X such that it sums to n by randomly removing entries
    """
    from pybasicbayes.util.stats import sample_discrete
    assert X.ndim == 2

    Xsub = X.copy()

    for i in xrange(Xsub.shape[0]):
        Mi = int(Xsub[i].sum())
        assert Mi >= n
        # if Mi > 1e8: print "Warning: M is really large!"
        p = Xsub[i] / float(Mi)

        # Random remove one of the entries to remove
        for m in xrange(Mi-n):
            k = sample_discrete(p)
            assert Xsub[i,k] > 0
            Xsub[i,k] -= 1
            p = Xsub[i] / float(Xsub[i].sum())

        assert Xsub[i].sum() == n

    return Xsub
示例#5
0
    def resample(self,data=[]):
        alpha_n, betas_n, posterior_discrete = self._posterior_hypparams(
                *self._get_statistics(data))

        r_idx = sample_discrete(posterior_discrete)
        self.r = self.r_support[r_idx]
        self.p = np.random.beta(alpha_n, betas_n[r_idx])
示例#6
0
 def _generate(self, N):
     # run a CRP forwards
     alpha_0 = self.alpha_0
     self.z = np.zeros(N, dtype=np.int32)
     for n in range(N):
         self.z[n] = sample_discrete(
             np.concatenate((np.bincount(self.z[:n]), (alpha_0, ))))
示例#7
0
    def generate(self,
                 T=100,
                 keep=True,
                 init_data=None,
                 covariates=None,
                 with_noise=True):
        from pybasicbayes.util.stats import sample_discrete
        # Generate from the prior and raise exception if unstable
        K, n = self.num_states, self.D

        # Prepare the covariates
        if covariates is None:
            covariates = np.zeros((T, 0))
        else:
            assert covariates.shape[0] == T

        # Initialize discrete state sequence
        pi_0 = self.init_state_distn.pi_0
        dss = np.empty(T, dtype=np.int32)
        dss[0] = sample_discrete(pi_0.ravel())

        data = np.empty((T, n), dtype='double')
        if init_data is None:
            data[0] = np.random.randn(n)
        else:
            data[0] = init_data

        for t in range(1, T):
            # Sample discrete state given previous continuous state and covariates
            cov_t = np.column_stack((data[t - 1:t], covariates[t]))
            A = self.trans_distn.get_trans_matrices(cov_t)[0]
            dss[t] = sample_discrete(A[dss[t - 1], :])

            # Sample continuous state given current discrete state
            if with_noise:
                data[t] = self.obs_distns[dss[t]].rvs(cov_t, return_xy=False)
            else:
                data[t] = self.obs_distns[dss[t]].predict(cov_t)

            assert np.all(np.isfinite(
                data[t])), "RARHMM appears to be unstable!"

        # TODO:
        # if keep:
        #     ...

        return data, dss
示例#8
0
    def generate_states(self,
                        initial_condition=None,
                        with_noise=True,
                        stateseq=None):
        """
        Jointly sample the discrete and continuous states
        """
        from pybasicbayes.util.stats import sample_discrete
        # Generate from the prior and raise exception if unstable
        T, K, n = self.T, self.num_states, self.D_latent

        # Initialize discrete state sequence
        dss = -1 * np.ones(T, dtype=np.int32) if stateseq is None else stateseq
        gss = np.empty((T, n), dtype='double')

        if initial_condition is None:
            init_state_distn = np.ones(self.num_states) / float(
                self.num_states)
            dss[0] = sample_discrete(init_state_distn.ravel())
            gss[0] = self.init_dynamics_distns[dss[0]].rvs()
        else:
            dss[0] = initial_condition[0]
            gss[0] = initial_condition[1]

        for t in range(1, T):
            # Sample discrete state given previous continuous state
            A = self.trans_distn.get_trans_matrices(gss[t - 1:t])[0]
            if with_noise:
                # Sample discrete state from recurrent transition matrix
                if dss[t] == -1:
                    dss[t] = sample_discrete(A[dss[t - 1], :])

                # Sample continuous state given current discrete state
                gss[t] = self.dynamics_distns[dss[t-1]].\
                    rvs(x=np.hstack((gss[t-1][None,:], self.inputs[t-1][None,:])),
                        return_xy=False)
            else:
                # Pick the most likely next discrete state and continuous state
                if dss[t] == -1:
                    dss[t] = np.argmax(A[dss[t - 1], :])

                gss[t] = self.dynamics_distns[dss[t-1]]. \
                    predict(np.hstack((gss[t-1][None,:], self.inputs[t-1][None,:])))
            assert np.all(np.isfinite(gss[t])), "SLDS appears to be unstable!"

        self.stateseq = dss
        self.gaussian_states = gss
示例#9
0
    def energy(self,data):
        # TODO TODO this function is horrible
        assert data.ndim == 1

        if np.isnan(data).any():
            return 0.

        from .util.stats import sample_discrete
        likes = np.array([c.log_likelihood(data) for c in self.components]).reshape((-1,))
        likes += np.log(self.weights.weights)
        label = sample_discrete(np.exp(likes - likes.max()))

        return self.components[label].energy(data)
示例#10
0
    def energy(self,data):
        # TODO TODO this function is horrible
        assert data.ndim == 1

        if np.isnan(data).any():
            return 0.

        from .util.stats import sample_discrete
        likes = np.array([c.log_likelihood(data) for c in self.components]).reshape((-1,))
        likes += np.log(self.weights.weights)
        label = sample_discrete(np.exp(likes - likes.max()))

        return self.components[label].energy(data)
示例#11
0
    def sample(self, z, x, i, n):
        """ Sample the next state given the previous time index

            :param z:       TxNxD buffer of particle states
            :param x:       NxD output buffer for observations
            :param i:       Time index to sample
            :param n:       Particle index to sample
        """
        psi = np.dot(self.C, z[i, n, :]) + self.mu
        pi = psi_to_pi(psi)

        from pybasicbayes.util.stats import sample_discrete
        s = sample_discrete(pi)
        x[i, :] = 0
        x[i, s] = 1
示例#12
0
    def generate_states(self, initial_condition=None, with_noise=True, stateseq=None):
        """
        Generate discrete and continuous states.  Note that the handling of 'with_noise'
        differs slightly from pySLDS implementation.  Rather than selecting the most
        likely discrete state, we randomly sample the discrete statse.
        """
        if stateseq is None:
            As = self.trans_matrix
            self.stateseq = -1 * np.ones(self.T, dtype=np.int32)
            self.stateseq[0] = np.random.choice(self.num_states)
            for t in range(1, self.T):
                self.stateseq[t] = sample_discrete(As[t-1, self.stateseq[t-1], :].ravel())

        else:
            assert stateseq.shape == (self.T,)
            self.stateseq = stateseq.astype(np.int32)
示例#13
0
    def sample(self, z, x, i,n):
        """ Sample the next state given the previous time index

            :param z:       TxNxD buffer of particle states
            :param x:       NxD output buffer for observations
            :param i:       Time index to sample
            :param n:       Particle index to sample
        """
        psi = np.dot(self.C, z[i,n,:]) + self.mu
        pi = psi_to_pi(psi)


        from pybasicbayes.util.stats import sample_discrete
        s = sample_discrete(pi)
        x[i,:] = 0
        x[i,s] = 1
示例#14
0
    def rvs(self,customer_counts):
        # could replace this with one of the faster C versions I have lying
        # around, but at least the Python version is clearer
        assert isinstance(customer_counts,list) or isinstance(customer_counts,int)
        if isinstance(customer_counts,int):
            customer_counts = [customer_counts]

        restaurants = []
        for num in customer_counts:
            # a CRP with num customers
            tables = []
            for c in range(num):
                newidx = sample_discrete(np.array(tables + [self.concentration]))
                if newidx == len(tables):
                    tables += [1]
                else:
                    tables[newidx] += 1

            restaurants.append(tables)

        return restaurants if len(restaurants) > 1 else restaurants[0]
示例#15
0
    def rvs(self,customer_counts):
        # could replace this with one of the faster C versions I have lying
        # around, but at least the Python version is clearer
        assert isinstance(customer_counts,list) or isinstance(customer_counts,int)
        if isinstance(customer_counts,int):
            customer_counts = [customer_counts]

        restaurants = []
        for num in customer_counts:
            # a CRP with num customers
            tables = []
            for c in range(num):
                newidx = sample_discrete(np.array(tables + [self.concentration]))
                if newidx == len(tables):
                    tables += [1]
                else:
                    tables[newidx] += 1

            restaurants.append(tables)

        return restaurants if len(restaurants) > 1 else restaurants[0]
示例#16
0
    def __init__(self,alpha_0=None,beta_0=None,alphas_0=None,betas_0=None,
            r_support=None,r_probs=None,r_discrete_distn=None,
            r=None,ps=None):

        assert (r_discrete_distn is not None) ^ (r_support is not None and r_probs is not None)
        if r_discrete_distn is not None:
            r_support, = np.where(r_discrete_distn)
            r_probs = r_discrete_distn[r_support]
            r_support += 1
        self.r_support = np.asarray(r_support)
        self.rho_0 = self.rho_mf = np.log(r_probs)

        assert (alpha_0 is not None and  beta_0 is not None) \
                ^ (alphas_0 is not None and betas_0 is not None)
        alphas_0 = alphas_0 if alphas_0 is not None else [alpha_0]*len(r_support)
        betas_0 = betas_0 if betas_0 is not None else [beta_0]*len(r_support)
        ps = ps if ps is not None else [None]*len(r_support)
        self._fixedr_distns = \
            [self._fixedr_class(r=r,p=p,alpha_0=alpha_0,beta_0=beta_0)
                    for r,p,alpha_0,beta_0 in zip(r_support,ps,alphas_0,betas_0)]

        # for init
        self.ridx = sample_discrete(r_probs)
        self.r = r_support[self.ridx]
示例#17
0
 def resample(self,data=[]):
     n, alpha_n, posterior_discrete, r_support = self._posterior_hypparams(
             *self._get_statistics(data)) # NOTE: pass out r_support b/c feasible subset
     self.r = r_support[sample_discrete(posterior_discrete)]
     self.p = np.random.beta(alpha_n - n*self.r, self.beta_0 + n*self.r)
示例#18
0
 def _resample_r(self,data):
     self.ridx = sample_discrete(
             self._posterior_hypparams(self._get_statistics(data)))
     self.r = self.r_support[self.ridx]
     return self
示例#19
0
 def _resample_r_from_mf(self):
     lognorm = logsumexp(self.rho_mf)
     self.ridx = sample_discrete(np.exp(self.rho_mf - lognorm))
     self.r = self.r_support[self.ridx]
示例#20
0
 def rvs(self,size=None):
     return sample_discrete(self.weights,size)
示例#21
0
 def rvs(self, size=None):
     return sample_discrete(self.weights, size)
示例#22
0
    def resample_Z_python(self):
        from pybasicbayes.util.stats import sample_discrete

        # TODO: Call cython function to resample parents
        S, C, Z, dt_max = self.S, self.C, self.Z, self.dt_max
        lambda0 = self.model.bias_model.lambda0
        W = self.model.weight_model.W
        impulse = self.model.impulse_model.impulse

        # Also compute number of parents assigned to background rate and
        # to specific connections
        self.bkgd_ss = np.zeros(self.K)
        self.weight_ss = np.zeros((self.K, self.K))
        self.imp_ss = np.zeros((self.K, self.K))

        # Resample parents
        for n in range(self.N):

            if n == 0:
                Z[n] = -1
                self.bkgd_ss[C[n]] += 1
                continue

            # Compute the probability of each parent spike
            p_par = np.zeros(n)
            denom = 0

            # First parent is just the background rate of this process
            p_bkgd = lambda0[C[n]]
            denom += p_bkgd

            # Iterate backward from the most recent to compute probabilities of each parent spike
            for par in range(n - 1, -1, -1):
                dt = S[n] - S[par]

                # Since the spikes are sorted, we can stop if we reach a potential
                # parent that occurred greater than dt_max in the past
                if dt > dt_max:
                    p_par[par] = 0
                    break

                p_par[par] = W[C[par], C[n]] * impulse(dt, C[par], C[n])
                denom += p_par[par]

            # Now sample forward, starting from the minimum viable parent
            min_par = par
            p_par = np.concatenate([[p_bkgd], p_par[min_par:n]])

            # Sample from the discrete distribution p_par
            i_par = sample_discrete(p_par)

            if i_par == 0:
                # Sampled the background rate
                Z[n] = -1
                self.bkgd_ss[C[n]] += 1

            else:
                # Sampled one of the preceding spikes
                Z[n] = (i_par - 1) + min_par
                Cp = C[Z[n]]
                dt = S[n] - S[Z[n]]

                self.weight_ss[Cp, C[n]] += 1
                self.imp_ss[Cp, C[n]] += np.log(dt) - np.log(dt_max - dt)
示例#23
0
    def resample_Z_python(self):
        from pybasicbayes.util.stats import sample_discrete

        # TODO: Call cython function to resample parents
        S, C, Z, dt_max = self.S, self.C, self.Z, self.dt_max
        lambda0 = self.model.bias_model.lambda0
        W = self.model.weight_model.W_effective
        impulse = self.model.impulse_model.impulse
        translate_dt = self.model.impulse_model.translate_dt

        # Also compute number of parents assigned to background rate and
        # to specific connections
        self.bkgd_ss = np.zeros(self.K)
        self.weight_ss = np.zeros((self.K, self.K))
        self.imp_ss = np.zeros((3, self.K, self.K))

        # Resample parents
        for n in range(self.N):

            if n == 0:
                Z[n] = -1
                self.bkgd_ss[C[n]] += 1
                continue

            # Compute the probability of each parent spike
            p_par = np.zeros(n)
            denom = 0

            # First parent is just the background rate of this process
            p_bkgd = lambda0[C[n]]
            denom += p_bkgd

            # Iterate backward from the most recent to compute probabilities of each parent spike
            for par in range(n - 1, -1, -1):
                dt = S[n] - S[par]

                if dt < 1e-8:
                    continue

                if dt > dt_max - 1e-8:
                    break

                p_par[par] = W[C[par], C[n]] * impulse(dt, C[par], C[n])
                denom += p_par[par]

            # Now sample forward, starting from the minimum viable parent
            min_par = par
            p_par = np.concatenate([[p_bkgd], p_par[min_par:n]])

            # Sample from the discrete distribution p_par
            i_par = sample_discrete(p_par)

            if i_par == 0:
                # Sampled the background rate
                Z[n] = -1
                self.bkgd_ss[C[n]] += 1
            else:
                # Sampled one of the preceding spikes
                Z[n] = (i_par - 1) + min_par
                Cp = C[Z[n]]
                # dt = S[n] - S[Z[n]]
                dt = translate_dt(S[n] - S[Z[n]])
                self.weight_ss[Cp, C[n]] += 1
                self.imp_ss[0, Cp, C[n]] += 1
                self.imp_ss[1, Cp, C[n]] += np.log(dt) - np.log(dt_max - dt)
        self.Z = Z

        # sum of squares of impulse responses
        mu = np.divide(self.imp_ss[1], self.imp_ss[0])
        for n in range(self.N):
            par = Z[n]
            if par > -1:
                # dt = S[n] - S[par]
                dt = translate_dt(S[n] - S[par])
                sdt = np.log(dt) - np.log(dt_max - dt)
                self.imp_ss[2, C[par], C[n]] += (sdt - mu[C[par], C[n]])**2
示例#24
0
    def resample_Z_python(self):
        from pybasicbayes.util.stats import sample_discrete

        # TODO: Call cython function to resample parents
        S, C, Z, dt_max = self.S, self.C, self.Z, self.dt_max
        lambda0 = self.model.bias_model.lambda0
        W = self.model.weight_model.W
        impulse = self.model.impulse_model.impulse

        # Also compute number of parents assigned to background rate and
        # to specific connections
        self.bkgd_ss = np.zeros(self.K)
        self.weight_ss = np.zeros((self.K, self.K))
        self.imp_ss = np.zeros((self.K, self.K))

        # Resample parents
        for n in range(self.N):

            if n == 0:
                Z[n] = -1
                self.bkgd_ss[C[n]] += 1
                continue

            # Compute the probability of each parent spike
            p_par = np.zeros(n)
            denom = 0

            # First parent is just the background rate of this process
            p_bkgd = lambda0[C[n]]
            denom += p_bkgd

            # Iterate backward from the most recent to compute probabilities of each parent spike
            for par in range(n-1, -1, -1):
                dt = S[n] - S[par]

                # Since the spikes are sorted, we can stop if we reach a potential
                # parent that occurred greater than dt_max in the past
                if dt > dt_max:
                    p_par[par] = 0
                    break

                p_par[par] = W[C[par], C[n]] * impulse(dt, C[par], C[n])
                denom += p_par[par]

            # Now sample forward, starting from the minimum viable parent
            min_par = par
            p_par = np.concatenate([[p_bkgd], p_par[min_par:n]])

            # Sample from the discrete distribution p_par
            i_par = sample_discrete(p_par)

            if i_par == 0:
                # Sampled the background rate
                Z[n] = -1
                self.bkgd_ss[C[n]] += 1

            else:
                # Sampled one of the preceding spikes
                Z[n] = (i_par - 1) + min_par
                Cp = C[Z[n]]
                dt = S[n] - S[Z[n]]

                self.weight_ss[Cp, C[n]] += 1
                self.imp_ss[Cp, C[n]] += np.log(dt) - np.log(dt_max - dt)
示例#25
0
 def _generate(self,N):
     # run a CRP forwards
     alpha_0 = self.alpha_0
     self.z = np.zeros(N,dtype=np.int32)
     for n in range(N):
         self.z[n] = sample_discrete(np.concatenate((np.bincount(self.z[:n]),(alpha_0,))))
示例#26
0
 def _resample_r_from_mf(self):
     lognorm = logsumexp(self.rho_mf)
     self.ridx = sample_discrete(np.exp(self.rho_mf - lognorm))
     self.r = self.r_support[self.ridx]
示例#27
0
 def resample(self,data=[]):
     n, alpha_n, posterior_discrete, r_support = self._posterior_hypparams(
             *self._get_statistics(data)) # NOTE: pass out r_support b/c feasible subset
     self.r = r_support[sample_discrete(posterior_discrete)]
     self.p = np.random.beta(alpha_n - n*self.r, self.beta_0 + n*self.r)
示例#28
0
 def _resample_r(self,data):
     self.ridx = sample_discrete(
             self._posterior_hypparams(self._get_statistics(data)))
     self.r = self.r_support[self.ridx]
     return self