Exemplo n.º 1
0
    def draw_m(self, it, x, j, Kmax, verbose):
        """
        Helper function which does the draws from the m_jt full conditional,
        which implicitly determines the overall clustering structure for each document.
        Updates the counts and the samples matrices at iteration `it`.
        Called by gibbs_direct()
        """

        k_next = self.direct_samples[it, :]
        self.m_ *= 0  # reset the m counts
        # Cycle through the k values of each restaurant
        j_idx, k_idx = np.where(self.q_ > 0)  # find the occupied clusters
        for i in np.random.permutation(len(j_idx)):
            jj, kk = j_idx[i], k_idx[i]
            max_m = self.q_[jj, kk]

            abk = self.a0_ * self.beta_samples[it, kk]
            m_range = np.arange(max_m) + 1
            log_s = np.array([self.stir_.stirlog(max_m, m) for m in m_range])
            m_dist = np.exp(
                logg(abk) - logg(abk + max_m) + log_s + m_range * np.log(abk))

            m_dist[np.logical_not(np.isfinite(m_dist))] = 0
            m_dist += 1e-10
            m_dist /= np.sum(m_dist)

            mm1 = np.random.choice(m_range, p=m_dist)
            self.m_[jj, kk] = mm1
Exemplo n.º 2
0
    def draw_m(self, it, x, j, Kmax, verbose):
        """
        Helper function which does the draws from the z_ij full conditional.
        Updates the counts and the samples matrices at iteration `it`.
        Called by gibbs_direct()
        """

        k_next = self.direct_samples[it, :, 1]
        self.m_ *= 0  # reset the m counts
        # Cycle through the k values of each restaurant
        j_idx, k_idx = np.where(self.q_ > 0)  # find the consumed dishes
        for i in np.random.permutation(len(j_idx)):
            jj, kk = j_idx[i], k_idx[i]
            max_m = self.q_[jj, kk]

            abk = self.a0_ * self.beta_samples[it, kk]
            m_range = np.arange(max_m) + 1
            log_s = np.array([self.stir_.stirlog(max_m, m) for m in m_range])
            m_dist = np.exp(
                logg(abk) - logg(abk + max_m) + log_s + m_range * np.log(abk))
            """MOSTLY FIXED.  m_dist should be a proper distribution"""
            m_dist[np.logical_not(np.isfinite(m_dist))] = 0
            m_dist += 1e-10

            mm1 = np.random.choice(m_range, p=m_dist / np.sum(m_dist))
            self.m_[jj, kk] = mm1
Exemplo n.º 3
0
def mnom_fk_cust(i, x, k, Kmax, L, ha, new=False):
    """
    Computes the mixture components for a given customer across all k values.
    MODEL: base measure H ~ Dirichlet(L, ha_1,...,ha_L),
                        F(x|phi) ~ Multinomial(n_ji, phi_1,...,phi_L)
    All components are calculated exactly in log-space and then exponentiated.
    X can be a dense or a sparse csr-style matrix.
    
    returns: (Kmax,) vector; if new=True, returns a scalar
    """
    
    xi, ni = x[i, :], np.sum(x[i, :])
    log_con = logg(ni + 1) - np.sum(logg(xi + np.ones(L))) # term constant for all k
    # Calculate the case where k has no members
    
    if new == True:
        fknew_cust = np.exp( log_con + np.sum(logg(xi + ha)) - logg(np.sum(xi + ha)) + 
                             logg(np.sum(ha)) - np.sum(logg(ha)) )
        return fknew_cust        
    
    # Get subset of customers eating kk; each entry is a (#, L) matrix
    x_kks = [x[k == kk, :] for kk in range(Kmax)]  
    
    # Compute params from Dirichlet kernel tricks done in fk function
    a_bot = np.vstack([np.sum(x_kk, axis=0) for x_kk in x_kks]) + ha[None, :]    # (Kmax, L)
    a_bot[k[i], :] -= xi                         # offset if xi is in this subset
    a_top = np.apply_along_axis(lambda row: row + xi, 1, a_bot)
    fk_cust = np.exp( log_con + np.sum(logg(a_top), axis=1) - logg(np.sum(a_top, axis=1)) +
                      logg(np.sum(a_bot, axis=1)) - np.sum(logg(a_bot), axis=1) )
     
    # Convert back to a dense array in case X was sparse
    return np.asarray(fk_cust).ravel()
Exemplo n.º 4
0
def mnom_fk_tabl(jj, tt, x, j, t, k, Kmax, L, ha, new=False):
    """
    Computes the mixture components for a given customer across all k values.
    MODEL: base measure H ~ Dirichlet(L, ha_1,...,ha_L),
                        F(x|phi) ~ Multinomial(n_ji, phi_1,...,phi_L)
    All components are calculated exactly in log-space and then exponentiated.
    
    returns: (Kmax,) vector; if new=True, returns a scalar
    """
    
    x_jt = x[np.logical_and(j == jj, t == tt), :]                                # (|T|, L)
    kk = k[np.logical_and(j == jj, t == tt)]
    n_jt = np.sum(x_jt, axis=1)                                                  # (|T|,)
    sum_jt = np.sum(x_jt, axis=0)                                                # (L,)
    log_con = np.sum(logg(n_jt + 1)) - np.sum(logg(x_jt + 1))    # term constant for all k
    
    fknew_tabl = np.exp( log_con + np.sum(logg(sum_jt + ha)) - logg(np.sum(sum_jt + ha)) + 
                         logg(np.sum(ha)) - np.sum(logg(ha)) )
    # If table jt doesn't exist, just return the "new" mixture component
    if x_jt.shape[0] == 0:
        #print(f"WARNING: table {(jj, tt)} does not exist currently")
        new = True
    if new == True: return fknew_tabl       
    
    # Get subset of customers eating kk; each entry is a (#, L) matrix
    x_kks = [x[k == kk, :] for kk in range(Kmax)]
      
    # Compute params from Dirichlet kernel tricks done in fk function
    a_bot = np.vstack([np.sum(x_kk, axis=0) for x_kk in x_kks]) + ha[None, :]    # (Kmax, L)
    a_bot[kk[0], :] -= sum_jt                       # offset if table x_jt is in this subset
    a_top = a_bot + sum_jt[None, :]
    fk_tabl = np.exp( log_con + np.sum(logg(a_top), axis=1) - logg(np.sum(a_top, axis=1)) +
                      logg(np.sum(a_bot, axis=1)) - np.sum(logg(a_bot), axis=1) )

    return fk_tabl
Exemplo n.º 5
0
def pois_fk_cust(i, x, k, Kmax, ha, hb, new=False):
    """
    Computes the mixture components for a given customer across all k values.
    MODEL: base measure H ~ Gamma(ha, hb), F(x|phi) ~ Poisson(phi)
    All components are calculated exactly in log-space and then exponentiated.
    
    returns: (Kmax,) vector; if new=True, returns a scalar
    """

    x = x.flatten()  # reshape to 1D, since gibbs routine passes in a 2D array

    # Calculate the case where k has no members
    fknew_cust = np.exp(-logg(x[i] + 1) + logg(x[i] + ha) - logg(ha) -
                        (x[i] + ha) * np.log(1 + hb) + ha * np.log(hb))
    if new == True: return fknew_cust

    x_kks = [x[k == kk] for kk in range(Kmax)]  # subset of customers eating kk
    xi_in = np.zeros(Kmax)  # offset if x[i] is in this subset
    xi_in[k[i]] = 1

    # Compute (a,b) params from gamma kernel tricks done in fk function
    av = np.array(list(map(np.sum, x_kks))) - xi_in * x[i] + ha
    bv = np.array(list(map(len, x_kks))) - xi_in + hb
    fk_cust = np.exp(-logg(x[i] + 1) + logg(x[i] + av) - logg(av) -
                     (x[i] + av) * np.log(1 + bv) + av * np.log(bv))

    return fk_cust
Exemplo n.º 6
0
def pois_fk_tabl(jj, tt, x, j, t, k, Kmax, ha, hb, new=False):
    """
    Computes the mixture components for a given table across all k values.
    MODEL: base measure H ~ Gamma(ha, hb), F(x|phi) ~ Poisson(phi)
    All components are calculated exactly in log-space and then exponentiated.
    
    returns: (Kmax,) vector; if new=True, returns a scalar
    """
    
    x = x.flatten()  # reshape to 1D, since gibbs routine passes in a 2D array
    x_jt = x[np.logical_and(j == jj, t == tt)]
    kk = k[np.logical_and(j == jj, t == tt)]
    
    fknew_tabl = np.exp( -np.sum(logg(x_jt + 1)) + logg(np.sum(x_jt) + ha) - logg(ha) -
                         (np.sum(x_jt) + ha)*np.log(len(x_jt) + hb) + ha*np.log(hb) )
    # If table jt doesn't exist, just return the "new" mixture component
    if len(x_jt) == 0:
        #print(f"WARNING: table {(jj, tt)} does not exist currently")
        new = True
    if new == True: return np.full(Kmax, fknew_tabl)
    
    x_kks = [x[k == kk] for kk in range(Kmax)]  # subset of customers at tables serving kk
    xjt_in = np.zeros(Kmax)                     # offset if table x_jt is in this subset
    xjt_in[kk[0]] = 1
      
    # Compute (a,b) params from gamma kernel tricks done in fk function
    av = np.array(list(map(np.sum, x_kks))) - xjt_in*np.sum(x_jt) + ha
    bv = np.array(list(map(len, x_kks))) - xjt_in*len(x_jt) + hb
    fk_tabl = np.exp( -np.sum(logg(x_jt + 1)) + logg(np.sum(x_jt) + av) - logg(av) -
                       (np.sum(x_jt) + av)*np.log(len(x_jt) + bv) + av*np.log(bv) )
     
    return fk_tabl