示例#1
0
    def _update_mins(self, mask=None):
        """Updates the minimum of g- and y+ (m) sampled from a Bessel.
        We estimate this using a point estimate of the mode of the
        Bessel distribution instead of the mean."""
        # Compute geometric expectation and variances
        mu_V_DIMS = (
            parafac(np.square(self.theta_E_DK_M) + self.theta_V_DK_M) -
            parafac(np.square(self.theta_E_DK_M)))
        # Compute geometric expectation of mu (based on theta)
        lam_G_2DIMS = np.exp(
            psi(self.lam_shp_2DIMS) - np.log(self.lam_rte_DIMS))
        lam_pos_V_DIMS = self.lam_shp_2DIMS[0] / np.square(self.lam_rte_DIMS)

        # Approximating a geometric expectation using the delta method:
        # E[f(x)] ~= f(E[x]) + f"(E[x])V[x]/2
        lam_pos_plus_mu_G_DIMS = (
            # f(E[x])
            (self.mu_G_DIMS + lam_G_2DIMS[0])
            # f"(E[x])V[x]/2
            * np.exp(-(mu_V_DIMS + lam_pos_V_DIMS) /
                     (2 * np.square(self.mu_G_DIMS + lam_G_2DIMS[0]))))

        # Bessel parameters a and nu
        a_DIMS = 2 * np.sqrt(lam_G_2DIMS[1] * lam_pos_plus_mu_G_DIMS)
        if isinstance(self.data_DIMS, skt.dtensor):
            nu_DIMS = self.data_DIMS.copy()
        else:
            nu_DIMS = self.data_DIMS.toarray()

        # Formula for the mode of the Bessel
        self.min_DIMS = np.floor_divide(
            np.sqrt(np.square(a_DIMS) + np.square(nu_DIMS)) - nu_DIMS, 2)

        if mask is not None:
            self.min_DIMS *= mask
示例#2
0
 def _update_mu(self, mask=None):
     parafac_theta = parafac(self.theta_E_DK_M)
     first_order_term = np.log(parafac_theta)
     second_order_term = -parafac(self.theta_V_DK_M) / (
         2 * np.square(parafac_theta))
     self.mu_G_DIMS = np.exp(first_order_term + second_order_term)
     if mask is not None:
         self.mu_G_DIMS *= mask
示例#3
0
    def _elbo(self, priv, mask=None):
        """Computes the Evidence Lower Bound (ELBO).

        Arguments
        ---------

        data : tensor-like
            Input data against which the ELBO is tested.

        mask : tensor-like containing 1s and 0s
            Masks sections of the data, with 1s indicating data regions that
            are kept.
        """
        if mask is None:
            uttkrp_K = self.sum_theta_E_MK.prod(axis=0)
        elif isinstance(mask, skt.dtensor):
            uttkrp_DK = mask.uttkrp(self.theta_E_DK_M, 0)
            uttkrp_K = (self.theta_E_DK_M[0] * uttkrp_DK).sum(axis=0)
        elif isinstance(mask, skt.sptensor):
            uttkrp_DK = sp_uttkrp(mask.vals, mask.subs, 0, self.theta_E_DK_M)
            uttkrp_K = (self.theta_E_DK_M[0] * uttkrp_DK).sum(axis=0)

        bound = -uttkrp_K.sum()

        if isinstance(self.y_E_DIMS, skt.sptensor):
            subs_I_M = self.y_E_DIMS.subs
            vals_I = self.y_E_DIMS.vals
        else:
            subs_I_M = self.y_E_DIMS.nonzero()
            vals_I = self.y_E_DIMS[subs_I_M]
        nz_recon_I = self._reconstruct_nz(subs_I_M)

        bound += (vals_I * np.log(nz_recon_I)).sum()

        K = self.n_components
        for m in range(self.n_modes):
            bound += _gamma_bound_term(pa=self.alpha,
                                       pb=self.alpha * self.beta_M[m],
                                       qa=self.theta_shp_DK_M[m],
                                       qb=self.theta_rte_DK_M[m]).sum()
            bound += K * self.mode_dims[m] * self.alpha * np.log(
                self.beta_M[m])

        # Privacy variables
        if priv > 0:
            log_lam_E_2DIMS = digamma(self.lam_shp_2DIMS) - np.log(
                self.lam_rte_DIMS)
            bound -= np.sum(parafac(self.theta_E_DK_M))
            bound -= np.sum(
                np.log(self.y_pos_E_DIMS[np.nonzero(self.y_pos_E_DIMS)]))
            bound += np.log(priv) * np.sum(self.g_pos_E_DIMS +
                                           self.g_neg_E_DIMS)
            bound += np.sum(self.lam_shp_2DIMS / self.lam_rte_DIMS) / priv
            bound -= np.sum(self.g_pos_E_DIMS * log_lam_E_2DIMS)
            bound += np.sum(
                self.y_pos_E_DIMS *
                np.log(self.mu_G_DIMS + np.exp(log_lam_E_2DIMS[0])))
        return bound
示例#4
0
    def reconstruct(self, mask=None, version='geometric', drop_diag=False):
        """Reconstruct data using point estimates of latent factors.
        Currently supported only up to 5-way tensors.
        """
        assert (version == 'geometric') or (version == 'arithmetic')
        if version == 'geometric':
            tmp = [G_DK.copy() for G_DK in self.theta_G_DK_M]
        elif version == 'arithmetic':
            tmp = [E_DK.copy() for E_DK in self.theta_E_DK_M]

        Y_pred = parafac(tmp)
        if drop_diag:
            diag_idx = np.identity(Y_pred.shape[0]).astype(bool)
            Y_pred[diag_idx] = 0
        return Y_pred
示例#5
0
def generate(shp=(30, 30, 20, 10), K=5, alpha=0.1, beta=0.1):
    """Generate a count tensor from the BPTF model.

    PARAMS:
    shp -- (tuple) shape of the generated count tensor
    K -- (int) number of latent components
    alpha -- (float) shape parameter of gamma prior over factors
    beta -- (float) rate parameter of gamma prior over factors

    RETURNS:
    Mu -- (np.ndarray) true Poisson rates
    Y -- (np.ndarray) generated count tensor
    """
    Theta_DK_M = [rn.gamma(alpha, 1./beta, size=(D, K)) for D in shp]
    Mu = parafac(Theta_DK_M)
    assert Mu.shape == shp
    Y = rn.poisson(Mu)
    return Mu, Y
示例#6
0
def generate(shp=(30, 30, 20, 10), K=5, alpha=0.1, beta=0.1):
    """Generate a count tensor from the BPTF model.

    PARAMS:
    shp -- (tuple) shape of the generated count tensor
    K -- (int) number of latent components
    alpha -- (float) shape parameter of gamma prior over factors
    beta -- (float) rate parameter of gamma prior over factors

    RETURNS:
    Mu -- (np.ndarray) true Poisson rates
    Y -- (np.ndarray) generated count tensor
    """
    Theta_DK_M = [rn.gamma(alpha, 1. / beta, size=(D, K)) for D in shp]
    Mu = parafac(Theta_DK_M)
    assert Mu.shape == shp
    Y = rn.poisson(Mu)
    return Mu, Y
示例#7
0
文件: bptf.py 项目: mrthat/bptf
    def reconstruct(self, weights={}, drop_diag=False, geom=True):
        """Reconstruct data using point estimates of latent factors.

        Currently supported only up to 5-way tensors.
        """
        if geom:
            tmp = [G_DK.copy() for G_DK in self.G_DK_M]
        else:
            tmp = [E_DK.copy() for E_DK in self.E_DK_M]
        if weights.keys():
            assert all(m in range(self.n_modes) for m in weights.keys())
            for m, weight_matrix in weights.iteritems():
                tmp[m] = weight_matrix
        Y_pred = parafac(tmp)
        if drop_diag:
            diag_idx = np.identity(Y_pred.shape[0]).astype(bool)
            Y_pred[diag_idx] = 0
        return Y_pred
示例#8
0
    def reconstruct(self, weights={}, drop_diag=False, geom=True):
        """Reconstruct data using point estimates of latent factors.

        Currently supported only up to 5-way tensors.
        """
        if geom:
            tmp = [G_DK.copy() for G_DK in self.G_DK_M]
        else:
            tmp = [E_DK.copy() for E_DK in self.E_DK_M]
        if weights.keys():
            assert all(m in range(self.n_modes) for m in weights.keys())
            for m, weight_matrix in weights.iteritems():
                tmp[m] = weight_matrix
        Y_pred = parafac(tmp)
        if drop_diag:
            diag_idx = np.identity(Y_pred.shape[0]).astype(bool)
            Y_pred[diag_idx] = 0
        return Y_pred
示例#9
0
 def _init_all_components(self, mode_dims):
     assert len(mode_dims) == self.n_modes
     self.mode_dims = mode_dims
     for m, D in enumerate(mode_dims):
         self._init_component(m, D)
     self.mu_G_DIMS = parafac(self.theta_E_DK_M)
示例#10
0
def main(n_docs,
         n_words,
         alpha,
         beta,
         rank,
         priv,
         n_iters=200,
         out_file='test_out.npz'):
    try:
        dat_file = np.load('test_data.npz')
        data_DV = dat_file['Y_DV']
        assert (data_DV.shape == (n_docs, n_words))
        noisy_data_DV = dat_file['noisy_data_DV']
        phi_KV = dat_file['phi_KV']
        assert (phi_KV.shape == (rank, n_words))
        theta_DK = dat_file['theta_DK']
        poisson_priors_DV = parafac((theta_DK, phi_KV.T))
    except:
        output_data_shape = (n_docs, n_words)
        theta_DK = np.random.gamma(alpha, 1. / beta, (n_docs, rank))
        phi_KV = np.random.gamma(alpha, 1. / beta, (rank, n_words))
        poisson_priors_DV = parafac((theta_DK, phi_KV.T))
        # Sample true data and noisy data
        data_DV = np.random.poisson(poisson_priors_DV, output_data_shape)
        noisy_data_DV = data_DV + two_sided_geometric(priv,
                                                      size=output_data_shape)
        np.savez_compressed('test_data.npz',
                            Y_DV=data_DV,
                            noisy_data_DV=noisy_data_DV,
                            phi_KV=phi_KV,
                            theta_DK=theta_DK,
                            mu_DV=poisson_priors_DV)

    assert (poisson_priors_DV.shape == (n_docs, n_words))
    bpptf_model = BPPTF(n_modes=2,
                        n_components=rank,
                        verbose=True,
                        max_iter=n_iters,
                        true_mu=poisson_priors_DV,
                        tol=1e-4)
    (new_theta, new_phi) = bpptf_model.fit_transform(noisy_data_DV,
                                                     priv,
                                                     version='arithmetic')
    new_mu = parafac((new_theta, new_phi))

    np.savez_compressed(out_file,
                        inferred_mu_DV=new_mu,
                        inferred_theta_DK=new_theta,
                        inferred_phi_KV=new_phi.T)

    naive_model = BPPTF(n_modes=2,
                        n_components=rank,
                        verbose=True,
                        max_iter=n_iters,
                        true_mu=poisson_priors_DV,
                        tol=1e-4)
    (naive_theta, naive_phi) = naive_model.fit_transform(noisy_data_DV,
                                                         0.0,
                                                         version='arithmetic')
    naive_mu = parafac((naive_theta, naive_phi))

    if n_docs > 100 or n_words > 100:
        return

    sns.set(context='poster', style='white', font='serif')
    data_max = np.max(noisy_data_DV)
    kwargs = {
        'cmap': 'bwr',
        'vmin': -data_max,
        'vmax': data_max,
        'square': True,
        'cbar': False,
        'xticklabels': False,
        'yticklabels': False
    }
    plt.figure(figsize=(22, 17))

    plt.subplot(1, 5, 1)
    sns.heatmap(poisson_priors_DV, **kwargs)
    plt.title('(a) True parameters')
    plt.subplot(1, 5, 2)
    sns.heatmap(data_DV, **kwargs)
    plt.title('(b) Actual data')
    plt.subplot(1, 5, 3)
    sns.heatmap(noisy_data_DV, **kwargs)
    plt.title('(c) Observed data')
    plt.subplot(1, 5, 4)
    sns.heatmap(naive_mu, **kwargs)
    plt.title('(d) Naive method, \nmae = {:.3f}'.format(
        np.mean(np.abs(naive_mu - poisson_priors_DV))))
    plt.subplot(1, 5, 5)
    sns.heatmap(new_mu, **kwargs)
    plt.title('(e) Our method, \nmae = {:.3f}'.format(
        np.mean(np.abs(new_mu - poisson_priors_DV))))

    plt.savefig('test_output.pdf', bbox_inches='tight')