示例#1
0
    def sample(self, y, n=None):
        """Sample from the fitted probabilistic CCA model.

        :param n: The number of samples.
        :return:  Two views of n samples each.
        """
        k = self.latent_dim
        Lambda, Psi_diag = self.tile_params()

        if n and n > y.shape[1]:
            raise AttributeError('More samples than estimated z variables.')
        elif not n:
            n = y.shape[1]

        z = torch.empty(k, n)
        for i in range(n):
            yi = y[:, i]
            z[:, i] = self.E_z_given_y(Lambda, Psi_diag, yi)

        m1 = self.Lambda1 @ z
        m2 = self.Lambda2 @ z

        y1 = torch.empty(self.p1, n)
        y2 = torch.empty(self.p2, n)

        for i in range(n):
            y1[:, i] = MVN(m1[:, i], diag(self.Psi1_diag)).sample()
            y2[:, i] = MVN(m2[:, i], diag(self.Psi2_diag)).sample()

        return y1.t(), y2.t()
示例#2
0
    def sample(self, y, n_samples):
        """Sample from the fitted probabilistic CCA model.

        :param n: The number of samples.
        :return:  Two views of n samples each.
        """
        k = 3 * self.latent_dim if self.private_z else self.latent_dim

        Lambda, Psi_diag = self.tile_params()

        PLL_inv = LA.woodbury_inv(Psi_diag, Lambda, Lambda.t(), k)
        z = self.E_z_given_y(Lambda, PLL_inv, y)

        m = Lambda @ z
        m1 = m[:self.p1]
        m2 = m[self.p1:]

        y1 = torch.empty(self.p1, n_samples)
        y2 = torch.empty(self.p2, n_samples)

        for i in range(n_samples):
            # Randomly select a latent variable.
            r = random.randint(0, z.shape[1])
            # Sample y using the mean for the chosen latent variable.
            y1[:, i] = MVN(m1[:, r], diag(self.Psi1_diag)).sample()
            y2[:, i] = MVN(m2[:, r], diag(self.Psi2_diag)).sample()

        return y1.t(), y2.t()
示例#3
0
 def init_VL_sampler(self):
     from torch.distributions.multivariate_normal import MultivariateNormal as MVN
     view_mvn_path = self.cfgs.get('view_mvn_path', 'checkpoints/view_light/view_mvn.pth')
     light_mvn_path = self.cfgs.get('light_mvn_path', 'checkpoints/view_light/light_mvn.pth')
     view_mvn = torch.load(view_mvn_path)
     light_mvn = torch.load(light_mvn_path)
     self.view_mean = view_mvn['mean'].cuda()
     self.light_mean = light_mvn['mean'].cuda()
     self.view_mvn = MVN(view_mvn['mean'].cuda(), view_mvn['cov'].cuda())
     self.light_mvn = MVN(light_mvn['mean'].cuda(), light_mvn['cov'].cuda())
示例#4
0
    def get_projections(self, data, J, projection='two'):
        """
        Get projections for ACS approximate procedure
        :param data: (Object) Data object to get projections for
        :param J: (int) Number of projections to use
        :param projection: (str) Type of projection to use (currently only 'two' supported)
        :return: (torch.tensor) Projections
        """
        projections = []
        with torch.no_grad():
            theta_mean, theta_cov = self.linear._compute_posterior(
                self.encode(self.x_train), self.y_train)
            jitter = utils.to_gpu(torch.eye(len(theta_cov)) * 1e-4)
            try:
                theta_samples = MVN(theta_mean.flatten(),
                                    theta_cov + jitter).sample(torch.Size([J]))
            except:
                import pdb
                pdb.set_trace()

            dataloader = DataLoader(Dataset(data, 'unlabeled'),
                                    batch_size=len(data.index['unlabeled']),
                                    shuffle=False)
            for (x, _) in dataloader:
                x = utils.to_gpu(x)
                if projection == 'two':
                    for theta_sample in theta_samples:
                        projections.append(
                            self._compute_expected_ll(x, theta_sample))
                else:
                    raise NotImplementedError

        return utils.to_gpu(torch.sqrt(1 / torch.FloatTensor(
            [J]))) * torch.cat(projections, dim=1), torch.zeros(len(x))
示例#5
0
	def test_neg_loglik(self):
		""" Compute negative log-likelihood of test set. """
		self.q_params = self.all_variationals[-1][0] 
		samples, _ = self.sample_q(self.config["bbb_nsamples"])
		results = np.apply_along_axis(lambda w: self.forward(self.X_test, weights=torch.Tensor(w)).numpy(), 1, samples)
		means = torch.tensor(np.mean(results, axis=0))
		return -1 * MVN(means, self.config["sigma_noise"] * torch.eye(self.Ydim)).log_prob(self.Y_test).sum()
示例#6
0
 def test_neg_loglik(self):
     """ Compute negative log-likelihood of test set. """
     results = np.apply_along_axis(
         lambda w: self.forward(self.X_test, weights=torch.Tensor(w)).numpy(
         ), 1, self.particles)
     means = torch.tensor(np.mean(results, axis=0))
     return -1 * MVN(means, self.config["sigma_noise"] *
                     torch.eye(self.Ydim)).log_prob(self.Y_test).sum()
def two_gaussians(n,
                  covariance=[1, 0, 0, 1],
                  transforms=[(lambda x: x), (lambda y: y)]):
    sampler = MVN(loc=torch.zeros(2),
                  covariance_matrix=torch.Tensor(
                      [covariance[0:2], covariance[2:4]]))
    X, Y = sampler.sample((n, )).t()
    X, Y = transforms[0](X), transforms[1](Y)
    return X.view(-1, 1), Y.view(-1, 1)
示例#8
0
    def get_projections(self,
                        data,
                        J,
                        projection='two',
                        gamma=0,
                        transform=None,
                        **kwargs):
        """
        Get projections for ACS approximate procedure
        :param data: (Object) Data object to get projections for
        :param J: (int) Number of projections to use
        :param projection: (str) Type of projection to use (currently only 'two' supported)
        :return: (torch.tensor) Projections
        """
        ent = lambda py: torch.distributions.Categorical(probs=py).entropy()
        projections = []
        feat_x = []
        with torch.no_grad():
            mean, cov = self.linear._compute_posterior()
            jitter = to_gpu(torch.eye(len(cov)) * 1e-6)
            theta_samples = MVN(mean,
                                cov + jitter).sample(torch.Size([J])).view(
                                    J, -1, self.linear.out_features)
            '''
            dataloader = DataLoader(Dataset(data, 'unlabeled', transform=transform),
                                    batch_size=256, shuffle=False)
            '''

            idx_lb = data.index['unlabeled']
            handler = DataHandler(X=data.X[idx_lb],
                                  Y=data.Y[idx_lb],
                                  transform=self.args['transform'])
            dataloader = DataLoader(handler,
                                    shuffle=False,
                                    batch_size=256,
                                    num_workers=0)

            for (x, _, _) in dataloader:
                x = to_gpu(x)
                feat_x.append(self.encode(x))

            feat_x = torch.cat(feat_x)
            py = self._compute_predictive_posterior(self.linear(
                feat_x, num_samples=100),
                                                    logits=False)
            ent_x = ent(py)
            if projection == 'two':
                for theta_sample in theta_samples:
                    projections.append(
                        self._compute_expected_ll(feat_x, theta_sample, py) +
                        gamma * ent_x[:, None])
            else:
                raise NotImplementedError

        return to_gpu(torch.sqrt(1 / torch.FloatTensor([J]))) * torch.cat(
            projections, dim=1), ent_x
示例#9
0
    def sample(self, y, n_samples=None, one_sample_per_y=False):
        """Sample from the fitted probabilistic CCA model.

        :param y:         Observations of shape (n_features, n_samples).
        :param n_samples: The number of samples.
        :return:          Two views of n samples each.
        """
        k = 3 * self.latent_dim
        if one_sample_per_y:
            if n_samples and n_samples != y.shape[1]:
                msg = 'When sampling once per `y`, `n_samples` must be the' \
                      'number of samples of `y`.'
                raise AttributeError(msg)
            n_samples = y.shape[1]

        Lambda, Psi_diag = self.tile_params()
        PLL_inv = LA.woodbury_inv(Psi_diag, Lambda, Lambda.t(), k)
        z = self.E_z_given_y(Lambda, PLL_inv, y)

        m = Lambda @ z
        m1 = m[:self.p1]
        m2 = m[self.p1:]

        y1r = torch.empty(self.p1, n_samples, device=device)
        y2r = torch.empty(self.p2, n_samples, device=device)

        for i in range(n_samples):
            if one_sample_per_y:
                # Sample based on the estimated mean for the current `y`.
                j = i
            else:
                # Sample based on a randomly chosen latent variable.
                j = random.randint(0, z.shape[1] - 1)
            y1r[:, i] = MVN(m1[:, j], diag(exp(self.log_Psi1_diag))).sample()
            y2r[:, i] = MVN(m2[:, j], diag(exp(self.log_Psi2_diag))).sample()

        return y1r.t(), y2r.t()
示例#10
0
 def log_likelihood(self, batch_indices=None):
     """ Computes log-likelihood term. """
     if batch_indices is None:
         batch = self.X_train
         target = self.Y_train
         multiplier = 1
     else:
         batch = self.X_train[batch_indices]
         target = self.Y_train[batch_indices]
         multiplier = (self.N_train / len(batch_indices))
     means = self.forward(X=batch)
     if self.Ydim == 1:
         return multiplier * self.noise_dist.log_prob(means - target).sum()
     return multiplier * MVN(
         means, self.config["sigma_noise"] *
         torch.eye(self.Ydim)).log_prob(target).sum()
示例#11
0
 def log_likelihood(self, batch_indices=None):
     """ Computes the likelihood. """
     if batch_indices is None:
         batch = self.X_train
         target = self.Y_train
         multiplier = 1
     else:
         batch = self.X_train[batch_indices]
         target = self.Y_train[batch_indices]
         multiplier = (self.N_train / len(batch_indices))
     means = self.forward(X=batch)
     if self.Ydim == 1:
         return multiplier * Normal(
             0, self.sigma_noise).log_prob(means - target).sum()
     return multiplier * MVN(means, self.sigma_noise *
                             torch.eye(self.Ydim)).log_prob(target).sum()
示例#12
0
    def positive_gaussian_cocp(self):
        """ Conditional output-constrained prior: mixture of Gaussian.
			Assume uniform mixing weights for each mixture.
			Assume isotropic Gaussian. 
		"""
        nn_mean = self.forward(X=self._cr_pos_xsamples)
        index = 0
        log_prob = torch.tensor(0.0)
        for i, (dom, ifunc) in enumerate(
                self.dconstraints['positive_gaussian_cocp']):
            sub_nsamples = self._cr_ylens[i] * self.ocp_nsamples
            dist = MVN(self._cr_pos_ysamples[index:index + sub_nsamples, :],
                       self.cocp_gaussian_sigma_c *
                       torch.eye(self.Ydim)).log_prob(nn_mean[index:index +
                                                              sub_nsamples, :])
            dist += torch.log(torch.tensor(1 / self._cr_ylens[i]))
            log_prob += torch.logsumexp(torch.stack(dist.split(
                self.ocp_nsamples),
                                                    dim=0),
                                        dim=0).sum()
            index += sub_nsamples
        return log_prob
示例#13
0
def nce_test():
    """
    Test implementation of NCE for Gaussian
    """
    #specify data size
    data_dim = 5
    Td = 100000
    noise_ratio = 50
    Tn = Td * noise_ratio
    Td_batch = 1000
    Tn_batch = Td_batch * noise_ratio

    #create Pd and create artificial data
    cov_base = th.tensor(make_spd_matrix(data_dim), dtype=th.float)
    tril_mat = th.tril(cov_base)
    cov_mat = th.matmul(tril_mat, tril_mat.t())
    true_c = -0.5 * th.log(th.abs(th.det(cov_mat))) - (data_dim / 2) * th.log(
        2 * th.tensor(np.pi))
    p_data = MVN(th.zeros(data_dim), scale_tril=tril_mat)
    data_labels = th.ones(Td)
    data_sample = th.utils.data.TensorDataset(p_data.sample((Td, )),
                                              data_labels)
    data_loader = th.utils.data.DataLoader(data_sample,
                                           batch_size=Td_batch,
                                           shuffle=True)

    #specify noise parameters for later use
    noise_cov_mat = th.eye(data_dim)

    #set up the model to be estimated
    cov_model = th.tensor(make_spd_matrix(data_dim), dtype=th.float)
    tril_mat_model = th.tril(cov_model)
    model = UnnormMVGaussian(th.zeros(data_dim), scale_tril=tril_mat_model)
    model.scale_tril.requires_grad = True
    model.normalizing_constant.requires_grad = True

    #set up optimization parameters
    start_epoch = 0
    end_epoch = 1000
    start_lr = 0.001
    momentum = 0.9
    decay_epochs = [50, 100, 250, 500, 750]
    decay_gamma = 0.1
    optimizer = th.optim.Adam([model.scale_tril, model.normalizing_constant],
                              lr=start_lr)
    lr_sched = th.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=decay_epochs,
                                                 gamma=decay_gamma)

    #train
    for epoch in range(start_epoch, end_epoch):
        lr_sched.step()
        print(epoch)
        for i, (data_batch, data_labels) in enumerate(data_loader):
            #sample noise data for current input batch
            noise_distr = MVN(th.zeros(data_dim), noise_cov_mat)
            noise_batch = noise_distr.sample((Tn_batch, ))
            noise_labels = th.zeros(Tn_batch)
            #combine data and noise samples
            joint_batch = th.cat((data_batch, noise_batch), 0)
            joint_labels = th.cat((data_labels, noise_labels), 0)

            #forward pass
            log_P_model = model.log_prob(joint_batch)
            log_P_noise = noise_distr.log_prob(joint_batch)
            log_P_diff = log_P_model - log_P_noise + 1e-20
            loss = NCE_loss(log_P_diff, joint_labels, Td_batch, noise_ratio)
            print(loss.item(), true_c.item(),
                  model.normalizing_constant.item())
            print(F.mse_loss(model.scale_tril, p_data.scale_tril))

            #backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            noise_cov_mat = th.chain_matmul(model.scale_tril.detach(),
                                            model.scale_tril.detach().t())

    pdb.set_trace()
示例#14
0
文件: vae.py 项目: JunwenBai/DAPC
def vdapc_loss(latent_dist,
               latent_sample,
               latent_mask,
               T,
               cov,
               post_L,
               alpha=0.,
               beta=0.,
               gamma=1.,
               zeta=1.):
    batch_size, seq_len, d = latent_sample.shape

    ### Junwen: compute the log prob terms for each 3*3 block
    # Weiran: sample across different utts.
    latent_mu = latent_dist[0].reshape(-1, d)
    latent_logvar = latent_dist[1].reshape(-1, d)
    mask = latent_mask.reshape(-1).float()
    # This gives indices of valid samples.
    idx = mask.nonzero()[:, 0]
    if idx.shape[0] > 2000:
        step = idx.shape[0] // 2000
        latent_mu_sub = latent_mu[idx[::step], :]
        latent_logvar_sub = latent_logvar[idx[::step], :]
    else:
        latent_mu_sub = latent_mu[idx, :]
        latent_logvar_sub = latent_mu[idx, :]

    block_log_pz, block_log_qz, block_log_prod_qzi, block_log_q_zCx = _get_log_pz_qz_prodzi_qzCx(
        latent_sample.reshape(-1, d), (latent_mu, latent_logvar),
        (latent_mu_sub, latent_logvar_sub))
    block_mi_loss = torch.sum(
        (block_log_q_zCx - block_log_qz) * mask) / torch.sum(mask)
    block_tc_loss = torch.sum(
        (block_log_qz - block_log_prod_qzi) * mask) / torch.sum(mask)
    block_kl_loss = torch.sum(
        (block_log_prod_qzi - block_log_pz) * mask) / torch.sum(mask)

    ### Junwen: compute the log prob terms for each 24*24 block
    latent_sample_2T = latent_sample.reshape(batch_size, seq_len * d).unfold(
        1, 2 * T * d, d).reshape(-1, 2 * T * d)
    latent_mu = latent_mu.reshape(batch_size, seq_len * d).unfold(
        1, 2 * T * d, d).reshape(-1, 2 * T * d)
    latent_logvar = latent_logvar.reshape(batch_size, seq_len * d).unfold(
        1, 2 * T * d, d).reshape(-1, 2 * T * d)
    mask = latent_mask.reshape(batch_size, seq_len).unfold(
        1, 2 * T, 1).reshape(-1, 2 * T).all(1).float()
    log_q_zCx = log_density_gaussian(latent_sample_2T, latent_mu,
                                     latent_logvar).sum(1)

    mvn = MVN(torch.zeros(2 * T * d, device=cov.device), covariance_matrix=cov)
    latent_sample_2T = post_L(latent_sample_2T)
    log_pz = mvn.log_prob(latent_sample_2T)
    kl_loss = torch.sum((log_q_zCx - log_pz) * mask) / torch.sum(mask)

    # the choice of the losses could be arbitrary combination of diff terms for diff-size blocks
    loss = alpha * block_mi_loss + beta * block_tc_loss + gamma * block_kl_loss + zeta * kl_loss

    print(
        "vae losses: block_mi_loss=%f, block_tc_loss=%f, block_kl_loss=%f, kl_loss=%f"
        % (block_mi_loss, block_tc_loss, block_kl_loss, kl_loss))
    return loss
def two_gaussians(n, covariance=[1, 0, 0, 1]):
    sampler = MVN(loc=torch.zeros(2),
                  covariance_matrix=torch.Tensor(
                      [covariance[0:2], covariance[2:4]]))
    X, Y = sampler.sample((n, )).t()
    return X.view(-1, 1), Y.view(-1, 1)