Exemplo n.º 1
0
class ConvFactorVAE(BaseVAE):
    """Class that implements the Factor Variational Auto-Encoder (based on CNN)"""

    def __init__(self, disc_hiddens=[1000, 1000, 1000],
                 gamma=30, input_size=(3, 64, 64),
                 kernel_sizes=[32, 32, 64, 64],
                 hidden_size=256, dim_z=32,
                 binary=True, **kwargs):
        """initialize neural networks
        :param disc_hiddens: list of int, numbers of hidden units of each layer in discriminator
        :param gamma: weight for total correlation term in loss function
        """
        super(ConvFactorVAE, self).__init__()
        self.gamma = gamma
        self.dim_z = dim_z
        self.binary = binary
        self.input_size = input_size
        self.hidden_size = hidden_size

        # VAE networks
        self.vae = ConvVAE(input_size, kernel_sizes, hidden_size, dim_z, binary, **kwargs)
        # inherit some attributes
        self.channel_sizes = self.vae.channel_sizes

        # discriminator networks
        D_act = nn.LeakyReLU
        D_act_args = {"negative_slope": 0.2, "inplace": False}
        D_output_dim = 2
        self.discriminator = nns.create_mlp(
            self.dim_z, disc_hiddens, act_layer=D_act, act_args=D_act_args)
        self.discriminator = nn.Sequential(
            self.discriminator,
            nn.Linear(disc_hiddens[-1], D_output_dim))

    def encode(self, x):
        """vae encode"""
        return self.vae.encode(x)

    def decode(self, code):
        """vae decode"""
        return self.vae.decode(code)

    def reparameterize(self, mu, logvar):
        """reparameterization trick"""
        return self.vae.reparameterize(mu, logvar)

    def forward(self, input, no_dec=False):
        """autoencoder forward computation"""
        encoded = self.encode(input)
        mu, logvar = encoded
        z = self.reparameterize(mu, logvar) # latent variable z

        if no_dec:
            # no decoding
            return z.clone() # avoid inplace operation

        return self.decode(z), encoded, z

    def sample_latent(self, num, device, **kwargs):
        """vae sample latent"""
        return self.vae.sample_latent(num, device, **kwargs)

    def sample(self, num, device, **kwargs):
        """vae sample"""
        return self.vae.sample(num, device, **kwargs)

    def decoded_to_output(self, decoded, **kwargs):
        """vae transform decoded result to output"""
        return self.vae.decoded_to_output(decoded, **kwargs)

    def reconstruct(self, input, **kwargs):
        """vae reconstruct"""
        return self.vae.reconstruct(input, **kwargs)

    def permute_dims(self, z):
        """permute separately each dimension of the z randomly in a batch
        :param z: [B x D] tensor
        :return: [B x D] tensor with each dim of D dims permuted randomly
        """
        B, D = z.size()
        # generate randomly permuted batch on each dimension
        permuted = []
        for i in range(D):
            ind = torch.randperm(B)
            permuted.append(z[:, i][ind].view(-1, 1))

        return torch.cat(permuted, dim=1)

    def loss_function(self, *inputs, **kwargs):
        """loss function described in the paper (eq. (2))"""
        optim_part = kwargs['optim_part'] # the part to optimize

        if optim_part == 'vae':
            # update VAE
            decoded = inputs[0]
            encoded = inputs[1]
            Dz = inputs[2]
            x = inputs[3]

            flat_input_size = np.prod(self.input_size)
            mu, logvar = encoded
            # KL divergence term
            KLD = -0.5 * (1 + logvar - mu.pow(2) - logvar.exp()).sum(1).mean()
            if self.binary:
                # likelihood term under Bernolli MLP decoder
                MLD = F.binary_cross_entropy(decoded.view(-1, flat_input_size),
                                             x.view(-1, flat_input_size),
                                             reduction='sum').div(x.size(0))
            else:
                # likelihood term under Gaussian MLP decoder
                mean_dec, logvar_dec = decoded
                recon_x_distribution = Normal(loc=mean_dec.view(-1, flat_input_size),
                                              scale=torch.exp(0.5*logvar_dec.view(-1, flat_input_size)))
                MLD = -recon_x_distribution.log_prob(x.view(-1, flat_input_size)).sum(1).mean()

            tc_loss = (Dz[:, :1] - Dz[:, 1:]).mean()

            return {
                "loss": KLD + MLD + self.gamma * tc_loss,
                "KLD": KLD,
                "MLD": MLD,
                "tc_loss": tc_loss}
        elif optim_part == 'discriminator':
            # update discriminator
            Dz = inputs[0]
            Dz_pperm = inputs[1]
            device = Dz.device

            ones = torch.ones(Dz.size(0), dtype=torch.long).to(device)
            zeros = torch.zeros(Dz.size(0), dtype=torch.long).to(device)

            D_tc_loss = 0.5 * (F.cross_entropy(Dz, zeros) +
                            F.cross_entropy(Dz_pperm, ones))

            return {"loss": D_tc_loss, "D_tc_loss": D_tc_loss}

        else:
            raise Exception("no such network to optimize: {}".format(optim_part))
Exemplo n.º 2
0
    print("batch_size {}".format(batch_size))
    print("crop_size {}".format(crop_size))

    cv2.imshow("crop",crops[0])
    cv2.waitKey(0)

    reset_graph()
    test_vae = ConvVAE(z_size=z_size,
                       batch_size=batch_size,
                       is_training=False,
                       reuse=False,
                       gpu_mode=True)

    # show reconstruction example
    test_vae.load_json("../../models/0/vae_{}.json".format(180000))
    z = test_vae.encode(crops)
    print(z.shape)

    rec = test_vae.decode(z)
    print(rec.shape)

    np.save("../../output/z_{}.npy".format(batch_size), z)
    np.save("../../output/rec_{}.npy".format(batch_size), rec)

    #for img_idx, img in enumerate(test_batch):
    vis = np.concatenate((crops[0], rec[0]), axis=1)
    cv2.imshow("org vs. rec",cv2.resize(vis,(0,0),fx=4.0,fy=4.0))

    key = cv2.waitKey(0) & 0xFF