示例#1
0
    def load_model_and_dataset(checkpt_filename):
        checkpt = torch.load(checkpt_filename)
        args = checkpt['args']
        state_dict = checkpt['state_dict']

        # backwards compatibility
        if not hasattr(args, 'conv'):
            args.conv = False

        from vae_quant import VAE, setup_data_loaders

        # model
        if args.dist == 'normal':
            prior_dist = dist.Normal()
            q_dist = dist.Normal()
        elif args.dist == 'laplace':
            prior_dist = dist.Laplace()
            q_dist = dist.Laplace()
        elif args.dist == 'flow':
            prior_dist = flows.FactorialNormalizingFlow(dim=args.latent_dim,
                                                        nsteps=32)
            q_dist = dist.Normal()
        vae = VAE(z_dim=args.latent_dim,
                  use_cuda=True,
                  prior_dist=prior_dist,
                  q_dist=q_dist,
                  conv=args.conv)
        vae.load_state_dict(state_dict, strict=False)
        vae.eval()

        # dataset loader
        loader = setup_data_loaders(args, use_cuda=True)
        return vae, loader
示例#2
0
    def load_model_and_dataset(checkpt_filename):
        print('Loading model and dataset.')
        checkpt = torch.load(checkpt_filename,
                             map_location=lambda storage, loc: storage)
        args = checkpt['args']
        state_dict = checkpt['state_dict']

        # model
        if not hasattr(args, 'dist') or args.dist == 'normal':
            prior_dist = dist.Normal()
            q_dist = dist.Normal()
        elif args.dist == 'laplace':
            prior_dist = dist.Laplace()
            q_dist = dist.Laplace()
        elif args.dist == 'flow':
            prior_dist = flows.FactorialNormalizingFlow(dim=args.latent_dim,
                                                        nsteps=4)
            q_dist = dist.Normal()
        vae = VAE(z_dim=args.latent_dim,
                  use_cuda=True,
                  prior_dist=prior_dist,
                  q_dist=q_dist,
                  conv=args.conv)
        vae.load_state_dict(state_dict, strict=False)

        # dataset loader
        loader = setup_data_loaders(args)
        return vae, loader, args
示例#3
0
    def __init__(self, z_dim, use_cuda=False, prior_dist=dist.Normal(), q_dist=dist.Normal(),
                 include_mutinfo=True, tcvae=False, conv=False, mss=False):
        super(VAE, self).__init__()

        self.use_cuda = use_cuda
        self.z_dim = z_dim
        self.include_mutinfo = include_mutinfo
        self.tcvae = tcvae
        self.lamb = 0
        self.beta = 1
        self.mss = mss
        self.x_dist = dist.Bernoulli()

        # Model-specific
        # distribution family of p(z)
        self.prior_dist = prior_dist
        self.q_dist = q_dist
        # hyperparameters for prior p(z)
        self.register_buffer('prior_params', torch.zeros(self.z_dim, 2))

        # create the encoder and decoder networks
        if conv:
            self.encoder = ConvEncoder(z_dim * self.q_dist.nparams)
            self.decoder = ConvDecoder(z_dim)
        else:
            self.encoder = MLPEncoder(z_dim * self.q_dist.nparams)
            self.decoder = MLPDecoder(z_dim)

        if use_cuda:
            # calling cuda() here will put all the parameters of
            # the encoder and decoder networks into gpu memory
            self.cuda()
示例#4
0
    def __init__(self,
                 z_dim=10,
                 beta=1.0,
                 learning_rate=5e-4,
                 fade_in_duration=5000,
                 flags=None,
                 chn_num=1,
                 train_seq=1,
                 image_size=64):
        self.flags = flags
        self.activation = tf.nn.leaky_relu
        self.z_dim = z_dim
        self.layer_num = 4
        self.learning_rate = learning_rate
        self.beta = beta
        self.chn_num = chn_num
        self.fade_in_duration = fade_in_duration
        self.train_seq = train_seq
        self.image_size = image_size
        self.pre_KL = flags.KL
        self.fadein = flags.fadein

        self.q_dist = dist.Normal()
        self.x_dist = dist.Bernoulli()
        self.prior_dist = dist.Normal()
        self.prior_params = torch.zeros(self.z_dim, 2)

        self._create_network()
        self._create_loss_optimizer()
示例#5
0
    def __init__(self,
                 z_dim,
                 use_cuda=False,
                 prior_dist=dist.Normal(),
                 q_dist=dist.Normal(),
                 x_dist=dist.Bernoulli(),
                 include_mutinfo=True,
                 tcvae=False,
                 conv=False,
                 mss=False,
                 dataset='',
                 mse_sigma=0.01,
                 DIP=False,
                 DIP_type=2,
                 lambda_od=2.0,
                 lambda_d=2.0):
        super(VAE, self).__init__()

        self.use_cuda = use_cuda
        self.z_dim = z_dim
        self.include_mutinfo = include_mutinfo
        self.tcvae = tcvae
        self.lamb = 0
        self.beta = 1
        self.mss = mss
        self.conv = conv
        self.x_dist = x_dist

        self.DIP = DIP
        self.DIP_type = DIP_type
        self.lambda_od = lambda_od
        self.lambda_d = lambda_d

        # Model-specific
        # distribution family of p(z)
        self.prior_dist = prior_dist
        self.q_dist = q_dist
        # hyperparameters for prior p(z)
        self.register_buffer('prior_params', torch.zeros(self.z_dim, 2))

        # create the encoder and decoder networks
        if conv:
            if dataset == 'celeba':
                self.encoder = ConvEncoderCelebA(z_dim * self.q_dist.nparams)
                self.decoder = ConvDecoderCelebA(z_dim)
            elif dataset == 'cars3d':
                self.encoder = ConvEncoderCars3d(z_dim * self.q_dist.nparams)
                self.decoder = ConvDecoderCars3d(z_dim)
            else:
                self.encoder = ConvEncoder(z_dim * self.q_dist.nparams)
                self.decoder = ConvDecoder(z_dim)
        else:
            self.encoder = MLPEncoder(z_dim * self.q_dist.nparams)
            self.decoder = MLPDecoder(z_dim)

        if use_cuda:
            # calling cuda() here will put all the parameters of
            # the encoder and decoder networks into gpu memory
            self.cuda()
def load_model_and_dataset(checkpt_filename):
    checkpt = torch.load(checkpt_filename)
    args = checkpt['args']
    state_dict = checkpt['state_dict']

    # backwards compatibility
    if not hasattr(args, 'conv'):
        args.conv = False

    x_dist = dist.Normal() if args.dataset == 'celeba' else dist.Bernoulli()
    a_dist = dist.Bernoulli()

    # model
    if args.dist == 'normal':
        prior_dist = dist.Normal()
        q_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
        q_dist = dist.Laplace()
    elif args.dist == 'flow':
        prior_dist = flows.FactorialNormalizingFlow(dim=args.latent_dim,
                                                    nsteps=32)
        q_dist = dist.Normal()
    #vae = SensVAE(z_dim=args.latent_dim, use_cuda=True, prior_dist=prior_dist, q_dist=q_dist, conv=args.conv)
    vae = SensVAE(z_dim=args.latent_dim,
                  use_cuda=True,
                  prior_dist=prior_dist,
                  q_dist=q_dist,
                  include_mutinfo=not args.exclude_mutinfo,
                  tcvae=args.tcvae,
                  conv=args.conv,
                  mss=args.mss,
                  n_chan=3 if args.dataset == 'celeba' else 1,
                  sens_idx=SENS_IDX,
                  x_dist=x_dist,
                  a_dist=a_dist)

    vae.load_state_dict(state_dict, strict=False)
    vae.beta = args.beta
    vae.beta_sens = args.beta_sens
    vae.eval()

    # dataset loader
    loader = setup_data_loaders(args, use_cuda=True)

    # test loader
    test_set = dset.CelebA(mode='test')
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = DataLoader(dataset=test_set,
                             batch_size=args.batch_size,
                             shuffle=False,
                             **kwargs)

    return vae, loader, test_loader, args
示例#7
0
def load_model_and_dataset(checkpt_filename):
    print('Loading model and dataset.')
    checkpt = torch.load(checkpt_filename,
                         map_location=lambda storage, loc: storage)
    args = checkpt['args']
    state_dict = checkpt['state_dict']

    # backwards compatibility
    if not hasattr(args, 'conv'):
        args.conv = False

    if not hasattr(args, 'dist') or args.dist == 'normal':
        prior_dist = dist.Normal()
        q_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
        q_dist = dist.Laplace()
    elif args.dist == 'flow':
        prior_dist = flows.FactorialNormalizingFlow(dim=args.latent_dim,
                                                    nsteps=32)
        q_dist = dist.Normal()

    # model
    if hasattr(args, 'ncon'):
        # InfoGAN
        model = infogan.Model(args.latent_dim,
                              n_con=args.ncon,
                              n_cat=args.ncat,
                              cat_dim=args.cat_dim,
                              use_cuda=True,
                              conv=args.conv)
        model.load_state_dict(state_dict, strict=False)
        vae = vae_quant.VAE(z_dim=args.ncon,
                            use_cuda=True,
                            prior_dist=prior_dist,
                            q_dist=q_dist,
                            conv=args.conv)
        vae.encoder = model.encoder
        vae.decoder = model.decoder
    else:
        vae = vae_quant.VAE(z_dim=args.latent_dim,
                            use_cuda=True,
                            prior_dist=prior_dist,
                            q_dist=q_dist,
                            conv=args.conv)
        vae.load_state_dict(state_dict, strict=False)

    # dataset loader
    loader = vae_quant.setup_data_loaders(args)
    return vae, loader.dataset, args
示例#8
0
 def __init__(self, args, device):
     super(AxisVAE, self).__init__()
     self.z_dim = args.latent_dim
     self.img_size = args.image_size
     self.device = device
     self.x_dist = dist.Normal()
     self.prior_dist = dist.Normal()
     self.q_dist = dist.Normal()
     self.register_buffer('prior_params', torch.zeros(self.z_dim, 2))
     self.axis_x_dist = dist.Normal()
     self.axis_y_dist = dist.Normal()
     self.encoder = Encoder(self.z_dim * self.q_dist.nparams)
     self.encoder_axis = AxisEncoder(self.z_dim, args)
     self.decoder = Decoder(self.z_dim)
     self.beta = args.beta
示例#9
0
    def __init__(self, z_dim, use_cuda=False, prior_dist=dist.Normal(), q_dist=dist.Normal(),
                 include_mutinfo=True, tcvae=False, conv=False, mss=False, n_chan=1, 
                 sens_idx=[], x_dist=dist.Bernoulli(), a_dist=dist.Bernoulli(),
                 clf_samps=False):
        super(SensVAE, self).__init__()

        self.use_cuda = use_cuda
        self.z_dim = z_dim
        self.include_mutinfo = include_mutinfo
        self.tcvae = tcvae
        self.lamb = 0
        self.beta = 1
        self.beta_sens = 1
        # ^ the values of these hyperparams are correctly set later on
        self.mss = mss
        self.x_dist = x_dist
        self.a_dist = a_dist
        self.clf_samps = clf_samps
        self.n_chan = n_chan
        self.sens_idx = sens_idx

        # Model-specific
        # distribution family of p(z)
        self.prior_dist = prior_dist
        self.q_dist = q_dist
        # hyperparameters for prior p(z)
        self.register_buffer('prior_params', torch.zeros(self.z_dim, 2))

        # create the encoder and decoder networks
        if conv:
            self.encoder = ConvEncoder(z_dim * self.q_dist.nparams, n_chan)
            self.decoder = ConvDecoder(z_dim, n_chan)
        else:
            self.encoder = MLPEncoder(z_dim * self.q_dist.nparams)
            self.decoder = MLPDecoder(z_dim)

        if use_cuda:
            # calling cuda() here will put all the parameters of
            # the encoder and decoder networks into gpu memory
            self.cuda()
示例#10
0
 def __init__(self, args, device, eps_range=(1e-4, 5e-4)):
     super(DGP, self).__init__()
     self.device = device
     self.z_dim = args.latent_dim
     self.axis_dim = args.axis_dim
     self.rbf_feature_size = 8
     self.eps_range = eps_range
     self.x_axis_net = AxisNet(args)
     self.y_axis_net = AxisNet(args)
     self.axis_dist = dist.Normal()
     self.extractors = nn.ModuleList()
     for i in range(self.z_dim):
         tmp_net = LargeFeatureExtractor(2 * self.axis_dim,
                                         self.rbf_feature_size)
         self.extractors.append(tmp_net)
     self.rbf_coef_sigma = 1
     self.rbf_coef_l = 1
示例#11
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-d', '--dataset', default='shapes', type=str, help='dataset name',
        choices=['shapes', 'faces'])
    parser.add_argument('-dist', default='normal', type=str, choices=['normal', 'laplace', 'flow'])
    parser.add_argument('-n', '--num-epochs', default=50, type=int, help='number of training epochs')
    parser.add_argument('-b', '--batch-size', default=2048, type=int, help='batch size')
    parser.add_argument('-l', '--learning-rate', default=1e-3, type=float, help='learning rate')
    parser.add_argument('-z', '--latent-dim', default=10, type=int, help='size of latent dimension')
    parser.add_argument('--beta', default=1, type=float, help='ELBO penalty term')
    parser.add_argument('--tcvae', action='store_true')
    parser.add_argument('--exclude-mutinfo', action='store_true')
    parser.add_argument('--beta-anneal', action='store_true')
    parser.add_argument('--lambda-anneal', action='store_true')
    parser.add_argument('--mss', action='store_true', help='use the improved minibatch estimator')
    parser.add_argument('--conv', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--visdom', action='store_true', help='whether plotting in visdom is desired')
    parser.add_argument('--save', default='test1')
    parser.add_argument('--log_freq', default=200, type=int, help='num iterations per log')
    args = parser.parse_args()

    # torch.cuda.set_device(args.gpu)

    # data loader
    train_loader = setup_data_loaders(args, use_cuda=True)

    # setup the VAE
    if args.dist == 'normal':
        prior_dist = dist.Normal()
        q_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
        q_dist = dist.Laplace()
    elif args.dist == 'flow':
        prior_dist = FactorialNormalizingFlow(dim=args.latent_dim, nsteps=32)
        q_dist = dist.Normal()

    vae = VAE(z_dim=args.latent_dim, use_cuda=True, prior_dist=prior_dist, q_dist=q_dist,
        include_mutinfo=not args.exclude_mutinfo, tcvae=args.tcvae, conv=args.conv, mss=args.mss)

    # setup the optimizer
    optimizer = optim.Adam(vae.parameters(), lr=args.learning_rate)

    # setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom(env=args.save, port=4500)

    train_elbo = []

    # training loop
    dataset_size = len(train_loader.dataset)
    num_iterations = len(train_loader) * args.num_epochs
    iteration = 0
    # initialize loss accumulator
    elbo_running_mean = utils.RunningAverageMeter()
    while iteration < num_iterations:
        for i, x in enumerate(train_loader):
            iteration += 1
            batch_time = time.time()
            vae.train()
            anneal_kl(args, vae, iteration)
            optimizer.zero_grad()
            # transfer to GPU
            x = x.cuda(async=True)
            # wrap the mini-batch in a PyTorch Variable
            x = Variable(x)
            # do ELBO gradient and accumulate loss
            obj, elbo = vae.elbo(x, dataset_size)
            if utils.isnan(obj).any():
                raise ValueError('NaN spotted in objective.')
            obj.mean().mul(-1).backward()
            print("obj value: ", obj.mean().mul(-1).cpu())
            elbo_running_mean.update(elbo.mean().item())
            optimizer.step()

            # report training diagnostics
            if iteration % args.log_freq == 0:
                train_elbo.append(elbo_running_mean.avg)
                print('[iteration %03d] time: %.2f \tbeta %.2f \tlambda %.2f training ELBO: %.4f (%.4f)' % (
                    iteration, time.time() - batch_time, vae.beta, vae.lamb,
                    elbo_running_mean.val, elbo_running_mean.avg))

                vae.eval()

                # plot training and test ELBOs
                if args.visdom:
                    display_samples(vae, x, vis)
                    plot_elbo(train_elbo, vis)

                utils.save_checkpoint({
                    'state_dict': vae.state_dict(),
                    'args': args}, args.save, 0)
                eval('plot_vs_gt_' + args.dataset)(vae, train_loader.dataset,
                    os.path.join(args.save, 'gt_vs_latent_{:05d}.png'.format(iteration)))

    # Report statistics after training
    vae.eval()
    utils.save_checkpoint({
        'state_dict': vae.state_dict(),
        'args': args}, args.save, 0)
    dataset_loader = DataLoader(train_loader.dataset, batch_size=10, num_workers=1, shuffle=False)
    logpx, dependence, information, dimwise_kl, analytical_cond_kl, marginal_entropies, joint_entropy = \
        elbo_decomposition(vae, dataset_loader)
    torch.save({
        'logpx': logpx,
        'dependence': dependence,
        'information': information,
        'dimwise_kl': dimwise_kl,
        'analytical_cond_kl': analytical_cond_kl,
        'marginal_entropies': marginal_entropies,
        'joint_entropy': joint_entropy
    }, os.path.join(args.save, 'elbo_decomposition.pth'))
    eval('plot_vs_gt_' + args.dataset)(vae, dataset_loader.dataset, os.path.join(args.save, 'gt_vs_latent.png'))
    return vae
示例#12
0
    def beta_tc_loss(self, x, x_hat, mu_z, logstd_var, z, alphas, rep_as, dataset_size):
        """
            Inputs:
                x: float Tensor of shape (self.height, self.width)
                    input to the network
                x_hat: float Tensor of shape (self.height, self.width)
                    output of the network
                mu_z: float Tensor of shape (self.z_dim)
                    output of encoder, mean of distribtion q
                logvar_z: flaot Tensor of shape (self.z_dim)
                    output of encoder, log varaince of distribtion q
                z: float Tensor of shape (self.z_dim)
                    output of reparameterization, sample from q(z|x)
            Output:
                float Tensors of scalars
                monte carlo estimate of EBLO decoposition 
                according to, Isolating Sources 
                of Disentanglement in VAEs (Chen et all, 2018)
        """
        if not self.computes_std:
            logstd_var = logstd_var / 2
        prior_dist, q_dist = dist.Normal(), dist.Normal()
        prior_params= torch.zeros(self.z_dim[0], 2)
        batch_size = x.size(0)
        x = x.view(batch_size, 1, self.height, self.width)
        expanded_size = (batch_size,) + prior_params.size()
        prior_params = prior_params.expand(expanded_size).cuda().requires_grad_()
        z_params = torch.cat([mu_z.view(batch_size,self.num_latent_dims,1),
                                logstd_var.view(batch_size,self.num_latent_dims,1)],dim=2)

        if self.output_type == 'binary':
            recons = F.binary_cross_entropy_with_logits(x_hat, x, reduction='mean') * self.width*self.height
        else:
            recons = F.mse_loss(x_hat,x, reduction='mean') * self.width*self.height
        z_cont = z[:,:self.num_latent_dims]
        logpz = prior_dist.log_density(z_cont, params=prior_params).view(batch_size, -1).sum(1)
        logqz_condx = q_dist.log_density(z_cont, params=z_params).view(batch_size, -1).sum(1)

        # compute log q(z) ~= log 1/(NM) sum_m=1^M q(z|x_m) = - log(MN) + logsumexp_m(q(z|x_m))
        _logqz = q_dist.log_density(
            z_cont.view(batch_size, 1, self.z_dim[0]),
            z_params.view(1, batch_size, self.z_dim[0], 2)
        )

        logqz_prodmarginals = (torch.logsumexp(_logqz, dim=1, keepdim=False) - log(batch_size * dataset_size)).sum(1)
        logqz = (torch.logsumexp(_logqz.sum(2), dim=1, keepdim=False) - log(batch_size * dataset_size))

        #monte carlo estiamtion

        #mutual information
        mi = (logqz_condx - logqz).mean()
        #total coorelation
        tc = (logqz - logqz_prodmarginals).mean()
        #dimension-wise KL. Here name regularization
        reg = (logqz_prodmarginals - logpz).mean()

        #For discrete 
        _logqy_s = []
        for alpha, sample_i in zip(alphas,rep_as):
            _logqy_s.append(dist.Gumbel_Softmax.log_density(alpha.view(1,batch_size,-1)
                            ,sample_i.view(batch_size,1,-1),self.temperature).view(1,batch_size,batch_size))
        
        _logqy_s = torch.cat(_logqy_s,dim=0)

        logqy_s_prodmarginals = (torch.logsumexp(_logqy_s, dim=1, keepdim=False) - log(batch_size * dataset_size)).sum(0)
        logqy = (torch.logsumexp(_logqy_s.sum(0), dim=1, keepdim=False) - log(batch_size * dataset_size))

        logqy_condx = torch.zeros_like(logqy)
        for alpha, sample_i in zip(alphas,rep_as):
            logqy_condx += dist.Gumbel_Softmax.log_density(alpha,sample_i,self.temperature)
        #mutual information
        mi_disc = (logqy_condx - logqy).mean()
        #total coorelation
        tc_disc = (logqy - logqy_s_prodmarginals).mean()
        #dimension-wise KL. Here name regularization
        reg_disc = (logqy_s_prodmarginals - logqy).mean()

        modified_elbo = recons + \
                        self.alpha *(mi) + \
                        self.beta * tc + \
                        self.gamma *reg +\
                        self.alpha_disc * mi_disc +\
                        self.beta_disc* tc_disc +\
                        self.gamma_disc * reg_disc

        return modified_elbo, recons, mi, tc, torch.abs(reg), mi_disc, tc_disc, torch.abs(reg_disc)
示例#13
0
def load_model(checkpt_filename, use_cuda=True):
    print('Loading model and dataset.')
    try:
        checkpt = torch.load(checkpt_filename,
                             map_location=lambda storage, loc: storage)
    except Exception as err:
        print('error: reading file {}'.format(err))
        return None, None

    args = checkpt['args']
    state_dict = checkpt['state_dict']

    # backwards compatibility
    if not hasattr(args, 'conv'):
        args.conv = False

    if not hasattr(args, 'pnorm'):
        args.pnorm = 4.0 / 3.0

    if not hasattr(args, 'q-dist'):
        args.q_dist == 'normal'

    if not hasattr(args, 'var_clipping'):
        args.var_clipping = 0

    # setup the VAE
    if args.dist == 'normal':
        prior_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
    elif args.dist == 'flow':
        prior_dist = FactorialNormalizingFlow(dim=args.latent_dim, nsteps=32)
    elif args.dist == 'lpnested':
        if not args.isa == '':
            pnested = parseISA(ast.literal_eval(args.isa))
        elif not args.pnested == '':
            pnested = ast.literal_eval(args.pnested)
        else:
            pnested = parseISA([
                args.p0,
                [(args.p1, args.n1), (args.p2, args.n2), (args.p3, args.n3)]
            ])

        print('using Lp-nested prior, pnested = ({}) {}'.format(
            type(pnested), pnested))
        prior_dist = LpNestedAdapter(p=pnested, scale=args.scale)
        args.latent_dim = prior_dist.dimz()
        print('using Lp-nested prior, changed latent dimension to {}'.format(
            args.latent_dim))
    elif args.dist == 'studentt':
        print('using student-t prior, scale = {}'.format(args.scale))
        prior_dist = StudentTAdapter(scale=args.scale)
    elif args.dist == 'lpnorm':
        prior_dist = LpNestedAdapter(p=[args.pnorm, [[1.0]] * args.latent_dim],
                                     scale=args.scale)

    if args.q_dist == 'normal':
        q_dist = dist.Normal()
    elif args.q_dist == 'laplace':
        q_dist = dist.Laplace()

    vae = vae_quant.VAE(z_dim=args.latent_dim,
                        use_cuda=use_cuda,
                        prior_dist=prior_dist,
                        q_dist=q_dist,
                        include_mutinfo=not args.exclude_mutinfo,
                        tcvae=args.tcvae,
                        conv=args.conv,
                        mss=args.mss,
                        var_clipping=(args.var_clipping != 0),
                        dataset=args.dataset)
    vae.load_state_dict(checkpt['state_dict'])
    if use_cuda:
        vae.cuda()

    return vae, args
示例#14
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-d',
                        '--dataset',
                        default='faces',
                        type=str,
                        help='dataset name',
                        choices=['shapes', 'faces'])
    parser.add_argument('-dist',
                        default='normal',
                        type=str,
                        choices=['normal', 'laplace', 'flow'])
    parser.add_argument('-x_dist',
                        default='normal',
                        type=str,
                        choices=['normal', 'bernoulli'])
    parser.add_argument('-n',
                        '--num-epochs',
                        default=50,
                        type=int,
                        help='number of training epochs')
    parser.add_argument('-b',
                        '--batch-size',
                        default=2048,
                        type=int,
                        help='batch size')
    parser.add_argument('-l',
                        '--learning-rate',
                        default=1e-3,
                        type=float,
                        help='learning rate')
    parser.add_argument('-z',
                        '--latent-dim',
                        default=10,
                        type=int,
                        help='size of latent dimension')
    parser.add_argument('--beta',
                        default=1,
                        type=float,
                        help='ELBO penalty term')
    parser.add_argument('--tcvae', action='store_true')
    parser.add_argument('--exclude-mutinfo', action='store_false')
    parser.add_argument('--beta-anneal', action='store_true')
    parser.add_argument('--lambda-anneal', action='store_true')
    parser.add_argument('--mss',
                        action='store_true',
                        help='use the improved minibatch estimator')
    parser.add_argument('--conv', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--visdom',
                        action='store_true',
                        help='whether plotting in visdom is desired')
    parser.add_argument('--save', default='test2')
    parser.add_argument('--log_freq',
                        default=50,
                        type=int,
                        help='num iterations per log')
    parser.add_argument(
        '-problem',
        default='Climate_ORNL',
        type=str,
        choices=['HEP_SL', 'Climate_ORNL', 'Climate_C', 'Nuclear_Physics'])
    parser.add_argument('--VIB', action='store_true', help='VIB regression')
    parser.add_argument('--UQ',
                        action='store_true',
                        help='Uncertainty Quantification - likelihood')
    parser.add_argument('-name_S',
                        '--name_save',
                        default=[],
                        type=str,
                        help='name to save file')
    parser.add_argument('--classification', action='store_true')
    parser.add_argument('--Func_reg', action='store_true')

    args = parser.parse_args()

    torch.cuda.set_device(args.gpu)

    # data loader
    train_loader = setup_data_loaders(args, use_cuda=True)

    # setup the VAE
    if args.dist == 'normal':
        prior_dist = dist.Normal()
        q_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
        q_dist = dist.Laplace()
    elif args.dist == 'flow':
        prior_dist = FactorialNormalizingFlow(dim=args.latent_dim, nsteps=32)
        q_dist = dist.Normal()

    # setup the likelihood distribution
    if args.x_dist == 'normal':
        x_dist = dist.Normal()
    elif args.x_dist == 'bernoulli':
        x_dist = dist.Bernoulli()
    else:
        raise ValueError('x_dist can be Normal or Bernoulli')

    vae = VAE(z_dim=args.latent_dim,
              beta=args.beta,
              use_cuda=True,
              prior_dist=prior_dist,
              q_dist=q_dist,
              x_dist=x_dist,
              x_dist_name=args.x_dist,
              include_mutinfo=not args.exclude_mutinfo,
              tcvae=args.tcvae,
              conv=args.conv,
              mss=args.mss,
              problem=args.problem,
              VIB=args.VIB,
              UQ=args.UQ,
              classification=args.classification)

    if (args.Func_reg):
        args.latent_dim2 = 4
        args.beta2 = 0.0
        prior_dist2 = dist.Normal()
        q_dist2 = dist.Normal()
        x_dist2 = dist.Normal()
        args.x_dist2 = dist.Normal()
        args.tcvae2 = False
        args.conv2 = False
        args.problem2 = 'Climate_ORNL'
        args.VIB2 = True
        args.UQ2 = False
        args.classification2 = False

        vae2 = VAE(z_dim=args.latent_dim2,
                   beta=args.beta2,
                   use_cuda=True,
                   prior_dist=prior_dist2,
                   q_dist=q_dist2,
                   x_dist=x_dist2,
                   x_dist_name=args.x_dist2,
                   include_mutinfo=not args.exclude_mutinfo,
                   tcvae=args.tcvae2,
                   conv=args.conv2,
                   mss=args.mss,
                   problem=args.problem2,
                   VIB=args.VIB2,
                   UQ=args.UQ2,
                   classification=args.classification2)

    # setup the optimizer
    #optimizer = optim.Adam(vae.parameters(), lr=args.learning_rate)
    if (args.Func_reg):
        params = list(vae.parameters()) + list(vae2.parameters())
        optimizer = optim.RMSprop(params, lr=args.learning_rate)
    else:
        optimizer = optim.RMSprop(vae.parameters(), lr=args.learning_rate)
    # setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom(env=args.save, port=4500)

    train_elbo = []
    train_rmse = []
    train_mae = []
    train_elbo1 = []
    train_elbo2 = []
    train_elbo3 = []
    train_elbo4 = []
    train_rmse2 = []
    train_mae2 = []
    # training loop
    dataset_size = len(train_loader.dataset)
    num_iterations = len(train_loader) * args.num_epochs
    print("num_iteration", len(train_loader), args.num_epochs)
    iteration = 0
    print("likelihood function", args.x_dist, x_dist)

    train_iter = iter(train_loader)
    images = train_iter.next()

    img_max = train_loader.dataset.__getmax__()

    # initialize loss accumulator
    elbo_running_mean = utils.RunningAverageMeter()
    elbo_running_rmse = utils.RunningAverageMeter()
    elbo_running_mae = utils.RunningAverageMeter()
    elbo_running_mean1 = utils.RunningAverageMeter()
    elbo_running_mean2 = utils.RunningAverageMeter()
    elbo_running_mean3 = utils.RunningAverageMeter()
    elbo_running_mean4 = utils.RunningAverageMeter()
    elbo_running_rmse2 = utils.RunningAverageMeter()
    elbo_running_mae2 = utils.RunningAverageMeter()
    #plot the data to visualize

    x_test = train_loader.dataset.imgs_test
    x_train = train_loader.dataset.imgs

    def count_parameters(model):
        trainable = sum(p.numel() for p in model.parameters()
                        if p.requires_grad)
        total = sum(p.numel() for p in model.parameters())
        return (trainable, total)

    while iteration < num_iterations:
        for i, xy in enumerate(train_loader):
            iteration += 1
            batch_time = time.time()
            vae.train()
            #anneal_kl(args, vae, iteration)
            optimizer.zero_grad()
            # transfer to GPU
            if (args.problem == 'HEP_SL'):
                x = xy[0]
                x = x.float()
                x = x.cuda()
                x = Variable(x)

                y = xy[1]
                y = y.cuda()
                y = Variable(y)

                label = xy[2]
                label = label.cuda()
                label = Variable(label)

            # Get the Training Objective
            obj, elbo, x_mean_pred, z_params1, _, _ = vae.elbo(
                x, y, label, dataset_size)
            if utils.isnan(obj).any():
                raise ValueError('NaN spotted in objective.')

            obj.mean().mul(-1).backward()
            elbo_running_mean.update(elbo.mean().data)  #[0])
            optimizer.step()

            # report training diagnostics
            if iteration % args.log_freq == 0:
                train_elbo.append(elbo_running_mean.avg)

                if (args.VIB):
                    if not args.classification:
                        if (args.UQ):
                            A = x_mean_pred.cpu().data.numpy()[:, :, 0]
                        else:
                            A = x_mean_pred.cpu().data.numpy()
                        B = y.cpu().data.numpy()
                    else:
                        A = x_mean_pred.cpu().data.numpy()
                        B = label.cpu().data.numpy()
                else:
                    A = x_mean_pred.cpu().data.numpy()
                    B = x.cpu().data.numpy()

                rmse = np.sqrt((np.square(A - B)).mean(axis=None))
                mae = np.abs(A - B).mean(axis=None)

                elbo_running_rmse.update(rmse)
                elbo_running_mae.update(mae)

                train_rmse.append(elbo_running_rmse.avg)
                train_mae.append(elbo_running_mae.avg)

                print(
                    '[iteration %03d] time: %.2f \tbeta %.2f \tlambda %.2f training ELBO: %.4f (%.4f) RMSE: %.4f (%.4f) MAE: %.4f (%.4f)'
                    % (iteration, time.time() - batch_time, vae.beta, vae.lamb,
                       elbo_running_mean.val, elbo_running_mean.avg,
                       elbo_running_rmse.val, elbo_running_rmse.avg,
                       elbo_running_mae.val, elbo_running_mae.avg))

                utils.save_checkpoint(
                    {
                        'state_dict': vae.state_dict(),
                        'args': args
                    }, args.save, 0)

                print("max pred:", np.max(A), "max input:", np.max(B),
                      "min pred:", np.min(A), "min input:", np.min(B))

    if (args.problem == 'HEP_SL'):
        x_test = train_loader.dataset.imgs_test
        x_test = x_test.cuda()
        y_test = train_loader.dataset.lens_p_test
        y_test = y_test.cuda()
        label_test = train_loader.dataset.label_test
        label_test = label_test.cuda()

    utils.save_checkpoint({
        'state_dict': vae.state_dict(),
        'args': args
    }, args.save, 0)
    name_save = args.name_save

    Viz_plot.Convergence_plot(train_elbo, train_rmse, train_mae, name_save,
                              args.save)
    Viz_plot.display_samples_pred_mlp(vae, x_test, y_test, label_test,
                                      args.problem, args.VIB, name_save,
                                      args.UQ, args.classification, args.save,
                                      img_max)

    # Report statistics after training
    vae.eval()
    return vae
示例#15
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-d', '--dataset', default='celeba', type=str, help='dataset name',
        choices=['celeba'])
    parser.add_argument('-dist', default='normal', type=str, choices=['normal', 'laplace', 'flow'])
    parser.add_argument('-n', '--num-epochs', default=50, type=int, help='number of training epochs')
    parser.add_argument('-b', '--batch-size', default=2048, type=int, help='batch size')
    parser.add_argument('-l', '--learning-rate', default=1e-3, type=float, help='learning rate')
    parser.add_argument('-z', '--latent-dim', default=100, type=int, help='size of latent dimension')
    parser.add_argument('--beta', default=1, type=float, help='ELBO penalty term')
    parser.add_argument('--beta_sens', default=20, type=float, help='Relative importance of predicting sensitive attributes')
    #parser.add_argument('--sens_idx', default=[13, 15, 20], type=list, help='Relative importance of predicting sensitive attributes')
    parser.add_argument('--tcvae', action='store_true')
    parser.add_argument('--exclude-mutinfo', action='store_true')
    parser.add_argument('--beta-anneal', action='store_true')
    parser.add_argument('--lambda-anneal', action='store_true')
    parser.add_argument('--mss', action='store_true', help='use the improved minibatch estimator')
    parser.add_argument('--conv', action='store_true')
    parser.add_argument('--clf_samps', action='store_true')
    parser.add_argument('--clf_means', action='store_false', dest='clf_samps')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--visdom', action='store_true', help='whether plotting in visdom is desired')
    parser.add_argument('--save', default='betatcvae-celeba')
    parser.add_argument('--log_freq', default=200, type=int, help='num iterations per log')
    parser.add_argument('--audit', action='store_true',
            help='after each epoch, audit the repr wrt fair clf task')
    args = parser.parse_args()
    print(args)
    
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    writer = SummaryWriter(args.save)
    writer.add_text('args', json.dumps(vars(args), sort_keys=True, indent=4))

    log_file = os.path.join(args.save, 'train.log')
    if os.path.exists(log_file):
        os.remove(log_file)

    print(vars(args))
    print(vars(args), file=open(log_file, 'w'))

    torch.cuda.set_device(args.gpu)

    # data loader
    loaders = setup_data_loaders(args, use_cuda=True)

    # setup the VAE
    if args.dist == 'normal':
        prior_dist = dist.Normal()
        q_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
        q_dist = dist.Laplace()
    elif args.dist == 'flow':
        prior_dist = FactorialNormalizingFlow(dim=args.latent_dim, nsteps=32)
        q_dist = dist.Normal()

    x_dist = dist.Normal() if args.dataset == 'celeba' else dist.Bernoulli()
    a_dist = dist.Bernoulli()
    vae = SensVAE(z_dim=args.latent_dim, use_cuda=True, prior_dist=prior_dist, 
            q_dist=q_dist, include_mutinfo=not args.exclude_mutinfo, 
            tcvae=args.tcvae, conv=args.conv, mss=args.mss, 
            n_chan=3 if args.dataset == 'celeba' else 1, sens_idx=SENS_IDX,
            x_dist=x_dist, a_dist=a_dist, clf_samps=args.clf_samps)

    if args.audit:
        audit_label_fn = get_label_fn(
                dict(data=dict(name='celeba', label_fn='H'))
                )
        audit_repr_fns = dict()
        audit_attr_fns = dict()
        audit_models = dict()
        audit_train_metrics = dict()
        audit_validation_metrics = dict()
        for attr_fn_name in CELEBA_SENS_IDX.keys():
            model = MLPClassifier(args.latent_dim, 1000, 2)
            model.cuda()
            audit_models[attr_fn_name] = model
            audit_repr_fns[attr_fn_name] = get_repr_fn(
                dict(data=dict(
                    name='celeba', repr_fn='remove_all', attr_fn=attr_fn_name))
                )
            audit_attr_fns[attr_fn_name] = get_attr_fn(
                dict(data=dict(name='celeba', attr_fn=attr_fn_name))
                )

    # setup the optimizer
    optimizer = optim.Adam(vae.parameters(), lr=args.learning_rate)
    if args.audit:
        Adam = optim.Adam
        audit_optimizers = dict()
        for k, v in audit_models.items():
            audit_optimizers[k] = Adam(v.parameters(), lr=args.learning_rate)


    # setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom(env=args.save, port=3776)

    train_elbo = []
    train_tc = []

    # training loop
    dataset_size = len(loaders['train'].dataset)
    num_iterations = len(loaders['train']) * args.num_epochs
    iteration = 0
    # initialize loss accumulator
    elbo_running_mean = utils.RunningAverageMeter()
    tc_running_mean = utils.RunningAverageMeter()
    clf_acc_meters = {'clf_acc{}'.format(s): utils.RunningAverageMeter() for s in vae.sens_idx}

    val_elbo_running_mean = utils.RunningAverageMeter()
    val_tc_running_mean = utils.RunningAverageMeter()
    val_clf_acc_meters = {'val_clf_acc{}'.format(s): utils.RunningAverageMeter() for s in vae.sens_idx}


    while iteration < num_iterations:
        bar = tqdm(range(len(loaders['train'])))
        for i, (x, a) in enumerate(loaders['train']):
            bar.update()
            iteration += 1
            batch_time = time.time()
            vae.train()
            #anneal_kl(args, vae, iteration)  # TODO try annealing beta/beta_sens
            vae.beta = args.beta
            vae.beta_sens = args.beta_sens
            optimizer.zero_grad()
            # transfer to GPU
            x = x.cuda(async=True)
            a = a.float()
            a = a.cuda(async=True)
            # wrap the mini-batch in a PyTorch Variable
            x = Variable(x)
            a = Variable(a)
            # do ELBO gradient and accumulate loss
            obj, elbo, metrics = vae.elbo(x, a, dataset_size)
            if utils.isnan(obj).any():
                raise ValueError('NaN spotted in objective.')
            obj.mean().mul(-1).backward()
            elbo_running_mean.update(elbo.mean().data.item())
            tc_running_mean.update(metrics['tc'])
            for (s, meter), (_, acc) in zip(clf_acc_meters.items(), metrics.items()):
                clf_acc_meters[s].update(acc.data.item())
            optimizer.step()

            if args.audit:
                for model in audit_models.values():
                    model.train()
                # now re-encode x and take a step to train each audit classifier
                for opt in audit_optimizers.values():
                    opt.zero_grad()
                with torch.no_grad():
                    zs, z_params = vae.encode(x)
                    if args.clf_samps:
                        z = zs
                    else:
                        z_mu = z_params.select(-1, 0)
                        z = z_mu
                    a_all = a
                for subgroup, model in audit_models.items():
                    # noise out sensitive dims of latent code
                    z_ = z.clone()
                    a_all_ = a_all.clone()
                    # subsample to just sens attr of interest for this subgroup
                    a_ = audit_attr_fns[subgroup](a_all_)
                    # noise out sensitive dims for this subgroup
                    z_ = audit_repr_fns[subgroup](z_, None, None)
                    y_ = audit_label_fn(a_all_).long()

                    loss, _, metrics = model(z_, y_, a_)
                    loss.backward()
                    audit_optimizers[subgroup].step()
                    metrics_dict = {}
                    metrics_dict.update(loss=loss.detach().item())
                    for k, v in metrics.items():
                        if v.numel() > 1:
                            k += '-avg'
                            v = v.float().mean()
                        metrics_dict.update({k:v.detach().item()})
                    audit_train_metrics[subgroup] = metrics_dict

            # report training diagnostics
            if iteration % args.log_freq == 0:
                if args.audit:
                    for subgroup, metrics in audit_train_metrics.items():
                        for metric_name, metric_value in metrics.items():
                            writer.add_scalar(
                                    '{}/{}'.format(subgroup, metric_name),
                                    metric_value, iteration)

                train_elbo.append(elbo_running_mean.avg)
                writer.add_scalar('train_elbo', elbo_running_mean.avg, iteration)
                train_tc.append(tc_running_mean.avg)
                writer.add_scalar('train_tc', tc_running_mean.avg, iteration)
                msg = '[iteration %03d] time: %.2f \tbeta %.2f \tlambda %.2f training ELBO: %.4f (%.4f) training TC %.4f (%.4f)' % (
                    iteration, time.time() - batch_time, vae.beta, vae.lamb,
                    elbo_running_mean.val, elbo_running_mean.avg,
                    tc_running_mean.val, tc_running_mean.avg)
                for k, v in clf_acc_meters.items():
                    msg += ' {}: {:.2f}'.format(k, v.avg)
                    writer.add_scalar(k, v.avg, iteration)
                print(msg)
                print(msg, file=open(log_file, 'a'))

                vae.eval()
                ################################################################
                # evaluate validation metrics on vae and auditors
                for x, a in loaders['validation']:
                    # transfer to GPU
                    x = x.cuda(async=True)
                    a = a.float()
                    a = a.cuda(async=True)
                    # wrap the mini-batch in a PyTorch Variable
                    x = Variable(x)
                    a = Variable(a)
                    # do ELBO gradient and accumulate loss
                    obj, elbo, metrics = vae.elbo(x, a, dataset_size)
                    if utils.isnan(obj).any():
                        raise ValueError('NaN spotted in objective.')
                    #
                    val_elbo_running_mean.update(elbo.mean().data.item())
                    val_tc_running_mean.update(metrics['tc'])
                    for (s, meter), (_, acc) in zip(
                            val_clf_acc_meters.items(), metrics.items()):
                        val_clf_acc_meters[s].update(acc.data.item())

                if args.audit:
                    for model in audit_models.values():
                        model.eval()
                    with torch.no_grad():
                        zs, z_params = vae.encode(x)
                        if args.clf_samps:
                            z = zs
                        else:
                            z_mu = z_params.select(-1, 0)
                            z = z_mu
                        a_all = a
                    for subgroup, model in audit_models.items():
                        # noise out sensitive dims of latent code
                        z_ = z.clone()
                        a_all_ = a_all.clone()
                        # subsample to just sens attr of interest for this subgroup
                        a_ = audit_attr_fns[subgroup](a_all_)
                        # noise out sensitive dims for this subgroup
                        z_ = audit_repr_fns[subgroup](z_, None, None)
                        y_ = audit_label_fn(a_all_).long()

                        loss, _, metrics = model(z_, y_, a_)
                        loss.backward()
                        audit_optimizers[subgroup].step()
                        metrics_dict = {}
                        metrics_dict.update(val_loss=loss.detach().item())
                        for k, v in metrics.items():
                            k = 'val_' + k  # denote a validation metric
                            if v.numel() > 1:
                                k += '-avg'
                                v = v.float().mean()
                            metrics_dict.update({k:v.detach().item()})
                        audit_validation_metrics[subgroup] = metrics_dict

                # after iterating through validation set, write summaries
                for subgroup, metrics in audit_validation_metrics.items():
                    for metric_name, metric_value in metrics.items():
                        writer.add_scalar(
                                '{}/{}'.format(subgroup, metric_name),
                                metric_value, iteration)
                writer.add_scalar('val_elbo', val_elbo_running_mean.avg, iteration)
                writer.add_scalar('val_tc', val_tc_running_mean.avg, iteration)
                for k, v in val_clf_acc_meters.items():
                    writer.add_scalar(k, v.avg, iteration)

                ################################################################
                # finally, plot training and test ELBOs
                if args.visdom:
                    display_samples(vae, x, vis)
                    plot_elbo(train_elbo, vis)
                    plot_tc(train_tc, vis)

                utils.save_checkpoint({
                    'state_dict': vae.state_dict(),
                    'args': args}, args.save, iteration // len(loaders['train']))
                eval('plot_vs_gt_' + args.dataset)(vae, loaders['train'].dataset,
                    os.path.join(args.save, 'gt_vs_latent_{:05d}.png'.format(iteration)))

    # Report statistics after training
    vae.eval()
    utils.save_checkpoint({
        'state_dict': vae.state_dict(),
        'args': args}, args.save, 0)
    dataset_loader = DataLoader(loaders['train'].dataset, batch_size=1000, num_workers=1, shuffle=False)
    if False:
        logpx, dependence, information, dimwise_kl, analytical_cond_kl, marginal_entropies, joint_entropy = \
            elbo_decomposition(vae, dataset_loader)
        torch.save({
            'logpx': logpx,
            'dependence': dependence,
            'information': information,
            'dimwise_kl': dimwise_kl,
            'analytical_cond_kl': analytical_cond_kl,
            'marginal_entropies': marginal_entropies,
            'joint_entropy': joint_entropy
        }, os.path.join(args.save, 'elbo_decomposition.pth'))
    eval('plot_vs_gt_' + args.dataset)(vae, dataset_loader.dataset, os.path.join(args.save, 'gt_vs_latent.png'))

    for file in [open(os.path.join(args.save, 'done'), 'w'), sys.stdout]:
        print('done', file=file)

    return vae
示例#16
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument(
        '-d',
        '--dataset',
        default='shapes',
        type=str,
        help='dataset name',
        choices=['shapes', 'faces', 'celeba', 'cars3d', '3dchairs'])
    parser.add_argument('-dist',
                        default='normal',
                        type=str,
                        choices=['normal', 'lpnorm', 'lpnested'])
    parser.add_argument('-n',
                        '--num-epochs',
                        default=50,
                        type=int,
                        help='number of training epochs')
    parser.add_argument(
        '--num-iterations',
        default=0,
        type=int,
        help='number of iterations (overrides number of epochs if >0)')
    parser.add_argument('-b',
                        '--batch-size',
                        default=2048,
                        type=int,
                        help='batch size')
    parser.add_argument('-l',
                        '--learning-rate',
                        default=1e-3,
                        type=float,
                        help='learning rate')
    parser.add_argument('-z',
                        '--latent-dim',
                        default=10,
                        type=int,
                        help='size of latent dimension')
    parser.add_argument('-p',
                        '--pnorm',
                        default=4.0 / 3.0,
                        type=float,
                        help='p value of the Lp-norm')
    parser.add_argument(
        '--pnested',
        default='',
        type=str,
        help=
        'nested list representation of the Lp-nested prior, e.g. [2.1, [ [2.2, [ [1.0], [1.0], [1.0], [1.0] ] ], [2.2, [ [1.0], [1.0], [1.0], [1.0] ] ], [2.2, [ [1.0], [1.0], [1.0], [1.0] ] ] ] ]'
    )
    parser.add_argument(
        '--isa',
        default='',
        type=str,
        help=
        'shorthand notation of ISA Lp-nested norm, e.g. [2.1, [(2.2, 4), (2.2, 4), (2.2, 4)]]'
    )
    parser.add_argument('--p0', default=2.0, type=float, help='p0 of ISA')
    parser.add_argument('--p1', default=2.1, type=float, help='p1 of ISA')
    parser.add_argument('--n1', default=6, type=int, help='n1 of ISA')
    parser.add_argument('--p2', default=2.1, type=float, help='p2 of ISA')
    parser.add_argument('--n2', default=6, type=int, help='n2 of ISA')
    parser.add_argument('--p3', default=2.1, type=float, help='p3 of ISA')
    parser.add_argument('--n3', default=6, type=int, help='n3 of ISA')
    parser.add_argument('--scale',
                        default=1.0,
                        type=float,
                        help='scale of LpNested distribution')
    parser.add_argument('--q-dist',
                        default='normal',
                        type=str,
                        choices=['normal', 'laplace'])
    parser.add_argument('--x-dist',
                        default='bernoulli',
                        type=str,
                        choices=['normal', 'bernoulli'])
    parser.add_argument('--beta',
                        default=1,
                        type=float,
                        help='ELBO penalty term')
    parser.add_argument('--tcvae', action='store_true')
    parser.add_argument('--exclude-mutinfo', action='store_true')
    parser.add_argument('--beta-anneal', action='store_true')
    parser.add_argument('--lambda-anneal', action='store_true')
    parser.add_argument('--mss',
                        action='store_true',
                        help='use the improved minibatch estimator')
    parser.add_argument('--conv', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--visdom',
                        action='store_true',
                        help='whether plotting in visdom is desired')
    parser.add_argument('--save', default='test1')
    parser.add_argument('--id', default='1')
    parser.add_argument(
        '--seed',
        default=-1,
        type=int,
        help=
        'seed for pytorch and numpy random number generator to allow reproducibility (default/-1: use random seed)'
    )
    parser.add_argument('--log_freq',
                        default=200,
                        type=int,
                        help='num iterations per log')
    parser.add_argument('--use-mse-loss', action='store_true')
    parser.add_argument('--mse-sigma',
                        default=0.01,
                        type=float,
                        help='sigma of mean squared error loss')
    parser.add_argument('--dip', action='store_true', help='use DIP-VAE')
    parser.add_argument('--dip-type',
                        default=1,
                        type=int,
                        help='DIP type (1 or 2)')
    parser.add_argument('--lambda-od',
                        default=2.0,
                        type=float,
                        help='DIP: lambda weight off-diagonal')
    parser.add_argument('--clip',
                        default=0.0,
                        type=float,
                        help='Gradient clipping (0 disabled)')
    parser.add_argument('--test', action='store_true', help='run test')
    parser.add_argument(
        '--trainingsetsize',
        default=0,
        type=int,
        help='Subsample the trainingset (0 use original training data)')
    args = parser.parse_args()

    # initialize seeds for reproducibility
    if not args.seed == -1:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    if args.gpu != -1:
        print('Using CUDA device {}'.format(args.gpu))
        torch.cuda.set_device(args.gpu)
        use_cuda = True
    else:
        print('CUDA disabled')
        use_cuda = False

    # data loader
    train_loader = setup_data_loaders(args.dataset,
                                      args.batch_size,
                                      use_cuda=use_cuda,
                                      len_subset=args.trainingsetsize)

    # setup the VAE
    if args.dist == 'normal':
        prior_dist = dist.Normal()
    elif args.dist == 'laplace':
        prior_dist = dist.Laplace()
    elif args.dist == 'lpnested':
        if not args.isa == '':
            pnested = parseISA(ast.literal_eval(args.isa))
        elif not args.pnested == '':
            pnested = ast.literal_eval(args.pnested)
        else:
            pnested = parseISA([
                args.p0,
                [(args.p1, args.n1), (args.p2, args.n2), (args.p3, args.n3)]
            ])

        print('using Lp-nested prior, pnested = ({}) {}'.format(
            type(pnested), pnested))
        prior_dist = LpNestedAdapter(p=pnested, scale=args.scale)
        args.latent_dim = prior_dist.dimz()
        print('using Lp-nested prior, changed latent dimension to {}'.format(
            args.latent_dim))
    elif args.dist == 'lpnorm':
        prior_dist = LpNestedAdapter(p=[args.pnorm, [[1.0]] * args.latent_dim],
                                     scale=args.scale)

    if args.q_dist == 'normal':
        q_dist = dist.Normal()
    elif args.q_dist == 'laplace':
        q_dist = dist.Laplace()

    if args.x_dist == 'normal':
        x_dist = dist.Normal(sigma=args.mse_sigma)
    elif args.x_dist == 'bernoulli':
        x_dist = dist.Bernoulli()

    if args.dip_type == 1:
        lambda_d = 10.0 * args.lambda_od
    else:
        lambda_d = args.lambda_od

    vae = VAE(z_dim=args.latent_dim,
              use_cuda=use_cuda,
              prior_dist=prior_dist,
              q_dist=q_dist,
              x_dist=x_dist,
              include_mutinfo=not args.exclude_mutinfo,
              tcvae=args.tcvae,
              conv=args.conv,
              mss=args.mss,
              dataset=args.dataset,
              mse_sigma=args.mse_sigma,
              DIP=args.dip,
              DIP_type=args.dip_type,
              lambda_od=args.lambda_od,
              lambda_d=lambda_d)

    # setup the optimizer
    optimizer = optim.Adam([{
        'params': vae.parameters()
    }, {
        'params': prior_dist.parameters(),
        'lr': 5e-4
    }],
                           lr=args.learning_rate)

    # setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom(env=args.save, port=4500)

    train_elbo = []

    # training loop
    dataset_size = len(train_loader.dataset)
    if args.num_iterations == 0:
        num_iterations = len(train_loader) * args.num_epochs
    else:
        num_iterations = args.num_iterations
    iteration = 0
    obj_best_snapshot = float('-inf')
    best_checkpoint_updated = False

    trainingcurve_filename = os.path.join(args.save, 'trainingcurve.csv')
    if not os.path.exists(trainingcurve_filename):
        with open(trainingcurve_filename, 'w') as fd:
            fd.write(
                'iteration,num_iterations,time,elbo_running_mean_val,elbo_running_mean_avg\n'
            )

    # initialize loss accumulator
    elbo_running_mean = utils.RunningAverageMeter()
    nan_detected = False
    while iteration < num_iterations and not nan_detected:
        for i, x in enumerate(train_loader):
            iteration += 1
            batch_time = time.time()
            vae.train()
            anneal_kl(args, vae, iteration)
            optimizer.zero_grad()
            # transfer to GPU
            if use_cuda:
                x = x.cuda()  # async=True)
            # wrap the mini-batch in a PyTorch Variable
            x = Variable(x)
            # do ELBO gradient and accumulate loss
            #with autograd.detect_anomaly():
            obj, elbo, logpx = vae.elbo(prior_dist,
                                        x,
                                        dataset_size,
                                        use_mse_loss=args.use_mse_loss,
                                        mse_sigma=args.mse_sigma)
            if utils.isnan(obj).any():
                print('NaN spotted in objective.')
                print('lpnested: {}'.format(prior_dist.prior.p))
                print("gradient abs max {}".format(
                    max([g.abs().max() for g in gradients])))
                #raise ValueError('NaN spotted in objective.')
                nan_detected = True
                break
            elbo_running_mean.update(elbo.mean().item())

            # save checkpoint of best ELBO
            if obj.mean().item() > obj_best_snapshot:
                obj_best_snapshot = obj.mean().item()
                best_checkpoint = {
                    'state_dict': vae.state_dict(),
                    'state_dict_prior_dist': prior_dist.state_dict(),
                    'args': args,
                    'iteration': iteration,
                    'obj': obj_best_snapshot,
                    'elbo': elbo.mean().item(),
                    'logpx': logpx.mean().item()
                }
                best_checkpoint_updated = True

            #with autograd.detect_anomaly():
            obj.mean().mul(-1).backward()

            gradients = list(
                filter(lambda p: p.grad is not None, vae.parameters()))

            if args.clip > 0:
                torch.nn.utils.clip_grad_norm_(vae.parameters(), args.clip)

            optimizer.step()

            # report training diagnostics
            if iteration % args.log_freq == 0:
                train_elbo.append(elbo_running_mean.avg)
                time_ = time.time() - batch_time
                print(
                    '[iteration %03d/%03d] time: %.2f \tbeta %.2f \tlambda %.2f \tobj %.4f \tlogpx %.4f training ELBO: %.4f (%.4f)'
                    % (iteration, num_iterations, time_, vae.beta, vae.lamb,
                       obj.mean().item(), logpx.mean().item(),
                       elbo_running_mean.val, elbo_running_mean.avg))

                p0, p1list = backwardsParseISA(prior_dist.prior.p)
                print('lpnested: {}, {}'.format(p0, p1list))
                print("gradient abs max {}".format(
                    max([g.abs().max() for g in gradients])))

                with open(os.path.join(args.save, 'trainingcurve.csv'),
                          'a') as fd:
                    fd.write('{},{},{},{},{}\n'.format(iteration,
                                                       num_iterations, time_,
                                                       elbo_running_mean.val,
                                                       elbo_running_mean.avg))

                if best_checkpoint_updated:
                    print(
                        'Update best checkpoint [iteration %03d] training ELBO: %.4f'
                        % (best_checkpoint['iteration'],
                           best_checkpoint['elbo']))
                    utils.save_checkpoint(best_checkpoint, args.save, 0)
                    best_checkpoint_updated = False

                vae.eval()
                prior_dist.eval()

                # plot training and test ELBOs
                if args.visdom:
                    if args.dataset == 'celeba':
                        num_channels = 3
                    else:
                        num_channels = 1
                    display_samples(vae, prior_dist, x, vis, num_channels)
                    plot_elbo(train_elbo, vis)

                if iteration % (10 * args.log_freq) == 0:
                    utils.save_checkpoint(
                        {
                            'state_dict': vae.state_dict(),
                            'state_dict_prior_dist': prior_dist.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict(),
                            'args': args,
                            'iteration': iteration,
                            'obj': obj.mean().item(),
                            'torch_random_state': torch.get_rng_state(),
                            'numpy_random_state': np.random.get_state()
                        },
                        args.save,
                        prefix='latest-optimizer-model-')
                    if not args.dataset == 'celeba' and not args.dataset == '3dchairs':
                        eval('plot_vs_gt_' + args.dataset)(
                            vae, train_loader.dataset,
                            os.path.join(
                                args.save,
                                'gt_vs_latent_{:05d}.png'.format(iteration)))

    # Report statistics of best snapshot after training
    vae.load_state_dict(best_checkpoint['state_dict'])
    prior_dist.load_state_dict(best_checkpoint['state_dict_prior_dist'])

    vae.eval()
    prior_dist.eval()

    if args.dataset == 'shapes':
        data_set = dset.Shapes()
    elif args.dataset == 'faces':
        data_set = dset.Faces()
    elif args.dataset == 'cars3d':
        data_set = dset.Cars3d()
    elif args.dataset == 'celeba':
        data_set = dset.CelebA()
    elif args.dataset == '3dchairs':
        data_set = dset.Chairs()
    else:
        raise ValueError('Unknown dataset ' + str(args.dataset))

    print("loaded dataset {} of size {}".format(args.dataset, len(data_set)))

    dataset_loader = DataLoader(data_set,
                                batch_size=1000,
                                num_workers=0,
                                shuffle=False)

    logpx, dependence, information, dimwise_kl, analytical_cond_kl, elbo_marginal_entropies, elbo_joint_entropy = \
        elbo_decomposition(vae, prior_dist, dataset_loader)
    torch.save(
        {
            'args': args,
            'logpx': logpx,
            'dependence': dependence,
            'information': information,
            'dimwise_kl': dimwise_kl,
            'analytical_cond_kl': analytical_cond_kl,
            'marginal_entropies': elbo_marginal_entropies,
            'joint_entropy': elbo_joint_entropy
        }, os.path.join(args.save, 'elbo_decomposition.pth'))
    print('logpx: {:.2f}'.format(logpx))
    if not args.dataset == 'celeba' and not args.dataset == '3dchairs':
        eval('plot_vs_gt_' + args.dataset)(vae, dataset_loader.dataset,
                                           os.path.join(
                                               args.save, 'gt_vs_latent.png'))

        metric, metric_marginal_entropies, metric_cond_entropies = eval(
            'disentanglement_metrics.mutual_info_metric_' + args.dataset)(
                vae, dataset_loader.dataset)
        torch.save(
            {
                'args': args,
                'metric': metric,
                'marginal_entropies': metric_marginal_entropies,
                'cond_entropies': metric_cond_entropies,
            }, os.path.join(args.save, 'disentanglement_metric.pth'))
        print('MIG: {:.2f}'.format(metric))

        if args.dist == 'lpnested':
            p0, p1list = backwardsParseISA(prior_dist.prior.p)
            print('p0: {}'.format(p0))
            print('p1: {}'.format(p1list))
            torch.save(
                {
                    'args': args,
                    'logpx': logpx,
                    'dependence': dependence,
                    'information': information,
                    'dimwise_kl': dimwise_kl,
                    'analytical_cond_kl': analytical_cond_kl,
                    'elbo_marginal_entropies': elbo_marginal_entropies,
                    'elbo_joint_entropy': elbo_joint_entropy,
                    'metric': metric,
                    'metric_marginal_entropies': metric_marginal_entropies,
                    'metric_cond_entropies': metric_cond_entropies,
                    'p0': p0,
                    'p1': p1list
                }, os.path.join(args.save, 'combined_data.pth'))
        else:
            torch.save(
                {
                    'args': args,
                    'logpx': logpx,
                    'dependence': dependence,
                    'information': information,
                    'dimwise_kl': dimwise_kl,
                    'analytical_cond_kl': analytical_cond_kl,
                    'elbo_marginal_entropies': elbo_marginal_entropies,
                    'elbo_joint_entropy': elbo_joint_entropy,
                    'metric': metric,
                    'metric_marginal_entropies': metric_marginal_entropies,
                    'metric_cond_entropies': metric_cond_entropies,
                }, os.path.join(args.save, 'combined_data.pth'))

        if args.dist == 'lpnested':
            if args.dataset == 'shapes':
                eval('plot_vs_gt_' + args.dataset)(
                    vae,
                    dataset_loader.dataset,
                    os.path.join(args.save, 'gt_vs_grouped_latent.png'),
                    eval_subspaces=True)

                metric_subspaces, metric_marginal_entropies_subspaces, metric_cond_entropies_subspaces = eval(
                    'disentanglement_metrics.mutual_info_metric_' +
                    args.dataset)(vae,
                                  dataset_loader.dataset,
                                  eval_subspaces=True)
                torch.save(
                    {
                        'args': args,
                        'metric': metric_subspaces,
                        'marginal_entropies':
                        metric_marginal_entropies_subspaces,
                        'cond_entropies': metric_cond_entropies_subspaces,
                    },
                    os.path.join(args.save,
                                 'disentanglement_metric_subspaces.pth'))
                print('MIG grouped by subspaces: {:.2f}'.format(
                    metric_subspaces))

                torch.save(
                    {
                        'args': args,
                        'logpx': logpx,
                        'dependence': dependence,
                        'information': information,
                        'dimwise_kl': dimwise_kl,
                        'analytical_cond_kl': analytical_cond_kl,
                        'elbo_marginal_entropies': elbo_marginal_entropies,
                        'elbo_joint_entropy': elbo_joint_entropy,
                        'metric': metric,
                        'metric_marginal_entropies': metric_marginal_entropies,
                        'metric_cond_entropies': metric_cond_entropies,
                        'metric_subspaces': metric_subspaces,
                        'metric_marginal_entropies_subspaces':
                        metric_marginal_entropies_subspaces,
                        'metric_cond_entropies_subspaces':
                        metric_cond_entropies_subspaces,
                        'p0': p0,
                        'p1': p1list
                    }, os.path.join(args.save, 'combined_data.pth'))

    return vae
    def beta_tc_loss(self, x, x_hat, z_params, z, dataset_size):
        """
            Inputs:
                x: float Tensor of shape (self.height, self.width)
                    input to the network
                x_hat: float Tensor of shape (self.height, self.width)
                    output of the network
                mu_z: float Tensor of shape (self.z_dim)
                    output of encoder, mean of distribtion q
                logvar_z: flaot Tensor of shape (self.z_dim)
                    output of encoder, log varaince of distribtion q
                z: float Tensor of shape (self.z_dim)
                    output of reparameterization, sample from q(z|x)
            Output:
                float Tensors of scalars
                monte carlo estimate of EBLO decoposition 
                according to, Isolating Sources 
                of Disentanglement in VAEs (Chen et all, 2018)
        """
        if not self.computes_std:
            z_params[:, :, 1] = z_params[:, :, 1] / 2

        prior_dist, q_dist = dist.Normal(), dist.Normal()
        prior_params = torch.zeros(self.z_dim, 2)
        batch_size = x.size(0)
        x = x.view(batch_size, self.nc, self.height, self.width)
        expanded_size = (batch_size, ) + prior_params.size()
        prior_params = prior_params.expand(
            expanded_size).cuda().requires_grad_()

        if self.output_type == 'binary':
            recons = F.binary_cross_entropy_with_logits(
                x_hat, x, reduction='mean') * self.width * self.height
        else:
            recons = F.mse_loss(x_hat * 255, x * 255, reduction='sum') / 255
        logpz = prior_dist.log_density(z, params=prior_params).view(
            batch_size, -1).sum(1)
        logqz_condx = q_dist.log_density(z,
                                         params=z_params).view(batch_size,
                                                               -1).sum(1)

        # compute log q(z) ~= log 1/(NM) sum_m=1^M q(z|x_m) = - log(MN) + logsumexp_m(q(z|x_m))
        _logqz = q_dist.log_density(
            z.view(batch_size, 1, self.z_dim),
            z_params.view(1, batch_size, self.z_dim, 2))

        logqz_prodmarginals = (torch.logsumexp(_logqz, dim=1, keepdim=False) -
                               log(batch_size * dataset_size)).sum(1)
        logqz = (torch.logsumexp(_logqz.sum(2), dim=1, keepdim=False) -
                 log(batch_size * dataset_size))

        #monte carlo estiamtion

        #mutual information
        mi = (logqz_condx - logqz).mean()
        #total coorelation
        tc = (logqz - logqz_prodmarginals).mean()
        #dimension-wise KL. Here name regularization
        reg = (logqz_prodmarginals - logpz).mean()
        modified_elbo = recons + \
                        self.alpha *(mi) + \
                        self.beta * tc + \
                        self.gamma *reg

        return modified_elbo, recons, mi, tc, torch.abs(reg)
示例#18
0
def mutual_info_metric_faces(vae, shapes_dataset):
    dataset_loader = DataLoader(shapes_dataset, batch_size=1000, num_workers=1, shuffle=False)

    N = len(dataset_loader.dataset)  # number of data samples
    K = 10                    # number of latent variables
    nparams = dist.Normal().nparams
    vae.eval()

    print('Computing q(z|x) distributions.')
    qz_params = torch.Tensor(N, K, nparams)

    n = 0
    for xs in dataset_loader:
        batch_size = xs.size(0)
        xs = xs.view(batch_size, 1, 64, 64).cuda()
        
        z, mu, logvar, y = vae(xs)
        mu = mu.view(batch_size, K, 1)
        logvar = logvar.view(batch_size, K, 1) 
        target = torch.cat([mu, logvar], dim=2)

        qz_params[n:n + batch_size] = target.view(batch_size, K, nparams).data
        n += batch_size

    qz_params = qz_params.view(50, 21, 11, 11, K, nparams).cuda()
    qz_samples = dist.Normal().sample(params=qz_params)

    print('Estimating marginal entropies.')
    # marginal entropies
    marginal_entropies = estimate_entropies(
        qz_samples.view(N, K).transpose(0, 1),
        qz_params.view(N, K, nparams),
        dist.Normal())

    marginal_entropies = marginal_entropies.cpu()
    cond_entropies = torch.zeros(3, K)

    print('Estimating conditional entropies for azimuth.')
    for i in range(21):
        qz_samples_pose_az = qz_samples[:, i, :, :, :].contiguous()
        qz_params_pose_az = qz_params[:, i, :, :, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_pose_az.view(N // 21, K).transpose(0, 1),
            qz_params_pose_az.view(N // 21, K, nparams),
            dist.Normal())

        cond_entropies[0] += cond_entropies_i.cpu() / 21

    print('Estimating conditional entropies for elevation.')
    for i in range(11):
        qz_samples_pose_el = qz_samples[:, :, i, :, :].contiguous()
        qz_params_pose_el = qz_params[:, :, i, :, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_pose_el.view(N // 11, K).transpose(0, 1),
            qz_params_pose_el.view(N // 11, K, nparams),
            dist.Normal())

        cond_entropies[1] += cond_entropies_i.cpu() / 11

    print('Estimating conditional entropies for lighting.')
    for i in range(11):
        qz_samples_lighting = qz_samples[:, :, :, i, :].contiguous()
        qz_params_lighting = qz_params[:, :, :, i, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_lighting.view(N // 11, K).transpose(0, 1),
            qz_params_lighting.view(N // 11, K, nparams),
            dist.Normal())

        cond_entropies[2] += cond_entropies_i.cpu() / 11

    metric = compute_metric_faces(marginal_entropies, cond_entropies)
    return metric, marginal_entropies, cond_entropies
示例#19
0
def mutual_info_metric_shapes(vae, shapes_dataset):
    dataset_loader = DataLoader(shapes_dataset, batch_size=1000, num_workers=1, shuffle=False)

    N = len(dataset_loader.dataset)  # number of data samples
    K = 10                    # number of latent variables
    nparams = dist.Normal().nparams
    vae.eval()

    print('Computing q(z|x) distributions.')
    qz_params = torch.Tensor(N, K, nparams)

    n = 0
    for xs in dataset_loader:
        batch_size = xs.size(0)
        xs = xs.view(batch_size, 1, 64, 64).cuda()

        z, mu, logvar, y = vae(xs)
        mu = mu.view(batch_size, K, 1)
        logvar = logvar.view(batch_size, K, 1) 
        target = torch.cat([mu, logvar], dim=2)

        qz_params[n:n + batch_size] = target.view(batch_size, K, nparams).data
        n += batch_size

    qz_params = qz_params.view(3, 6, 40, 32, 32, K, nparams).cuda()
    qz_samples = dist.Normal().sample(params=qz_params)

    print('Estimating marginal entropies.')
    # marginal entropies
    marginal_entropies = estimate_entropies(
        qz_samples.view(N, K).transpose(0, 1),
        qz_params.view(N, K, nparams),
        dist.Normal())

    marginal_entropies = marginal_entropies.cpu()
    cond_entropies = torch.zeros(4, K)

    print('Estimating conditional entropies for scale.')
    for i in range(6):
        qz_samples_scale = qz_samples[:, i, :, :, :, :].contiguous()
        qz_params_scale = qz_params[:, i, :, :, :, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_scale.view(N // 6, K).transpose(0, 1),
            qz_params_scale.view(N // 6, K, nparams),
            dist.Normal())

        cond_entropies[0] += cond_entropies_i.cpu() / 6

    print('Estimating conditional entropies for orientation.')
    for i in range(40):
        qz_samples_scale = qz_samples[:, :, i, :, :, :].contiguous()
        qz_params_scale = qz_params[:, :, i, :, :, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_scale.view(N // 40, K).transpose(0, 1),
            qz_params_scale.view(N // 40, K, nparams),
            dist.Normal())

        cond_entropies[1] += cond_entropies_i.cpu() / 40

    print('Estimating conditional entropies for pos x.')
    for i in range(32):
        qz_samples_scale = qz_samples[:, :, :, i, :, :].contiguous()
        qz_params_scale = qz_params[:, :, :, i, :, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_scale.view(N // 32, K).transpose(0, 1),
            qz_params_scale.view(N // 32, K, nparams),
            dist.Normal())

        cond_entropies[2] += cond_entropies_i.cpu() / 32

    print('Estimating conditional entropies for pox y.')
    for i in range(32):
        qz_samples_scale = qz_samples[:, :, :, :, i, :].contiguous()
        qz_params_scale = qz_params[:, :, :, :, i, :].contiguous()

        cond_entropies_i = estimate_entropies(
            qz_samples_scale.view(N // 32, K).transpose(0, 1),
            qz_params_scale.view(N // 32, K, nparams),
            dist.Normal())

        cond_entropies[3] += cond_entropies_i.cpu() / 32

    metric = compute_metric_shapes(marginal_entropies, cond_entropies)
    return metric, marginal_entropies, cond_entropies
示例#20
0
    def __init__(self,
                 z_dim,
                 t_dim,
                 use_cuda=False,
                 tcvae=False,
                 indepLs=False,
                 useSepaUnit=1,
                 modelNum=1,
                 ngf=32,
                 h_dim=256,
                 imCh=1):

        super(VAE_idpVec_catY, self).__init__()

        self.use_cuda = use_cuda

        self.z_dim = z_dim
        self.n_class = t_dim

        self.ngf = ngf
        self.imCh = imCh

        self.tcvae = tcvae
        self.indepLs = indepLs
        self.useSepaUnit = useSepaUnit

        ###################################################################
        # Set latent and data distributions
        self.prior_dist_z = dist.Normal()
        self.q_dist_z = dist.Normal()

        self.prior_dist_y = dist.GumbelSoftmax_catY(nClass=self.n_class)
        self.q_dist_y = dist.GumbelSoftmax_catY(nClass=self.n_class)

        self.x_dist = dist.Bernoulli()

        ###################################################################
        # To be set by set_lossWeight(args, vae) in main code

        self.beta_z, self.beta_y = 1.0, 1.0
        self.alpha_z, self.alpha_y = 1.0, 1.0
        self.gamma_z, self.gamma_y = 1.0, 1.0

        self.beta_h, self.alpha_h = 1.0, 1.0
        self.gamma_h_z, self.gamma_h_y = 1.0, 1.0

        self.lamb_indep = 1.0
        self.lamb_recon = 1.0
        self.temperature = .67

        self.lamb_cls = 1.0
        self.crit_cls = nn.NLLLoss(reduction='none')

        ###################################################################
        self.register_buffer('prior_params_z', torch.zeros(self.z_dim, 2))
        self.register_buffer(
            'prior_params_y',
            torch.zeros(self.n_class).fill_(1.0 / self.n_class).log())

        # create the encoder and decoder networks
        if modelNum == 1:  # im32 mnist, fashion-mnist
            self.encoder = enc_conv_im32(z_dim=z_dim * self.q_dist_z.nparams,
                                         y_dim=t_dim,
                                         imCh=imCh,
                                         ngf=ngf,
                                         h_dim=h_dim,
                                         useBias=True)

            self.decoder = dec_conv_im32(z_dim=z_dim,
                                         y_dim=t_dim,
                                         imCh=imCh,
                                         ngf=ngf,
                                         h_dim=h_dim,
                                         useBias=True)

        elif modelNum == 2:  # im64 shapes
            self.encoder = enc_conv_im64(z_dim=z_dim * self.q_dist_z.nparams,
                                         y_dim=t_dim,
                                         imCh=imCh,
                                         ngf=ngf,
                                         h_dim=h_dim,
                                         useBias=True)

            self.decoder = dec_conv_im64(z_dim=z_dim,
                                         y_dim=t_dim,
                                         imCh=imCh,
                                         ngf=ngf,
                                         h_dim=h_dim,
                                         useBias=True)

        if use_cuda:
            self.cuda()
示例#21
0
    def __init__(self,
                 z_dim,
                 beta,
                 use_cuda=False,
                 prior_dist=dist.Normal(),
                 q_dist=dist.Normal(),
                 x_dist=dist.Bernoulli(),
                 x_dist_name='bernoulli',
                 include_mutinfo=True,
                 tcvae=False,
                 conv=False,
                 mss=False,
                 problem='HEP_SL',
                 VIB=False,
                 UQ=False,
                 classification=False):
        super(VAE, self).__init__()

        self.use_cuda = use_cuda
        self.z_dim = z_dim
        self.include_mutinfo = include_mutinfo
        self.tcvae = tcvae
        self.lamb = 0
        self.beta = beta
        self.mss = mss
        self.x_dist = x_dist
        self.VIB = VIB
        self.conv = conv
        self.x_dist_name = x_dist_name
        self.problem = problem
        self.UQ = UQ
        self.classification = classification

        # Model-specific
        # distribution family of p(z)
        self.prior_dist = prior_dist
        self.q_dist = q_dist
        # hyperparameters for prior p(z)
        self.register_buffer('prior_params', torch.zeros(self.z_dim, 2))

        # create the encoder and decoder networks
        if conv:
            self.encoder = ConvEncoder_HEP_SL(z_dim * self.q_dist.nparams)

            if (self.VIB):
                if not self.classification:
                    if (self.UQ):
                        out_dim = 6
                    else:
                        if (self.problem == 'HEP_SL'):
                            out_dim = 3
                    self.decoder = MLPDecoder_y(z_dim, out_dim)
                else:
                    self.decoder = MLPDecoder_label(
                        z_dim)  # binary classification
            else:
                if (self.problem == 'HEP_SL'):
                    self.decoder = ConvDecoder_HEP_SL(z_dim)

        else:
            print("not implemented")

        if use_cuda:
            # calling cuda() here will put all the parameters of
            # the encoder and decoder networks into gpu memory
            self.cuda()