Exemplo n.º 1
0
def train():
    param = _param()
    dataset = DATA_LOADER(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result = Result()
    result_gzsl = Result()

    netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    print(netG)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    exp_info = 'GBU_{}'.format(opt.dataset)
    exp_params = 'Eu{}_Rls{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA)

    out_dir = 'out/{:s}'.format(exp_info)
    out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists('out'):
        os.mkdir('out')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir = out_subdir + '/log_{:s}_{}.txt'.format(exp_info, opt.exp_idx)
    with open(log_dir, 'w') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            netD.load_state_dict(checkpoint['state_dict_D'])
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    nets = [netG, netD]

    tr_cls_centroid = Variable(
        torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in range(start_step, 3000 + 1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real)
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)

            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake)
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty(
                netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2
            GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss)

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (
                            G_sample_cls.mean(dim=0) -
                            tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            log_text = 'Iter-{}; Was_D: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \
                       ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%'\
                        .format(it, Wasserstein_D.data[0],  Euclidean_loss.data[0], reg_loss.data[0],
                                G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100)
            print(log_text)
            with open(log_dir, 'a') as f:
                f.write(log_text + '\n')

        if it % opt.evl_interval == 0 and it >= 100:
            netG.eval()
            eval_fakefeat_test(it, netG, dataset, param, result)
            if result.save_model:
                files2remove = glob.glob(out_subdir + '/Best_model_ZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                # best_acc = result.acc_list[-1]
                save_model(
                    it, netG, netD, opt.manualSeed, log_text,
                    out_subdir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format(
                        result.acc_list[-1]))

            eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl)
            if result.save_model:
                files2remove = glob.glob(out_subdir + '/Best_model_GZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                # best_acc_gzsl = result.acc_list[-1]
                save_model(
                    it, netG, netD, opt.manualSeed, log_text, out_subdir +
                    '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format(
                        result_gzsl.best_acc, result_gzsl.best_acc_S_T,
                        result_gzsl.best_acc_U_T))

            netG.train()

        if it % opt.save_interval == 0 and it:
            save_model(it, netG, netD, opt.manualSeed, log_text,
                       out_subdir + '/Iter_{:d}.tar'.format(it))
            cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it),
                   'red')
Exemplo n.º 2
0
def train():
    dataset = DATA_LOADER(opt)
    opt.C_dim = dataset.att_dim
    opt.X_dim = dataset.feature_dim
    opt.y_dim = dataset.ntrain_class

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result_zsl_knn = Result()
    result_gzsl_soft = Result()

    netG = Glow(classes=opt.y_dim, condition_dim=opt.C_dim).cuda()

    out_dir = 'out/{}/shuffle'.format(opt.dataset)
    os.makedirs(out_dir, exist_ok=True)
    print("The output dictionary is {}".format(out_dir))

    log_dir = out_dir + '/log_{}.txt'.format(opt.dataset)
    with open(log_dir, 'w') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0
    if opt.resume:
        if os.pathls\
                .isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            train_z = checkpoint['latent_z'].cuda()
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    initial = True
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr)

    for it in range(start_step, opt.niter + 1):
        blobs = data_layer.forward()
        feat_data = blobs['data']  # image data
        labels = blobs['labels'].astype(int)  # class labels
        idx = blobs['idx'].astype(int)

        C = np.array([dataset.train_att[i, :] for i in labels])
        L = torch.from_numpy(labels).cuda()
        C = torch.from_numpy(C.astype('float32')).cuda()
        X = torch.from_numpy(feat_data).cuda()
        X = X.view(*X.shape, 1, 1)

        if initial is True:
            netG(x=X, y_onehot=C, reverse=False)
            initial = False

        z, nll, vaeloss, y_logit = netG(x=X, y_onehot=C, reverse=False)

        loss_generative = Glow.loss_generative(nll)
        loss_classes = Glow.loss_class(y_logit, L)
        loss = loss_generative + vaeloss + loss_classes * 0.01
        netG.zero_grad()
        optimizerG.zero_grad()
        loss.backward()
        optimizerG.step()

        if it % opt.disp_interval == 0 and it:
            log_text = 'Iter-[{}/{}]; epoch: {} Gloss: {:.3f} vaeloss: {:.3f} clsloss: {:.3f}'.format(
                it, opt.niter, it // opt.evl_interval, float(loss_generative),
                float(vaeloss), float(loss_classes))
            log_print(log_text, log_dir)

        if it % opt.evl_interval == 0 and it:
            netG.eval()
            gen_feat, gen_label = synthesize_feature_test(
                netG, dataset, 300, 0.5, opt)
            """ ZSL"""

            acc = eval_zsl_knn(gen_feat.numpy(), gen_label.numpy(), dataset)
            result_zsl_knn.update(it, acc)
            log_print("{}nn Classifer: ".format(opt.Knn), log_dir)
            log_print(
                "Accuracy is {:.2f}%, Best_acc [{:.2f}% | Iter-{}]".format(
                    acc, result_zsl_knn.best_acc, result_zsl_knn.best_iter),
                log_dir)

            gen_feat, gen_label = synthesize_feature_test(
                netG, dataset, opt.nSample, 1.0, opt)
            """ GZSL"""
            # note test label need be shift with offset ntrain_class
            train_X = torch.cat((dataset.train_feature, gen_feat), 0)
            train_Y = torch.cat(
                (dataset.train_label, gen_label + dataset.ntrain_class), 0)

            cls = classifier.CLASSIFIER(
                train_X, train_Y, dataset,
                dataset.ntrain_class + dataset.ntest_class, True,
                opt.classifier_lr, 0.5, 25, opt.nSample, True)
            result_gzsl_soft.update_gzsl(it, cls.acc_unseen, cls.acc_seen,
                                         cls.H)
            log_print("GZSL Softmax:", log_dir)
            log_print(
                "U->T {:.2f}%  S->T {:.2f}%  H {:.2f}%  Best_H [{:.2f}% {:.2f}% {:.2f}% | Iter-{}]"
                .format(cls.acc_unseen, cls.acc_seen, cls.H,
                        result_gzsl_soft.best_acc_U_T,
                        result_gzsl_soft.best_acc_S_T,
                        result_gzsl_soft.best_acc, result_gzsl_soft.best_iter),
                log_dir)

            if result_zsl_knn.save_model:
                files2remove = glob.glob(out_dir + '/Best_model_ZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                save_model(
                    it, netG, opt.manualSeed, log_text,
                    out_dir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format(
                        result_zsl_knn.acc_list[-1]))

            if result_gzsl_soft.save_model:
                files2remove = glob.glob(out_dir + '/Best_model_GZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                save_model(
                    it, netG, opt.manualSeed, log_text, out_dir +
                    '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format(
                        result_gzsl_soft.best_acc,
                        result_gzsl_soft.best_acc_S_T,
                        result_gzsl_soft.best_acc_U_T))
            netG.train()

        if it % opt.save_interval == 0 and it:
            save_model(it, netG, opt.manualSeed, log_text,
                       out_dir + '/Iter_{:d}.tar'.format(it))
            print('Save model to ' + out_dir + '/Iter_{:d}.tar'.format(it))
Exemplo n.º 3
0
def train():
    dataset = DATA_LOADER(opt)
    opt.C_dim = dataset.att_dim
    opt.X_dim = dataset.feature_dim
    opt.Z_dim = opt.latent_dim
    opt.y_dim = dataset.ntrain_class
    opt.niter = int(dataset.ntrain / opt.batchsize) * opt.nepoch

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result_zsl_knn = Result()
    result_gzsl_soft = Result()

    netG = Conditional_Generator(opt).cuda()
    netG.apply(weights_init)
    print(netG)
    train_z = torch.FloatTensor(len(dataset.train_feature),
                                opt.Z_dim).normal_(0, opt.latent_var).cuda()

    out_dir = 'out/{}/nSample-{}_nZ-{}_sigma-{}_langevin_s-{}_step-{}'.format(
        opt.dataset, opt.nSample, opt.Z_dim, opt.sigma, opt.langevin_s,
        opt.langevin_step)
    os.makedirs(out_dir, exist_ok=True)
    print("The output dictionary is {}".format(out_dir))

    log_dir = out_dir + '/log_{}.txt'.format(opt.dataset)
    with open(log_dir, 'w') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG.load_state_dict(checkpoint['state_dict_G'])
            train_z = checkpoint['latent_z'].cuda()
            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    optimizerG = optim.Adam(netG.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    # range(start_step, opt.niter+1)
    for it in range(start_step, opt.niter + 1):
        blobs = data_layer.forward()
        feat_data = blobs['data']  # image data
        labels = blobs['labels'].astype(int)  # class labels
        idx = blobs['idx'].astype(int)

        C = np.array([dataset.train_att[i, :] for i in labels])
        C = torch.from_numpy(C.astype('float32')).cuda()
        X = torch.from_numpy(feat_data).cuda()
        Z = train_z[idx].cuda()
        optimizer_z = torch.optim.Adam([Z],
                                       lr=opt.lr,
                                       weight_decay=opt.weight_decay)

        # Alternatingly update weights w and infer latent_batch z
        for em_step in range(2):  # EM_STEP
            # update w
            for _ in range(1):
                pred = netG(Z, C)
                loss = getloss(pred, X, Z, opt)

                loss.backward()
                torch.nn.utils.clip_grad_norm_(netG.parameters(), 1)
                optimizerG.step()
                optimizerG.zero_grad()

            # infer z
            for _ in range(opt.langevin_step):
                U_tau = torch.FloatTensor(Z.shape).normal_(0,
                                                           opt.sigma_U).cuda()
                pred = netG(Z, C)
                loss = getloss(pred, X, Z, opt)
                loss = opt.langevin_s * 2 / 2 * loss

                loss.backward()
                torch.nn.utils.clip_grad_norm_([Z], 1)
                optimizer_z.step()
                optimizer_z.zero_grad()
                if it < opt.niter / 3:
                    Z.data += opt.langevin_s * U_tau
        # update Z
        train_z[idx, ] = Z.data

        if it % opt.disp_interval == 0 and it:
            log_text = 'Iter-[{}/{}]; loss: {:.3f}'.format(
                it, opt.niter, loss.item())
            log_print(log_text, log_dir)

        if it % opt.evl_interval == 0 and it:
            netG.eval()
            gen_feat, gen_label = synthesize_feature_test(netG, dataset, opt)
            """ ZSL"""
            acc = eval_zsl_knn(gen_feat.numpy(), gen_label.numpy(), dataset)
            result_zsl_knn.update(it, acc)
            log_print("{}nn Classifer: ".format(opt.Knn), log_dir)
            log_print(
                "Accuracy is {:.2f}%, Best_acc [{:.2f}% | Iter-{}]".format(
                    acc, result_zsl_knn.best_acc, result_zsl_knn.best_iter),
                log_dir)
            """ GZSL"""
            # note test label need be shift with offset ntrain_class
            train_X = torch.cat((dataset.train_feature, gen_feat), 0)
            train_Y = torch.cat(
                (dataset.train_label, gen_label + dataset.ntrain_class), 0)

            cls = classifier.CLASSIFIER(
                train_X, train_Y, dataset,
                dataset.ntrain_class + dataset.ntest_class, True,
                opt.classifier_lr, 0.5, 25, opt.nSample, True)
            result_gzsl_soft.update_gzsl(it, cls.acc_unseen, cls.acc_seen,
                                         cls.H)
            log_print("GZSL Softmax:", log_dir)
            log_print(
                "U->T {:.2f}%  S->T {:.2f}%  H {:.2f}%  Best_H [{:.2f}% {:.2f}% {:.2f}% | Iter-{}]"
                .format(cls.acc_unseen, cls.acc_seen, cls.H,
                        result_gzsl_soft.best_acc_U_T,
                        result_gzsl_soft.best_acc_S_T,
                        result_gzsl_soft.best_acc, result_gzsl_soft.best_iter),
                log_dir)

            if result_zsl_knn.save_model:
                files2remove = glob.glob(out_dir + '/Best_model_ZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                save_model(
                    it, netG, train_z, opt.manualSeed, log_text,
                    out_dir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format(
                        result_zsl_knn.acc_list[-1]))

            if result_gzsl_soft.save_model:
                files2remove = glob.glob(out_dir + '/Best_model_GZSL_*')
                for _i in files2remove:
                    os.remove(_i)
                save_model(
                    it, netG, train_z, opt.manualSeed, log_text, out_dir +
                    '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format(
                        result_gzsl_soft.best_acc,
                        result_gzsl_soft.best_acc_S_T,
                        result_gzsl_soft.best_acc_U_T))
            netG.train()

        if it % opt.save_interval == 0 and it:
            save_model(it, netG, train_z, opt.manualSeed, log_text,
                       out_dir + '/Iter_{:d}.tar'.format(it))
            print('Save model to ' + out_dir + '/Iter_{:d}.tar'.format(it))
Exemplo n.º 4
0
def train():
    param = _param()
    dataset = DATA_LOADER(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result = Result()
    result_gzsl = Result()

    netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda()
    netG.apply(weights_init)
    print(netG)
    netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda()
    netD.apply(weights_init)
    print(netD)

    start_step = 0

    nets = [netG, netD]

    tr_cls_centroid = Variable(
        torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda()
    optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in range(start_step, 3000 + 1):
        """ Discriminator """
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_sem[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            # GAN's D loss
            D_real, C_real = netD(X)
            D_loss_real = torch.mean(D_real)
            C_loss_real = F.cross_entropy(C_real, y_true)
            DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real)
            DC_loss.backward()

            # GAN's D loss
            G_sample = netG(z, text_feat).detach()
            D_fake, C_fake = netD(G_sample)
            D_loss_fake = torch.mean(D_fake)

            C_loss_fake = F.cross_entropy(C_fake, y_true)
            DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake)
            DC_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty(
                netD, X.data, G_sample.data)
            grad_penalty.backward()

            Wasserstein_D = D_loss_real - D_loss_fake
            optimizerD.step()
            reset_grad(nets)
        """ Generator """
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_sem[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(
                text_feat.astype('float32'))).cuda()

            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()

            G_sample = netG(z, text_feat)
            D_fake, C_fake = netD(G_sample)
            _, C_real = netD(X)

            # GAN's G loss
            G_loss = torch.mean(D_fake)
            # Auxiliary classification loss
            C_loss = (F.cross_entropy(C_real, y_true) +
                      F.cross_entropy(C_fake, y_true)) / 2
            GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss)

            # Centroid loss
            Euclidean_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for i in range(dataset.train_cls_num):
                    sample_idx = (y_true == i).data.nonzero().squeeze()
                    if sample_idx.numel() == 0:
                        Euclidean_loss += 0.0
                    else:
                        G_sample_cls = G_sample[sample_idx, :]
                        Euclidean_loss += (
                            G_sample_cls.mean(dim=0) -
                            tr_cls_centroid[i]).pow(2).sum().sqrt()
                Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            all_loss = GC_loss + Euclidean_loss + reg_loss
            all_loss.backward()
            optimizerG.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])
            acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1)
                        == y_true.data.cpu().numpy()).sum() / float(
                            y_true.data.size()[0])

            # log_text = 'Iter-{}; Was_D: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \
            #            ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%'\
            #             .format(it, Wasserstein_D.data[0],  Euclidean_loss.data[0], reg_loss.data[0],
            #                     G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100)
            log_text = 'Iter-{} *********************'.format(it)
            print(log_text)
            # with open(log_dir, 'a') as f:
            #     f.write(log_text+'\n')

        if it % opt.evl_interval == 0 and it >= 100:
            netG.eval()
            eval_fakefeat_test(it, netG, dataset, param, result)
            # eval_fakefeat_test_Hit(it, netG, dataset, param)
            eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl)

            netG.train()
Exemplo n.º 5
0
def train():
    param = _param()
    dataset = DATA_LOADER(opt)
    param.X_dim = dataset.feature_dim

    data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt)

    netG2 = _netG2_att(opt, dataset.text_dim, dataset.feature_dim).cuda()
    netG2.apply(weights_init)
    print(netG2)
    netD2 = _netD2_att(dataset.text_dim, dataset.train_cls_num).cuda()
    netD2.apply(weights_init)
    print(netD2)


    exp_info = 'GBU_{}_PretrainG2D2'.format(opt.dataset)
    exp_params = 'Eu{}_Rls{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA)

    out_dir  = 'out/{:s}'.format(exp_info)
    out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params)
    if not os.path.exists('out'):
        os.mkdir('out')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if not os.path.exists(out_subdir):
        os.mkdir(out_subdir)

    cprint(" The output dictionary is {}".format(out_subdir), 'red')
    log_dir = out_subdir + '/log_{:s}_{}.txt'.format(exp_info, opt.exp_idx)
    with open(log_dir, 'w') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            netG2.load_state_dict(checkpoint['state_dict_G2'])
            netD2.load_state_dict(checkpoint['state_dict_D2'])

            start_step = checkpoint['it']
            print(checkpoint['log'])
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    nets = [netD2 , netD2]

    optimizerD2 = optim.Adam(netD2.parameters(), lr=opt.lr, betas=(0.5, 0.9))
    optimizerG2 = optim.Adam(netG2.parameters(), lr=opt.lr, betas=(0.5, 0.9))

    for it in range(start_step, 3000+1):
        """D2"""
        for _ in range(5):
            blobs = data_layer.forward()
            feat_data = blobs['data']             # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([ dataset.train_att[i,:] for i in labels])
            text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()
            z2 = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda()

            # real loss
            D2_real, C2_real = netD2(text_feat)
            D2_loss_real = torch.mean(D2_real)
            C2_loss_real = F.cross_entropy(C2_real, y_true)
            DC2_loss = -D2_loss_real +  C2_loss_real
            DC2_loss.backward()

            # fake loss
            text_sample = netG2(z,X).detach()
            D2_fake,C2_fake = netD2(text_sample)
            D2_loss_fake = torch.mean(D2_fake)
            C2_loss_fake = F.cross_entropy(C2_fake, y_true)
            DC2_loss = D2_loss_fake   + C2_loss_fake
            DC2_loss.backward()

            # train with gradient penalty (WGAN_GP)
            grad_penalty = calc_gradient_penalty(netD2, text_feat.data, text_sample.data)
            grad_penalty.backward()
            Wasserstein_D = D2_loss_real - D2_loss_fake
            optimizerD2.step()
            reset_grad(nets)

        """G2"""
        for _ in range(1):
            blobs = data_layer.forward()
            feat_data = blobs['data']  # image data
            labels = blobs['labels'].astype(int)  # class labels
            text_feat = np.array([dataset.train_att[i, :] for i in labels])
            text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda()
            X = Variable(torch.from_numpy(feat_data)).cuda()
            y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda()
            z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda()


            text_sample = netG2(z, X)
            D2_fake, C2_fake = netD2(text_sample)
            G2_loss = torch.mean(D2_fake)
            C2_loss_fake = F.cross_entropy(C2_fake, y_true)
            GC2_loss = -G2_loss + C2_loss_fake

            # ||W||_2 regularization
            reg_loss = Variable(torch.Tensor([0.0])).cuda()
            if opt.REG_W_LAMBDA != 0:
                for name, p in netG2.named_parameters():
                    if 'weight' in name:
                        reg_loss += p.pow(2).sum()
                reg_loss.mul_(opt.REG_W_LAMBDA)

            all_loss = GC2_loss + 0.1*reg_loss
            all_loss.backward()
            optimizerG2.step()
            reset_grad(nets)

        if it % opt.disp_interval == 0 and it:
            acc_real = (np.argmax(C2_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0])
            acc_fake = (np.argmax(C2_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0])

            log_text = 'Iter-{}; Was_D: {:.3f};  reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \
                       ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%; c_rl: {:.2f}; c_fk: {:.2f}'\
                        .format(it, Wasserstein_D.item(),  reg_loss.item(),
                                G2_loss.item(), D2_loss_real.item(), D2_loss_fake.item(),
                                acc_real * 100, acc_fake * 100, C2_loss_real.item(), C2_loss_fake.item())

            print(log_text)
            with open(log_dir, 'a') as f:
                f.write(log_text+'\n')


        if it % opt.save_interval == 0 and it:
            save_model(it,netG2,netD2, opt.manualSeed, log_text,
                       out_subdir + '/Iter_{:d}.tar'.format(it))
            cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')
Exemplo n.º 6
0
def train():
    dataset = DATA_LOADER(opt)
    opt.C_dim = dataset.att_dim
    opt.X_dim = dataset.feature_dim
    opt.Z_dim = opt.latent_dim
    opt.y_dim = dataset.ntrain_class
    opt.niter = int(dataset.ntrain / opt.batchsize) * opt.nepoch  #309000

    data_layer = FeatDataLayer(dataset.train_label.numpy(),
                               dataset.train_feature.numpy(), opt)
    result_zsl_knn = Result()
    result_gzsl_soft = Result()
    netG = Conditional_Generator(opt)
    print('Conditional_Generator:', netG)

    train_z = tf.random.normal(mean=0,
                               stddev=opt.latent_var,
                               shape=(len(dataset.train_feature), opt.Z_dim))
    out_dir = 'out/{}/nSample-{}_nZ-{}_sigma-{}_langevin_s-{}_step-{}'.format(
        opt.dataset, opt.nSample, opt.Z_dim, opt.sigma, opt.langevin_s,
        opt.langevin_step)
    os.makedirs(out_dir, exist_ok=True)
    print("The output dictionary is {}".format(out_dir))
    log_dir = out_dir + '/log_{}.txt'.format(opt.dataset)
    with open(log_dir, 'w') as f:
        f.write('Training Start:')
        f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n')

    start_step = 0
    #这里复用不想写
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    optimizerG = keras.optimizers.Adam(lr=opt.lr, decay=opt.weight_decay)

    # range(start_step, opt.niter+1)
    for it in range(start_step, opt.niter + 1):
        blobs = data_layer.forward()
        feat_data = blobs['data']  # image data(64, 2048)
        labels = blobs['labels'].astype(int)  # class labels(64,)
        idx = blobs['idx'].astype(int)  #(64,)

        C = np.array([dataset.train_att[i, :] for i in labels])
        C = tf.convert_to_tensor(C, dtype=tf.float32)  #(64,85)
        X = tf.convert_to_tensor(feat_data)  #(64,2048)
        Z = tf.convert_to_tensor(train_z.numpy()[idx])  #(64,10)
        optimizer_z = keras.optimizers.Adam(lr=opt.lr, decay=opt.weight_decay)

        # Alternatingly update weights w and infer latent_batch z
        for em_step in range(2):  # EM_STEP
            #UPDATE W
            for _ in range(1):
                with tf.GradientTape() as tape:
                    pred = netG(Z, C)
                    loss = getloss(pred, X, Z, opt)
                grads = tape.gradient(loss, netG.trainable_variables)
                #进行梯度裁剪,防止梯度爆炸
                for i, grad in enumerate(grads):
                    grads[i] = tf.clip_by_norm(grad, 1)
                optimizerG.apply_gradients(zip(grads,
                                               netG.trainable_variables))
            #infer z
            for _ in range(opt.langevin_step):
                U_tau = tf.random.normal(mean=0,
                                         stddev=opt.sigma_U,
                                         shape=Z.shape)
                with tf.GradientTape() as tape:
                    pred = netG(Z, C)
                    loss = getloss(pred, X, Z, opt)
                    loss = opt.langevin_s * 2 / 2 * loss
                grads = tape.gradient(loss, netG.trainable_variables)
                Z = tf.clip_by_norm(Z, 1)
                optimizer_z.apply_gradients(
                    zip(grads, netG.trainable_variables))
                if it < opt.niter / 3:
                    Z += opt.langevin_s * U_tau
        #update Z
        train_z = train_z.numpy()
        train_z[idx, ] = Z
        # print(train_z[idx,].shape)
        train_z = tf.convert_to_tensor(train_z)
        if it % opt.disp_interval == 0 and it:
            log_text = 'Iter-[{}/{}]; loss: {:.3f}'.format(it, opt.niter, loss)
            log_print(log_text, log_dir)

        if it % opt.evl_interval == 0 and it:
            gen_feat, gen_label = synthesize_feature_test(
                netG, dataset, opt)  #(3000,2048)(3000,)
            """ZSL"""
            acc = eval_zsl_knn(gen_feat.numpy(), gen_label.numpy(), dataset)
            result_zsl_knn.update(it, acc)
            log_print("{}nn Classifer: ".format(opt.Knn), log_dir)
            log_print(
                "Accuracy is {:.2f}%, Best_acc [{:.2f}% | Iter-{}]".format(
                    acc, result_zsl_knn.best_acc, result_zsl_knn.best_iter),
                log_dir)