def train(): param = _param() dataset = DATA_LOADER(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result = Result() result_gzsl = Result() netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) exp_info = 'GBU_{}'.format(opt.dataset) exp_params = 'Eu{}_Rls{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA) out_dir = 'out/{:s}'.format(exp_info) out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists('out'): os.mkdir('out') if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}_{}.txt'.format(exp_info, opt.exp_idx) with open(log_dir, 'w') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) netD.load_state_dict(checkpoint['state_dict_D']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) nets = [netG, netD] tr_cls_centroid = Variable( torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 3000 + 1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real) DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake) DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty( netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss) # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += ( G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss all_loss.backward() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) log_text = 'Iter-{}; Was_D: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \ ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%'\ .format(it, Wasserstein_D.data[0], Euclidean_loss.data[0], reg_loss.data[0], G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100) print(log_text) with open(log_dir, 'a') as f: f.write(log_text + '\n') if it % opt.evl_interval == 0 and it >= 100: netG.eval() eval_fakefeat_test(it, netG, dataset, param, result) if result.save_model: files2remove = glob.glob(out_subdir + '/Best_model_ZSL_*') for _i in files2remove: os.remove(_i) # best_acc = result.acc_list[-1] save_model( it, netG, netD, opt.manualSeed, log_text, out_subdir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format( result.acc_list[-1])) eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl) if result.save_model: files2remove = glob.glob(out_subdir + '/Best_model_GZSL_*') for _i in files2remove: os.remove(_i) # best_acc_gzsl = result.acc_list[-1] save_model( it, netG, netD, opt.manualSeed, log_text, out_subdir + '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format( result_gzsl.best_acc, result_gzsl.best_acc_S_T, result_gzsl.best_acc_U_T)) netG.train() if it % opt.save_interval == 0 and it: save_model(it, netG, netD, opt.manualSeed, log_text, out_subdir + '/Iter_{:d}.tar'.format(it)) cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')
def train(): dataset = DATA_LOADER(opt) opt.C_dim = dataset.att_dim opt.X_dim = dataset.feature_dim opt.y_dim = dataset.ntrain_class data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result_zsl_knn = Result() result_gzsl_soft = Result() netG = Glow(classes=opt.y_dim, condition_dim=opt.C_dim).cuda() out_dir = 'out/{}/shuffle'.format(opt.dataset) os.makedirs(out_dir, exist_ok=True) print("The output dictionary is {}".format(out_dir)) log_dir = out_dir + '/log_{}.txt'.format(opt.dataset) with open(log_dir, 'w') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.pathls\ .isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) train_z = checkpoint['latent_z'].cuda() start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) initial = True optimizerG = optim.Adam(netG.parameters(), lr=opt.lr) for it in range(start_step, opt.niter + 1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels idx = blobs['idx'].astype(int) C = np.array([dataset.train_att[i, :] for i in labels]) L = torch.from_numpy(labels).cuda() C = torch.from_numpy(C.astype('float32')).cuda() X = torch.from_numpy(feat_data).cuda() X = X.view(*X.shape, 1, 1) if initial is True: netG(x=X, y_onehot=C, reverse=False) initial = False z, nll, vaeloss, y_logit = netG(x=X, y_onehot=C, reverse=False) loss_generative = Glow.loss_generative(nll) loss_classes = Glow.loss_class(y_logit, L) loss = loss_generative + vaeloss + loss_classes * 0.01 netG.zero_grad() optimizerG.zero_grad() loss.backward() optimizerG.step() if it % opt.disp_interval == 0 and it: log_text = 'Iter-[{}/{}]; epoch: {} Gloss: {:.3f} vaeloss: {:.3f} clsloss: {:.3f}'.format( it, opt.niter, it // opt.evl_interval, float(loss_generative), float(vaeloss), float(loss_classes)) log_print(log_text, log_dir) if it % opt.evl_interval == 0 and it: netG.eval() gen_feat, gen_label = synthesize_feature_test( netG, dataset, 300, 0.5, opt) """ ZSL""" acc = eval_zsl_knn(gen_feat.numpy(), gen_label.numpy(), dataset) result_zsl_knn.update(it, acc) log_print("{}nn Classifer: ".format(opt.Knn), log_dir) log_print( "Accuracy is {:.2f}%, Best_acc [{:.2f}% | Iter-{}]".format( acc, result_zsl_knn.best_acc, result_zsl_knn.best_iter), log_dir) gen_feat, gen_label = synthesize_feature_test( netG, dataset, opt.nSample, 1.0, opt) """ GZSL""" # note test label need be shift with offset ntrain_class train_X = torch.cat((dataset.train_feature, gen_feat), 0) train_Y = torch.cat( (dataset.train_label, gen_label + dataset.ntrain_class), 0) cls = classifier.CLASSIFIER( train_X, train_Y, dataset, dataset.ntrain_class + dataset.ntest_class, True, opt.classifier_lr, 0.5, 25, opt.nSample, True) result_gzsl_soft.update_gzsl(it, cls.acc_unseen, cls.acc_seen, cls.H) log_print("GZSL Softmax:", log_dir) log_print( "U->T {:.2f}% S->T {:.2f}% H {:.2f}% Best_H [{:.2f}% {:.2f}% {:.2f}% | Iter-{}]" .format(cls.acc_unseen, cls.acc_seen, cls.H, result_gzsl_soft.best_acc_U_T, result_gzsl_soft.best_acc_S_T, result_gzsl_soft.best_acc, result_gzsl_soft.best_iter), log_dir) if result_zsl_knn.save_model: files2remove = glob.glob(out_dir + '/Best_model_ZSL_*') for _i in files2remove: os.remove(_i) save_model( it, netG, opt.manualSeed, log_text, out_dir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format( result_zsl_knn.acc_list[-1])) if result_gzsl_soft.save_model: files2remove = glob.glob(out_dir + '/Best_model_GZSL_*') for _i in files2remove: os.remove(_i) save_model( it, netG, opt.manualSeed, log_text, out_dir + '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format( result_gzsl_soft.best_acc, result_gzsl_soft.best_acc_S_T, result_gzsl_soft.best_acc_U_T)) netG.train() if it % opt.save_interval == 0 and it: save_model(it, netG, opt.manualSeed, log_text, out_dir + '/Iter_{:d}.tar'.format(it)) print('Save model to ' + out_dir + '/Iter_{:d}.tar'.format(it))
def train(): dataset = DATA_LOADER(opt) opt.C_dim = dataset.att_dim opt.X_dim = dataset.feature_dim opt.Z_dim = opt.latent_dim opt.y_dim = dataset.ntrain_class opt.niter = int(dataset.ntrain / opt.batchsize) * opt.nepoch data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result_zsl_knn = Result() result_gzsl_soft = Result() netG = Conditional_Generator(opt).cuda() netG.apply(weights_init) print(netG) train_z = torch.FloatTensor(len(dataset.train_feature), opt.Z_dim).normal_(0, opt.latent_var).cuda() out_dir = 'out/{}/nSample-{}_nZ-{}_sigma-{}_langevin_s-{}_step-{}'.format( opt.dataset, opt.nSample, opt.Z_dim, opt.sigma, opt.langevin_s, opt.langevin_step) os.makedirs(out_dir, exist_ok=True) print("The output dictionary is {}".format(out_dir)) log_dir = out_dir + '/log_{}.txt'.format(opt.dataset) with open(log_dir, 'w') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG.load_state_dict(checkpoint['state_dict_G']) train_z = checkpoint['latent_z'].cuda() start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) # range(start_step, opt.niter+1) for it in range(start_step, opt.niter + 1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels idx = blobs['idx'].astype(int) C = np.array([dataset.train_att[i, :] for i in labels]) C = torch.from_numpy(C.astype('float32')).cuda() X = torch.from_numpy(feat_data).cuda() Z = train_z[idx].cuda() optimizer_z = torch.optim.Adam([Z], lr=opt.lr, weight_decay=opt.weight_decay) # Alternatingly update weights w and infer latent_batch z for em_step in range(2): # EM_STEP # update w for _ in range(1): pred = netG(Z, C) loss = getloss(pred, X, Z, opt) loss.backward() torch.nn.utils.clip_grad_norm_(netG.parameters(), 1) optimizerG.step() optimizerG.zero_grad() # infer z for _ in range(opt.langevin_step): U_tau = torch.FloatTensor(Z.shape).normal_(0, opt.sigma_U).cuda() pred = netG(Z, C) loss = getloss(pred, X, Z, opt) loss = opt.langevin_s * 2 / 2 * loss loss.backward() torch.nn.utils.clip_grad_norm_([Z], 1) optimizer_z.step() optimizer_z.zero_grad() if it < opt.niter / 3: Z.data += opt.langevin_s * U_tau # update Z train_z[idx, ] = Z.data if it % opt.disp_interval == 0 and it: log_text = 'Iter-[{}/{}]; loss: {:.3f}'.format( it, opt.niter, loss.item()) log_print(log_text, log_dir) if it % opt.evl_interval == 0 and it: netG.eval() gen_feat, gen_label = synthesize_feature_test(netG, dataset, opt) """ ZSL""" acc = eval_zsl_knn(gen_feat.numpy(), gen_label.numpy(), dataset) result_zsl_knn.update(it, acc) log_print("{}nn Classifer: ".format(opt.Knn), log_dir) log_print( "Accuracy is {:.2f}%, Best_acc [{:.2f}% | Iter-{}]".format( acc, result_zsl_knn.best_acc, result_zsl_knn.best_iter), log_dir) """ GZSL""" # note test label need be shift with offset ntrain_class train_X = torch.cat((dataset.train_feature, gen_feat), 0) train_Y = torch.cat( (dataset.train_label, gen_label + dataset.ntrain_class), 0) cls = classifier.CLASSIFIER( train_X, train_Y, dataset, dataset.ntrain_class + dataset.ntest_class, True, opt.classifier_lr, 0.5, 25, opt.nSample, True) result_gzsl_soft.update_gzsl(it, cls.acc_unseen, cls.acc_seen, cls.H) log_print("GZSL Softmax:", log_dir) log_print( "U->T {:.2f}% S->T {:.2f}% H {:.2f}% Best_H [{:.2f}% {:.2f}% {:.2f}% | Iter-{}]" .format(cls.acc_unseen, cls.acc_seen, cls.H, result_gzsl_soft.best_acc_U_T, result_gzsl_soft.best_acc_S_T, result_gzsl_soft.best_acc, result_gzsl_soft.best_iter), log_dir) if result_zsl_knn.save_model: files2remove = glob.glob(out_dir + '/Best_model_ZSL_*') for _i in files2remove: os.remove(_i) save_model( it, netG, train_z, opt.manualSeed, log_text, out_dir + '/Best_model_ZSL_Acc_{:.2f}.tar'.format( result_zsl_knn.acc_list[-1])) if result_gzsl_soft.save_model: files2remove = glob.glob(out_dir + '/Best_model_GZSL_*') for _i in files2remove: os.remove(_i) save_model( it, netG, train_z, opt.manualSeed, log_text, out_dir + '/Best_model_GZSL_H_{:.2f}_S_{:.2f}_U_{:.2f}.tar'.format( result_gzsl_soft.best_acc, result_gzsl_soft.best_acc_S_T, result_gzsl_soft.best_acc_U_T)) netG.train() if it % opt.save_interval == 0 and it: save_model(it, netG, train_z, opt.manualSeed, log_text, out_dir + '/Iter_{:d}.tar'.format(it)) print('Save model to ' + out_dir + '/Iter_{:d}.tar'.format(it))
def train(): param = _param() dataset = DATA_LOADER(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result = Result() result_gzsl = Result() netG = _netG_att(opt, dataset.text_dim, dataset.feature_dim).cuda() netG.apply(weights_init) print(netG) netD = _netD(dataset.train_cls_num, dataset.feature_dim).cuda() netD.apply(weights_init) print(netD) start_step = 0 nets = [netG, netD] tr_cls_centroid = Variable( torch.from_numpy(dataset.tr_cls_centroid.astype('float32'))).cuda() optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 3000 + 1): """ Discriminator """ for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_sem[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() # GAN's D loss D_real, C_real = netD(X) D_loss_real = torch.mean(D_real) C_loss_real = F.cross_entropy(C_real, y_true) DC_loss = opt.Adv_LAMBDA * (-D_loss_real + C_loss_real) DC_loss.backward() # GAN's D loss G_sample = netG(z, text_feat).detach() D_fake, C_fake = netD(G_sample) D_loss_fake = torch.mean(D_fake) C_loss_fake = F.cross_entropy(C_fake, y_true) DC_loss = opt.Adv_LAMBDA * (D_loss_fake + C_loss_fake) DC_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = opt.Adv_LAMBDA * calc_gradient_penalty( netD, X.data, G_sample.data) grad_penalty.backward() Wasserstein_D = D_loss_real - D_loss_fake optimizerD.step() reset_grad(nets) """ Generator """ for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_sem[i, :] for i in labels]) text_feat = Variable(torch.from_numpy( text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() G_sample = netG(z, text_feat) D_fake, C_fake = netD(G_sample) _, C_real = netD(X) # GAN's G loss G_loss = torch.mean(D_fake) # Auxiliary classification loss C_loss = (F.cross_entropy(C_real, y_true) + F.cross_entropy(C_fake, y_true)) / 2 GC_loss = opt.Adv_LAMBDA * (-G_loss + C_loss) # Centroid loss Euclidean_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for i in range(dataset.train_cls_num): sample_idx = (y_true == i).data.nonzero().squeeze() if sample_idx.numel() == 0: Euclidean_loss += 0.0 else: G_sample_cls = G_sample[sample_idx, :] Euclidean_loss += ( G_sample_cls.mean(dim=0) - tr_cls_centroid[i]).pow(2).sum().sqrt() Euclidean_loss *= 1.0 / dataset.train_cls_num * opt.CENT_LAMBDA # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) all_loss = GC_loss + Euclidean_loss + reg_loss all_loss.backward() optimizerG.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) acc_fake = (np.argmax(C_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float( y_true.data.size()[0]) # log_text = 'Iter-{}; Was_D: {:.3f}; Euc_ls: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \ # ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%'\ # .format(it, Wasserstein_D.data[0], Euclidean_loss.data[0], reg_loss.data[0], # G_loss.data[0], D_loss_real.data[0], D_loss_fake.data[0], acc_real * 100, acc_fake * 100) log_text = 'Iter-{} *********************'.format(it) print(log_text) # with open(log_dir, 'a') as f: # f.write(log_text+'\n') if it % opt.evl_interval == 0 and it >= 100: netG.eval() eval_fakefeat_test(it, netG, dataset, param, result) # eval_fakefeat_test_Hit(it, netG, dataset, param) eval_fakefeat_test_gzsl(it, netG, dataset, param, result_gzsl) netG.train()
def train(): param = _param() dataset = DATA_LOADER(opt) param.X_dim = dataset.feature_dim data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) netG2 = _netG2_att(opt, dataset.text_dim, dataset.feature_dim).cuda() netG2.apply(weights_init) print(netG2) netD2 = _netD2_att(dataset.text_dim, dataset.train_cls_num).cuda() netD2.apply(weights_init) print(netD2) exp_info = 'GBU_{}_PretrainG2D2'.format(opt.dataset) exp_params = 'Eu{}_Rls{}'.format(opt.CENT_LAMBDA, opt.REG_W_LAMBDA) out_dir = 'out/{:s}'.format(exp_info) out_subdir = 'out/{:s}/{:s}'.format(exp_info, exp_params) if not os.path.exists('out'): os.mkdir('out') if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(out_subdir): os.mkdir(out_subdir) cprint(" The output dictionary is {}".format(out_subdir), 'red') log_dir = out_subdir + '/log_{:s}_{}.txt'.format(exp_info, opt.exp_idx) with open(log_dir, 'w') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) netG2.load_state_dict(checkpoint['state_dict_G2']) netD2.load_state_dict(checkpoint['state_dict_D2']) start_step = checkpoint['it'] print(checkpoint['log']) else: print("=> no checkpoint found at '{}'".format(opt.resume)) nets = [netD2 , netD2] optimizerD2 = optim.Adam(netD2.parameters(), lr=opt.lr, betas=(0.5, 0.9)) optimizerG2 = optim.Adam(netG2.parameters(), lr=opt.lr, betas=(0.5, 0.9)) for it in range(start_step, 3000+1): """D2""" for _ in range(5): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([ dataset.train_att[i,:] for i in labels]) text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() z2 = Variable(torch.randn(opt.batchsize, param.z_dim)).cuda() # real loss D2_real, C2_real = netD2(text_feat) D2_loss_real = torch.mean(D2_real) C2_loss_real = F.cross_entropy(C2_real, y_true) DC2_loss = -D2_loss_real + C2_loss_real DC2_loss.backward() # fake loss text_sample = netG2(z,X).detach() D2_fake,C2_fake = netD2(text_sample) D2_loss_fake = torch.mean(D2_fake) C2_loss_fake = F.cross_entropy(C2_fake, y_true) DC2_loss = D2_loss_fake + C2_loss_fake DC2_loss.backward() # train with gradient penalty (WGAN_GP) grad_penalty = calc_gradient_penalty(netD2, text_feat.data, text_sample.data) grad_penalty.backward() Wasserstein_D = D2_loss_real - D2_loss_fake optimizerD2.step() reset_grad(nets) """G2""" for _ in range(1): blobs = data_layer.forward() feat_data = blobs['data'] # image data labels = blobs['labels'].astype(int) # class labels text_feat = np.array([dataset.train_att[i, :] for i in labels]) text_feat = Variable(torch.from_numpy(text_feat.astype('float32'))).cuda() X = Variable(torch.from_numpy(feat_data)).cuda() y_true = Variable(torch.from_numpy(labels.astype('int'))).cuda() z = Variable(torch.randn(opt.batchsize, opt.z_dim)).cuda() text_sample = netG2(z, X) D2_fake, C2_fake = netD2(text_sample) G2_loss = torch.mean(D2_fake) C2_loss_fake = F.cross_entropy(C2_fake, y_true) GC2_loss = -G2_loss + C2_loss_fake # ||W||_2 regularization reg_loss = Variable(torch.Tensor([0.0])).cuda() if opt.REG_W_LAMBDA != 0: for name, p in netG2.named_parameters(): if 'weight' in name: reg_loss += p.pow(2).sum() reg_loss.mul_(opt.REG_W_LAMBDA) all_loss = GC2_loss + 0.1*reg_loss all_loss.backward() optimizerG2.step() reset_grad(nets) if it % opt.disp_interval == 0 and it: acc_real = (np.argmax(C2_real.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0]) acc_fake = (np.argmax(C2_fake.data.cpu().numpy(), axis=1) == y_true.data.cpu().numpy()).sum() / float(y_true.data.size()[0]) log_text = 'Iter-{}; Was_D: {:.3f}; reg_ls: {:.3f}; G_loss: {:.3f}; D_loss_real: {:.3f};' \ ' D_loss_fake: {:.3f}; rl: {:.2f}%; fk: {:.2f}%; c_rl: {:.2f}; c_fk: {:.2f}'\ .format(it, Wasserstein_D.item(), reg_loss.item(), G2_loss.item(), D2_loss_real.item(), D2_loss_fake.item(), acc_real * 100, acc_fake * 100, C2_loss_real.item(), C2_loss_fake.item()) print(log_text) with open(log_dir, 'a') as f: f.write(log_text+'\n') if it % opt.save_interval == 0 and it: save_model(it,netG2,netD2, opt.manualSeed, log_text, out_subdir + '/Iter_{:d}.tar'.format(it)) cprint('Save model to ' + out_subdir + '/Iter_{:d}.tar'.format(it), 'red')
def train(): dataset = DATA_LOADER(opt) opt.C_dim = dataset.att_dim opt.X_dim = dataset.feature_dim opt.Z_dim = opt.latent_dim opt.y_dim = dataset.ntrain_class opt.niter = int(dataset.ntrain / opt.batchsize) * opt.nepoch #309000 data_layer = FeatDataLayer(dataset.train_label.numpy(), dataset.train_feature.numpy(), opt) result_zsl_knn = Result() result_gzsl_soft = Result() netG = Conditional_Generator(opt) print('Conditional_Generator:', netG) train_z = tf.random.normal(mean=0, stddev=opt.latent_var, shape=(len(dataset.train_feature), opt.Z_dim)) out_dir = 'out/{}/nSample-{}_nZ-{}_sigma-{}_langevin_s-{}_step-{}'.format( opt.dataset, opt.nSample, opt.Z_dim, opt.sigma, opt.langevin_s, opt.langevin_step) os.makedirs(out_dir, exist_ok=True) print("The output dictionary is {}".format(out_dir)) log_dir = out_dir + '/log_{}.txt'.format(opt.dataset) with open(log_dir, 'w') as f: f.write('Training Start:') f.write(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + '\n') start_step = 0 #这里复用不想写 if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) else: print("=> no checkpoint found at '{}'".format(opt.resume)) optimizerG = keras.optimizers.Adam(lr=opt.lr, decay=opt.weight_decay) # range(start_step, opt.niter+1) for it in range(start_step, opt.niter + 1): blobs = data_layer.forward() feat_data = blobs['data'] # image data(64, 2048) labels = blobs['labels'].astype(int) # class labels(64,) idx = blobs['idx'].astype(int) #(64,) C = np.array([dataset.train_att[i, :] for i in labels]) C = tf.convert_to_tensor(C, dtype=tf.float32) #(64,85) X = tf.convert_to_tensor(feat_data) #(64,2048) Z = tf.convert_to_tensor(train_z.numpy()[idx]) #(64,10) optimizer_z = keras.optimizers.Adam(lr=opt.lr, decay=opt.weight_decay) # Alternatingly update weights w and infer latent_batch z for em_step in range(2): # EM_STEP #UPDATE W for _ in range(1): with tf.GradientTape() as tape: pred = netG(Z, C) loss = getloss(pred, X, Z, opt) grads = tape.gradient(loss, netG.trainable_variables) #进行梯度裁剪,防止梯度爆炸 for i, grad in enumerate(grads): grads[i] = tf.clip_by_norm(grad, 1) optimizerG.apply_gradients(zip(grads, netG.trainable_variables)) #infer z for _ in range(opt.langevin_step): U_tau = tf.random.normal(mean=0, stddev=opt.sigma_U, shape=Z.shape) with tf.GradientTape() as tape: pred = netG(Z, C) loss = getloss(pred, X, Z, opt) loss = opt.langevin_s * 2 / 2 * loss grads = tape.gradient(loss, netG.trainable_variables) Z = tf.clip_by_norm(Z, 1) optimizer_z.apply_gradients( zip(grads, netG.trainable_variables)) if it < opt.niter / 3: Z += opt.langevin_s * U_tau #update Z train_z = train_z.numpy() train_z[idx, ] = Z # print(train_z[idx,].shape) train_z = tf.convert_to_tensor(train_z) if it % opt.disp_interval == 0 and it: log_text = 'Iter-[{}/{}]; loss: {:.3f}'.format(it, opt.niter, loss) log_print(log_text, log_dir) if it % opt.evl_interval == 0 and it: gen_feat, gen_label = synthesize_feature_test( netG, dataset, opt) #(3000,2048)(3000,) """ZSL""" acc = eval_zsl_knn(gen_feat.numpy(), gen_label.numpy(), dataset) result_zsl_knn.update(it, acc) log_print("{}nn Classifer: ".format(opt.Knn), log_dir) log_print( "Accuracy is {:.2f}%, Best_acc [{:.2f}% | Iter-{}]".format( acc, result_zsl_knn.best_acc, result_zsl_knn.best_iter), log_dir)