def train_src_rec(encoder, classifier, generator, data_loader): """Train classifier for source domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers encoder.train() classifier.train() generator.train() # setup criterion and optimizer optimizer = optim.Adam( generator.parameters(), lr=cfg.learning_rate_apt, betas=(cfg.beta1, cfg.beta2)) criterionRec = torch.nn.MSELoss() #################### # 2. train network # #################### for epoch in range(cfg.num_epochs_pre_rec): for step, (images, labels) in enumerate(data_loader): # make images and labels variable images = make_variable(images) # labels[labels == 10] = 0 labels = make_variable(labels).long().squeeze() # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic feat= encoder(images) feat_reshape = (feat.unsqueeze(2)).unsqueeze(2) reconst = generator(feat_reshape) loss_rec = criterionRec(reconst, images) loss_rec.backward() optimizer.step() # print step info if (step+1) % cfg.log_step_pre == 0: print('Epoch [%d] ' 'loss[%.2f] ' %(epoch, loss_rec.data[0], ) ) # # save final model save_model(generator, "ADDA-source-generator-final.pt") return generator
net.eval() iterator_valid = iter(loader_valid) valid_loss = {} valid_num = 0 for _ in trange(len(loader_valid), desc='Valid ep%d' % ith_epoch, position=2): x, y_bon, y_cor = next(iterator_valid) with torch.no_grad(): losses = feed_forward(net, x, y_bon, y_cor) for k, v in losses.items(): valid_loss[k] = valid_loss.get(k, 0) + v.item() * x.size(0) valid_num += x.size(0) for k, v in valid_loss.items(): k = 'valid/%s' % k tb_writer.add_scalar(k, v / valid_num, ith_epoch) # Periodically save model if ith_epoch % args.save_every == 0: save_model( net, os.path.join(args.ckpt, args.id, 'epoch_%d.pth' % ith_epoch), ith_epoch) # Save best validation loss model if valid_loss['total'] < args.best_valid_loss: args.best_valid_loss = valid_loss['total'] save_model(net, os.path.join(args.ckpt, args.id, 'best_valid.pth'), ith_epoch)
def train(self, data_loader, stage=1): if stage == 1: netG, netD = self.load_network_stageI() else: netG, netD = self.load_network_stageII() nz = cfg.Z_DIM batch_size = self.batch_size noise = Variable(torch.FloatTensor(batch_size, nz)) fixed_noise = \ Variable(torch.FloatTensor(batch_size, nz).normal_(0, 1), volatile=True) real_labels = Variable(torch.FloatTensor(batch_size).fill_(1)) fake_labels = Variable(torch.FloatTensor(batch_size).fill_(0)) if cfg.CUDA: noise, fixed_noise = noise.cuda(), fixed_noise.cuda() real_labels, fake_labels = real_labels.cuda(), fake_labels.cuda() generator_lr = cfg.TRAIN.GENERATOR_LR discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH optimizerD = \ optim.Adam(netD.parameters(), lr=cfg.TRAIN.DISCRIMINATOR_LR, betas=(0.5, 0.999)) netG_para = [] for p in netG.parameters(): if p.requires_grad: netG_para.append(p) optimizerG = optim.Adam(netG_para, lr=cfg.TRAIN.GENERATOR_LR, betas=(0.5, 0.999)) count = 0 for epoch in range(self.max_epoch): start_t = time.time() if epoch % lr_decay_step == 0 and epoch > 0: generator_lr *= 0.5 for param_group in optimizerG.param_groups: param_group['lr'] = generator_lr discriminator_lr *= 0.5 for param_group in optimizerD.param_groups: param_group['lr'] = discriminator_lr for i, data in enumerate(data_loader, 0): ###################################################### # (1) Prepare training data ###################################################### real_img_cpu, txt_embedding = data real_imgs = Variable(real_img_cpu) txt_embedding = Variable(txt_embedding) if cfg.CUDA: real_imgs = real_imgs.cuda() txt_embedding = txt_embedding.cuda() ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) inputs = (txt_embedding, noise) _, fake_imgs, mu, logvar = \ nn.parallel.data_parallel(netG, inputs, self.gpus) ############################ # (3) Update D network ########################### netD.zero_grad() errD, errD_real, errD_wrong, errD_fake = \ compute_discriminator_loss(netD, real_imgs, fake_imgs, real_labels, fake_labels, mu, self.gpus) errD.backward() optimizerD.step() ############################ # (2) Update G network ########################### netG.zero_grad() errG = compute_generator_loss(netD, fake_imgs, real_labels, mu, self.gpus) kl_loss = KL_loss(mu, logvar) errG_total = errG + kl_loss * cfg.TRAIN.COEFF.KL errG_total.backward() optimizerG.step() count = count + 1 if i % 100 == 0: summary_D = summary.scalar('D_loss', errD.item()) summary_D_r = summary.scalar('D_loss_real', errD_real) summary_D_w = summary.scalar('D_loss_wrong', errD_wrong) summary_D_f = summary.scalar('D_loss_fake', errD_fake) summary_G = summary.scalar('G_loss', errG.item()) summary_KL = summary.scalar('KL_loss', kl_loss.item()) self.summary_writer.add_summary(summary_D, count) self.summary_writer.add_summary(summary_D_r, count) self.summary_writer.add_summary(summary_D_w, count) self.summary_writer.add_summary(summary_D_f, count) self.summary_writer.add_summary(summary_G, count) self.summary_writer.add_summary(summary_KL, count) # save the image result for each epoch inputs = (txt_embedding, fixed_noise) lr_fake, fake, _, _ = \ nn.parallel.data_parallel(netG, inputs, self.gpus) save_img_results(real_img_cpu, fake, epoch, self.image_dir) if lr_fake is not None: save_img_results(None, lr_fake, epoch, self.image_dir) end_t = time.time() print('''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_KL: %.4f Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f Total Time: %.2fsec ''' % (epoch, self.max_epoch, i, len(data_loader), errD.item(), errG.item(), kl_loss.item(), errD_real, errD_wrong, errD_fake, (end_t - start_t))) if epoch % self.snapshot_interval == 0: save_model(netG, netD, epoch, self.model_dir) # save_model(netG, netD, self.max_epoch, self.model_dir) # self.summary_writer.close()
optimizer_g_l.step() g_step += 1 # break # print info if ((epoch + 1) % params.log_step == 0): print("Epoch ", epoch + 1, "in ", params.num_epochs, " d_loss :", d_loss.data[0], " g_loss: ", g_loss.data[0], " c_loss:", c_loss.data[0]) d_loss_durations.append(d_loss.data[0]) g_loss_durations.append(g_loss.data[0]) c_loss_durations.append(c_loss.data[0]) plot_durations() # save model if ((epoch + 1) % params.save_step == 0): save_model(critic, "V2_WGAN-GP_critic-{}.pt".format(epoch + 1)) save_model(classifier, "V2_WGAN-GP_classifier-{}.pt".format(epoch + 1)) save_model(generator, "V2_WGAN-GP_generator-{}.pt".format(epoch + 1)) save_model(generator_larger,"V2_WGAN-GP_generator_larger-{}.pt".format(epoch + 1)) plt.ioff() plt.show()
def train_src(encoder, classifier, data_loader, tgt_data_loader_eval): """Train classifier for source domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers encoder.train() classifier.train() # setup criterion and optimizer optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters()), lr=cfg.learning_rate_pre, betas=(cfg.beta1, cfg.beta2)) criterion = nn.CrossEntropyLoss() # optimizer = optim.Adam( # itertools(encoder.parameters(), classifier.parameters()), # lr=cfg.learning_rate_pre, # betas=(cfg.beta1, cfg.beta2)) # criterion = nn.CrossEntropyLoss() #################### # 2. train network # #################### for epoch in range(cfg.num_epochs_pre): for step, (images, labels) in enumerate(data_loader): # make images and labels variable images = make_variable(images) # labels[labels == 10] = 0 labels = make_variable(labels).long().squeeze() # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic preds = classifier(encoder(images)) loss = criterion(preds, labels) # optimize source classifier loss.backward() optimizer.step() # st() acc = evaluate.evaluate_step(encoder, classifier, images, labels) # print step info if (step + 1) % cfg.log_step_pre == 0: print('Epoch [%d] ' 'loss[%.2f] ' 'Source_Accuracy[%.2f] ' % (epoch, loss.data[0], acc)) # eval model on test set if ((epoch + 1) % cfg.eval_step_pre == 0): # evaluate.eval_func(encoder, classifier, tgt_data_loader_eval, sample=True) # evaluate.eval_func(encoder, classifier, data_loader) print(">>> source only <<<") evaluate.eval_func(encoder, classifier, tgt_data_loader_eval) # save model parameters if ((epoch + 1) % cfg.save_step_pre == 0): save_model(encoder, "ADDA-source-encoder-{}.pt".format(epoch + 1)) save_model(classifier, "ADDA-source-classifier-{}.pt".format(epoch + 1)) # # save final model save_model(encoder, "ADDA-source-encoder-final.pt") save_model(classifier, "ADDA-source-classifier-final.pt") return encoder, classifier
def train(classifier, generator, critic, src_data_loader, tgt_data_loader): """Train generator, classifier and critic jointly.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers classifier.train() generator.train() critic.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_c = get_optimizer(classifier, "Adam") optimizer_g = get_optimizer(generator, "Adam") optimizer_d = get_optimizer(critic, "Adam") # zip source and target data pair data_iter_src = get_inf_iterator(src_data_loader) data_iter_tgt = get_inf_iterator(tgt_data_loader) # counter g_step = 0 # positive and negative labels pos_labels = make_variable(torch.FloatTensor([1])) neg_labels = make_variable(torch.FloatTensor([-1])) #################### # 2. train network # #################### for epoch in range(params.num_epochs): ########################### # 2.1 train discriminator # ########################### # requires to compute gradients for D for p in critic.parameters(): p.requires_grad = True # set steps for discriminator if g_step < 25 or g_step % 500 == 0: # this helps to start with the critic at optimum # even in the first iterations. critic_iters = 100 else: critic_iters = params.d_steps # loop for optimizing discriminator for d_step in range(critic_iters): # convert images into torch.Variable images_src, labels_src = next(data_iter_src) images_tgt, _ = next(data_iter_tgt) images_src = make_variable(images_src) labels_src = make_variable(labels_src.squeeze_()) images_tgt = make_variable(images_tgt) if images_src.size(0) != params.batch_size or \ images_tgt.size(0) != params.batch_size: continue # zero gradients for optimizer optimizer_d.zero_grad() # compute source data loss for discriminator feat_src = generator(images_src) d_loss_src = critic(feat_src.detach()) d_loss_src = d_loss_src.mean() d_loss_src.backward(neg_labels) # compute target data loss for discriminator feat_tgt = generator(images_tgt) d_loss_tgt = critic(feat_tgt.detach()) d_loss_tgt = d_loss_tgt.mean() d_loss_tgt.backward(pos_labels) # compute gradient penalty gradient_penalty = calc_gradient_penalty(critic, feat_src.data, feat_tgt.data) gradient_penalty.backward() # optimize weights of discriminator d_loss = -d_loss_src + d_loss_tgt + gradient_penalty optimizer_d.step() ######################## # 2.2 train classifier # ######################## # zero gradients for optimizer optimizer_c.zero_grad() # compute loss for critic preds_c = classifier(generator(images_src).detach()) c_loss = criterion(preds_c, labels_src) # optimize source classifier c_loss.backward() optimizer_c.step() ####################### # 2.3 train generator # ####################### # avoid to compute gradients for D for p in critic.parameters(): p.requires_grad = False # zero grad for optimizer of generator optimizer_g.zero_grad() # compute source data classification loss for generator feat_src = generator(images_src) preds_c = classifier(feat_src) g_loss_cls = criterion(preds_c, labels_src) g_loss_cls.backward() # compute source data discriminattion loss for generator feat_src = generator(images_src) g_loss_src = critic(feat_src).mean() g_loss_src.backward(pos_labels) # compute target data discriminattion loss for generator feat_tgt = generator(images_tgt) g_loss_tgt = critic(feat_tgt).mean() g_loss_tgt.backward(neg_labels) # compute loss for generator g_loss = g_loss_src - g_loss_tgt + g_loss_cls # optimize weights of generator optimizer_g.step() g_step += 1 ################## # 2.4 print info # ################## if ((epoch + 1) % params.log_step == 0): print("Epoch [{}/{}]:" "d_loss={:.5f} c_loss={:.5f} g_loss={:.5f} " "D(x)={:.5f} D(G(z))={:.5f} GP={:.5f}".format( epoch + 1, params.num_epochs, d_loss.data[0], c_loss.data[0], g_loss.data[0], d_loss_src.data[0], d_loss_tgt.data[0], gradient_penalty.data[0])) ############################# # 2.5 save model parameters # ############################# if ((epoch + 1) % params.save_step == 0): save_model(critic, "WGAN-GP_critic-{}.pt".format(epoch + 1)) save_model(classifier, "WGAN-GP_classifier-{}.pt".format(epoch + 1)) save_model(generator, "WGAN-GP_generator-{}.pt".format(epoch + 1)) return classifier, generator
def domain_adapt(F, F_1, F_2, F_t, source_dataset, target_dataset, excerpt, pseudo_labels, plot): """Perform Doamin Adaptation between source and target domains.""" # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() if 0: optimType = "Adam" cfg.learning_rate = 1.0E-4 else: optimType = "sgd" cfg.learning_rate = 1.0E-4 optimizer_F = get_optimizer(F, optimType) optimizer_F_1 = get_optimizer(F_1, optimType) optimizer_F_2 = get_optimizer(F_2, optimType) optimizer_F_t = get_optimizer(F_t, optimType) # get labelled target dataset print('pseudo_labels = %s' % str(pseudo_labels)) target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # merge soruce data and target data merged_dataset = ConcatDataset([source_dataset, target_dataset_labelled]) print('target_dataset_labelled = %d' % len(target_dataset_labelled)) # start training plt.figure() for k in range(cfg.num_epochs_k): # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() losses = [] merged_dataloader = make_data_loader(merged_dataset) target_dataloader_labelled = make_data_loader(target_dataset_labelled) target_dataloader_labelled_iter = get_inf_iterator( target_dataloader_labelled) if 0: plt.figure() atr.showDataSet(target_dataloader_labelled) plt.waitforbuttonpress() if 0: # There's a bug here, the labels are not the same data type. print them out!! source_dataloader_iter = get_inf_iterator( make_data_loader(source_dataset)) a, b = next(source_dataloader_iter) c, d = next(target_dataloader_labelled_iter) print('source labels = {}'.format(b)) print('target labels = {}'.format(d)) sys.exit(0) for epoch in range(cfg.num_epochs_adapt): if optimType == 'sgd': adjustLearningRate(optimizer_F, cfg.learning_rate, epoch, cfg.num_epochs_adapt) adjustLearningRate(optimizer_F_1, cfg.learning_rate, epoch, cfg.num_epochs_adapt) adjustLearningRate(optimizer_F_2, cfg.learning_rate, epoch, cfg.num_epochs_adapt) adjustLearningRate(optimizer_F_t, cfg.learning_rate, epoch, cfg.num_epochs_adapt) for step, rez in enumerate(merged_dataloader): #!!print('rez = %s' % rez) images, labels = rez if images.shape[0] < cfg.batch_size: print('WARNING: batch of size %d smaller than desired %d: skipping' % \ (images.shape[0], cfg.batch_size)) continue # sample from T_l images_tgt, labels_tgt = next(target_dataloader_labelled_iter) while images_tgt.shape[0] < cfg.batch_size: print('WARNING: target batch of size %d smaller than desired %d' % \ (images_tgt.shape[0], cfg.batch_size)) images_tgt, labels_tgt = next( target_dataloader_labelled_iter) # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) images_tgt = make_variable(images_tgt) labels_tgt = make_variable(labels_tgt) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks #print('images shape = {}'.format(images.shape))#!! out_F = F(images) #print('out_F = {}'.format(out_F.shape))#!! out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(F(images_tgt)) # compute labelling loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_labelling = loss_F_1 + loss_F_2 + 0.03 * loss_similiar loss_labelling.backward() # compute target specific loss loss_F_t = criterion(out_F_t, labels_tgt) loss_F_t.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() losses.append(loss_F_t.item()) # print step info if ((step + 1) % cfg.log_step == 0): print("K[{}/{}] Epoch [{}/{}] Step[{}/{}] Loss(" "labelling={:.5f} target={:.5f})".format( k + 1, cfg.num_epochs_k, epoch + 1, cfg.num_epochs_adapt, step + 1, len(merged_dataloader), loss_labelling.item(), #.data[0], loss_F_t.item(), #.data[0], )) #!!print('end of loop') if plot: plt.clf() plt.plot(losses) plt.grid(1) plt.title( 'Loss for domain adaptation, k = {}/{}, epoch = {}/{}' .format(k, cfg.num_epochs_k, epoch, cfg.num_epochs_adapt)) plt.waitforbuttonpress(0.0001) # re-compute the number of selected taget data num_target = (k + 2) * len(source_dataset) // 20 num_target = min(num_target, cfg.num_target_max) print(">>> Set num of sampled target data: {}".format(num_target)) # re-generate pseudo labels excerpt, pseudo_labels = generate_labels(F, F_1, F_2, target_dataset, num_target, useWeightedSampling=True) print(">>> Genrate pseudo labels [{}] numtarget = {}".format( len(target_dataset_labelled), num_target)) print('sizes = {}, {}, excerpt = {}, \npseudo_labels = {}'.format( len(excerpt), len(pseudo_labels), excerpt, pseudo_labels)) # get labelled target dataset target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # re-merge soruce data and target data merged_dataset = ConcatDataset( [source_dataset, target_dataset_labelled]) # save model if ((k + 1) % cfg.save_step == 0): save_model(F, "adapt-F-{}.pt".format(k + 1)) save_model(F_1, "adapt-F_1-{}.pt".format(k + 1)) save_model(F_2, "adapt-F_2-{}.pt".format(k + 1)) save_model(F_t, "adapt-F_t-{}.pt".format(k + 1)) # save final model save_model(F, "adapt-F-final.pt") save_model(F_1, "adapt-F_1-final.pt") save_model(F_2, "adapt-F_2-final.pt") save_model(F_t, "adapt-F_t-final.pt")
def pre_train(F, F_1, F_2, F_t, source_data, plot): """Pre-train models on source domain dataset.""" # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() if 0: optimType = "Adam" cfg.learning_rate = 1.0E-4 else: optimType = "sgd" cfg.learning_rate = 1.0E-3 optimizer_F = get_optimizer(F, optimType) optimizer_F_1 = get_optimizer(F_1, optimType) optimizer_F_2 = get_optimizer(F_2, optimType) optimizer_F_t = get_optimizer(F_t, optimType) losses = [] if plot: plt.figure() # start training for epoch in range(cfg.num_epochs_pre): if optimType == 'sgd': adjustLearningRate(optimizer_F, cfg.learning_rate, epoch, cfg.num_epochs_pre) adjustLearningRate(optimizer_F_1, cfg.learning_rate, epoch, cfg.num_epochs_pre) adjustLearningRate(optimizer_F_2, cfg.learning_rate, epoch, cfg.num_epochs_pre) adjustLearningRate(optimizer_F_t, cfg.learning_rate, epoch, cfg.num_epochs_pre) for step, (images, labels) in enumerate(source_data): # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks out_F = F(images) #!! #out_F = torch.flatten(out_F,1) out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(out_F) # compute loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_F_t = criterion(out_F_t, labels) loss_F = loss_F_1 + loss_F_2 + loss_F_t + 0.03 * loss_similiar loss_F.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() losses.append(loss_F.item()) # print step info if ((step + 1) % cfg.log_step == 0): print("Epoch [{}/{}] Step[{}/{}] Loss(" "Total={:.5f} F_1={:.5f} F_2={:.5f} " "F_t={:.5f} sim={:.5f})" #!! "F_t={:.5f})" .format(epoch + 1, cfg.num_epochs_pre, step + 1, len(source_data), loss_F.item(), #.data[0], loss_F_1.item(), #.data[0], loss_F_2.item(), #.data[0], loss_F_t.item(), #.data[0], loss_similiar.item(), #.data[0], )) if plot: plt.clf() plt.plot(losses) plt.grid(1) plt.title('Loss for pre-training') plt.waitforbuttonpress(0.0001) # save model if ((epoch + 1) % cfg.save_step == 0): save_model(F, "pretrain-F-{}.pt".format(epoch + 1)) save_model(F_1, "pretrain-F_1-{}.pt".format(epoch + 1)) save_model(F_2, "pretrain-F_2-{}.pt".format(epoch + 1)) save_model(F_t, "pretrain-F_t-{}.pt".format(epoch + 1)) # save final model save_model(F, "pretrain-F-final.pt") save_model(F_1, "pretrain-F_1-final.pt") save_model(F_2, "pretrain-F_2-final.pt") save_model(F_t, "pretrain-F_t-final.pt")
def domain_adapt(F, F_1, F_2, F_t, source_dataset, target_dataset, excerpt, pseudo_labels): """Perform Doamin Adaptation between source and target domains.""" # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_F = get_optimizer(F, "Adam") optimizer_F_1 = get_optimizer(F_1, "Adam") optimizer_F_2 = get_optimizer(F_2, "Adam") optimizer_F_t = get_optimizer(F_t, "Adam") # get labelled target dataset target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # merge soruce data and target data merged_dataset = ConcatDataset([source_dataset, target_dataset_labelled]) # start training for k in range(cfg.num_epochs_k): # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() merged_dataloader = make_data_loader(merged_dataset) target_dataloader_labelled = get_inf_iterator( make_data_loader(target_dataset_labelled)) for epoch in range(cfg.num_epochs_adapt): for step, (images, labels) in enumerate(merged_dataloader): # sample from T_l images_tgt, labels_tgt = next(target_dataloader_labelled) # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) images_tgt = make_variable(images_tgt) labels_tgt = make_variable(labels_tgt) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks out_F = F(images) out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(F(images_tgt)) # compute labelling loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_labelling = loss_F_1 + loss_F_2 + loss_similiar loss_labelling.backward() # compute target specific loss loss_F_t = criterion(out_F_t, labels_tgt) loss_F_t.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() # print step info if ((step + 1) % cfg.log_step == 0): print("K[{}/{}] Epoch [{}/{}] Step[{}/{}] Loss(" "labelling={:.5f} target={:.5f})".format( k + 1, cfg.num_epochs_k, epoch + 1, cfg.num_epochs_adapt, step + 1, len(merged_dataloader), loss_labelling.data[0], loss_F_t.data[0], )) # re-compute the number of selected taget data num_target = (k + 2) * len(source_dataset) // 20 num_target = min(num_target, cfg.num_target_max) print(">>> Set num of sampled target data: {}".format(num_target)) # re-generate pseudo labels excerpt, pseudo_labels = genarate_labels(F, F_1, F_2, target_dataset, num_target) print(">>> Genrate pseudo labels [{}]".format( len(target_dataset_labelled))) # get labelled target dataset target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # re-merge soruce data and target data merged_dataset = ConcatDataset( [source_dataset, target_dataset_labelled]) # save model if ((k + 1) % cfg.save_step == 0): save_model(F, "adapt-F-{}.pt".format(k + 1)) save_model(F_1, "adapt-F_1-{}.pt".format(k + 1)) save_model(F_2, "adapt-F_2-{}.pt".format(k + 1)) save_model(F_t, "adapt-F_t-{}.pt".format(k + 1)) # save final model save_model(F, "adapt-F-final.pt") save_model(F_1, "adapt-F_1-final.pt") save_model(F_2, "adapt-F_2-final.pt") save_model(F_t, "adapt-F_t-final.pt")
def pre_train(F, F_1, F_2, F_t, source_data): """Pre-train models on source domain dataset.""" # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_F = get_optimizer(F, "Adam") optimizer_F_1 = get_optimizer(F_1, "Adam") optimizer_F_2 = get_optimizer(F_2, "Adam") optimizer_F_t = get_optimizer(F_t, "Adam") # start training for epoch in range(cfg.num_epochs_pre): for step, (images, labels) in enumerate(source_data): # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks out_F = F(images) out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(out_F) # compute loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_F_t = criterion(out_F_t, labels) loss_F = loss_F_1 + loss_F_2 + loss_F_t + loss_similiar loss_F.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() # print step info if ((step + 1) % cfg.log_step == 0): print("Epoch [{}/{}] Step[{}/{}] Loss(" "Total={:.5f} F_1={:.5f} F_2={:.5f} " "F_t={:.5f} sim={:.5f})".format( epoch + 1, cfg.num_epochs_pre, step + 1, len(source_data), loss_F.data[0], loss_F_1.data[0], loss_F_2.data[0], loss_F_t.data[0], loss_similiar.data[0], )) # save model if ((epoch + 1) % cfg.save_step == 0): save_model(F, "pretrain-F-{}.pt".format(epoch + 1)) save_model(F_1, "pretrain-F_1-{}.pt".format(epoch + 1)) save_model(F_2, "pretrain-F_2-{}.pt".format(epoch + 1)) save_model(F_t, "pretrain-F_t-{}.pt".format(epoch + 1)) # save final model save_model(F, "pretrain-F-final.pt") save_model(F_1, "pretrain-F_1-final.pt") save_model(F_2, "pretrain-F_2-final.pt") save_model(F_t, "pretrain-F_t-final.pt")
def train(classifier, generator, critic, src_data_loader, tgt_data_loader): """Train generator, classifier and critic jointly.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers classifier.train() generator.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_c = get_optimizer(classifier, "Adam") # zip source and target data pair data_iter_src = get_inf_iterator(src_data_loader) # counter g_step = 0 #################### # 2. train network # #################### for epoch in range(params.num_epochs): ########################### # 2.1 train discriminator # ########################### # requires to compute gradients for D for p in critic.parameters(): p.requires_grad = True # set steps for discriminator if g_step < 25 or g_step % 500 == 0: # this helps to start with the critic at optimum # even in the first iterations. critic_iters = 100 else: critic_iters = params.d_steps critic_iters = 0 # loop for optimizing discriminator #for d_step in range(critic_iters): # convert images into torch.Variable images_src, labels_src = next(data_iter_src) images_src = make_variable(images_src).cuda() labels_src = make_variable(labels_src.squeeze_()).cuda() # print(type(images_src)) ######################## # 2.2 train classifier # ######################## # zero gradients for optimizer optimizer_c.zero_grad() # compute loss for critic preds_c = classifier(generator(images_src)) c_loss = criterion(preds_c, labels_src) # optimize source classifier c_loss.backward() optimizer_c.step() g_step += 1 ################## # 2.4 print info # ################## if ((epoch + 1) % 500 == 0): # print("Epoch [{}/{}]:" # "c_loss={:.5f}" # "D(x)={:.5f}" # .format(epoch + 1, # params.num_epochs, # c_loss.item(), # )) test(classifier, generator, src_data_loader, params.src_dataset) if ((epoch + 1) % 500 == 0): save_model(generator, "Mnist-generator-{}.pt".format(epoch + 1)) save_model(classifier, "Mnist-classifer{}.pt".format(epoch + 1))