def evaluate(encoder, classifier, data_loader): """Evaluation for target encoder by source classifier on target dataset.""" # set eval state for Dropout and BN layers encoder.eval() classifier.eval() # init loss and accuracy loss = 0 acc = 0 # set loss function criterion = nn.CrossEntropyLoss() # evaluate network for (reviews, mask, labels) in data_loader: reviews = make_cuda(reviews) mask = make_cuda(mask) labels = make_cuda(labels) with torch.no_grad(): feat = encoder(reviews, mask) preds = classifier(feat) loss += criterion(preds, labels).item() pred_cls = preds.data.max(1)[1] acc += pred_cls.eq(labels.data).cpu().sum().item() loss /= len(data_loader) acc /= len(data_loader.dataset) print("Avg Loss = %.4f, Avg Accuracy = %.4f" % (loss, acc)) return acc
def eval_tgt(encoder, classifier, data_loader): """Evaluation for target encoder by source classifier on target dataset.""" # set eval state for Dropout and BN layers encoder.eval() classifier.eval() # init loss and accuracy loss = 0 acc = 0 # set loss function criterion = nn.CrossEntropyLoss() # evaluate network for (images, labels) in data_loader: images = make_cuda(images) labels = make_cuda(labels).squeeze_() preds = classifier(encoder(images)) loss += criterion(preds, labels).item() pred_cls = preds.data.max(1)[1] acc += pred_cls.eq(labels.data).cpu().sum().item() loss /= len(data_loader) acc /= len(data_loader.dataset) print("Avg Loss = {}, Avg Accuracy = {:2%}".format(loss, acc))
def save_features(args, encoder, data_loader): """save inferred features.""" # set eval state for Dropout and BN layers encoder.eval() # classifier.eval() x = [] y = [] if args.adapt_method == 'shot': for (reviews, masks, labels, _) in data_loader: reviews = make_cuda(reviews) masks = make_cuda(masks) # labels = make_cuda(labels) with torch.no_grad(): feat = encoder(reviews, masks) # preds = classifier(feat) # loss += criterion(preds, labels).item() # pred_cls = preds.data.max(1)[1] # acc += pred_cls.eq(labels.data).cpu().sum().item() else: for (reviews, masks, labels) in data_loader: reviews = make_cuda(reviews) masks = make_cuda(masks) # labels = make_cuda(labels) with torch.no_grad(): feat = torch.squeeze(encoder(reviews, masks)).cpu().numpy() # preds = classifier(feat) x.append(feat) y.append(labels.cpu().numpy()) x = np.asarray(x) y = np.asarray(y) return x, y
def eval_(model, data_loader, mode): """Evaluate classifier for source domain.""" # set eval state for Dropout and BN layers model.eval() alpha = 0 # init loss and accuracy loss = 0 acc = 0 # set loss function len_dataloader = len(data_loader) data_iter = iter(data_loader) i = 0 # evaluate network while i < len_dataloader: data_source = data_iter.next() s_img, s_label = data_source s_image = make_cuda(s_img) s_label = make_cuda(s_label) preds, _ = model(s_image, alpha) pred_cls = preds.data.max(1)[1] acc += pred_cls.eq(s_label).cpu().sum().item() i += 1 loss /= len(data_loader) acc /= len(data_loader.dataset) print("{}, Avg Accuracy = {:2%}".format(mode, acc))
def train(args, model, data_loader, initial=False): MSELoss = nn.MSELoss(reduction='mean') optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) model.train() num_epochs = args.initial_epochs if initial else args.num_epochs for epoch in range(num_epochs): loss = 0 for step, (features, targets) in enumerate(data_loader): features = make_cuda(features) targets = make_cuda(targets) optimizer.zero_grad() preds = model(features) mse_loss = MSELoss(preds, targets) loss += mse_loss.item() mse_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() # print step info if (step + 1) % args.log_step == 0: print( "Epoch [%.3d/%.3d] Step [%.3d/%.3d]: MSE_loss=%.4f, RMSE_loss=%.4f" % (epoch + 1, num_epochs, step + 1, len(data_loader), loss / args.log_step, math.sqrt(loss / args.log_step))) loss = 0 return model
def dann_adapt_data_free(args, encoder, tgt_encoder, discriminator, classifier, src_data_loader, tgt_train_loader, tgt_all_loader): """ src tgt data free version of DANN """ # set train state for Dropout and BN layers classifier.train() discriminator.train() # setup criterion and optimizer loss_class = nn.CrossEntropyLoss() loss_domain = nn.CrossEntropyLoss() optimizer_e = optim.Adam(encoder.parameters(), lr=param.c_lr) optimizer_c = optim.Adam(classifier.parameters(), lr=param.c_lr) optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr) len_dataloader = min(len(src_data_loader), len(tgt_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_data_loader, tgt_train_loader)) for i, ((src_feat, src_label), (tgt_feat, _)) in enumerate(pbar): p = float(i + epoch * len_dataloader) / args.num_epochs / len_dataloader alpha = 2. / (1. + np.exp(-10 * p)) - 1 src_feat = make_cuda(src_feat) src_label = make_cuda(src_label) tgt_feat = make_cuda(tgt_feat) # zero gradients for optimizers optimizer_c.zero_grad() optimizer_d.zero_grad() # extract and concat features s_class_output = classifier(src_feat) s_reverse_feat = ReverseLayerF.apply(src_feat, alpha) s_domain_output = discriminator(s_reverse_feat) loss_s_label = loss_class(s_class_output, src_label) s_domain_label = make_cuda(torch.zeros(src_feat.size()[0]).long()) loss_s_domain = loss_domain(s_domain_output, s_domain_label) t_reverse_feat = ReverseLayerF.apply(tgt_feat, alpha) t_domain_output = discriminator(t_reverse_feat) t_domain_label = make_cuda(torch.ones(tgt_feat.size()[0]).long()) loss_t_domain = loss_domain(t_domain_output, t_domain_label) loss = loss_s_label + loss_s_domain + loss_t_domain loss.backward() optimizer_c.step() optimizer_d.step() if i % args.log_step == 0: desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \ f"l_s_label={loss_s_label.item():.4f} l_s_dom={loss_s_domain.item():.4f} " \ f"l_t_dom={loss_t_domain.item():.4f}" pbar.set_description(desc=desc) evaluate(args, encoder, classifier, tgt_all_loader) return encoder, classifier
def train_src(encoder, classifier, data_loader): """Train classifier for source domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers encoder.train() classifier.train() # setup criterion and optimizer optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters()), lr=params.c_learning_rate, betas=(params.beta1, params.beta2)) criterion = nn.CrossEntropyLoss() #################### # 2. train network # #################### for epoch in range(params.num_epochs_pre): for step, (images, labels) in enumerate(data_loader): # make images and labels variable images = make_cuda(images) labels = make_cuda(labels.squeeze_()) # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic preds = classifier(encoder(images)) loss = criterion(preds, labels) # optimize source classifier loss.backward() optimizer.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs_pre, step + 1, len(data_loader), loss.item())) # eval model on test set if ((epoch + 1) % params.eval_step_pre == 0): eval_src(encoder, classifier, data_loader) # save model parameters if ((epoch + 1) % params.save_step_pre == 0): save_model(encoder, "ADDA-source-encoder-{}.pt".format(epoch + 1)) save_model(classifier, "ADDA-source-classifier-{}.pt".format(epoch + 1)) # # save final model save_model(encoder, "ADDA-source-encoder-final.pt") save_model(classifier, "ADDA-source-classifier-final.pt") return encoder, classifier
def pretrain(args, encoder, classifier, data_loader): """Train classifier for source domain.""" #################### # 1. setup network # #################### # setup criterion and optimizer optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters()), lr=param.c_learning_rate) CELoss = nn.CrossEntropyLoss() # set train state for Dropout and BN layers encoder.train() classifier.train() #################### # 2. train network # #################### for epoch in range(args.pre_epochs): for step, (reviews, mask, labels) in enumerate(data_loader): reviews = make_cuda(reviews) mask = make_cuda(mask) labels = make_cuda(labels) # zero gradients for optimizer optimizer.zero_grad() # compute loss for discriminator feat = encoder(reviews, mask) preds = classifier(feat) cls_loss = CELoss(preds, labels) # optimize source classifier cls_loss.backward() optimizer.step() # print step info if (step + 1) % args.pre_log_step == 0: print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f" % (epoch + 1, args.pre_epochs, step + 1, len(data_loader), cls_loss.item())) # save final model save_model(args, encoder, param.src_encoder_path) save_model(args, classifier, param.src_classifier_path) return encoder, classifier
def src_gmm(args, src_encoder, src_data_loader): """ from src features to gmm, then resample new features based on source_target_free.py """ src_encoder.eval() # cov = {} # num_classes, feature_dim # mean = {} # store original features # s_ori_features = [] # store resampled features (num_classes, num_samples, feature_dim) s_res_features = np.zeros( [param.num_labels, param.num_samples, param.input_dim]) for i in range(param.num_labels): x = [] # 1 class's features: 1 Gaussian pbar = tqdm(src_data_loader) with torch.no_grad(): for j, (reviews, masks, labels) in enumerate(pbar): reviews = make_cuda(reviews) masks = make_cuda(masks) for review, mask, label in zip(reviews, masks, labels): if label == i: review = torch.unsqueeze(review, 0) mask = torch.unsqueeze(mask, 0) s_feature = torch.squeeze(src_encoder( review, mask)).cpu().numpy() x.append(s_feature) pbar.set_description('src_gmm') x = np.asarray(x) # 1 class features # s_ori_features.append(x) gmm = GaussianMixture(n_components=6).fit(x) if args.dp: dp_model = DPModel(x, total_eps=args.total_eps, k=1) noise_for_mu = dp_model.add_noise_for_mu() z = dp_model.add_noise_for_sigma() gmm.means_ += noise_for_mu gmm.covariances_ += z # visual shows don't div by norm of new_mu & new_sigma # resample s_res_features[i, :, :] = gmm.sample(param.num_samples)[0] # if args.dp: # np.savez(os.path.join(param.model_root, f's_res_features_dp_{args.total_eps:.2f}'), s_res_features) # else: # np.savez(os.path.join(param.model_root, 's_res_features_nodp'), s_res_features) # # np.savez(os.path.join(param.model_root, 's_ori_features'), s_ori_features) return s_res_features # source resampled features [2, 2000, 768]
def obtain_label(args, tgt_te, src_encoder, classifier): start_test = True with torch.no_grad(): for step, (reviews, masks, labels, tgt_idx) in enumerate(tgt_te): reviews = make_cuda(reviews) masks = make_cuda(masks) feas = src_encoder(reviews, masks) outputs = classifier(feas) if start_test: all_fea = feas.float().cpu() all_output = outputs.float().cpu() all_label = labels.float() start_test = False else: all_fea = torch.cat((all_fea, feas.float().cpu()), 0) all_output = torch.cat((all_output, outputs.float().cpu()), 0) all_label = torch.cat((all_label, labels.float()), 0) all_output = nn.Softmax(dim=1)(all_output) _, predict = torch.max(all_output, 1) prev_acc = torch.sum( torch.squeeze(predict).float() == all_label).item() / float( all_label.size()[0]) all_fea = torch.cat((all_fea, torch.ones(all_fea.size(0), 1)), 1) all_fea = (all_fea.t() / torch.norm(all_fea, p=2, dim=1)).t() all_fea = all_fea.float().cpu().numpy() K = all_output.size(1) aff = all_output.float().cpu().numpy() initc = aff.transpose().dot(all_fea) initc = initc / (1e-8 + aff.sum(axis=0)[:, None]) dd = cdist(all_fea, initc, 'cosine') pred_label = dd.argmin(axis=1) acc = np.sum(pred_label == all_label.float().numpy()) / len(all_fea) for round in range(1): aff = np.eye(K)[pred_label] initc = aff.transpose().dot(all_fea) initc = initc / (1e-8 + aff.sum(axis=0)[:, None]) dd = cdist(all_fea, initc, 'cosine') pred_label = dd.argmin(axis=1) acc = np.sum(pred_label == all_label.float().numpy()) / len(all_fea) log_str = 'Accuracy = {:.2f}% -> {:.2f}%'.format(prev_acc * 100, acc * 100) # args.out_file.write(log_str + '\n') # args.out_file.flush() print(log_str + '\n') return pred_label # .astype('int')
def shot_adapt(args, encoder, classifier, tgt_train_loader, tgt_all_loader, tgt_te): classifier.train() optimizer = optim.SGD(encoder.parameters(), lr=param.d_lr) interval_iter = len(tgt_train_loader) for epoch in range(args.num_epochs): pbar = tqdm(tgt_train_loader) for step, (tgt_reviews, tgt_masks, _, tgt_idx) in enumerate(pbar): if len(tgt_reviews) == 1: continue if step % interval_iter == 0 and args.cls_par > 0: encoder.eval() mem_label = obtain_label(args, tgt_te, encoder, classifier) encoder.train() tgt_reviews = make_cuda(tgt_reviews) tgt_masks = make_cuda(tgt_masks) features_test = encoder(tgt_reviews, tgt_masks) outputs_test = classifier(features_test) if args.cls_par > 0: pred = mem_label[tgt_idx] pred = make_cuda(torch.tensor(pred)) classifier_loss = args.cls_par * nn.CrossEntropyLoss()( outputs_test, pred) else: classifier_loss = torch.tensor(0.0).cuda() if args.ent: softmax_out = nn.Softmax(dim=1)( outputs_test) # outputs_test is C(E(xt)) entropy_loss = torch.mean(entropy(softmax_out)) # loss_ent if args.gent: msoftmax = softmax_out.mean(dim=0) entropy_loss -= torch.sum( -msoftmax * torch.log(msoftmax + 1e-6)) # loss_ent + loss_div im_loss = entropy_loss * args.ent_par classifier_loss += im_loss optimizer.zero_grad() classifier_loss.backward() optimizer.step() return encoder, classifier
def __init__(self, n_features, n_hidden, n_layers, n_output, weight, bidirectional): super(weightedLSTM, self).__init__() self.n_hidden = n_hidden self.n_layers = n_layers self.weight = make_cuda(torch.FloatTensor(weight)) self.bidirectional = bidirectional self.lstm = nn.LSTM(input_size=n_features, hidden_size=n_hidden, num_layers=n_layers, bidirectional=bidirectional, batch_first=True) self.regr = nn.Linear(2 * n_hidden if bidirectional else n_hidden, n_output)
def tgt_gmm(encoder, tgt_data_all_loader, num_cluster): """ build target GMM and resample from it, used in adapt based on source_target_free.py """ encoder.eval() t_features = [] pbar = tqdm(tgt_data_all_loader) for j, (reviews, masks, _) in enumerate(pbar): reviews = make_cuda(reviews) masks = make_cuda(masks) with torch.no_grad(): for review, mask in zip(reviews, masks): review = torch.unsqueeze(review, 0) mask = torch.unsqueeze(mask, 0) feature = torch.squeeze(encoder(review, mask)) feature = feature.cpu().numpy() t_features.append(feature) pbar.set_description('tgt_gmm') t_features = np.asarray(t_features) # num_class * num_cluster, not sure why num_cluster is not 1 gmm = GaussianMixture(n_components=param.num_labels * num_cluster, ).fit(t_features) tgt_mean = gmm.means_ tgt_var = gmm.covariances_ print(gmm.converged_) t_feature_dict = np.zeros([param.num_samples, param.input_dim ]) # param.num_resample samples, feature_dim p = gmm.weights_ p[-1] += 1 - np.sum(p) decrete = np.random.multinomial(param.num_samples, p, 1) k = 0 for i in range(2 * num_cluster): t_feature_dict[k:k + decrete[0, i], :] = np.random.multivariate_normal( tgt_mean[i, :], tgt_var[i, :, :], decrete[0, i]) k += decrete[0, i] return t_feature_dict # [2000, 768]
def pretrain(args, encoder, classifier, data_loader): """Train classifier for source domain.""" # setup criterion and optimizer optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters()), lr=param.c_lr) ce_loss = nn.CrossEntropyLoss() # set train state for Dropout and BN layers encoder.train() classifier.train() for epoch in range(args.pre_epochs): pbar = tqdm(data_loader) for step, (reviews, mask, labels) in enumerate(pbar): reviews = make_cuda(reviews) mask = make_cuda(mask) labels = make_cuda(labels) # zero gradients for optimizer optimizer.zero_grad() # compute loss for discriminator feat = encoder(reviews, mask) preds = classifier(feat) cls_loss = ce_loss(preds, labels) # optimize source classifier cls_loss.backward() optimizer.step() # print step info if step % args.pre_log_step == 0: desc = f"Epoch [{epoch}/{args.pre_epochs}] Step [{step}/{len(data_loader)}]: " \ f"c_loss={cls_loss.item():.4f} " pbar.set_description(desc=desc) return encoder, classifier
def train_model(epoch): i = 0 hidden_init = model.state0(batch_size) if options.cuda: embedding.cuda() model.cuda() hidden_init = utils.make_cuda(hidden_init) loss_avg = 0 for s in range(num_batches - 1): embed_optimizer.zero_grad() model_optimizer.zero_grad() batch = Variable( train_x.narrow(0, s * seq_length, seq_length + 1).long()) start = time.time() hidden = hidden_init if options.cuda: batch = batch.cuda() loss = 0 for t in range(seq_length): emb = embedding(batch[t]) hidden, output = model(emb, hidden) loss_step = loss_fn(output, batch[t + 1]) loss += loss_step writer.add_scalar('loss per step', loss_step, i) i += 1 writer.add_scalar('loss per batch ', loss, s) loss.backward() hidden_init = utils.copy_state(hidden) gn = utils.calc_grad_norm(model) utils.clip_gradient(model, model_settings['clip_gradient']) utils.clip_gradient(embedding, model_settings['clip_gradient']) embed_optimizer.step() model_optimizer.step() loss_avg = .99 * loss_avg + .01 * loss.data[0] / seq_length if s % 10 == 0: print( f'epoch: {epoch} | batch: {s}/{num_batches} | step loss: {loss.data[0] / seq_length} | batch loss: {loss.data[0]} | avg loss: {loss_avg} | time: {time.time() - start}s' )
def evaluate(args, model, scaler, data_loader): model.eval() model.lstm.flatten_parameters() all_preds = [] all_targets = [] for features, targets in data_loader: features = make_cuda(features) with torch.no_grad(): preds = model(features) all_preds.append(preds) all_targets.append(targets) all_preds = scaler.inverse_transform( torch.cat(all_preds, dim=0).cpu().numpy().reshape(-1, 1)) all_targets = scaler.inverse_transform( torch.cat(all_targets, dim=0).cpu().numpy().reshape(-1, 1)) mse = mean_squared_error(all_targets, all_preds) rmse = math.sqrt(mse) mae = mean_absolute_error(all_targets, all_preds) print("RMSE = %.4f, MAE = %.4f\n" % (rmse, mae)) return rmse, mae
next_input = Variable(indices[0]) if options.cuda: next_input.cuda() return ''.join(chr(i) for i in sample) if __name__ == '__main__': if options.test: checkpoint = torch.load(options.load_model) embedding = checkpoint['embedding'] model = checkpoint['model'] state = model.state0(batch_size) if options.cuda: state = utils.make_cuda(state) embedding.cuda() model.cuda() gen_text = generation(embedding, model, state, options.n, options.primer) print(gen_text) else: lr = model_settings['learning_rate'] layers = model_settings['layers'] batch_size = model_settings['batch_size'] rnn_size = model_settings['rnn_size'] embed_size = model_settings['embed_size'] seq_length = model_settings['seq_length'] dropout = model_settings['dropout'] data_size = 256 # ???
def adda_adapt(args, src_encoder, tgt_encoder, discriminator, src_data_loader, tgt_data_loader): """ Adapt tgt encoder by ADDA can run, but tgt acc is bad, only 0.5 """ # set train state for Dropout and BN layers src_encoder.eval() tgt_encoder.train() discriminator.train() # setup criterion and optimizer bce_loss = nn.BCELoss() optimizer_tgt = optim.Adam(tgt_encoder.parameters(), lr=param.d_lr) optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr) len_data_loader = min(len(src_data_loader), len(tgt_data_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_data_loader, tgt_data_loader)) for step, ((src_reviews, src_masks, _), (tgt_reviews, tgt_masks, _)) in enumerate(pbar): # zero gradients for optimizer optimizer_d.zero_grad() # extract and concat features feat_src = src_encoder(src_reviews, src_masks) feat_tgt = tgt_encoder(tgt_reviews, tgt_masks) feat_concat = torch.cat((feat_src, feat_tgt), 0) # predict on discriminator pred_concat = discriminator(feat_concat.detach()) # prepare real and fake label label_src = make_cuda(torch.ones(feat_src.size(0))).unsqueeze(1) label_tgt = make_cuda(torch.zeros(feat_tgt.size(0))).unsqueeze(1) label_concat = torch.cat((label_src, label_tgt), 0) # compute loss for critic d_loss = bce_loss(pred_concat, label_concat) d_loss.backward() # optimize critic optimizer_d.step() # pred_cls = pred_concat.max(1)[1] # acc = (pred_cls == label_concat).float().mean() # zero gradients for optimizer optimizer_tgt.zero_grad() # extract and target features # feat_tgt = tgt_encoder(tgt_reviews, tgt_masks) # predict on discriminator pred_tgt = discriminator(feat_tgt) # prepare fake labels # label_tgt = make_cuda(torch.ones(feat_tgt.size(0)).long()) # compute loss for target encoder loss_tgt = bce_loss(pred_tgt, label_src) loss_tgt.backward() # optimize target encoder optimizer_tgt.step() if (step + 1) % args.log_step == 0: desc = "Epoch [{}/{}] Step [{}/{}]: t_loss={:.4f} c_loss={:.4f} ".format( epoch, args.num_epochs, step, len_data_loader, loss_tgt.item(), d_loss.item(), ) pbar.set_description(desc=desc) # torch.save(critic.state_dict(), os.path.join( # args.model_root, "ADDA-critic.pt")) # torch.save(tgt_encoder.state_dict(), os.path.join( # args.model_root, "ADDA-target-encoder.pt")) return tgt_encoder
def train(args, encoder, classifier, src_data_loader, src_data_loader_eval, tgt_data_loader, tgt_data_loader_all): """Train encoder for target domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers encoder.train() classifier.train() # setup criterion and optimizer CELoss = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters()), lr=param.c_learning_rate) #################### # 2. train network # #################### for epoch in range(args.num_epochs): # zip source and target data pair data_zip = enumerate(zip(src_data_loader, tgt_data_loader)) for step, ((src_reviews, src_mask, src_labels), (tgt_reviews, tgt_mask, _)) in data_zip: src_reviews = make_cuda(src_reviews) src_mask = make_cuda(src_mask) src_labels = make_cuda(src_labels) tgt_reviews = make_cuda(tgt_reviews) tgt_mask = make_cuda(tgt_mask) # extract and concat features src_feat = encoder(src_reviews, src_mask) tgt_feat = encoder(tgt_reviews, tgt_mask) src_preds = classifier(src_feat) # prepare real and fake label optimizer.zero_grad() cls_loss = CELoss(src_preds, src_labels) if args.method == 'coral': adapt_loss = CORAL(src_feat, tgt_feat) else: # args.method == 'mmd' adapt_loss = MMD(src_feat, tgt_feat) loss = cls_loss + args.alpha * adapt_loss # optimize source classifier loss.backward() optimizer.step() # print step info if (step + 1) % args.log_step == 0: print( "Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f coral_loss=%.4f" % (epoch + 1, args.num_epochs, step + 1, len(src_data_loader), cls_loss.item(), adapt_loss.item())) evaluate(encoder, classifier, src_data_loader) evaluate(encoder, classifier, src_data_loader_eval) evaluate(encoder, classifier, tgt_data_loader_all) save_model(encoder, param.encoder_path) save_model(classifier, param.classifier_path) return encoder, classifier
def train_tgt(src_encoder, tgt_encoder, critic, src_data_loader, tgt_data_loader): """Train encoder for target domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers tgt_encoder.train() critic.train() # setup criterion and optimizer criterion = nn.CrossEntropyLoss() optimizer_tgt = optim.Adam(tgt_encoder.parameters(), lr=params.c_learning_rate, betas=(params.beta1, params.beta2)) optimizer_critic = optim.Adam(critic.parameters(), lr=params.d_learning_rate, betas=(params.beta1, params.beta2)) len_data_loader = min(len(src_data_loader), len(tgt_data_loader)) #################### # 2. train network # #################### for epoch in range(params.num_epochs): # zip source and target data pair data_zip = enumerate(zip(src_data_loader, tgt_data_loader)) for step, ((images_src, _), (images_tgt, _)) in data_zip: ########################### # 2.1 train discriminator # ########################### # make images variable images_src = make_cuda(images_src) images_tgt = make_cuda(images_tgt) # zero gradients for optimizer optimizer_critic.zero_grad() # extract and concat features feat_src = src_encoder(images_src) feat_tgt = tgt_encoder(images_tgt) feat_concat = torch.cat((feat_src, feat_tgt), 0) # predict on discriminator pred_concat = critic(feat_concat.detach()) # prepare real and fake label label_src = make_cuda(torch.ones(feat_src.size(0)).long()) label_tgt = make_cuda(torch.zeros(feat_tgt.size(0)).long()) label_concat = torch.cat((label_src, label_tgt), 0) # compute loss for critic loss_critic = criterion(pred_concat, label_concat) loss_critic.backward() # optimize critic optimizer_critic.step() pred_cls = torch.squeeze(pred_concat.max(1)[1]) acc = (pred_cls == label_concat).float().mean() ############################ # 2.2 train target encoder # ############################ # zero gradients for optimizer optimizer_critic.zero_grad() optimizer_tgt.zero_grad() # extract and target features feat_tgt = tgt_encoder(images_tgt) # predict on discriminator pred_tgt = critic(feat_tgt) # prepare fake labels label_tgt = make_cuda(torch.ones(feat_tgt.size(0)).long()) # compute loss for target encoder loss_tgt = criterion(pred_tgt, label_tgt) loss_tgt.backward() # optimize target encoder optimizer_tgt.step() ####################### # 2.3 print step info # ####################### if ((step + 1) % params.log_step == 0): print("Epoch [{}/{}] Step [{}/{}]:" "d_loss={:.5f} g_loss={:.5f} acc={:.5f}" .format(epoch + 1, params.num_epochs, step + 1, len_data_loader, loss_critic.item(), loss_tgt.item(), acc.item())) ############################# # 2.4 save model parameters # ############################# if ((epoch + 1) % params.save_step == 0): torch.save(critic.state_dict(), os.path.join( params.model_root, "ADDA-critic-{}.pt".format(epoch + 1))) torch.save(tgt_encoder.state_dict(), os.path.join( params.model_root, "ADDA-target-encoder-{}.pt".format(epoch + 1))) torch.save(critic.state_dict(), os.path.join( params.model_root, "ADDA-critic-final.pt")) torch.save(tgt_encoder.state_dict(), os.path.join( params.model_root, "ADDA-target-encoder-final.pt")) return tgt_encoder
def train(args, encoder, cls_classifier, dom_classifier, src_data_loader, src_data_loader_eval, tgt_data_loader, tgt_data_loader_all): """Train encoder for target domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers encoder.train() cls_classifier.train() dom_classifier.train() # setup criterion and optimizer CELoss = nn.CrossEntropyLoss() optimizer = optim.Adam(list(encoder.parameters()) + list(cls_classifier.parameters()) + list(dom_classifier.parameters()), lr=param.c_learning_rate) #################### # 2. train network # #################### for epoch in range(args.num_epochs): # zip source and target data pair data_zip = enumerate(zip(src_data_loader, tgt_data_loader)) for step, ((src_reviews, src_mask, src_labels), (tgt_reviews, tgt_mask, _)) in data_zip: src_reviews = make_cuda(src_reviews) src_mask = make_cuda(src_mask) src_labels = make_cuda(src_labels) tgt_reviews = make_cuda(tgt_reviews) tgt_mask = make_cuda(tgt_mask) # extract and concat features src_feat = encoder(src_reviews, src_mask) tgt_feat = encoder(tgt_reviews, tgt_mask) feat_concat = torch.cat((src_feat, tgt_feat), 0) src_preds = cls_classifier(src_feat) dom_preds = dom_classifier(feat_concat, alpha=args.alpha) # prepare real and fake label optimizer.zero_grad() label_src = make_cuda(torch.ones(src_feat.size(0))) label_tgt = make_cuda(torch.zeros(tgt_feat.size(0))) label_concat = torch.cat((label_src, label_tgt), 0).long() loss_cls = CELoss(src_preds, src_labels) loss_dom = CELoss(dom_preds, label_concat) loss = loss_cls + loss_dom # optimize source classifier loss.backward() optimizer.step() # print step info if (step + 1) % args.log_step == 0: print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f dom_loss=%.4f" % (epoch + 1, args.num_epochs, step + 1, len(src_data_loader), loss_cls.item(), loss_dom.item())) evaluate(encoder, cls_classifier, src_data_loader) evaluate(encoder, cls_classifier, src_data_loader_eval) evaluate(encoder, cls_classifier, tgt_data_loader_all) save_model(encoder, param.encoder_path) save_model(cls_classifier, param.cls_classifier_path) save_model(dom_classifier, param.dom_classifier_path) return encoder, cls_classifier, dom_classifier
def dann_adapt(args, encoder, src_encoder, discriminator, classifier, src_data_loader, tgt_train_loader, tgt_all_loader): """ adding KD, encoder for adapting, src encoder only for KD """ # set train state for Dropout and BN layers encoder.train() # works as tgt encoder, for adapting src_encoder.eval() # for KD loss classifier.train() discriminator.train() # setup criterion and optimizer loss_class = nn.CrossEntropyLoss() loss_domain = nn.CrossEntropyLoss() # maybe need changing kl_div_loss = nn.KLDivLoss(reduction='batchmean') optimizer_e = optim.Adam(encoder.parameters(), lr=param.c_lr) optimizer_c = optim.Adam(classifier.parameters(), lr=param.c_lr) optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr) len_dataloader = min(len(src_data_loader), len(tgt_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_data_loader, tgt_train_loader)) for i, ((src_reviews, src_masks, src_labels), (tgt_reviews, tgt_masks, _)) in enumerate(pbar): p = float(i + epoch * len_dataloader) / args.num_epochs / len_dataloader alpha = 2. / (1. + np.exp(-10 * p)) - 1 src_reviews = make_cuda(src_reviews) src_masks = make_cuda(src_masks) src_labels = make_cuda(src_labels) tgt_reviews = make_cuda(tgt_reviews) tgt_masks = make_cuda(tgt_masks) # zero gradients for optimizers optimizer_e.zero_grad() optimizer_c.zero_grad() optimizer_d.zero_grad() # extract and concat features src_feat = encoder(src_reviews, src_masks) s_class_output = classifier(src_feat) s_reverse_feat = ReverseLayerF.apply(src_feat, alpha) s_domain_output = discriminator(s_reverse_feat) loss_s_label = loss_class(s_class_output, src_labels) s_domain_label = make_cuda( torch.zeros(s_domain_output.size()[0]).long()) loss_s_domain = loss_domain(s_domain_output, s_domain_label) tgt_feat = encoder(tgt_reviews, tgt_masks) t_reverse_feat = ReverseLayerF.apply(tgt_feat, alpha) t_domain_output = discriminator(t_reverse_feat) t_domain_label = make_cuda( torch.ones(t_domain_output.size()[0]).long()) loss_t_domain = loss_domain(t_domain_output, t_domain_label) loss = loss_s_label + loss_s_domain + loss_t_domain if args.kd: t = args.temperature with torch.no_grad(): src_tgt_feat = src_encoder(tgt_reviews, tgt_masks) src_prob = F.softmax(classifier(src_tgt_feat) / t, dim=-1) tgt_prob = F.log_softmax(classifier(tgt_feat) / t, dim=-1) kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t loss += kd_loss loss.backward() optimizer_e.step() optimizer_c.step() optimizer_d.step() if i % args.log_step == 0: desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \ f"l_s_label={loss_s_label.item():.4f} l_s_dom={loss_s_domain.item():.4f} " \ f"l_t_dom={loss_t_domain.item():.4f}" pbar.set_description(desc=desc) evaluate(args, encoder, classifier, tgt_all_loader) return encoder, classifier
def cdan_adapt_data_free(args, encoder, discriminator, classifier, src_data_loader, tgt_data_train_loader, tgt_data_all_loader): """ cdan src tgt data free """ # set train state for Dropout and BN layers classifier.train() discriminator.train() # setup criterion and optimizer loss_class = nn.CrossEntropyLoss() loss_domain = nn.BCELoss() optimizer_c = optim.SGD(classifier.parameters(), lr=param.c_lr, weight_decay=5e-3, momentum=0.9) optimizer_d = optim.SGD(discriminator.parameters(), lr=param.d_lr, weight_decay=5e-3, momentum=0.9) len_dataloader = min(len(src_data_loader), len(tgt_data_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_data_loader, tgt_data_train_loader)) for i, ((src_feat, src_label), (tgt_feat, _)) in enumerate(pbar): src_feat = make_cuda(src_feat) src_label = make_cuda(src_label) tgt_feat = make_cuda(tgt_feat) # zero gradients for optimizers optimizer_c.zero_grad() optimizer_d.zero_grad() # extract and concat features feat = torch.cat((src_feat, tgt_feat), 0) s_class_output = classifier(src_feat) loss_s_label = loss_class( s_class_output, src_label) # maybe change s_class_output to before softmax class_output = classifier(feat) op_out = torch.bmm(class_output.unsqueeze(2), feat.unsqueeze(1)) ad_out = discriminator( op_out.view(-1, class_output.size(1) * feat.size(1))) dc_target = torch.from_numpy( np.array([[1]] * src_feat.size()[0] + [[0]] * tgt_feat.size()[0])).float().cuda() loss_d = loss_domain(ad_out, dc_target) loss = loss_s_label + loss_d loss.backward() optimizer_c.step() optimizer_d.step() if i % args.log_step == 0: desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \ f"l_s_label={loss_s_label.item():.4f} l_d={loss_d.item():.4f} " pbar.set_description(desc=desc) evaluate(args, encoder, classifier, tgt_data_all_loader) return encoder, classifier
def aad_adapt(args, src_encoder, tgt_encoder, discriminator, src_classifier, src_loader, tgt_train_loader, tgt_data_all_loader): """ Train tgt_encoder using bert-AAD swapped src data to tgt data for KD """ # set train state for Dropout and BN layers src_encoder.eval() src_classifier.eval() tgt_encoder.train() discriminator.train() # setup criterion and optimizer bce_loss = nn.BCELoss() kl_div_loss = nn.KLDivLoss(reduction='batchmean') optimizer_g = optim.Adam(tgt_encoder.parameters(), lr=param.d_lr) optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr) len_data_loader = min(len(src_loader), len(tgt_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_loader, tgt_train_loader)) for step, ((src_reviews, src_masks, _), (tgt_reviews, tgt_masks, _)) in enumerate(pbar): src_reviews = make_cuda(src_reviews) src_masks = make_cuda(src_masks) tgt_reviews = make_cuda(tgt_reviews) tgt_masks = make_cuda(tgt_masks) # zero gradients for optimizer optimizer_d.zero_grad() # extract and concat features with torch.no_grad(): src_feat = src_encoder(src_reviews, src_masks) src_tgt_feat = src_encoder( tgt_reviews, tgt_masks) # was tgt_encoder(src_reviews, src_masks) tgt_feat = tgt_encoder(tgt_reviews, tgt_masks) feat_concat = torch.cat( (src_feat, tgt_feat), 0) # different from original code, is correct # predict on discriminator pred_concat = discriminator(feat_concat.detach()) # prepare real and fake label src_label = make_cuda(torch.ones(src_feat.size(0))).unsqueeze(1) tgt_label = make_cuda(torch.zeros(tgt_feat.size(0))).unsqueeze(1) label_concat = torch.cat((src_label, tgt_label), 0) # domain discriminator loss of discriminator d_loss = bce_loss(pred_concat, label_concat) d_loss.backward() # increase the clip_value from 0.01 to 0.1 is bad for p in discriminator.parameters(): p.data.clamp_(-args.clip_value, args.clip_value) # optimize discriminator optimizer_d.step() # zero gradients for optimizer optimizer_g.zero_grad() t = args.temperature # predict on discriminator pred_tgt = discriminator(tgt_feat) # logits for KL-divergence with torch.no_grad(): src_prob = F.softmax(src_classifier(src_tgt_feat) / t, dim=-1) tgt_prob = F.log_softmax(src_classifier(tgt_feat) / t, dim=-1) # changed direction kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t # compute loss for target encoder gen_loss = bce_loss(pred_tgt, src_label) # correct loss_tgt = args.alpha * gen_loss + args.beta * kd_loss loss_tgt.backward() torch.nn.utils.clip_grad_norm_(tgt_encoder.parameters(), args.max_grad_norm) # optimize target encoder optimizer_g.step() if step % args.log_step == 0: desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{step}/{len_data_loader}]: " \ f"g_loss={gen_loss.item():.4f} d_loss={d_loss.item():.4f} kd_loss={kd_loss.item():.4f}" pbar.set_description(desc=desc) evaluate(args, tgt_encoder, src_classifier, tgt_data_all_loader) return tgt_encoder
def adapt(args, src_encoder, tgt_encoder, discriminator, src_classifier, src_data_loader, tgt_data_train_loader, tgt_data_all_loader): """Train encoder for target domain.""" # set train state for Dropout and BN layers src_encoder.eval() src_classifier.eval() tgt_encoder.train() discriminator.train() # setup criterion and optimizer BCELoss = nn.BCEWithLogitsLoss() KLDivLoss = nn.KLDivLoss(reduction='batchmean') optimizer_G = optim.Adam(tgt_encoder.parameters(), lr=param.d_learning_rate) optimizer_D = optim.Adam(discriminator.parameters(), lr=param.d_learning_rate) len_data_loader = min(len(src_data_loader), len(tgt_data_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair data_zip = enumerate(zip(src_data_loader, tgt_data_train_loader)) for step, ((reviews_src, src_mask, _), (reviews_tgt, tgt_mask, _)) in data_zip: reviews_src = make_cuda(reviews_src) src_mask = make_cuda(src_mask) reviews_tgt = make_cuda(reviews_tgt) tgt_mask = make_cuda(tgt_mask) # zero gradients for optimizer optimizer_D.zero_grad() # extract and concat features with torch.no_grad(): feat_src = src_encoder(reviews_src, src_mask) feat_src_tgt = tgt_encoder(reviews_src, src_mask) feat_tgt = tgt_encoder(reviews_tgt, tgt_mask) feat_concat = torch.cat((feat_src_tgt, feat_tgt), 0) # predict on discriminator pred_concat = discriminator(feat_concat.detach()) # prepare real and fake label label_src = make_cuda(torch.ones( feat_src_tgt.size(0))).unsqueeze(1) label_tgt = make_cuda(torch.zeros(feat_tgt.size(0))).unsqueeze(1) label_concat = torch.cat((label_src, label_tgt), 0) # compute loss for discriminator dis_loss = BCELoss(pred_concat, label_concat) dis_loss.backward() for p in discriminator.parameters(): p.data.clamp_(-args.clip_value, args.clip_value) # optimize discriminator optimizer_D.step() pred_cls = torch.squeeze(pred_concat.max(1)[1]) acc = (pred_cls == label_concat).float().mean() # zero gradients for optimizer optimizer_G.zero_grad() T = args.temperature # predict on discriminator pred_tgt = discriminator(feat_tgt) # logits for KL-divergence with torch.no_grad(): src_prob = F.softmax(src_classifier(feat_src) / T, dim=-1) tgt_prob = F.log_softmax(src_classifier(feat_src_tgt) / T, dim=-1) kd_loss = KLDivLoss(tgt_prob, src_prob.detach()) * T * T # compute loss for target encoder gen_loss = BCELoss(pred_tgt, label_src) loss_tgt = args.alpha * gen_loss + args.beta * kd_loss loss_tgt.backward() torch.nn.utils.clip_grad_norm_(tgt_encoder.parameters(), args.max_grad_norm) # optimize target encoder optimizer_G.step() if (step + 1) % args.log_step == 0: print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: " "acc=%.4f g_loss=%.4f d_loss=%.4f kd_loss=%.4f" % (epoch + 1, args.num_epochs, step + 1, len_data_loader, acc.item(), gen_loss.item(), dis_loss.item(), kd_loss.item())) evaluate(tgt_encoder, src_classifier, tgt_data_all_loader) return tgt_encoder
def train_src(args, encoder, class_classifier, domain_classifier, src_data_loader, tgt_data_loader, data_loader_eval): """Train classifier for source domain.""" #################### # 1. setup network # #################### # setup criterion and optimizer optimizer = optim.Adam(list(encoder.parameters()) + list(class_classifier.parameters()) + list(domain_classifier.parameters()), lr=param.c_learning_rate, betas=(param.beta1, param.beta2)) criterion = nn.CrossEntropyLoss() # set train state for Dropout and BN layers encoder.train() class_classifier.train() domain_classifier.train() #################### # 2. train network # #################### for epoch in range(args.num_epochs): data_zip = enumerate(zip(src_data_loader, tgt_data_loader)) for step, ((src_reviews, src_labels), (tgt_reviews, _)) in data_zip: # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic src_mask = (src_reviews != 0).long() tgt_mask = (tgt_reviews != 0).long() src_feat = encoder(src_reviews, src_mask) tgt_feat = encoder(tgt_reviews, tgt_mask) feat_concat = torch.cat((src_feat, tgt_feat), 0) src_preds = class_classifier(src_feat) domain_preds = domain_classifier(feat_concat, alpha=args.dom_weight) # prepare real and fake label label_src = make_cuda(torch.ones(src_feat.size(0))) label_tgt = make_cuda(torch.zeros(tgt_feat.size(0))) label_concat = torch.cat((label_src, label_tgt), 0).long() loss_cls = criterion(src_preds, src_labels) loss_dom = criterion(domain_preds, label_concat) loss = loss_cls + loss_dom # optimize source classifier loss.backward() optimizer.step() # print step info if (step + 1) % args.log_step == 0: print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f dom_loss=%.4f" % (epoch + 1, args.num_epochs, step + 1, len(src_data_loader), loss_cls.item(), loss_dom.item())) # eval model on lambda0.1 set if (epoch + 1) % args.eval_step == 0: eval_src(encoder, class_classifier, src_data_loader) eval_src(encoder, class_classifier, data_loader_eval) print() # save model parameters if (epoch + 1) % args.save_step == 0: save_model(encoder, "DANN-encoder-{}.pt".format(epoch + 1)) save_model(class_classifier, "DANN-cls-classifier-{}.pt".format(epoch + 1)) save_model(domain_classifier, "DANN-dom-classifier-{}.pt".format(epoch + 1)) # # save final model save_model(encoder, "DANN-encoder-final.pt") save_model(class_classifier, "DANN-cls-classifier-final.pt") save_model(domain_classifier, "DANN-dom-classifier-final.pt") return encoder, class_classifier, domain_classifier
def dann_adapt_src_free(args, encoder, src_encoder, discriminator, classifier, src_data_loader, tgt_train_loader, tgt_all_loader): """ src data free version of DANN, w original tgt data """ # set train state for Dropout and BN layers src_encoder.eval() encoder.train() classifier.train() discriminator.train() # setup criterion and optimizer loss_class = nn.CrossEntropyLoss() loss_domain = nn.CrossEntropyLoss() kl_div_loss = nn.KLDivLoss(reduction='batchmean') optimizer_e = optim.Adam(encoder.parameters(), lr=param.c_lr) optimizer_c = optim.Adam(classifier.parameters(), lr=param.c_lr) optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr) len_dataloader = min(len(src_data_loader), len(tgt_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_data_loader, tgt_train_loader)) for i, ((src_feat, src_label), (tgt_reviews, tgt_masks, _)) in enumerate(pbar): p = float(i + epoch * len_dataloader) / args.num_epochs / len_dataloader alpha = 2. / (1. + np.exp(-10 * p)) - 1 src_feat = make_cuda(src_feat) src_label = make_cuda(src_label) tgt_reviews = make_cuda(tgt_reviews) tgt_masks = make_cuda(tgt_masks) # zero gradients for optimizers optimizer_e.zero_grad() optimizer_c.zero_grad() optimizer_d.zero_grad() # extract and concat features s_class_output = classifier(src_feat) s_reverse_feat = ReverseLayerF.apply(src_feat, alpha) s_domain_output = discriminator(s_reverse_feat) loss_s_label = loss_class(s_class_output, src_label) s_domain_label = make_cuda(torch.zeros(src_feat.size()[0]).long()) loss_s_domain = loss_domain(s_domain_output, s_domain_label) tgt_feat = encoder(tgt_reviews, tgt_masks) t_reverse_feat = ReverseLayerF.apply(tgt_feat, alpha) t_domain_output = discriminator(t_reverse_feat) t_domain_label = make_cuda(torch.ones(tgt_feat.size()[0]).long()) loss_t_domain = loss_domain(t_domain_output, t_domain_label) loss = loss_s_label + loss_s_domain + loss_t_domain tgt_outputs = classifier(tgt_feat) if args.kd: t = args.temperature src_tgt_feat = src_encoder(tgt_reviews, tgt_masks) with torch.no_grad(): src_prob = F.softmax(classifier(src_tgt_feat) / t, dim=-1) tgt_prob = F.log_softmax(tgt_outputs / t, dim=-1) kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t loss += kd_loss if args.ent: softmax_out = nn.Softmax(dim=1)(tgt_outputs) entropy_loss = torch.mean(entropy(softmax_out)) # loss_ent if args.gent: msoftmax = softmax_out.mean(dim=0) entropy_loss -= torch.sum( -msoftmax * torch.log(msoftmax + 1e-6)) # loss_ent + loss_div im_loss = entropy_loss * args.ent_par loss += im_loss loss.backward() optimizer_e.step() optimizer_c.step() optimizer_d.step() if i % args.log_step == 0: desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \ f"l_s_label={loss_s_label.item():.4f} l_s_dom={loss_s_domain.item():.4f} " \ f"l_t_dom={loss_t_domain.item():.4f}" pbar.set_description(desc=desc) evaluate(args, encoder, classifier, tgt_all_loader) return encoder, classifier
def train(model, dataloader_source, dataloader_target, source_data_loader_eval, target_data_loader_eval): optimizer = optim.Adam( model.parameters(), lr=params.learning_rate, ) loss_class = torch.nn.NLLLoss() loss_domain = torch.nn.NLLLoss() model = model.cuda() loss_class = loss_class.cuda() loss_domain = loss_domain.cuda() for epoch in range(params.num_epochs): model.train() len_dataloader = min(len(dataloader_source), len(dataloader_target)) data_source_iter = iter(dataloader_source) data_target_iter = iter(dataloader_target) i = 0 while i < len_dataloader: p = float(i + epoch * len_dataloader) / params.num_epochs / len_dataloader alpha = 2. / (1. + np.exp(-10 * p)) - 1 # training model using source data data_source = data_source_iter.next() s_img, s_label = data_source s_image = make_cuda(s_img) s_label = make_cuda(s_label) model.zero_grad() batch_size = len(s_label) domain_label = torch.zeros(batch_size) domain_label = domain_label.long().cuda() class_output, domain_output = model(s_image, alpha=alpha) err_s_label = loss_class(class_output, s_label) err_s_domain = loss_domain(domain_output, domain_label) # training model using target data data_target = data_target_iter.next() t_img, _ = data_target t_img = make_cuda(t_img) batch_size = len(t_img) domain_label = torch.ones(batch_size) domain_label = domain_label.long().cuda() _, domain_output = model(t_img, alpha=alpha) err_t_domain = loss_domain(domain_output, domain_label) err = err_t_domain + err_s_domain + err_s_label err.backward() optimizer.step() if ((i + 1) % params.log_step == 0): print( "Epoch [{}/{}] Step [{}/{}]: d_loss_t={} / d_loss_s={} / c_loss_s={}" .format(epoch + 1, params.num_epochs, i + 1, len_dataloader, err_t_domain.item(), err_s_domain.item(), err_s_label.item())) i += 1 eval_(model, source_data_loader_eval, 'src') eval_(model, target_data_loader_eval, 'tgt') if ((epoch + 1) % params.save_step == 0): save_model(model, "DANN-{}.pt".format(epoch + 1)) return model
def cdan_adapt_src_free(args, encoder, src_encoder, discriminator, classifier, src_data_loader, tgt_data_train_loader, tgt_data_all_loader): """ cdan src data free, w original tgt data """ # set train state for Dropout and BN layers src_encoder.eval() encoder.train() classifier.train() discriminator.train() # setup criterion and optimizer loss_class = nn.CrossEntropyLoss() loss_domain = nn.BCELoss() kl_div_loss = nn.KLDivLoss(reduction='batchmean') optimizer_e = optim.SGD(encoder.parameters(), lr=param.c_lr, weight_decay=5e-3, momentum=0.9) optimizer_c = optim.SGD(classifier.parameters(), lr=param.c_lr, weight_decay=5e-3, momentum=0.9) optimizer_d = optim.SGD(discriminator.parameters(), lr=param.d_lr, weight_decay=5e-3, momentum=0.9) len_dataloader = min(len(src_data_loader), len(tgt_data_train_loader)) for epoch in range(args.num_epochs): # zip source and target data pair pbar = tqdm(zip(src_data_loader, tgt_data_train_loader)) for i, ((src_feat, src_label), (tgt_reviews, tgt_masks, _)) in enumerate(pbar): src_feat = make_cuda(src_feat) src_label = make_cuda(src_label) tgt_reviews = make_cuda(tgt_reviews) tgt_masks = make_cuda(tgt_masks) # zero gradients for optimizers optimizer_e.zero_grad() optimizer_c.zero_grad() optimizer_d.zero_grad() # extract and concat features tgt_feat = encoder(tgt_reviews, tgt_masks) feat = torch.cat((src_feat, tgt_feat), 0) s_class_output = classifier(src_feat) loss_s_label = loss_class( s_class_output, src_label) # maybe change s_class_output to before softmax class_output = classifier(feat) op_out = torch.bmm(class_output.unsqueeze(2), feat.unsqueeze(1)) ad_out = discriminator( op_out.view(-1, class_output.size(1) * feat.size(1))) dc_target = torch.from_numpy( np.array([[1]] * src_feat.size()[0] + [[0]] * tgt_feat.size()[0])).float().cuda() loss_d = loss_domain(ad_out, dc_target) loss = loss_s_label + loss_d tgt_outputs = classifier(tgt_feat) if args.kd: t = args.temperature src_tgt_feat = src_encoder(tgt_reviews, tgt_masks) with torch.no_grad(): src_prob = F.softmax(classifier(src_tgt_feat) / t, dim=-1) tgt_prob = F.log_softmax(tgt_outputs / t, dim=-1) kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t loss += kd_loss if args.ent: softmax_out = nn.Softmax(dim=1)(tgt_outputs) entropy_loss = torch.mean(entropy(softmax_out)) # loss_ent if args.gent: msoftmax = softmax_out.mean(dim=0) entropy_loss -= torch.sum( -msoftmax * torch.log(msoftmax + 1e-6)) # loss_ent + loss_div im_loss = entropy_loss * args.ent_par loss += im_loss loss.backward() optimizer_e.step() optimizer_c.step() if epoch > 0: optimizer_d.step() if i % args.log_step == 0: desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \ f"l_s_label={loss_s_label.item():.4f} l_d={loss_d.item():.4f} " pbar.set_description(desc=desc) evaluate(args, encoder, classifier, tgt_data_all_loader) return encoder, classifier
def train_src(model, source_data_loader, target_data_loader, data_loader_eval): """Train classifier for source domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers model.train() if params.usemixup: target_data_loader = list(target_data_loader) # setup criterion and optimizer optimizer = optim.Adam(model.parameters(), lr=params.pre_c_learning_rate, betas=(params.beta1, params.beta2), weight_decay=params.weight_decay) if params.labelsmoothing: criterion = LabelSmoothingCrossEntropy(smoothing=params.smoothing) else: criterion = nn.CrossEntropyLoss() #################### # 2. train network # #################### for epoch in range(params.num_epochs_pre): for step, (images, labels) in enumerate(source_data_loader): # make images and labels variable images = make_cuda(images) labels = make_cuda(labels.squeeze_()) # zero gradients for optimizer optimizer.zero_grad() # source , target : mixup if params.usemixup: images, lam = mixup_data( images, target_data_loader[randint( 0, len(target_data_loader) - 1)][0]) # compute loss for critic preds = model(images) loss = criterion(preds, labels) # optimize source classifier loss.backward() optimizer.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs_pre, step + 1, len(source_data_loader), loss.item())) # eval model on test set if ((epoch + 1) % params.eval_step_pre == 0): print("eval", end='') eval_src(model, data_loader_eval) # save model parameters if ((epoch + 1) % params.save_step_pre == 0): save_model(model, "ADDA-source_cnn-{}.pt".format(epoch + 1)) # # save final model save_model(model, "ADDA-source_cnn-final.pt") return model