Exemplo n.º 1
0
def evaluate(encoder, classifier, data_loader):
    """Evaluation for target encoder by source classifier on target dataset."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (reviews, mask, labels) in data_loader:
        reviews = make_cuda(reviews)
        mask = make_cuda(mask)
        labels = make_cuda(labels)

        with torch.no_grad():
            feat = encoder(reviews, mask)
            preds = classifier(feat)
        loss += criterion(preds, labels).item()
        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(labels.data).cpu().sum().item()

    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("Avg Loss = %.4f, Avg Accuracy = %.4f" % (loss, acc))

    return acc
Exemplo n.º 2
0
def eval_tgt(encoder, classifier, data_loader):
    """Evaluation for target encoder by source classifier on target dataset."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (images, labels) in data_loader:
        images = make_cuda(images)
        labels = make_cuda(labels).squeeze_()

        preds = classifier(encoder(images))
        loss += criterion(preds, labels).item()

        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(labels.data).cpu().sum().item()

    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("Avg Loss = {}, Avg Accuracy = {:2%}".format(loss, acc))
Exemplo n.º 3
0
def save_features(args, encoder, data_loader):
    """save inferred features."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    # classifier.eval()
    x = []
    y = []

    if args.adapt_method == 'shot':
        for (reviews, masks, labels, _) in data_loader:
            reviews = make_cuda(reviews)
            masks = make_cuda(masks)
            # labels = make_cuda(labels)

            with torch.no_grad():
                feat = encoder(reviews, masks)
                # preds = classifier(feat)
            # loss += criterion(preds, labels).item()
            # pred_cls = preds.data.max(1)[1]
            # acc += pred_cls.eq(labels.data).cpu().sum().item()
    else:
        for (reviews, masks, labels) in data_loader:
            reviews = make_cuda(reviews)
            masks = make_cuda(masks)
            # labels = make_cuda(labels)

            with torch.no_grad():
                feat = torch.squeeze(encoder(reviews, masks)).cpu().numpy()
                # preds = classifier(feat)
            x.append(feat)
            y.append(labels.cpu().numpy())
    x = np.asarray(x)
    y = np.asarray(y)
    return x, y
Exemplo n.º 4
0
def eval_(model, data_loader, mode):
    """Evaluate classifier for source domain."""
    # set eval state for Dropout and BN layers
    model.eval()
    alpha = 0
    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    len_dataloader = len(data_loader)
    data_iter = iter(data_loader)

    i = 0
    # evaluate network
    while i < len_dataloader:

        data_source = data_iter.next()
        s_img, s_label = data_source

        s_image = make_cuda(s_img)
        s_label = make_cuda(s_label)
        preds, _ = model(s_image, alpha)

        pred_cls = preds.data.max(1)[1]
        acc += pred_cls.eq(s_label).cpu().sum().item()

        i += 1

    loss /= len(data_loader)
    acc /= len(data_loader.dataset)

    print("{}, Avg Accuracy = {:2%}".format(mode, acc))
Exemplo n.º 5
0
def train(args, model, data_loader, initial=False):
    MSELoss = nn.MSELoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

    model.train()
    num_epochs = args.initial_epochs if initial else args.num_epochs

    for epoch in range(num_epochs):
        loss = 0
        for step, (features, targets) in enumerate(data_loader):
            features = make_cuda(features)
            targets = make_cuda(targets)

            optimizer.zero_grad()

            preds = model(features)
            mse_loss = MSELoss(preds, targets)
            loss += mse_loss.item()
            mse_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.max_grad_norm)
            optimizer.step()

            # print step info
            if (step + 1) % args.log_step == 0:
                print(
                    "Epoch [%.3d/%.3d] Step [%.3d/%.3d]: MSE_loss=%.4f, RMSE_loss=%.4f"
                    % (epoch + 1, num_epochs, step + 1, len(data_loader),
                       loss / args.log_step, math.sqrt(loss / args.log_step)))
                loss = 0
    return model
Exemplo n.º 6
0
def dann_adapt_data_free(args, encoder, tgt_encoder, discriminator, classifier,
                         src_data_loader, tgt_train_loader, tgt_all_loader):
    """
    src tgt data free version of DANN
    """
    # set train state for Dropout and BN layers
    classifier.train()
    discriminator.train()

    # setup criterion and optimizer
    loss_class = nn.CrossEntropyLoss()
    loss_domain = nn.CrossEntropyLoss()
    optimizer_e = optim.Adam(encoder.parameters(), lr=param.c_lr)
    optimizer_c = optim.Adam(classifier.parameters(), lr=param.c_lr)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr)
    len_dataloader = min(len(src_data_loader), len(tgt_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_data_loader, tgt_train_loader))
        for i, ((src_feat, src_label), (tgt_feat, _)) in enumerate(pbar):
            p = float(i + epoch *
                      len_dataloader) / args.num_epochs / len_dataloader
            alpha = 2. / (1. + np.exp(-10 * p)) - 1
            src_feat = make_cuda(src_feat)
            src_label = make_cuda(src_label)
            tgt_feat = make_cuda(tgt_feat)

            # zero gradients for optimizers
            optimizer_c.zero_grad()
            optimizer_d.zero_grad()

            # extract and concat features
            s_class_output = classifier(src_feat)
            s_reverse_feat = ReverseLayerF.apply(src_feat, alpha)
            s_domain_output = discriminator(s_reverse_feat)
            loss_s_label = loss_class(s_class_output, src_label)
            s_domain_label = make_cuda(torch.zeros(src_feat.size()[0]).long())
            loss_s_domain = loss_domain(s_domain_output, s_domain_label)

            t_reverse_feat = ReverseLayerF.apply(tgt_feat, alpha)
            t_domain_output = discriminator(t_reverse_feat)
            t_domain_label = make_cuda(torch.ones(tgt_feat.size()[0]).long())
            loss_t_domain = loss_domain(t_domain_output, t_domain_label)
            loss = loss_s_label + loss_s_domain + loss_t_domain

            loss.backward()

            optimizer_c.step()
            optimizer_d.step()

            if i % args.log_step == 0:
                desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \
                       f"l_s_label={loss_s_label.item():.4f} l_s_dom={loss_s_domain.item():.4f} " \
                       f"l_t_dom={loss_t_domain.item():.4f}"
                pbar.set_description(desc=desc)

        evaluate(args, encoder, classifier, tgt_all_loader)

    return encoder, classifier
Exemplo n.º 7
0
def train_src(encoder, classifier, data_loader):
    """Train classifier for source domain."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    encoder.train()
    classifier.train()

    # setup criterion and optimizer
    optimizer = optim.Adam(list(encoder.parameters()) +
                           list(classifier.parameters()),
                           lr=params.c_learning_rate,
                           betas=(params.beta1, params.beta2))
    criterion = nn.CrossEntropyLoss()

    ####################
    # 2. train network #
    ####################

    for epoch in range(params.num_epochs_pre):
        for step, (images, labels) in enumerate(data_loader):
            # make images and labels variable
            images = make_cuda(images)
            labels = make_cuda(labels.squeeze_())

            # zero gradients for optimizer
            optimizer.zero_grad()

            # compute loss for critic
            preds = classifier(encoder(images))
            loss = criterion(preds, labels)

            # optimize source classifier
            loss.backward()
            optimizer.step()

            # print step info
            if ((step + 1) % params.log_step_pre == 0):
                print("Epoch [{}/{}] Step [{}/{}]: loss={}".format(
                    epoch + 1, params.num_epochs_pre, step + 1,
                    len(data_loader), loss.item()))

        # eval model on test set
        if ((epoch + 1) % params.eval_step_pre == 0):
            eval_src(encoder, classifier, data_loader)

        # save model parameters
        if ((epoch + 1) % params.save_step_pre == 0):
            save_model(encoder, "ADDA-source-encoder-{}.pt".format(epoch + 1))
            save_model(classifier,
                       "ADDA-source-classifier-{}.pt".format(epoch + 1))

    # # save final model
    save_model(encoder, "ADDA-source-encoder-final.pt")
    save_model(classifier, "ADDA-source-classifier-final.pt")

    return encoder, classifier
Exemplo n.º 8
0
def pretrain(args, encoder, classifier, data_loader):
    """Train classifier for source domain."""
    ####################
    # 1. setup network #
    ####################

    # setup criterion and optimizer
    optimizer = optim.Adam(list(encoder.parameters()) +
                           list(classifier.parameters()),
                           lr=param.c_learning_rate)
    CELoss = nn.CrossEntropyLoss()

    # set train state for Dropout and BN layers
    encoder.train()
    classifier.train()

    ####################
    # 2. train network #
    ####################

    for epoch in range(args.pre_epochs):
        for step, (reviews, mask, labels) in enumerate(data_loader):
            reviews = make_cuda(reviews)
            mask = make_cuda(mask)
            labels = make_cuda(labels)

            # zero gradients for optimizer
            optimizer.zero_grad()

            # compute loss for discriminator
            feat = encoder(reviews, mask)
            preds = classifier(feat)
            cls_loss = CELoss(preds, labels)

            # optimize source classifier
            cls_loss.backward()
            optimizer.step()

            # print step info
            if (step + 1) % args.pre_log_step == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f" %
                      (epoch + 1, args.pre_epochs, step + 1, len(data_loader),
                       cls_loss.item()))

    # save final model
    save_model(args, encoder, param.src_encoder_path)
    save_model(args, classifier, param.src_classifier_path)

    return encoder, classifier
Exemplo n.º 9
0
def src_gmm(args, src_encoder, src_data_loader):
    """
    from src features to gmm, then resample new features
    based on source_target_free.py
    """
    src_encoder.eval()
    # cov = {}  # num_classes, feature_dim
    # mean = {}
    # store original features
    # s_ori_features = []
    # store resampled features (num_classes, num_samples, feature_dim)
    s_res_features = np.zeros(
        [param.num_labels, param.num_samples, param.input_dim])
    for i in range(param.num_labels):
        x = []  # 1 class's features: 1 Gaussian
        pbar = tqdm(src_data_loader)
        with torch.no_grad():
            for j, (reviews, masks, labels) in enumerate(pbar):
                reviews = make_cuda(reviews)
                masks = make_cuda(masks)
                for review, mask, label in zip(reviews, masks, labels):
                    if label == i:
                        review = torch.unsqueeze(review, 0)
                        mask = torch.unsqueeze(mask, 0)
                        s_feature = torch.squeeze(src_encoder(
                            review, mask)).cpu().numpy()
                        x.append(s_feature)
                pbar.set_description('src_gmm')

        x = np.asarray(x)  # 1 class features
        # s_ori_features.append(x)
        gmm = GaussianMixture(n_components=6).fit(x)
        if args.dp:
            dp_model = DPModel(x, total_eps=args.total_eps, k=1)
            noise_for_mu = dp_model.add_noise_for_mu()
            z = dp_model.add_noise_for_sigma()
            gmm.means_ += noise_for_mu
            gmm.covariances_ += z  # visual shows don't div by norm of new_mu & new_sigma

        # resample
        s_res_features[i, :, :] = gmm.sample(param.num_samples)[0]

    # if args.dp:
    #     np.savez(os.path.join(param.model_root, f's_res_features_dp_{args.total_eps:.2f}'), s_res_features)
    # else:
    #     np.savez(os.path.join(param.model_root, 's_res_features_nodp'), s_res_features)
    #
    # np.savez(os.path.join(param.model_root, 's_ori_features'), s_ori_features)
    return s_res_features  # source resampled features [2, 2000, 768]
Exemplo n.º 10
0
def obtain_label(args, tgt_te, src_encoder, classifier):
    start_test = True
    with torch.no_grad():
        for step, (reviews, masks, labels, tgt_idx) in enumerate(tgt_te):
            reviews = make_cuda(reviews)
            masks = make_cuda(masks)
            feas = src_encoder(reviews, masks)
            outputs = classifier(feas)
            if start_test:
                all_fea = feas.float().cpu()
                all_output = outputs.float().cpu()
                all_label = labels.float()
                start_test = False
            else:
                all_fea = torch.cat((all_fea, feas.float().cpu()), 0)
                all_output = torch.cat((all_output, outputs.float().cpu()), 0)
                all_label = torch.cat((all_label, labels.float()), 0)

    all_output = nn.Softmax(dim=1)(all_output)
    _, predict = torch.max(all_output, 1)
    prev_acc = torch.sum(
        torch.squeeze(predict).float() == all_label).item() / float(
            all_label.size()[0])

    all_fea = torch.cat((all_fea, torch.ones(all_fea.size(0), 1)), 1)
    all_fea = (all_fea.t() / torch.norm(all_fea, p=2, dim=1)).t()
    all_fea = all_fea.float().cpu().numpy()

    K = all_output.size(1)
    aff = all_output.float().cpu().numpy()
    initc = aff.transpose().dot(all_fea)
    initc = initc / (1e-8 + aff.sum(axis=0)[:, None])
    dd = cdist(all_fea, initc, 'cosine')
    pred_label = dd.argmin(axis=1)
    acc = np.sum(pred_label == all_label.float().numpy()) / len(all_fea)

    for round in range(1):
        aff = np.eye(K)[pred_label]
        initc = aff.transpose().dot(all_fea)
        initc = initc / (1e-8 + aff.sum(axis=0)[:, None])
        dd = cdist(all_fea, initc, 'cosine')
        pred_label = dd.argmin(axis=1)
        acc = np.sum(pred_label == all_label.float().numpy()) / len(all_fea)

    log_str = 'Accuracy = {:.2f}% -> {:.2f}%'.format(prev_acc * 100, acc * 100)
    # args.out_file.write(log_str + '\n')
    # args.out_file.flush()
    print(log_str + '\n')
    return pred_label  # .astype('int')
Exemplo n.º 11
0
def shot_adapt(args, encoder, classifier, tgt_train_loader, tgt_all_loader,
               tgt_te):
    classifier.train()
    optimizer = optim.SGD(encoder.parameters(), lr=param.d_lr)
    interval_iter = len(tgt_train_loader)

    for epoch in range(args.num_epochs):
        pbar = tqdm(tgt_train_loader)
        for step, (tgt_reviews, tgt_masks, _, tgt_idx) in enumerate(pbar):
            if len(tgt_reviews) == 1:
                continue
            if step % interval_iter == 0 and args.cls_par > 0:
                encoder.eval()
                mem_label = obtain_label(args, tgt_te, encoder, classifier)
                encoder.train()

            tgt_reviews = make_cuda(tgt_reviews)
            tgt_masks = make_cuda(tgt_masks)
            features_test = encoder(tgt_reviews, tgt_masks)
            outputs_test = classifier(features_test)

            if args.cls_par > 0:
                pred = mem_label[tgt_idx]
                pred = make_cuda(torch.tensor(pred))
                classifier_loss = args.cls_par * nn.CrossEntropyLoss()(
                    outputs_test, pred)
            else:
                classifier_loss = torch.tensor(0.0).cuda()

            if args.ent:
                softmax_out = nn.Softmax(dim=1)(
                    outputs_test)  # outputs_test is C(E(xt))
                entropy_loss = torch.mean(entropy(softmax_out))  # loss_ent
                if args.gent:
                    msoftmax = softmax_out.mean(dim=0)
                    entropy_loss -= torch.sum(
                        -msoftmax *
                        torch.log(msoftmax + 1e-6))  # loss_ent + loss_div

                im_loss = entropy_loss * args.ent_par
                classifier_loss += im_loss

            optimizer.zero_grad()
            classifier_loss.backward()
            optimizer.step()

    return encoder, classifier
Exemplo n.º 12
0
 def __init__(self, n_features, n_hidden, n_layers, n_output, weight, bidirectional):
     super(weightedLSTM, self).__init__()
     self.n_hidden = n_hidden
     self.n_layers = n_layers
     self.weight = make_cuda(torch.FloatTensor(weight))
     self.bidirectional = bidirectional
     self.lstm = nn.LSTM(input_size=n_features, hidden_size=n_hidden,
                         num_layers=n_layers, bidirectional=bidirectional,
                         batch_first=True)
     self.regr = nn.Linear(2 * n_hidden if bidirectional else n_hidden, n_output)
Exemplo n.º 13
0
def tgt_gmm(encoder, tgt_data_all_loader, num_cluster):
    """
    build target GMM and resample from it, used in adapt
    based on source_target_free.py
    """
    encoder.eval()
    t_features = []
    pbar = tqdm(tgt_data_all_loader)
    for j, (reviews, masks, _) in enumerate(pbar):
        reviews = make_cuda(reviews)
        masks = make_cuda(masks)
        with torch.no_grad():
            for review, mask in zip(reviews, masks):
                review = torch.unsqueeze(review, 0)
                mask = torch.unsqueeze(mask, 0)
                feature = torch.squeeze(encoder(review, mask))
                feature = feature.cpu().numpy()
                t_features.append(feature)
        pbar.set_description('tgt_gmm')

    t_features = np.asarray(t_features)

    # num_class * num_cluster, not sure why num_cluster is not 1
    gmm = GaussianMixture(n_components=param.num_labels *
                          num_cluster, ).fit(t_features)

    tgt_mean = gmm.means_
    tgt_var = gmm.covariances_
    print(gmm.converged_)
    t_feature_dict = np.zeros([param.num_samples, param.input_dim
                               ])  # param.num_resample samples, feature_dim
    p = gmm.weights_
    p[-1] += 1 - np.sum(p)
    decrete = np.random.multinomial(param.num_samples, p, 1)
    k = 0
    for i in range(2 * num_cluster):
        t_feature_dict[k:k + decrete[0, i], :] = np.random.multivariate_normal(
            tgt_mean[i, :], tgt_var[i, :, :], decrete[0, i])
        k += decrete[0, i]

    return t_feature_dict  # [2000, 768]
Exemplo n.º 14
0
def pretrain(args, encoder, classifier, data_loader):
    """Train classifier for source domain."""

    # setup criterion and optimizer
    optimizer = optim.Adam(list(encoder.parameters()) +
                           list(classifier.parameters()),
                           lr=param.c_lr)
    ce_loss = nn.CrossEntropyLoss()

    # set train state for Dropout and BN layers
    encoder.train()
    classifier.train()

    for epoch in range(args.pre_epochs):
        pbar = tqdm(data_loader)
        for step, (reviews, mask, labels) in enumerate(pbar):
            reviews = make_cuda(reviews)
            mask = make_cuda(mask)
            labels = make_cuda(labels)

            # zero gradients for optimizer
            optimizer.zero_grad()

            # compute loss for discriminator
            feat = encoder(reviews, mask)
            preds = classifier(feat)
            cls_loss = ce_loss(preds, labels)

            # optimize source classifier
            cls_loss.backward()
            optimizer.step()

            # print step info
            if step % args.pre_log_step == 0:
                desc = f"Epoch [{epoch}/{args.pre_epochs}] Step [{step}/{len(data_loader)}]: " \
                       f"c_loss={cls_loss.item():.4f} "
                pbar.set_description(desc=desc)

    return encoder, classifier
Exemplo n.º 15
0
def train_model(epoch):
    i = 0
    hidden_init = model.state0(batch_size)
    if options.cuda:
        embedding.cuda()
        model.cuda()
        hidden_init = utils.make_cuda(hidden_init)

    loss_avg = 0

    for s in range(num_batches - 1):
        embed_optimizer.zero_grad()
        model_optimizer.zero_grad()
        batch = Variable(
            train_x.narrow(0, s * seq_length, seq_length + 1).long())
        start = time.time()
        hidden = hidden_init
        if options.cuda:
            batch = batch.cuda()
        loss = 0
        for t in range(seq_length):
            emb = embedding(batch[t])
            hidden, output = model(emb, hidden)
            loss_step = loss_fn(output, batch[t + 1])
            loss += loss_step
            writer.add_scalar('loss per step', loss_step, i)
            i += 1

        writer.add_scalar('loss per batch ', loss, s)

        loss.backward()

        hidden_init = utils.copy_state(hidden)
        gn = utils.calc_grad_norm(model)
        utils.clip_gradient(model, model_settings['clip_gradient'])
        utils.clip_gradient(embedding, model_settings['clip_gradient'])
        embed_optimizer.step()
        model_optimizer.step()
        loss_avg = .99 * loss_avg + .01 * loss.data[0] / seq_length

        if s % 10 == 0:
            print(
                f'epoch: {epoch} | batch: {s}/{num_batches} | step loss: {loss.data[0] / seq_length} | batch loss: {loss.data[0]} | avg loss: {loss_avg} | time: {time.time() - start}s'
            )
Exemplo n.º 16
0
def evaluate(args, model, scaler, data_loader):
    model.eval()
    model.lstm.flatten_parameters()
    all_preds = []
    all_targets = []

    for features, targets in data_loader:
        features = make_cuda(features)

        with torch.no_grad():
            preds = model(features)
        all_preds.append(preds)
        all_targets.append(targets)

    all_preds = scaler.inverse_transform(
        torch.cat(all_preds, dim=0).cpu().numpy().reshape(-1, 1))
    all_targets = scaler.inverse_transform(
        torch.cat(all_targets, dim=0).cpu().numpy().reshape(-1, 1))
    mse = mean_squared_error(all_targets, all_preds)
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(all_targets, all_preds)
    print("RMSE = %.4f, MAE = %.4f\n" % (rmse, mae))
    return rmse, mae
Exemplo n.º 17
0
        next_input = Variable(indices[0])
        if options.cuda:
            next_input.cuda()

    return ''.join(chr(i) for i in sample)


if __name__ == '__main__':
    if options.test:
        checkpoint = torch.load(options.load_model)
        embedding = checkpoint['embedding']
        model = checkpoint['model']

        state = model.state0(batch_size)
        if options.cuda:
            state = utils.make_cuda(state)
            embedding.cuda()
            model.cuda()

        gen_text = generation(embedding, model, state, options.n,
                              options.primer)
        print(gen_text)
    else:
        lr = model_settings['learning_rate']
        layers = model_settings['layers']
        batch_size = model_settings['batch_size']
        rnn_size = model_settings['rnn_size']
        embed_size = model_settings['embed_size']
        seq_length = model_settings['seq_length']
        dropout = model_settings['dropout']
        data_size = 256  # ???
Exemplo n.º 18
0
def adda_adapt(args, src_encoder, tgt_encoder, discriminator, src_data_loader,
               tgt_data_loader):
    """
    Adapt tgt encoder by ADDA
    can run, but tgt acc is bad, only 0.5
    """
    # set train state for Dropout and BN layers
    src_encoder.eval()
    tgt_encoder.train()
    discriminator.train()

    # setup criterion and optimizer
    bce_loss = nn.BCELoss()
    optimizer_tgt = optim.Adam(tgt_encoder.parameters(), lr=param.d_lr)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr)

    len_data_loader = min(len(src_data_loader), len(tgt_data_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_data_loader, tgt_data_loader))
        for step, ((src_reviews, src_masks, _), (tgt_reviews, tgt_masks,
                                                 _)) in enumerate(pbar):

            # zero gradients for optimizer
            optimizer_d.zero_grad()

            # extract and concat features
            feat_src = src_encoder(src_reviews, src_masks)
            feat_tgt = tgt_encoder(tgt_reviews, tgt_masks)
            feat_concat = torch.cat((feat_src, feat_tgt), 0)

            # predict on discriminator
            pred_concat = discriminator(feat_concat.detach())

            # prepare real and fake label
            label_src = make_cuda(torch.ones(feat_src.size(0))).unsqueeze(1)
            label_tgt = make_cuda(torch.zeros(feat_tgt.size(0))).unsqueeze(1)
            label_concat = torch.cat((label_src, label_tgt), 0)

            # compute loss for critic
            d_loss = bce_loss(pred_concat, label_concat)
            d_loss.backward()

            # optimize critic
            optimizer_d.step()
            # pred_cls = pred_concat.max(1)[1]
            # acc = (pred_cls == label_concat).float().mean()

            # zero gradients for optimizer
            optimizer_tgt.zero_grad()

            # extract and target features
            # feat_tgt = tgt_encoder(tgt_reviews, tgt_masks)

            # predict on discriminator
            pred_tgt = discriminator(feat_tgt)

            # prepare fake labels
            # label_tgt = make_cuda(torch.ones(feat_tgt.size(0)).long())

            # compute loss for target encoder
            loss_tgt = bce_loss(pred_tgt, label_src)
            loss_tgt.backward()

            # optimize target encoder
            optimizer_tgt.step()

            if (step + 1) % args.log_step == 0:
                desc = "Epoch [{}/{}] Step [{}/{}]: t_loss={:.4f} c_loss={:.4f} ".format(
                    epoch,
                    args.num_epochs,
                    step,
                    len_data_loader,
                    loss_tgt.item(),
                    d_loss.item(),
                )
                pbar.set_description(desc=desc)

    # torch.save(critic.state_dict(), os.path.join(
    #     args.model_root, "ADDA-critic.pt"))
    # torch.save(tgt_encoder.state_dict(), os.path.join(
    #     args.model_root, "ADDA-target-encoder.pt"))
    return tgt_encoder
Exemplo n.º 19
0
def train(args, encoder, classifier, src_data_loader, src_data_loader_eval,
          tgt_data_loader, tgt_data_loader_all):
    """Train encoder for target domain."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    encoder.train()
    classifier.train()

    # setup criterion and optimizer
    CELoss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(list(encoder.parameters()) +
                           list(classifier.parameters()),
                           lr=param.c_learning_rate)

    ####################
    # 2. train network #
    ####################

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        data_zip = enumerate(zip(src_data_loader, tgt_data_loader))
        for step, ((src_reviews, src_mask, src_labels), (tgt_reviews, tgt_mask,
                                                         _)) in data_zip:
            src_reviews = make_cuda(src_reviews)
            src_mask = make_cuda(src_mask)
            src_labels = make_cuda(src_labels)
            tgt_reviews = make_cuda(tgt_reviews)
            tgt_mask = make_cuda(tgt_mask)

            # extract and concat features
            src_feat = encoder(src_reviews, src_mask)
            tgt_feat = encoder(tgt_reviews, tgt_mask)
            src_preds = classifier(src_feat)

            # prepare real and fake label
            optimizer.zero_grad()
            cls_loss = CELoss(src_preds, src_labels)
            if args.method == 'coral':
                adapt_loss = CORAL(src_feat, tgt_feat)
            else:  # args.method == 'mmd'
                adapt_loss = MMD(src_feat, tgt_feat)
            loss = cls_loss + args.alpha * adapt_loss

            # optimize source classifier
            loss.backward()
            optimizer.step()

            # print step info
            if (step + 1) % args.log_step == 0:
                print(
                    "Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f coral_loss=%.4f"
                    %
                    (epoch + 1, args.num_epochs, step + 1,
                     len(src_data_loader), cls_loss.item(), adapt_loss.item()))

        evaluate(encoder, classifier, src_data_loader)
        evaluate(encoder, classifier, src_data_loader_eval)
        evaluate(encoder, classifier, tgt_data_loader_all)

    save_model(encoder, param.encoder_path)
    save_model(classifier, param.classifier_path)

    return encoder, classifier
Exemplo n.º 20
0
def train_tgt(src_encoder, tgt_encoder, critic,
              src_data_loader, tgt_data_loader):
    """Train encoder for target domain."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    tgt_encoder.train()
    critic.train()

    # setup criterion and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer_tgt = optim.Adam(tgt_encoder.parameters(),
                               lr=params.c_learning_rate,
                               betas=(params.beta1, params.beta2))
    optimizer_critic = optim.Adam(critic.parameters(),
                                  lr=params.d_learning_rate,
                                  betas=(params.beta1, params.beta2))
    len_data_loader = min(len(src_data_loader), len(tgt_data_loader))

    ####################
    # 2. train network #
    ####################

    for epoch in range(params.num_epochs):
        # zip source and target data pair
        data_zip = enumerate(zip(src_data_loader, tgt_data_loader))
        for step, ((images_src, _), (images_tgt, _)) in data_zip:
            ###########################
            # 2.1 train discriminator #
            ###########################

            # make images variable
            images_src = make_cuda(images_src)
            images_tgt = make_cuda(images_tgt)

            # zero gradients for optimizer
            optimizer_critic.zero_grad()

            # extract and concat features
            feat_src = src_encoder(images_src)
            feat_tgt = tgt_encoder(images_tgt)
            feat_concat = torch.cat((feat_src, feat_tgt), 0)

            # predict on discriminator
            pred_concat = critic(feat_concat.detach())

            # prepare real and fake label
            label_src = make_cuda(torch.ones(feat_src.size(0)).long())
            label_tgt = make_cuda(torch.zeros(feat_tgt.size(0)).long())
            label_concat = torch.cat((label_src, label_tgt), 0)

            # compute loss for critic
            loss_critic = criterion(pred_concat, label_concat)
            loss_critic.backward()

            # optimize critic
            optimizer_critic.step()

            pred_cls = torch.squeeze(pred_concat.max(1)[1])
            acc = (pred_cls == label_concat).float().mean()

            ############################
            # 2.2 train target encoder #
            ############################

            # zero gradients for optimizer
            optimizer_critic.zero_grad()
            optimizer_tgt.zero_grad()

            # extract and target features
            feat_tgt = tgt_encoder(images_tgt)

            # predict on discriminator
            pred_tgt = critic(feat_tgt)

            # prepare fake labels
            label_tgt = make_cuda(torch.ones(feat_tgt.size(0)).long())

            # compute loss for target encoder
            loss_tgt = criterion(pred_tgt, label_tgt)
            loss_tgt.backward()

            # optimize target encoder
            optimizer_tgt.step()

            #######################
            # 2.3 print step info #
            #######################
            if ((step + 1) % params.log_step == 0):
                print("Epoch [{}/{}] Step [{}/{}]:"
                      "d_loss={:.5f} g_loss={:.5f} acc={:.5f}"
                      .format(epoch + 1,
                              params.num_epochs,
                              step + 1,
                              len_data_loader,
                              loss_critic.item(),
                              loss_tgt.item(),
                              acc.item()))

        #############################
        # 2.4 save model parameters #
        #############################
        if ((epoch + 1) % params.save_step == 0):
            torch.save(critic.state_dict(), os.path.join(
                params.model_root,
                "ADDA-critic-{}.pt".format(epoch + 1)))
            torch.save(tgt_encoder.state_dict(), os.path.join(
                params.model_root,
                "ADDA-target-encoder-{}.pt".format(epoch + 1)))

    torch.save(critic.state_dict(), os.path.join(
        params.model_root,
        "ADDA-critic-final.pt"))
    torch.save(tgt_encoder.state_dict(), os.path.join(
        params.model_root,
        "ADDA-target-encoder-final.pt"))
    return tgt_encoder
Exemplo n.º 21
0
def train(args, encoder, cls_classifier, dom_classifier,
          src_data_loader, src_data_loader_eval,
          tgt_data_loader, tgt_data_loader_all):
    """Train encoder for target domain."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    encoder.train()
    cls_classifier.train()
    dom_classifier.train()

    # setup criterion and optimizer
    CELoss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(list(encoder.parameters()) +
                           list(cls_classifier.parameters()) +
                           list(dom_classifier.parameters()),
                           lr=param.c_learning_rate)

    ####################
    # 2. train network #
    ####################

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        data_zip = enumerate(zip(src_data_loader, tgt_data_loader))
        for step, ((src_reviews, src_mask, src_labels), (tgt_reviews, tgt_mask, _)) in data_zip:
            src_reviews = make_cuda(src_reviews)
            src_mask = make_cuda(src_mask)
            src_labels = make_cuda(src_labels)
            tgt_reviews = make_cuda(tgt_reviews)
            tgt_mask = make_cuda(tgt_mask)

            # extract and concat features
            src_feat = encoder(src_reviews, src_mask)
            tgt_feat = encoder(tgt_reviews, tgt_mask)
            feat_concat = torch.cat((src_feat, tgt_feat), 0)
            src_preds = cls_classifier(src_feat)
            dom_preds = dom_classifier(feat_concat, alpha=args.alpha)

            # prepare real and fake label
            optimizer.zero_grad()
            label_src = make_cuda(torch.ones(src_feat.size(0)))
            label_tgt = make_cuda(torch.zeros(tgt_feat.size(0)))
            label_concat = torch.cat((label_src, label_tgt), 0).long()
            loss_cls = CELoss(src_preds, src_labels)
            loss_dom = CELoss(dom_preds, label_concat)
            loss = loss_cls + loss_dom

            # optimize source classifier
            loss.backward()
            optimizer.step()

            # print step info
            if (step + 1) % args.log_step == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f dom_loss=%.4f"
                      % (epoch + 1,
                         args.num_epochs,
                         step + 1,
                         len(src_data_loader),
                         loss_cls.item(),
                         loss_dom.item()))

        evaluate(encoder, cls_classifier, src_data_loader)
        evaluate(encoder, cls_classifier, src_data_loader_eval)
        evaluate(encoder, cls_classifier, tgt_data_loader_all)

    save_model(encoder, param.encoder_path)
    save_model(cls_classifier, param.cls_classifier_path)
    save_model(dom_classifier, param.dom_classifier_path)

    return encoder, cls_classifier, dom_classifier
Exemplo n.º 22
0
def dann_adapt(args, encoder, src_encoder, discriminator, classifier,
               src_data_loader, tgt_train_loader, tgt_all_loader):
    """
    adding KD, encoder for adapting, src encoder only for KD
    """
    # set train state for Dropout and BN layers
    encoder.train()  # works as tgt encoder, for adapting
    src_encoder.eval()  # for KD loss
    classifier.train()
    discriminator.train()

    # setup criterion and optimizer
    loss_class = nn.CrossEntropyLoss()
    loss_domain = nn.CrossEntropyLoss()  # maybe need changing
    kl_div_loss = nn.KLDivLoss(reduction='batchmean')
    optimizer_e = optim.Adam(encoder.parameters(), lr=param.c_lr)
    optimizer_c = optim.Adam(classifier.parameters(), lr=param.c_lr)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr)
    len_dataloader = min(len(src_data_loader), len(tgt_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_data_loader, tgt_train_loader))
        for i, ((src_reviews, src_masks, src_labels), (tgt_reviews, tgt_masks,
                                                       _)) in enumerate(pbar):
            p = float(i + epoch *
                      len_dataloader) / args.num_epochs / len_dataloader
            alpha = 2. / (1. + np.exp(-10 * p)) - 1
            src_reviews = make_cuda(src_reviews)
            src_masks = make_cuda(src_masks)
            src_labels = make_cuda(src_labels)

            tgt_reviews = make_cuda(tgt_reviews)
            tgt_masks = make_cuda(tgt_masks)

            # zero gradients for optimizers
            optimizer_e.zero_grad()
            optimizer_c.zero_grad()
            optimizer_d.zero_grad()

            # extract and concat features
            src_feat = encoder(src_reviews, src_masks)
            s_class_output = classifier(src_feat)
            s_reverse_feat = ReverseLayerF.apply(src_feat, alpha)
            s_domain_output = discriminator(s_reverse_feat)
            loss_s_label = loss_class(s_class_output, src_labels)
            s_domain_label = make_cuda(
                torch.zeros(s_domain_output.size()[0]).long())
            loss_s_domain = loss_domain(s_domain_output, s_domain_label)

            tgt_feat = encoder(tgt_reviews, tgt_masks)
            t_reverse_feat = ReverseLayerF.apply(tgt_feat, alpha)
            t_domain_output = discriminator(t_reverse_feat)
            t_domain_label = make_cuda(
                torch.ones(t_domain_output.size()[0]).long())
            loss_t_domain = loss_domain(t_domain_output, t_domain_label)
            loss = loss_s_label + loss_s_domain + loss_t_domain

            if args.kd:
                t = args.temperature
                with torch.no_grad():
                    src_tgt_feat = src_encoder(tgt_reviews, tgt_masks)
                    src_prob = F.softmax(classifier(src_tgt_feat) / t, dim=-1)
                tgt_prob = F.log_softmax(classifier(tgt_feat) / t, dim=-1)
                kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t
                loss += kd_loss

            loss.backward()

            optimizer_e.step()
            optimizer_c.step()
            optimizer_d.step()

            if i % args.log_step == 0:
                desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \
                       f"l_s_label={loss_s_label.item():.4f} l_s_dom={loss_s_domain.item():.4f} " \
                       f"l_t_dom={loss_t_domain.item():.4f}"
                pbar.set_description(desc=desc)

        evaluate(args, encoder, classifier, tgt_all_loader)

    return encoder, classifier
Exemplo n.º 23
0
def cdan_adapt_data_free(args, encoder, discriminator, classifier,
                         src_data_loader, tgt_data_train_loader,
                         tgt_data_all_loader):
    """
    cdan src tgt data free
    """
    # set train state for Dropout and BN layers
    classifier.train()
    discriminator.train()

    # setup criterion and optimizer
    loss_class = nn.CrossEntropyLoss()
    loss_domain = nn.BCELoss()
    optimizer_c = optim.SGD(classifier.parameters(),
                            lr=param.c_lr,
                            weight_decay=5e-3,
                            momentum=0.9)
    optimizer_d = optim.SGD(discriminator.parameters(),
                            lr=param.d_lr,
                            weight_decay=5e-3,
                            momentum=0.9)
    len_dataloader = min(len(src_data_loader), len(tgt_data_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_data_loader, tgt_data_train_loader))
        for i, ((src_feat, src_label), (tgt_feat, _)) in enumerate(pbar):
            src_feat = make_cuda(src_feat)
            src_label = make_cuda(src_label)
            tgt_feat = make_cuda(tgt_feat)

            # zero gradients for optimizers
            optimizer_c.zero_grad()
            optimizer_d.zero_grad()

            # extract and concat features
            feat = torch.cat((src_feat, tgt_feat), 0)
            s_class_output = classifier(src_feat)
            loss_s_label = loss_class(
                s_class_output,
                src_label)  # maybe change s_class_output to before softmax
            class_output = classifier(feat)
            op_out = torch.bmm(class_output.unsqueeze(2), feat.unsqueeze(1))
            ad_out = discriminator(
                op_out.view(-1,
                            class_output.size(1) * feat.size(1)))
            dc_target = torch.from_numpy(
                np.array([[1]] * src_feat.size()[0] +
                         [[0]] * tgt_feat.size()[0])).float().cuda()
            loss_d = loss_domain(ad_out, dc_target)

            loss = loss_s_label + loss_d
            loss.backward()

            optimizer_c.step()
            optimizer_d.step()

            if i % args.log_step == 0:
                desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \
                       f"l_s_label={loss_s_label.item():.4f} l_d={loss_d.item():.4f} "
                pbar.set_description(desc=desc)

        evaluate(args, encoder, classifier, tgt_data_all_loader)

    return encoder, classifier
Exemplo n.º 24
0
def aad_adapt(args, src_encoder, tgt_encoder, discriminator, src_classifier,
              src_loader, tgt_train_loader, tgt_data_all_loader):
    """
    Train tgt_encoder using bert-AAD
    swapped src data to tgt data for KD
    """

    # set train state for Dropout and BN layers
    src_encoder.eval()
    src_classifier.eval()
    tgt_encoder.train()
    discriminator.train()

    # setup criterion and optimizer
    bce_loss = nn.BCELoss()
    kl_div_loss = nn.KLDivLoss(reduction='batchmean')
    optimizer_g = optim.Adam(tgt_encoder.parameters(), lr=param.d_lr)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr)
    len_data_loader = min(len(src_loader), len(tgt_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_loader, tgt_train_loader))
        for step, ((src_reviews, src_masks, _), (tgt_reviews, tgt_masks,
                                                 _)) in enumerate(pbar):
            src_reviews = make_cuda(src_reviews)
            src_masks = make_cuda(src_masks)
            tgt_reviews = make_cuda(tgt_reviews)
            tgt_masks = make_cuda(tgt_masks)

            # zero gradients for optimizer
            optimizer_d.zero_grad()

            # extract and concat features
            with torch.no_grad():
                src_feat = src_encoder(src_reviews, src_masks)
            src_tgt_feat = src_encoder(
                tgt_reviews,
                tgt_masks)  # was tgt_encoder(src_reviews, src_masks)
            tgt_feat = tgt_encoder(tgt_reviews, tgt_masks)
            feat_concat = torch.cat(
                (src_feat, tgt_feat),
                0)  # different from original code, is correct

            # predict on discriminator
            pred_concat = discriminator(feat_concat.detach())

            # prepare real and fake label
            src_label = make_cuda(torch.ones(src_feat.size(0))).unsqueeze(1)
            tgt_label = make_cuda(torch.zeros(tgt_feat.size(0))).unsqueeze(1)
            label_concat = torch.cat((src_label, tgt_label), 0)

            # domain discriminator loss of discriminator
            d_loss = bce_loss(pred_concat, label_concat)
            d_loss.backward()
            # increase the clip_value from 0.01 to 0.1 is bad
            for p in discriminator.parameters():
                p.data.clamp_(-args.clip_value, args.clip_value)
            # optimize discriminator
            optimizer_d.step()

            # zero gradients for optimizer
            optimizer_g.zero_grad()
            t = args.temperature

            # predict on discriminator
            pred_tgt = discriminator(tgt_feat)

            # logits for KL-divergence
            with torch.no_grad():
                src_prob = F.softmax(src_classifier(src_tgt_feat) / t, dim=-1)
            tgt_prob = F.log_softmax(src_classifier(tgt_feat) / t,
                                     dim=-1)  # changed direction
            kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t

            # compute loss for target encoder
            gen_loss = bce_loss(pred_tgt, src_label)  # correct
            loss_tgt = args.alpha * gen_loss + args.beta * kd_loss
            loss_tgt.backward()
            torch.nn.utils.clip_grad_norm_(tgt_encoder.parameters(),
                                           args.max_grad_norm)
            # optimize target encoder
            optimizer_g.step()

            if step % args.log_step == 0:
                desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{step}/{len_data_loader}]: " \
                       f"g_loss={gen_loss.item():.4f} d_loss={d_loss.item():.4f} kd_loss={kd_loss.item():.4f}"
                pbar.set_description(desc=desc)

        evaluate(args, tgt_encoder, src_classifier, tgt_data_all_loader)

    return tgt_encoder
Exemplo n.º 25
0
def adapt(args, src_encoder, tgt_encoder, discriminator, src_classifier,
          src_data_loader, tgt_data_train_loader, tgt_data_all_loader):
    """Train encoder for target domain."""

    # set train state for Dropout and BN layers
    src_encoder.eval()
    src_classifier.eval()
    tgt_encoder.train()
    discriminator.train()

    # setup criterion and optimizer
    BCELoss = nn.BCEWithLogitsLoss()
    KLDivLoss = nn.KLDivLoss(reduction='batchmean')
    optimizer_G = optim.Adam(tgt_encoder.parameters(),
                             lr=param.d_learning_rate)
    optimizer_D = optim.Adam(discriminator.parameters(),
                             lr=param.d_learning_rate)
    len_data_loader = min(len(src_data_loader), len(tgt_data_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        data_zip = enumerate(zip(src_data_loader, tgt_data_train_loader))
        for step, ((reviews_src, src_mask, _), (reviews_tgt, tgt_mask,
                                                _)) in data_zip:
            reviews_src = make_cuda(reviews_src)
            src_mask = make_cuda(src_mask)

            reviews_tgt = make_cuda(reviews_tgt)
            tgt_mask = make_cuda(tgt_mask)

            # zero gradients for optimizer
            optimizer_D.zero_grad()

            # extract and concat features
            with torch.no_grad():
                feat_src = src_encoder(reviews_src, src_mask)
            feat_src_tgt = tgt_encoder(reviews_src, src_mask)
            feat_tgt = tgt_encoder(reviews_tgt, tgt_mask)
            feat_concat = torch.cat((feat_src_tgt, feat_tgt), 0)

            # predict on discriminator
            pred_concat = discriminator(feat_concat.detach())

            # prepare real and fake label
            label_src = make_cuda(torch.ones(
                feat_src_tgt.size(0))).unsqueeze(1)
            label_tgt = make_cuda(torch.zeros(feat_tgt.size(0))).unsqueeze(1)
            label_concat = torch.cat((label_src, label_tgt), 0)

            # compute loss for discriminator
            dis_loss = BCELoss(pred_concat, label_concat)
            dis_loss.backward()

            for p in discriminator.parameters():
                p.data.clamp_(-args.clip_value, args.clip_value)
            # optimize discriminator
            optimizer_D.step()

            pred_cls = torch.squeeze(pred_concat.max(1)[1])
            acc = (pred_cls == label_concat).float().mean()

            # zero gradients for optimizer
            optimizer_G.zero_grad()
            T = args.temperature

            # predict on discriminator
            pred_tgt = discriminator(feat_tgt)

            # logits for KL-divergence
            with torch.no_grad():
                src_prob = F.softmax(src_classifier(feat_src) / T, dim=-1)
            tgt_prob = F.log_softmax(src_classifier(feat_src_tgt) / T, dim=-1)
            kd_loss = KLDivLoss(tgt_prob, src_prob.detach()) * T * T

            # compute loss for target encoder
            gen_loss = BCELoss(pred_tgt, label_src)
            loss_tgt = args.alpha * gen_loss + args.beta * kd_loss
            loss_tgt.backward()
            torch.nn.utils.clip_grad_norm_(tgt_encoder.parameters(),
                                           args.max_grad_norm)
            # optimize target encoder
            optimizer_G.step()

            if (step + 1) % args.log_step == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: "
                      "acc=%.4f g_loss=%.4f d_loss=%.4f kd_loss=%.4f" %
                      (epoch + 1, args.num_epochs, step + 1, len_data_loader,
                       acc.item(), gen_loss.item(), dis_loss.item(),
                       kd_loss.item()))

        evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)

    return tgt_encoder
Exemplo n.º 26
0
def train_src(args, encoder, class_classifier, domain_classifier, src_data_loader, tgt_data_loader, data_loader_eval):
    """Train classifier for source domain."""
    ####################
    # 1. setup network #
    ####################

    # setup criterion and optimizer
    optimizer = optim.Adam(list(encoder.parameters()) +
                           list(class_classifier.parameters()) +
                           list(domain_classifier.parameters()),
                           lr=param.c_learning_rate,
                           betas=(param.beta1, param.beta2))
    criterion = nn.CrossEntropyLoss()

    # set train state for Dropout and BN layers
    encoder.train()
    class_classifier.train()
    domain_classifier.train()

    ####################
    # 2. train network #
    ####################

    for epoch in range(args.num_epochs):
        data_zip = enumerate(zip(src_data_loader, tgt_data_loader))
        for step, ((src_reviews, src_labels), (tgt_reviews, _)) in data_zip:

            # zero gradients for optimizer
            optimizer.zero_grad()

            # compute loss for critic
            src_mask = (src_reviews != 0).long()
            tgt_mask = (tgt_reviews != 0).long()
            src_feat = encoder(src_reviews, src_mask)
            tgt_feat = encoder(tgt_reviews, tgt_mask)
            feat_concat = torch.cat((src_feat, tgt_feat), 0)
            src_preds = class_classifier(src_feat)
            domain_preds = domain_classifier(feat_concat, alpha=args.dom_weight)

            # prepare real and fake label
            label_src = make_cuda(torch.ones(src_feat.size(0)))
            label_tgt = make_cuda(torch.zeros(tgt_feat.size(0)))
            label_concat = torch.cat((label_src, label_tgt), 0).long()
            loss_cls = criterion(src_preds, src_labels)
            loss_dom = criterion(domain_preds, label_concat)
            loss = loss_cls + loss_dom

            # optimize source classifier
            loss.backward()
            optimizer.step()

            # print step info
            if (step + 1) % args.log_step == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f dom_loss=%.4f"
                      % (epoch + 1,
                         args.num_epochs,
                         step + 1,
                         len(src_data_loader),
                         loss_cls.item(),
                         loss_dom.item()))

        # eval model on lambda0.1 set
        if (epoch + 1) % args.eval_step == 0:
            eval_src(encoder, class_classifier, src_data_loader)
            eval_src(encoder, class_classifier, data_loader_eval)
            print()

        # save model parameters
        if (epoch + 1) % args.save_step == 0:
            save_model(encoder, "DANN-encoder-{}.pt".format(epoch + 1))
            save_model(class_classifier, "DANN-cls-classifier-{}.pt".format(epoch + 1))
            save_model(domain_classifier, "DANN-dom-classifier-{}.pt".format(epoch + 1))

    # # save final model
    save_model(encoder, "DANN-encoder-final.pt")
    save_model(class_classifier, "DANN-cls-classifier-final.pt")
    save_model(domain_classifier, "DANN-dom-classifier-final.pt")

    return encoder, class_classifier, domain_classifier
Exemplo n.º 27
0
def dann_adapt_src_free(args, encoder, src_encoder, discriminator, classifier,
                        src_data_loader, tgt_train_loader, tgt_all_loader):
    """
    src data free version of DANN, w original tgt data
    """
    # set train state for Dropout and BN layers
    src_encoder.eval()
    encoder.train()
    classifier.train()
    discriminator.train()

    # setup criterion and optimizer
    loss_class = nn.CrossEntropyLoss()
    loss_domain = nn.CrossEntropyLoss()
    kl_div_loss = nn.KLDivLoss(reduction='batchmean')
    optimizer_e = optim.Adam(encoder.parameters(), lr=param.c_lr)
    optimizer_c = optim.Adam(classifier.parameters(), lr=param.c_lr)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=param.d_lr)
    len_dataloader = min(len(src_data_loader), len(tgt_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_data_loader, tgt_train_loader))
        for i, ((src_feat, src_label), (tgt_reviews, tgt_masks,
                                        _)) in enumerate(pbar):
            p = float(i + epoch *
                      len_dataloader) / args.num_epochs / len_dataloader
            alpha = 2. / (1. + np.exp(-10 * p)) - 1
            src_feat = make_cuda(src_feat)
            src_label = make_cuda(src_label)
            tgt_reviews = make_cuda(tgt_reviews)
            tgt_masks = make_cuda(tgt_masks)

            # zero gradients for optimizers
            optimizer_e.zero_grad()
            optimizer_c.zero_grad()
            optimizer_d.zero_grad()

            # extract and concat features
            s_class_output = classifier(src_feat)
            s_reverse_feat = ReverseLayerF.apply(src_feat, alpha)
            s_domain_output = discriminator(s_reverse_feat)
            loss_s_label = loss_class(s_class_output, src_label)
            s_domain_label = make_cuda(torch.zeros(src_feat.size()[0]).long())
            loss_s_domain = loss_domain(s_domain_output, s_domain_label)

            tgt_feat = encoder(tgt_reviews, tgt_masks)
            t_reverse_feat = ReverseLayerF.apply(tgt_feat, alpha)
            t_domain_output = discriminator(t_reverse_feat)
            t_domain_label = make_cuda(torch.ones(tgt_feat.size()[0]).long())
            loss_t_domain = loss_domain(t_domain_output, t_domain_label)
            loss = loss_s_label + loss_s_domain + loss_t_domain
            tgt_outputs = classifier(tgt_feat)
            if args.kd:
                t = args.temperature
                src_tgt_feat = src_encoder(tgt_reviews, tgt_masks)
                with torch.no_grad():
                    src_prob = F.softmax(classifier(src_tgt_feat) / t, dim=-1)
                tgt_prob = F.log_softmax(tgt_outputs / t, dim=-1)
                kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t
                loss += kd_loss

            if args.ent:
                softmax_out = nn.Softmax(dim=1)(tgt_outputs)
                entropy_loss = torch.mean(entropy(softmax_out))  # loss_ent
                if args.gent:
                    msoftmax = softmax_out.mean(dim=0)
                    entropy_loss -= torch.sum(
                        -msoftmax *
                        torch.log(msoftmax + 1e-6))  # loss_ent + loss_div
                im_loss = entropy_loss * args.ent_par
                loss += im_loss

            loss.backward()
            optimizer_e.step()
            optimizer_c.step()
            optimizer_d.step()

            if i % args.log_step == 0:
                desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \
                       f"l_s_label={loss_s_label.item():.4f} l_s_dom={loss_s_domain.item():.4f} " \
                       f"l_t_dom={loss_t_domain.item():.4f}"
                pbar.set_description(desc=desc)

        evaluate(args, encoder, classifier, tgt_all_loader)

    return encoder, classifier
Exemplo n.º 28
0
def train(model, dataloader_source, dataloader_target, source_data_loader_eval,
          target_data_loader_eval):

    optimizer = optim.Adam(
        model.parameters(),
        lr=params.learning_rate,
    )
    loss_class = torch.nn.NLLLoss()
    loss_domain = torch.nn.NLLLoss()

    model = model.cuda()
    loss_class = loss_class.cuda()
    loss_domain = loss_domain.cuda()

    for epoch in range(params.num_epochs):
        model.train()
        len_dataloader = min(len(dataloader_source), len(dataloader_target))
        data_source_iter = iter(dataloader_source)
        data_target_iter = iter(dataloader_target)

        i = 0
        while i < len_dataloader:

            p = float(i + epoch *
                      len_dataloader) / params.num_epochs / len_dataloader
            alpha = 2. / (1. + np.exp(-10 * p)) - 1

            # training model using source data
            data_source = data_source_iter.next()
            s_img, s_label = data_source

            s_image = make_cuda(s_img)
            s_label = make_cuda(s_label)

            model.zero_grad()
            batch_size = len(s_label)

            domain_label = torch.zeros(batch_size)
            domain_label = domain_label.long().cuda()

            class_output, domain_output = model(s_image, alpha=alpha)

            err_s_label = loss_class(class_output, s_label)
            err_s_domain = loss_domain(domain_output, domain_label)

            # training model using target data
            data_target = data_target_iter.next()
            t_img, _ = data_target
            t_img = make_cuda(t_img)

            batch_size = len(t_img)

            domain_label = torch.ones(batch_size)
            domain_label = domain_label.long().cuda()

            _, domain_output = model(t_img, alpha=alpha)

            err_t_domain = loss_domain(domain_output, domain_label)
            err = err_t_domain + err_s_domain + err_s_label
            err.backward()
            optimizer.step()

            if ((i + 1) % params.log_step == 0):
                print(
                    "Epoch [{}/{}] Step [{}/{}]: d_loss_t={} / d_loss_s={} / c_loss_s={}"
                    .format(epoch + 1, params.num_epochs, i + 1,
                            len_dataloader, err_t_domain.item(),
                            err_s_domain.item(), err_s_label.item()))
            i += 1
        eval_(model, source_data_loader_eval, 'src')
        eval_(model, target_data_loader_eval, 'tgt')

        if ((epoch + 1) % params.save_step == 0):
            save_model(model, "DANN-{}.pt".format(epoch + 1))

    return model
Exemplo n.º 29
0
def cdan_adapt_src_free(args, encoder, src_encoder, discriminator, classifier,
                        src_data_loader, tgt_data_train_loader,
                        tgt_data_all_loader):
    """
    cdan src data free, w original tgt data
    """
    # set train state for Dropout and BN layers
    src_encoder.eval()
    encoder.train()
    classifier.train()
    discriminator.train()

    # setup criterion and optimizer
    loss_class = nn.CrossEntropyLoss()
    loss_domain = nn.BCELoss()
    kl_div_loss = nn.KLDivLoss(reduction='batchmean')
    optimizer_e = optim.SGD(encoder.parameters(),
                            lr=param.c_lr,
                            weight_decay=5e-3,
                            momentum=0.9)
    optimizer_c = optim.SGD(classifier.parameters(),
                            lr=param.c_lr,
                            weight_decay=5e-3,
                            momentum=0.9)
    optimizer_d = optim.SGD(discriminator.parameters(),
                            lr=param.d_lr,
                            weight_decay=5e-3,
                            momentum=0.9)
    len_dataloader = min(len(src_data_loader), len(tgt_data_train_loader))

    for epoch in range(args.num_epochs):
        # zip source and target data pair
        pbar = tqdm(zip(src_data_loader, tgt_data_train_loader))
        for i, ((src_feat, src_label), (tgt_reviews, tgt_masks,
                                        _)) in enumerate(pbar):
            src_feat = make_cuda(src_feat)
            src_label = make_cuda(src_label)
            tgt_reviews = make_cuda(tgt_reviews)
            tgt_masks = make_cuda(tgt_masks)

            # zero gradients for optimizers
            optimizer_e.zero_grad()
            optimizer_c.zero_grad()
            optimizer_d.zero_grad()

            # extract and concat features
            tgt_feat = encoder(tgt_reviews, tgt_masks)
            feat = torch.cat((src_feat, tgt_feat), 0)
            s_class_output = classifier(src_feat)
            loss_s_label = loss_class(
                s_class_output,
                src_label)  # maybe change s_class_output to before softmax
            class_output = classifier(feat)
            op_out = torch.bmm(class_output.unsqueeze(2), feat.unsqueeze(1))
            ad_out = discriminator(
                op_out.view(-1,
                            class_output.size(1) * feat.size(1)))
            dc_target = torch.from_numpy(
                np.array([[1]] * src_feat.size()[0] +
                         [[0]] * tgt_feat.size()[0])).float().cuda()
            loss_d = loss_domain(ad_out, dc_target)
            loss = loss_s_label + loss_d
            tgt_outputs = classifier(tgt_feat)
            if args.kd:
                t = args.temperature
                src_tgt_feat = src_encoder(tgt_reviews, tgt_masks)
                with torch.no_grad():
                    src_prob = F.softmax(classifier(src_tgt_feat) / t, dim=-1)
                tgt_prob = F.log_softmax(tgt_outputs / t, dim=-1)
                kd_loss = kl_div_loss(tgt_prob, src_prob.detach()) * t * t
                loss += kd_loss
            if args.ent:
                softmax_out = nn.Softmax(dim=1)(tgt_outputs)
                entropy_loss = torch.mean(entropy(softmax_out))  # loss_ent
                if args.gent:
                    msoftmax = softmax_out.mean(dim=0)
                    entropy_loss -= torch.sum(
                        -msoftmax *
                        torch.log(msoftmax + 1e-6))  # loss_ent + loss_div
                im_loss = entropy_loss * args.ent_par
                loss += im_loss

            loss.backward()

            optimizer_e.step()
            optimizer_c.step()
            if epoch > 0:
                optimizer_d.step()

            if i % args.log_step == 0:
                desc = f"Epoch [{epoch}/{args.num_epochs}] Step [{i}/{len_dataloader}] " \
                       f"l_s_label={loss_s_label.item():.4f} l_d={loss_d.item():.4f} "
                pbar.set_description(desc=desc)

        evaluate(args, encoder, classifier, tgt_data_all_loader)

    return encoder, classifier
Exemplo n.º 30
0
def train_src(model, source_data_loader, target_data_loader, data_loader_eval):
    """Train classifier for source domain."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    model.train()

    if params.usemixup:

        target_data_loader = list(target_data_loader)

    # setup criterion and optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=params.pre_c_learning_rate,
                           betas=(params.beta1, params.beta2),
                           weight_decay=params.weight_decay)

    if params.labelsmoothing:
        criterion = LabelSmoothingCrossEntropy(smoothing=params.smoothing)
    else:
        criterion = nn.CrossEntropyLoss()

    ####################
    # 2. train network #
    ####################

    for epoch in range(params.num_epochs_pre):

        for step, (images, labels) in enumerate(source_data_loader):

            # make images and labels variable
            images = make_cuda(images)
            labels = make_cuda(labels.squeeze_())
            # zero gradients for optimizer
            optimizer.zero_grad()

            # source , target   :  mixup
            if params.usemixup:
                images, lam = mixup_data(
                    images, target_data_loader[randint(
                        0,
                        len(target_data_loader) - 1)][0])

            # compute loss for critic
            preds = model(images)

            loss = criterion(preds, labels)

            # optimize source classifier
            loss.backward()
            optimizer.step()

            # print step info
            if ((step + 1) % params.log_step_pre == 0):
                print("Epoch [{}/{}] Step [{}/{}]: loss={}".format(
                    epoch + 1, params.num_epochs_pre, step + 1,
                    len(source_data_loader), loss.item()))

        # eval model on test set
        if ((epoch + 1) % params.eval_step_pre == 0):
            print("eval", end='')
            eval_src(model, data_loader_eval)

        # save model parameters
        if ((epoch + 1) % params.save_step_pre == 0):
            save_model(model, "ADDA-source_cnn-{}.pt".format(epoch + 1))

    # # save final model
    save_model(model, "ADDA-source_cnn-final.pt")

    return model