Exemplo n.º 1
0
def test(model, tgt_loader, epoch, device, args):

    loss = 0
    correct = 0
    result = []
    gt_label = []

    model.eval()
    criterion_cel = nn.CrossEntropyLoss()

    for batch_idx, (data_t, label) in enumerate(tgt_loader):
        data_t = data_t.to(device)
        label = label.to(device)

        feat, output = model(data_t)
        pred = output.max(1, keepdim=True)[1]
        loss += criterion_cel(output, label).item()

        for i in range(len(pred)):
            result.append(pred[i].item())
            gt_label.append(label[i].item())

        correct += pred.eq(label.view_as(pred)).sum().item()

    loss /= len(tgt_loader.dataset)

    utils.cal_acc(gt_label, result, args.class_num)
    acc = 100. * correct / len(tgt_loader.dataset)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            loss, correct, len(tgt_loader.dataset),
            100. * correct / len(tgt_loader.dataset)))

    return result, gt_label, acc
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    net = Question_Classifier(args.bert_mode,
                              args.bert_pretrain,
                              num_classes=3)
    net.load_state_dict(
        torch.load(args.load_path, map_location=lambda storage, loc: storage))

    torch.cuda.set_device(device=0)
    net.cuda()

    dictionary = Dictionary.load_from_file(args.dictionary_path)
    valset = Question_Dataset('val',
                              dictionary,
                              args.data_root,
                              question_len=12)
    testset = Question_Dataset('test',
                               dictionary,
                               args.data_root,
                               question_len=12)

    valloader = DataLoader(valset,
                           batch_size=args.batch_size,
                           shuffle=False,
                           num_workers=2)
    testloader = DataLoader(testset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=2)

    net.eval()
    val_acc = 0.0
    test_acc = 0.0

    with torch.no_grad():
        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            val_acc += (tmp_acc * question.shape[0])
        val_acc /= len(valset)

        for ii, sample_batched in enumerate(testloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            test_acc += (tmp_acc * question.shape[0])
        test_acc /= len(testset)

        print('valset || questions: %d acc: %.4f' % (len(valset), val_acc))
        print('testset || questions: %d acc: %.4f' % (len(testset), test_acc))
Exemplo n.º 3
0
def test(epoch):
    global best_prec1
    model.eval()
    loss = 0
    pred_y = []
    true_y = []

    correct = 0
    ema_correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(target_loader):
            data, target = data.cuda(), target.cuda(non_blocking=True)
            data = data.unsqueeze(1)
            output = model(data)

            target = target.long()
            loss += criterion_cel(output, target).item()  # sum up batch loss

            pred = output.max(
                1, keepdim=True)[1]  # get the index of the max log-probability

            for i in range(len(pred)):
                pred_y.append(pred[i].item())
                true_y.append(target[i].item())

            correct += pred.eq(target.view_as(pred)).sum().item()

    loss /= len(target_loader.dataset)

    utils.cal_acc(true_y, pred_y, NUM_CLASSES)

    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            loss, correct, len(target_loader.dataset),
            100. * correct / len(target_loader.dataset)))

    prec1 = 100. * correct / len(target_loader.dataset)
    if epoch % 1 == 0:
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        utils.save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
        if is_best:
            global best_gt_y
            global best_pred_y
            best_gt_y = true_y
            best_pred_y = pred_y
def train(model, adj, features, labels, idx_train, idx_val, idx_test):
    """Train gnn model."""
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    best_val_acc = 0.0

    if FLAGS.optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr)
    elif FLAGS.optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD(learning_rate=FLAGS.lr)

    inputs = (features, adj)
    for epoch in range(FLAGS.epochs):
        epoch_start_time = time.time()

        with tf.GradientTape() as tape:
            output = model(inputs, training=True)
            train_loss = loss_fn(labels[idx_train], output[idx_train])
            # L2 regularization
            for weight in model.trainable_weights:
                train_loss += FLAGS.weight_decay * tf.nn.l2_loss(weight)

        gradients = tape.gradient(train_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_acc = cal_acc(labels[idx_train], output[idx_train])

        # Evaluate
        output = model(inputs, training=False)
        val_loss = loss_fn(labels[idx_val], output[idx_val])
        val_acc = cal_acc(labels[idx_val], output[idx_val])

        if FLAGS.save_best_val:
            if val_acc >= best_val_acc:
                best_val_acc = val_acc
                model.save(FLAGS.save_dir)

        print('[%03d/%03d] %.2f sec(s) Train Acc: %.3f Loss: %.6f | Val Acc: %.3f loss: %.6f' % \
             (epoch + 1, FLAGS.epochs, time.time()-epoch_start_time, \
              train_acc, train_loss, val_acc, val_loss))

    if not FLAGS.save_best_val:
        model.save(FLAGS.save_dir)
    print('Start Predicting...')
    model = tf.keras.models.load_model(FLAGS.save_dir)
    output = model(inputs, training=False)
    test_acc = cal_acc(labels[idx_test], output[idx_test])
    print('***Test Accuracy: %.3f***' % test_acc)
Exemplo n.º 5
0
    def test(batch_set):
        data = torch.tensor(batch_set, device=opts['device'], dtype=torch.long)
        model.forward(data)

        preds = model.preds_hard.detach().cpu().numpy()
        lang = model.argmax_messages.detach().cpu().numpy()

        target = batch_set
        test_acc, test_ind_acc = utils.cal_acc(target, preds)

        return test_acc, test_ind_acc, preds, target, lang
Exemplo n.º 6
0
 def valid(self, n_batches=10):
     # input: valid data, output: (loss, acc)
     total_loss, total_acc = 0., 0.
     self.set_eval()
     for i in range(n_batches):
         data = next(self.valid_data_loader)
         y, x = self.permute_data(data)
         enc = self.Encoder(x)
         logits = self.SpeakerClassifier(enc)
         loss = self.cal_loss(logits, y)
         acc = cal_acc(logits, y)
         total_loss += loss.data[0]
         total_acc += acc  
     self.set_train()
     return total_loss / n_batches, total_acc / n_batches
Exemplo n.º 7
0
 def train(self, model_path, flag='train'):
     # load hyperparams
     hps = self.hps
     for iteration in range(hps.iters):
         data = next(self.data_loader)
         y, x = self.permute_data(data)
         # encode
         enc = self.encode_step(x)
         # forward to classifier
         logits = self.forward_step(enc)
         # calculate loss
         loss = self.cal_loss(logits, y)
         # optimize
         reset_grad([self.SpeakerClassifier])
         loss.backward()
         grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
         self.opt.step()
         # calculate acc
         acc = cal_acc(logits, y)
         # print info
         info = {
             f'{flag}/loss': loss.data[0], 
             f'{flag}/acc': acc,
         }
         slot_value = (iteration + 1, hps.iters) + tuple([value for value in info.values()])
         log = 'iter:[%06d/%06d], loss=%.3f, acc=%.3f'
         print(log % slot_value, end='\r')
         for tag, value in info.items():
             self.logger.scalar_summary(tag, value, iteration)
         if iteration % 1000 == 0 or iteration + 1 == hps.iters:
             valid_loss, valid_acc = self.valid(n_batches=10)
             # print info
             info = {
                 f'{flag}/valid_loss': valid_loss, 
                 f'{flag}/valid_acc': valid_acc,
             }
             slot_value = (iteration + 1, hps.iters) + \
                     tuple([value for value in info.values()])
             log = 'iter:[%06d/%06d], valid_loss=%.3f, valid_acc=%.3f'
             print(log % slot_value)
             for tag, value in info.items():
                 self.logger.scalar_summary(tag, value, iteration)
             self.save_model(model_path, iteration)
Exemplo n.º 8
0
 def train(self, model_path, flag='train', mode='train'):
     # load hyperparams
     hps = self.hps
     if mode == 'pretrain_G':
         for iteration in range(hps.enc_pretrain_iters):
             data = next(self.data_loader)
             c, x = self.permute_data(data)
             # encode
             enc = self.encode_step(x)
             x_tilde = self.decode_step(enc, c)
             loss_rec = torch.mean(torch.abs(x_tilde - x))
             reset_grad([self.Encoder, self.Decoder])
             loss_rec.backward()
             grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm)
             self.ae_opt.step()
             # tb info
             info = {
                 f'{flag}/pre_loss_rec': loss_rec.item(),
             }
             slot_value = (iteration + 1, hps.enc_pretrain_iters) + tuple(
                 [value for value in info.values()])
             log = 'pre_G:[%06d/%06d], loss_rec=%.3f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
                     self.logger.scalar_summary(tag, value, iteration + 1)
     elif mode == 'pretrain_D':
         for iteration in range(hps.dis_pretrain_iters):
             data = next(self.data_loader)
             c, x = self.permute_data(data)
             # encode
             enc = self.encode_step(x)
             # classify speaker
             logits = self.clf_step(enc)
             loss_clf = self.cal_loss(logits, c)
             # update
             reset_grad([self.SpeakerClassifier])
             loss_clf.backward()
             grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
             self.clf_opt.step()
             # calculate acc
             acc = cal_acc(logits, c)
             info = {
                 f'{flag}/pre_loss_clf': loss_clf.item(),
                 f'{flag}/pre_acc': acc,
             }
             slot_value = (iteration + 1, hps.dis_pretrain_iters) + tuple(
                 [value for value in info.values()])
             log = 'pre_D:[%06d/%06d], loss_clf=%.2f, acc=%.2f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
                     self.logger.scalar_summary(tag, value, iteration + 1)
     elif mode == 'patchGAN':
         for iteration in range(hps.patch_iters):
             #=======train D=========#
             for step in range(hps.n_patch_steps):
                 data = next(self.data_loader)
                 c, x = self.permute_data(data)
                 ## encode
                 enc = self.encode_step(x)
                 # sample c
                 c_prime = self.sample_c(x.size(0))
                 # generator
                 x_tilde = self.gen_step(enc, c_prime)
                 # discriminstor
                 w_dis, real_logits, gp = self.patch_step(x,
                                                          x_tilde,
                                                          is_dis=True)
                 # aux classification loss
                 loss_clf = self.cal_loss(real_logits, c)
                 loss = -hps.beta_dis * w_dis + hps.beta_clf * loss_clf + hps.lambda_ * gp
                 reset_grad([self.PatchDiscriminator])
                 loss.backward()
                 grad_clip([self.PatchDiscriminator],
                           self.hps.max_grad_norm)
                 self.patch_opt.step()
                 # calculate acc
                 acc = cal_acc(real_logits, c)
                 info = {
                     f'{flag}/w_dis': w_dis.item(),
                     f'{flag}/gp': gp.item(),
                     f'{flag}/real_loss_clf': loss_clf.item(),
                     f'{flag}/real_acc': acc,
                 }
                 slot_value = (step, iteration + 1,
                               hps.patch_iters) + tuple(
                                   [value for value in info.values()])
                 log = 'patch_D-%d:[%06d/%06d], w_dis=%.2f, gp=%.2f, loss_clf=%.2f, acc=%.2f'
                 print(log % slot_value)
                 if iteration % 100 == 0:
                     for tag, value in info.items():
                         self.logger.scalar_summary(tag, value,
                                                    iteration + 1)
             #=======train G=========#
             data = next(self.data_loader)
             c, x = self.permute_data(data)
             # encode
             enc = self.encode_step(x)
             # sample c
             c_prime = self.sample_c(x.size(0))
             # generator
             x_tilde = self.gen_step(enc, c_prime)
             # discriminstor
             loss_adv, fake_logits = self.patch_step(x,
                                                     x_tilde,
                                                     is_dis=False)
             # aux classification loss
             loss_clf = self.cal_loss(fake_logits, c_prime)
             loss = hps.beta_clf * loss_clf + hps.beta_gen * loss_adv
             reset_grad([self.Generator])
             loss.backward()
             grad_clip([self.Generator], self.hps.max_grad_norm)
             self.gen_opt.step()
             # calculate acc
             acc = cal_acc(fake_logits, c_prime)
             info = {
                 f'{flag}/loss_adv': loss_adv.item(),
                 f'{flag}/fake_loss_clf': loss_clf.item(),
                 f'{flag}/fake_acc': acc,
             }
             slot_value = (iteration + 1, hps.patch_iters) + tuple(
                 [value for value in info.values()])
             log = 'patch_G:[%06d/%06d], loss_adv=%.2f, loss_clf=%.2f, acc=%.2f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
                     self.logger.scalar_summary(tag, value, iteration)
         #===================== Train G =====================#
         data = next(self.data_loader)
         (c_i, c_j), (x_i_t, x_i_tk, x_i_prime,
                      x_j) = self.permute_data(data)
         # encode
         enc_i_t, enc_i_tk, enc_i_prime, enc_j = self.encode_step(
             x_i_t, x_i_tk, x_i_prime, x_j)
         # decode
         x_tilde = self.decode_step(enc_i_t, c_i)
         loss_rec = torch.mean(torch.abs(x_tilde - x_i_t))
         # latent discriminate
         loss_adv = self.latent_discriminate_step(enc_i_t,
                                                  enc_i_tk,
                                                  enc_i_prime,
                                                  enc_j,
                                                  is_dis=False)
         ae_loss = loss_rec + current_alpha * loss_adv
         reset_grad([self.Encoder, self.Decoder])
         retain_graph = True if hps.n_patch_steps > 0 else False
         ae_loss.backward(retain_graph=retain_graph)
         grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm)
         self.ae_opt.step()
         info = {
             f'{flag}/loss_rec': loss_rec.data[0],
             f'{flag}/loss_adv': loss_adv.data[0],
             f'{flag}/alpha': current_alpha,
         }
         slot_value = (iteration + 1, hps.iters) + tuple(
             [value for value in info.values()])
         log = 'G:[%06d/%06d], loss_rec=%.2f, loss_adv=%.2f, alpha=%.2e'
         print(log % slot_value)
         for tag, value in info.items():
             self.logger.scalar_summary(tag, value, iteration + 1)
         # patch discriminate
         if hps.n_patch_steps > 0 and iteration >= hps.patch_start_iter:
             c_sample = self.sample_c(x_i_t.size(0))
             x_tilde = self.decode_step(enc_i_t, c_sample)
             patch_w_dis, real_logits, fake_logits = \
                     self.patch_discriminate_step(x_i_t, x_tilde, cal_gp=False)
             patch_loss = hps.beta_dec * patch_w_dis + hps.beta_clf * c_loss
             reset_grad([self.Decoder])
             patch_loss.backward()
             grad_clip([self.Decoder], self.hps.max_grad_norm)
             self.decoder_opt.step()
             info = {
                 f'{flag}/loss_rec': loss_rec.item(),
                 f'{flag}/G_loss_clf': loss_clf.item(),
                 f'{flag}/alpha': current_alpha,
                 f'{flag}/G_acc': acc,
             }
             slot_value = (iteration + 1, hps.iters) + tuple(
                 [value for value in info.values()])
             log = 'G:[%06d/%06d], loss_rec=%.3f, loss_clf=%.2f, alpha=%.2e, acc=%.2f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
                     self.logger.scalar_summary(tag, value, iteration + 1)
             if iteration % 1000 == 0 or iteration + 1 == hps.iters:
                 self.save_model(model_path, iteration)
Exemplo n.º 9
0
        err = 0
        n = 0
        for i, x in enumerate(dataloader):
            img = x.cuda()
            img_encoded, img_decoded = model(img)
            loss = criterion(img_decoded, img)

            # Reconstruction error (MSE)
            err += loss.item()
            n += 1
        print('Reconstruction error (MSE):', err/n)

        # preproduce checkpoint on valX to get acc
        latents = inference(X=valX, model=model)
        pred, X_embedded = predict(latents)
        acc = cal_acc(valY, pred)
        print('Accuracy:', acc)
        
        # insert points
        epochs.append(epoch)
        losss.append(err/n)
        accs.append(acc)

# sorting
sort_idx = np.argsort(np.array(epochs))
losss = np.array(losss)[sort_idx].tolist()
accs = np.array(accs)[sort_idx].tolist()
epochs = np.array(epochs)[sort_idx].tolist()

# plot result
plt.figure(figsize=(6,6))
Exemplo n.º 10
0
def train(args):
    ## set pre-process
    dset_loaders = data_load(args)
    class_num = args.class_num
    class_weight_src = torch.ones(class_num, ).cuda()
    ##################################################################################################

    ## set base network
    if args.net == 'resnet34':
        netG = utils.ResBase34().cuda()
    elif args.net == 'vgg16':
        netG = utils.VGG16Base().cuda()

    netF = utils.ResClassifier(class_num=class_num,
                               feature_dim=netG.in_features,
                               bottleneck_dim=args.bottleneck_dim).cuda()

    max_len = max(len(dset_loaders["source"]), len(dset_loaders["target"]))
    args.max_iter = args.max_epoch * max_len

    ad_flag = False
    if args.method == 'DANN':
        ad_net = utils.AdversarialNetwork(args.bottleneck_dim,
                                          1024,
                                          max_iter=args.max_iter).cuda()
        ad_flag = True
    if args.method == 'CDANE':
        ad_net = utils.AdversarialNetwork(args.bottleneck_dim * class_num,
                                          1024,
                                          max_iter=args.max_iter).cuda()
        random_layer = None
        ad_flag = True

    optimizer_g = optim.SGD(netG.parameters(), lr=args.lr * 0.1)
    optimizer_f = optim.SGD(netF.parameters(), lr=args.lr)
    if ad_flag:
        optimizer_d = optim.SGD(ad_net.parameters(), lr=args.lr)

    base_network = nn.Sequential(netG, netF)

    if args.pl.startswith('atdoc_na'):
        mem_fea = torch.rand(
            len(dset_loaders["target"].dataset) +
            len(dset_loaders["ltarget"].dataset), args.bottleneck_dim).cuda()
        mem_fea = mem_fea / torch.norm(mem_fea, p=2, dim=1, keepdim=True)
        mem_cls = torch.ones(
            len(dset_loaders["target"].dataset) +
            len(dset_loaders["ltarget"].dataset), class_num).cuda() / class_num

    if args.pl == 'atdoc_nc':
        mem_fea = torch.rand(args.class_num, args.bottleneck_dim).cuda()
        mem_fea = mem_fea / torch.norm(mem_fea, p=2, dim=1, keepdim=True)

    source_loader_iter = iter(dset_loaders["source"])
    target_loader_iter = iter(dset_loaders["target"])
    ltarget_loader_iter = iter(dset_loaders["ltarget"])

    # ###
    list_acc = []
    best_val_acc = 0

    for iter_num in range(1, args.max_iter + 1):
        # print(iter_num)
        base_network.train()
        lr_scheduler(optimizer_g,
                     init_lr=args.lr * 0.1,
                     iter_num=iter_num,
                     max_iter=args.max_iter)
        lr_scheduler(optimizer_f,
                     init_lr=args.lr,
                     iter_num=iter_num,
                     max_iter=args.max_iter)
        if ad_flag:
            lr_scheduler(optimizer_d,
                         init_lr=args.lr,
                         iter_num=iter_num,
                         max_iter=args.max_iter)

        try:
            inputs_source, labels_source = source_loader_iter.next()
        except:
            source_loader_iter = iter(dset_loaders["source"])
            inputs_source, labels_source = source_loader_iter.next()
        try:
            inputs_target, _, idx = target_loader_iter.next()
        except:
            target_loader_iter = iter(dset_loaders["target"])
            inputs_target, _, idx = target_loader_iter.next()

        try:
            inputs_ltarget, labels_ltarget, lidx = ltarget_loader_iter.next()
        except:
            ltarget_loader_iter = iter(dset_loaders["ltarget"])
            inputs_ltarget, labels_ltarget, lidx = ltarget_loader_iter.next()

        inputs_ltarget, labels_ltarget = inputs_ltarget.cuda(
        ), labels_ltarget.cuda()

        inputs_source, inputs_target, labels_source = inputs_source.cuda(
        ), inputs_target.cuda(), labels_source.cuda()

        if args.method == 'srconly' and args.pl == 'none':
            features_source, outputs_source = base_network(inputs_source)
            features_ltarget, outputs_ltarget = base_network(inputs_ltarget)
        else:
            features_ltarget, outputs_ltarget = base_network(inputs_ltarget)
            features_source, outputs_source = base_network(inputs_source)
            features_target, outputs_target = base_network(inputs_target)

            features_target = torch.cat((features_ltarget, features_target),
                                        dim=0)
            outputs_target = torch.cat((outputs_ltarget, outputs_target),
                                       dim=0)

            features = torch.cat((features_source, features_target), dim=0)
            outputs = torch.cat((outputs_source, outputs_target), dim=0)
            softmax_out = nn.Softmax(dim=1)(outputs)

        eff = utils.calc_coeff(iter_num, max_iter=args.max_iter)

        if args.method[-1] == 'E':
            entropy = loss.Entropy(softmax_out)
        else:
            entropy = None

        if args.method == 'CDANE':
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy,
                                      eff, random_layer)

        elif args.method == 'DANN':
            transfer_loss = loss.DANN(features, ad_net, entropy, eff)

        elif args.method == 'srconly':
            transfer_loss = torch.tensor(0.0).cuda()
        else:
            raise ValueError('Method cannot be recognized.')

        src_ = loss.CrossEntropyLabelSmooth(reduction='none',
                                            num_classes=class_num,
                                            epsilon=args.smooth)(
                                                outputs_source, labels_source)
        weight_src = class_weight_src[labels_source].unsqueeze(0)
        classifier_loss = torch.sum(
            weight_src * src_) / (torch.sum(weight_src).item())
        total_loss = transfer_loss + classifier_loss

        ltar_ = loss.CrossEntropyLabelSmooth(reduction='none',
                                             num_classes=class_num,
                                             epsilon=args.smooth)(
                                                 outputs_ltarget,
                                                 labels_ltarget)
        weight_src = class_weight_src[labels_ltarget].unsqueeze(0)
        ltar_classifier_loss = torch.sum(
            weight_src * ltar_) / (torch.sum(weight_src).item())
        total_loss += ltar_classifier_loss

        eff = iter_num / args.max_iter

        if not args.pl == 'none':
            outputs_target = outputs_target[-args.batch_size // 3:, :]
            features_target = features_target[-args.batch_size // 3:, :]

        if args.pl == 'none':
            pass

        elif args.pl == 'square':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            square_loss = -torch.sqrt((softmax_out**2).sum(dim=1)).mean()
            total_loss += args.tar_par * eff * square_loss

        elif args.pl == 'bsp':
            sigma_loss = bsp_loss(features)
            total_loss += args.tar_par * sigma_loss

        elif args.pl == 'ent':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            ent_loss = torch.mean(loss.Entropy(softmax_out))
            ent_loss /= torch.log(torch.tensor(class_num + 0.0))
            total_loss += args.tar_par * eff * ent_loss

        elif args.pl == 'bnm':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            bnm_loss = -torch.norm(softmax_out, 'nuc')
            cof = torch.tensor(
                np.sqrt(np.min(softmax_out.size())) / softmax_out.size(0))
            bnm_loss *= cof
            total_loss += args.tar_par * eff * bnm_loss

        elif args.pl == 'mcc':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            ent_weight = 1 + torch.exp(-loss.Entropy(softmax_out)).detach()
            ent_weight /= ent_weight.sum()
            cov_tar = softmax_out.t().mm(
                torch.diag(softmax_out.size(0) * ent_weight)).mm(softmax_out)
            mcc_loss = (torch.diag(cov_tar) / cov_tar.sum(dim=1)).mean()
            total_loss -= args.tar_par * eff * mcc_loss

        elif args.pl == 'npl':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            softmax_out = softmax_out**2 / ((softmax_out**2).sum(dim=0))

            weight_, pred = torch.max(softmax_out, 1)
            loss_ = nn.CrossEntropyLoss(reduction='none')(outputs_target, pred)
            classifier_loss = torch.sum(
                weight_ * loss_) / (torch.sum(weight_).item())
            total_loss += args.tar_par * eff * classifier_loss

        elif args.pl == 'atdoc_nc':
            mem_fea_norm = mem_fea / torch.norm(
                mem_fea, p=2, dim=1, keepdim=True)
            dis = torch.mm(features_target.detach(), mem_fea_norm.t())
            _, pred = torch.max(dis, dim=1)
            classifier_loss = nn.CrossEntropyLoss()(outputs_target, pred)
            total_loss += args.tar_par * eff * classifier_loss

        elif args.pl.startswith('atdoc_na'):

            dis = -torch.mm(features_target.detach(), mem_fea.t())
            for di in range(dis.size(0)):
                dis[di, idx[di]] = torch.max(dis)
            _, p1 = torch.sort(dis, dim=1)

            w = torch.zeros(features_target.size(0), mem_fea.size(0)).cuda()
            for wi in range(w.size(0)):
                for wj in range(args.K):
                    w[wi][p1[wi, wj]] = 1 / args.K

            weight_, pred = torch.max(w.mm(mem_cls), 1)

            if args.pl.startswith('atdoc_na_now'):
                classifier_loss = nn.CrossEntropyLoss()(outputs_target, pred)
            else:
                loss_ = nn.CrossEntropyLoss(reduction='none')(outputs_target,
                                                              pred)
                classifier_loss = torch.sum(
                    weight_ * loss_) / (torch.sum(weight_).item())
            total_loss += args.tar_par * eff * classifier_loss

        optimizer_g.zero_grad()
        optimizer_f.zero_grad()
        if ad_flag:
            optimizer_d.zero_grad()
        total_loss.backward()
        optimizer_g.step()
        optimizer_f.step()
        if ad_flag:
            optimizer_d.step()

        if args.pl.startswith('atdoc_na'):
            base_network.eval()
            with torch.no_grad():
                features_target, outputs_target = base_network(inputs_target)
                features_target = features_target / torch.norm(
                    features_target, p=2, dim=1, keepdim=True)
                softmax_out = nn.Softmax(dim=1)(outputs_target)
                if args.pl.startswith('atdoc_na_nos'):
                    outputs_target = softmax_out
                else:
                    outputs_target = softmax_out**2 / (
                        (softmax_out**2).sum(dim=0))

            mem_fea[idx] = (1.0 - args.momentum) * mem_fea[
                idx] + args.momentum * features_target.clone()
            mem_cls[idx] = (1.0 - args.momentum) * mem_cls[
                idx] + args.momentum * outputs_target.clone()

            with torch.no_grad():
                features_ltarget, outputs_ltarget = base_network(
                    inputs_ltarget)
                features_ltarget = features_ltarget / torch.norm(
                    features_ltarget, p=2, dim=1, keepdim=True)
                softmax_out = nn.Softmax(dim=1)(outputs_ltarget)
                if args.pl.startswith('atdoc_na_nos'):
                    outputs_ltarget = softmax_out
                else:
                    outputs_ltarget = softmax_out**2 / (
                        (softmax_out**2).sum(dim=0))

            mem_fea[lidx + len(dset_loaders["target"].dataset)] = (1.0 - args.momentum) * \
                mem_fea[lidx + len(dset_loaders["target"].dataset)] + args.momentum * features_ltarget.clone()
            mem_cls[lidx + len(dset_loaders["target"].dataset)] = (1.0 - args.momentum) * \
                mem_cls[lidx + len(dset_loaders["target"].dataset)] + args.momentum * outputs_ltarget.clone()

        if args.pl == 'atdoc_nc':
            base_network.eval()
            with torch.no_grad():
                feat_u, outputs_target = base_network(inputs_target)
                softmax_t = nn.Softmax(dim=1)(outputs_target)
                _, pred_t = torch.max(softmax_t, 1)
                onehot_tu = torch.eye(args.class_num)[pred_t].cuda()

                feat_l, outputs_target = base_network(inputs_ltarget)
                softmax_t = nn.Softmax(dim=1)(outputs_target)
                _, pred_t = torch.max(softmax_t, 1)
                onehot_tl = torch.eye(args.class_num)[pred_t].cuda()

            center_t = ((torch.mm(feat_u.t(), onehot_tu) + torch.mm(
                feat_l.t(), onehot_tl))) / (onehot_tu.sum(dim=0) +
                                            onehot_tl.sum(dim=0) + 1e-8)
            mem_fea = (1.0 - args.momentum
                       ) * mem_fea + args.momentum * center_t.t().clone()

        if iter_num % int(args.eval_epoch * max_len) == 0:
            base_network.eval()
            acc, py, score, y = utils.cal_acc(dset_loaders["test"],
                                              base_network)
            val_acc, _, _, _ = utils.cal_acc(dset_loaders["val"], base_network)

            list_acc.append(acc * 100)
            if best_val_acc <= val_acc:
                best_val_acc = val_acc
                best_acc = acc
                best_y = y
                best_py = py
                best_score = score

            log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Val Acc = {:.2f}%'.format(
                args.name, iter_num, args.max_iter, acc * 100, val_acc * 100)
            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str + '\n')

    val_acc = best_acc * 100
    idx = np.argmax(np.array(list_acc))
    max_acc = list_acc[idx]
    final_acc = list_acc[-1]

    log_str = '\n==========================================\n'
    log_str += '\nVal Acc = {:.2f}\nMax Acc = {:.2f}\nFin Acc = {:.2f}\n'.format(
        val_acc, max_acc, final_acc)
    args.out_file.write(log_str + '\n')
    args.out_file.flush()
Exemplo n.º 11
0
    def validate(self, epoch):
        """ Validate with validation dataset """
        self.model.eval()

        self.val_time.reset()
        self.val_loss.reset()
        self.val_cls_acc.reset()
        self.val_mIoU.reset()
        self.val_pix_acc.reset()

        iter_per_epoch = len(
            self.val_loader.dataset) // self.cfg.val_batch_size
        if len(self.val_loader.dataset) % self.cfg.val_batch_size != 0:
            iter_per_epoch += 1

        for i, (image, label) in enumerate(self.val_loader):

            start_time = time.time()
            image_var = image.to(self.device)
            label_var = label.to(self.device)

            output = self.model(image_var)
            loss = self.criterion(output, label_var)

            end_time = time.time()

            self.val_time.update(end_time - start_time)
            self.val_loss.update(loss.item())

            if self.cfg.task == 'cls':
                # Record classification accuracy
                cls_acc = cal_acc(output, label_var)

                # Update recorder
                self.val_cls_acc.update(cls_acc.item())

                if (i + 1) % self.cfg.log_step == 0:
                    print(
                        'Epoch[{0}][{1}/{2}]\t'
                        'Time {val_time.val:.3f} ({val_time.avg:.3f})\t'
                        'Loss {val_loss.val:.4f} ({val_loss.avg:.4f})\t'
                        'Accuracy {val_cls_acc.val:.4f} ({val_cls_acc.avg:.4f})'
                        .format(epoch + 1,
                                i + 1,
                                iter_per_epoch,
                                val_time=self.val_time,
                                val_loss=self.val_loss,
                                val_cls_acc=self.val_cls_acc))

                if self.cfg.use_tensorboard:
                    self.writer.add_scalar('val/loss', loss.item(),
                                           epoch * iter_per_epoch + i)
                    self.writer.add_scalar('val/accuracy', cls_acc.item(),
                                           epoch * iter_per_epoch + i)

            elif self.cfg.task == 'seg':
                # Record mIoU and pixel-wise accuracy
                pix_acc = cal_pixel_acc(output, label_var)
                mIoU = cal_mIoU(output, label_var)[-1]
                mIoU = torch.mean(mIoU)

                # Update recorders
                self.val_pix_acc.update(pix_acc.item())
                self.val_mIoU.update(mIoU.item())

                if (i + 1) % self.cfg.log_step == 0:
                    print(
                        ' ##### Validation\t'
                        'Epoch[{0}][{1}/{2}]\t'
                        'Time {val_time.val:.3f} ({val_time.avg:.3f})\t'
                        'Loss {val_loss.val:.4f} ({val_loss.avg:.4f})\t'
                        'Pixel-Acc {val_pix_acc.val:.4f} ({val_pix_acc.avg:.4f})\t'
                        'mIoU {val_mIoU.val:.4f} ({val_mIoU.avg:.4f})'.format(
                            epoch + 1,
                            i + 1,
                            iter_per_epoch,
                            val_time=self.val_time,
                            val_loss=self.val_loss,
                            val_pix_acc=self.val_pix_acc,
                            val_mIoU=self.val_mIoU))

                if self.cfg.use_tensorboard:
                    self.writer.add_scalar('val/loss', loss.item(),
                                           epoch * iter_per_epoch + i)
                    self.writer.add_scalar('val/pix_acc', pix_acc.item(),
                                           epoch * iter_per_epoch + i)
                    self.writer.add_scalar('val/mIoU', mIoU.item(),
                                           epoch * iter_per_epoch + i)

        if self.cfg.task == 'cls':
            if (epoch + 1) % self.cfg.model_save_epoch == 0:
                state = {
                    'epoch': epoch + 1,
                    'state_dict': self.model.state_dict(),
                    'optim': self.optim.state_dict()
                }
                if self.best_cls < self.val_cls_acc.avg:
                    self.best_cls = self.val_cls_acc.avg
                    torch.save(
                        state, './model/cls_model_' + str(epoch + 1) + '_' +
                        str(self.val_cls_acc.avg)[0:5] + '.pth')

        elif self.cfg.task == 'seg':
            # Save segmentation samples and model
            if (epoch + 1) % self.cfg.sample_save_epoch == 0:
                pred = torch.argmax(output, dim=1)
                save_image(image, './sample/ori_' + str(epoch + 1) + '.png')
                save_image(label.unsqueeze(1),
                           './sample/true_' + str(epoch + 1) + '.png')
                save_image(pred.cpu().unsqueeze(1),
                           './sample/pred_' + str(epoch + 1) + '.png')

            if (epoch + 1) % self.cfg.model_save_epoch == 0:
                state = {
                    'epoch': epoch + 1,
                    'state_dict': self.model.state_dict(),
                    'optim': self.optim.state_dict()
                }
                if self.best_seg < self.val_pix_acc.avg:
                    self.best_seg = self.val_pix_acc.avg
                    torch.save(
                        state, './model/seg_model_' + str(epoch + 1) + '_' +
                        str(self.val_pix_acc.avg)[0:5] + '.pth')

            if self.cfg.use_tensorboard:
                image = make_grid(image)
                label = make_grid(label.unsqueeze(1))
                pred = make_grid(pred.cpu().unqueeze(1))
                self.writer.add_image('Origianl', image, epoch + 1)
                self.writer.add_image('Labels', label, epoch + 1)
                self.writer.add_image('Predictions', pred, epoch + 1)
Exemplo n.º 12
0
 def train(self, model_path, flag='train', mode='train'):
     # load hyperparams
     hps = self.hps
     if mode == 'pretrain_G':
         for iteration in range(hps.enc_pretrain_iters):
             data = next(self.data_loader)
             c, x = self.permute_data(data)
             # encode
             enc = self.encode_step(x)
             x_tilde = self.decode_step(enc, c)
             loss_rec = torch.mean(torch.abs(x_tilde - x))
             reset_grad([self.Encoder, self.Decoder])
             loss_rec.backward()
             grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm)
             self.ae_opt.step()
             # tb info
             info = {
                 f'{flag}/pre_loss_rec': loss_rec.item(),
             }
             slot_value = (iteration + 1, hps.enc_pretrain_iters) + tuple([value for value in info.values()])
             log = 'pre_G:[%06d/%06d], loss_rec=%.3f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
                     self.logger.scalar_summary(tag, value, iteration + 1)
     elif mode == 'pretrain_D':
         for iteration in range(hps.dis_pretrain_iters):
             data = next(self.data_loader)
             c, x = self.permute_data(data)
             # encode
             enc = self.encode_step(x)
             # classify speaker
             logits = self.clf_step(enc)
             loss_clf = self.cal_loss(logits, c)
             # update 
             reset_grad([self.SpeakerClassifier])
             loss_clf.backward()
             grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
             self.clf_opt.step()
             # calculate acc
             acc = cal_acc(logits, c)
             info = {
                 f'{flag}/pre_loss_clf': loss_clf.item(),
                 f'{flag}/pre_acc': acc,
             }
             slot_value = (iteration + 1, hps.dis_pretrain_iters) + tuple([value for value in info.values()])
             log = 'pre_D:[%06d/%06d], loss_clf=%.2f, acc=%.2f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
                     self.logger.scalar_summary(tag, value, iteration + 1)
     elif mode == 'patchGAN':
         for iteration in range(hps.patch_iters):
             #=======train D=========#
             for step in range(hps.n_patch_steps):
                 data = next(self.data_loader)
                 c, x = self.permute_data(data)
                 ## encode
                 enc = self.encode_step(x)
                 # sample c
                 c_prime = self.sample_c(x.size(0))
                 # generator
                 x_tilde = self.gen_step(enc, c_prime)
                 # discriminstor
                 w_dis, real_logits, gp = self.patch_step(x, x_tilde, is_dis=True)
                 # aux classification loss 
                 loss_clf = self.cal_loss(real_logits, c)
                 loss = -hps.beta_dis * w_dis + hps.beta_clf * loss_clf + hps.lambda_ * gp
                 reset_grad([self.PatchDiscriminator])
                 loss.backward()
                 grad_clip([self.PatchDiscriminator], self.hps.max_grad_norm)
                 self.patch_opt.step()
                 # calculate acc
                 acc = cal_acc(real_logits, c)
                 info = {
                     f'{flag}/w_dis': w_dis.item(),
                     f'{flag}/gp': gp.item(), 
                     f'{flag}/real_loss_clf': loss_clf.item(),
                     f'{flag}/real_acc': acc, 
                 }
                 slot_value = (step, iteration+1, hps.patch_iters) + tuple([value for value in info.values()])
                 log = 'patch_D-%d:[%06d/%06d], w_dis=%.2f, gp=%.2f, loss_clf=%.2f, acc=%.2f'
                 print(log % slot_value)
                 if iteration % 100 == 0:
                     for tag, value in info.items():
                         self.logger.scalar_summary(tag, value, iteration + 1)
             #=======train G=========#
             data = next(self.data_loader)
             c, x = self.permute_data(data)
             # encode
             enc = self.encode_step(x)
             # sample c
             c_prime = self.sample_c(x.size(0))
             # generator
             x_tilde = self.gen_step(enc, c_prime)
             # discriminstor
             loss_adv, fake_logits = self.patch_step(x, x_tilde, is_dis=False)
             # aux classification loss 
             loss_clf = self.cal_loss(fake_logits, c_prime)
             loss = hps.beta_clf * loss_clf + hps.beta_gen * loss_adv
             reset_grad([self.Generator])
             loss.backward()
             grad_clip([self.Generator], self.hps.max_grad_norm)
             self.gen_opt.step()
             # calculate acc
             acc = cal_acc(fake_logits, c_prime)
             info = {
                 f'{flag}/loss_adv': loss_adv.item(),
                 f'{flag}/fake_loss_clf': loss_clf.item(),
                 f'{flag}/fake_acc': acc, 
             }
             slot_value = (iteration+1, hps.patch_iters) + tuple([value for value in info.values()])
             log = 'patch_G:[%06d/%06d], loss_adv=%.2f, loss_clf=%.2f, acc=%.2f'
             print(log % slot_value)
             if iteration % 100 == 0:
                 for tag, value in info.items():
Exemplo n.º 13
0
def train(args):
    ## set pre-process
    dset_loaders = data_load(args)

    max_len = max(len(dset_loaders["source"]), len(dset_loaders["target"]))
    args.max_iter = args.max_epoch * max_len

    ## set base network
    if args.net == 'resnet34':
        netG = utils.ResBase34().cuda()
    elif args.net == 'vgg16':
        netG = utils.VGG16Base().cuda()

    netF = utils.ResClassifier(class_num=args.class_num,
                               feature_dim=netG.in_features,
                               bottleneck_dim=args.bottleneck_dim).cuda()

    if len(args.gpu_id.split(',')) > 1:
        netG = nn.DataParallel(netG)

    optimizer_g = optim.SGD(netG.parameters(), lr=args.lr * 0.1)
    optimizer_f = optim.SGD(netF.parameters(), lr=args.lr)

    base_network = nn.Sequential(netG, netF)
    source_loader_iter = iter(dset_loaders["source"])
    target_loader_iter = iter(dset_loaders["target"])
    ltarget_loader_iter = iter(dset_loaders["ltarget"])

    if args.pl.startswith('atdoc_na'):
        mem_fea = torch.rand(
            len(dset_loaders["target"].dataset) +
            len(dset_loaders["ltarget"].dataset), args.bottleneck_dim).cuda()
        mem_fea = mem_fea / torch.norm(mem_fea, p=2, dim=1, keepdim=True)
        mem_cls = torch.ones(
            len(dset_loaders["target"].dataset) +
            len(dset_loaders["ltarget"].dataset),
            args.class_num).cuda() / args.class_num

    if args.pl == 'atdoc_nc':
        mem_fea = torch.rand(args.class_num, args.bottleneck_dim).cuda()
        mem_fea = mem_fea / torch.norm(mem_fea, p=2, dim=1, keepdim=True)

    list_acc = []
    best_val_acc = 0

    for iter_num in range(1, args.max_iter + 1):
        base_network.train()
        lr_scheduler(optimizer_g,
                     init_lr=args.lr * 0.1,
                     iter_num=iter_num,
                     max_iter=args.max_iter)
        lr_scheduler(optimizer_f,
                     init_lr=args.lr,
                     iter_num=iter_num,
                     max_iter=args.max_iter)

        try:
            inputs_source, labels_source = source_loader_iter.next()
        except:
            source_loader_iter = iter(dset_loaders["source"])
            inputs_source, labels_source = source_loader_iter.next()
        try:
            inputs_target, _, target_idx = target_loader_iter.next()
        except:
            target_loader_iter = iter(dset_loaders["target"])
            inputs_target, _, target_idx = target_loader_iter.next()

        try:
            inputs_ltarget, labels_ltarget, lidx = ltarget_loader_iter.next()
        except:
            ltarget_loader_iter = iter(dset_loaders["ltarget"])
            inputs_ltarget, labels_ltarget, lidx = ltarget_loader_iter.next()

        inputs_lt = inputs_ltarget[0].cuda()
        inputs_lt2 = inputs_ltarget[1].cuda()
        targets_lt = torch.zeros(args.batch_size // 3,
                                 args.class_num).scatter_(
                                     1, labels_ltarget.view(-1, 1), 1)
        targets_lt = targets_lt.cuda()

        targets_s = torch.zeros(args.batch_size, args.class_num).scatter_(
            1, labels_source.view(-1, 1), 1)
        inputs_s = inputs_source.cuda()
        targets_s = targets_s.cuda()
        inputs_t = inputs_target[0].cuda()
        inputs_t2 = inputs_target[1].cuda()

        if args.pl.startswith('atdoc_na'):

            targets_u = 0
            for inp in [inputs_t, inputs_t2]:
                with torch.no_grad():
                    features_target, outputs_u = base_network(inp)

                dis = -torch.mm(features_target.detach(), mem_fea.t())
                for di in range(dis.size(0)):
                    dis[di, target_idx[di]] = torch.max(dis)
                    # dis[di, target_idx[di]+len(dset_loaders["target"].dataset)] = torch.max(dis)

                _, p1 = torch.sort(dis, dim=1)
                w = torch.zeros(features_target.size(0),
                                mem_fea.size(0)).cuda()
                for wi in range(w.size(0)):
                    for wj in range(args.K):
                        w[wi][p1[wi, wj]] = 1 / args.K

                _, pred = torch.max(w.mm(mem_cls), 1)

                targets_u += 0.5 * torch.eye(outputs_u.size(1))[pred].cuda()

        elif args.pl == 'atdoc_nc':

            targets_u = 0
            mem_fea_norm = mem_fea / torch.norm(
                mem_fea, p=2, dim=1, keepdim=True)
            for inp in [inputs_t, inputs_t2]:
                with torch.no_grad():
                    features_target, outputs_u = base_network(inp)
                dis = torch.mm(features_target.detach(), mem_fea_norm.t())
                _, pred = torch.max(dis, dim=1)
                targets_u += 0.5 * torch.eye(outputs_u.size(1))[pred].cuda()

        elif args.pl == 'npl':

            targets_u = 0
            for inp in [inputs_t, inputs_t2]:
                with torch.no_grad():
                    _, outputs_u = base_network(inp)
                _, pred = torch.max(outputs_u.detach(), 1)
                targets_u += 0.5 * torch.eye(outputs_u.size(1))[pred].cuda()

        else:
            with torch.no_grad():
                # compute guessed labels of unlabel samples
                _, outputs_u = base_network(inputs_t)
                _, outputs_u2 = base_network(inputs_t2)
                p = (torch.softmax(outputs_u, dim=1) +
                     torch.softmax(outputs_u2, dim=1)) / 2
                pt = p**(1 / args.T)
                targets_u = pt / pt.sum(dim=1, keepdim=True)
                targets_u = targets_u.detach()

        ####################################################################
        all_inputs = torch.cat(
            [inputs_s, inputs_lt, inputs_t, inputs_lt2, inputs_t2], dim=0)
        all_targets = torch.cat(
            [targets_s, targets_lt, targets_u, targets_lt, targets_u], dim=0)
        if args.alpha > 0:
            l = np.random.beta(args.alpha, args.alpha)
            l = max(l, 1 - l)
        else:
            l = 1
        idx = torch.randperm(all_inputs.size(0))

        input_a, input_b = all_inputs, all_inputs[idx]
        target_a, target_b = all_targets, all_targets[idx]
        mixed_input = l * input_a + (1 - l) * input_b
        mixed_target = l * target_a + (1 - l) * target_b

        # interleave labeled and unlabed samples between batches to get correct batchnorm calculation
        mixed_input = list(torch.split(mixed_input, args.batch_size))
        mixed_input = utils.interleave(mixed_input, args.batch_size)
        # s = [sa, sb, sc]
        # t1 = [t1a, t1b, t1c]
        # t2 = [t2a, t2b, t2c]
        # => s' = [sa, t1b, t2c]   t1' = [t1a, sb, t1c]   t2' = [t2a, t2b, sc]

        # _, logits = base_network(mixed_input[0])
        features, logits = base_network(mixed_input[0])
        logits = [logits]
        for input in mixed_input[1:]:
            _, temp = base_network(input)
            logits.append(temp)

        # put interleaved samples back
        # [i[:,0] for i in aa]
        logits = utils.interleave(logits, args.batch_size)
        logits_x = logits[0]
        logits_u = torch.cat(logits[1:], dim=0)

        train_criterion = utils.SemiLoss()

        Lx, Lu, w = train_criterion(logits_x, mixed_target[:args.batch_size],
                                    logits_u, mixed_target[args.batch_size:],
                                    iter_num, args.max_iter, args.lambda_u)
        loss = Lx + w * Lu

        optimizer_g.zero_grad()
        optimizer_f.zero_grad()
        loss.backward()
        optimizer_g.step()
        optimizer_f.step()

        if args.pl.startswith('atdoc_na'):
            base_network.eval()
            with torch.no_grad():
                fea1, outputs1 = base_network(inputs_t)
                fea2, outputs2 = base_network(inputs_t2)
                feat = 0.5 * (fea1 + fea2)
                feat = feat / torch.norm(feat, p=2, dim=1, keepdim=True)
                softmax_out = 0.5 * (nn.Softmax(dim=1)(outputs1) +
                                     nn.Softmax(dim=1)(outputs2))
                softmax_out = softmax_out**2 / ((softmax_out**2).sum(dim=0))

            mem_fea[target_idx] = (
                1.0 -
                args.momentum) * mem_fea[target_idx] + args.momentum * feat
            mem_cls[target_idx] = (1.0 - args.momentum) * mem_cls[
                target_idx] + args.momentum * softmax_out

            with torch.no_grad():
                fea1, outputs1 = base_network(inputs_lt)
                fea2, outputs2 = base_network(inputs_lt2)
                feat = 0.5 * (fea1 + fea2)
                feat = feat / torch.norm(feat, p=2, dim=1, keepdim=True)
                softmax_out = 0.5 * (nn.Softmax(dim=1)(outputs1) +
                                     nn.Softmax(dim=1)(outputs2))
                softmax_out = softmax_out**2 / ((softmax_out**2).sum(dim=0))

            mem_fea[lidx + len(dset_loaders["target"].dataset)] = (1.0 - args.momentum) * \
                mem_fea[lidx + len(dset_loaders["target"].dataset)] + args.momentum*feat
            mem_cls[lidx + len(dset_loaders["target"].dataset)] = (1.0 - args.momentum) * \
                mem_cls[lidx + len(dset_loaders["target"].dataset)] + args.momentum*softmax_out

        if args.pl == 'atdoc_nc':
            base_network.eval()
            with torch.no_grad():
                fea1, outputs1 = base_network(inputs_t)
                fea2, outputs2 = base_network(inputs_t2)
                feat_u = 0.5 * (fea1 + fea2)
                softmax_t = 0.5 * (nn.Softmax(dim=1)(outputs1) +
                                   nn.Softmax(dim=1)(outputs2))
                _, pred_t = torch.max(softmax_t, 1)
                onehot_tu = torch.eye(args.class_num)[pred_t].cuda()

            with torch.no_grad():
                fea1, outputs1 = base_network(inputs_lt)
                fea2, outputs2 = base_network(inputs_lt2)
                feat_l = 0.5 * (fea1 + fea2)
                softmax_t = 0.5 * (nn.Softmax(dim=1)(outputs1) +
                                   nn.Softmax(dim=1)(outputs2))
                _, pred_t = torch.max(softmax_t, 1)
                onehot_tl = torch.eye(args.class_num)[pred_t].cuda()
                # onehot_tl = torch.eye(args.class_num)[labels_ltarget].cuda()

            center_t = ((torch.mm(feat_u.t(), onehot_tu) + torch.mm(
                feat_l.t(), onehot_tl))) / (onehot_tu.sum(dim=0) +
                                            onehot_tl.sum(dim=0) + 1e-8)
            mem_fea = (1.0 - args.momentum
                       ) * mem_fea + args.momentum * center_t.t().clone()

        if iter_num % int(args.eval_epoch * max_len) == 0:
            base_network.eval()
            if args.dset == 'VISDA-C':
                acc, py, score, y, tacc = utils.cal_acc_visda(
                    dset_loaders["test"], base_network)
                args.out_file.write(tacc + '\n')
                args.out_file.flush()
            else:
                acc, py, score, y = utils.cal_acc(dset_loaders["test"],
                                                  base_network)
                val_acc, _, _, _ = utils.cal_acc(dset_loaders["val"],
                                                 base_network)

            list_acc.append(acc * 100)
            if best_val_acc <= val_acc:
                best_val_acc = val_acc
                best_acc = acc
                best_y = y
                best_py = py
                best_score = score

            log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Val Acc = {:.2f}%'.format(
                args.name, iter_num, args.max_iter, acc * 100, val_acc * 100)
            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str + '\n')

    val_acc = best_acc * 100
    idx = np.argmax(np.array(list_acc))
    max_acc = list_acc[idx]
    final_acc = list_acc[-1]

    log_str = '\n==========================================\n'
    log_str += '\nVal Acc = {:.2f}\nMax Acc = {:.2f}\nFin Acc = {:.2f}\n'.format(
        val_acc, max_acc, final_acc)
    args.out_file.write(log_str + '\n')
    args.out_file.flush()

    # torch.save(base_network.state_dict(), osp.join(args.output_dir, args.log + ".pt"))
    # sio.savemat(osp.join(args.output_dir, args.log + ".mat"), {'y':best_y.cpu().numpy(),
    #     'py':best_py.cpu().numpy(), 'score':best_score.cpu().numpy()})

    return base_network, py
Exemplo n.º 14
0
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
        if is_best:
            global best_gt_y
            global best_pred_y
            best_gt_y = true_y
            best_pred_y = pred_y


def adjust_learning_rate(optimizer, epoch, step_in_epoch,
                         total_steps_in_epoch):
    lr = args.lr
    epoch = epoch + step_in_epoch / total_steps_in_epoch

    lr *= utils.cosine_rampdown(epoch, args.lr_rampdown_epochs)

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


try:
    for epoch in range(1, args.epochs + 1):
        train(epoch)
        test(epoch)
    print("------Best Result-------")
    utils.cal_acc(best_gt_y, best_pred_y, NUM_CLASSES)
except KeyboardInterrupt:
    print("------Best Result-------")
    utils.cal_acc(best_gt_y, best_pred_y, NUM_CLASSES)
Exemplo n.º 15
0
def main(argv=None):
    opt = TestOptions().parse()
    test_results = os.path.join(opt.saveroot, 'test_results')
    utils.check_dir_exist(test_results)
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids
    DATA_SIZE = opt.data_size.split(',')
    DATA_SIZE = [
        int(DATA_SIZE[0][1:]),
        int(DATA_SIZE[1]),
        int(DATA_SIZE[2][:-1])
    ]
    BLOCK_SIZE = opt.block_size.split(',')
    BLOCK_SIZE = [
        int(BLOCK_SIZE[0][1:]),
        int(BLOCK_SIZE[1]),
        int(BLOCK_SIZE[2][:-1])
    ]
    label_path = os.path.join(opt.dataroot, opt.mode, 'label')
    label_names = natsort.natsorted(os.listdir(label_path))

    x = tf.placeholder(tf.float32,
                       shape=[None] + BLOCK_SIZE + [opt.input_nc],
                       name="input_image")
    y = tf.placeholder(tf.int32,
                       shape=[None, 1, BLOCK_SIZE[1], BLOCK_SIZE[2], 1],
                       name="annotation")
    y_, pred_, variables, sf = model.IPN(x=x,
                                         PLM_NUM=opt.PLM_num,
                                         LAYER_NUM=opt.layer_num,
                                         NUM_OF_CLASS=opt.NUM_OF_CLASS)
    model_loss = lossfunc.cross_entropy(y_, y)

    sess = tf.Session()
    print("Setting up Saver...")
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

    restore_path = os.path.join(
        opt.saveroot, 'best_model',
        natsort.natsorted(os.listdir(os.path.join(opt.saveroot,
                                                  'best_model')))[-1])
    o_itr = natsort.natsorted(os.listdir(restore_path))[-1][11:-5]
    saver.restore(sess, os.path.join(restore_path, 'model.ckpt-' + o_itr))
    print("Model restored...")

    test_images = np.zeros(
        (1, BLOCK_SIZE[0], BLOCK_SIZE[1], BLOCK_SIZE[2], opt.input_nc))
    cube_images = np.zeros(
        (1, BLOCK_SIZE[0], DATA_SIZE[1], DATA_SIZE[2], opt.input_nc))
    test_annotations = np.zeros((1, 1, BLOCK_SIZE[1], BLOCK_SIZE[2], 1))

    modalitylist = os.listdir(os.path.join(opt.dataroot, opt.mode))
    modalitylist = natsort.natsorted(modalitylist)
    print(modalitylist)

    result = np.zeros((DATA_SIZE[1], DATA_SIZE[2]))
    result_pre = np.zeros((DATA_SIZE[1], DATA_SIZE[2]))

    cubelist = os.listdir(os.path.join(opt.dataroot, opt.mode,
                                       modalitylist[0]))
    cubelist = natsort.natsorted(cubelist)
    loss_sum = 0
    acc_sum = 0
    dice_sum = 0
    for kk, cube in enumerate(cubelist):
        loss2 = 0
        bscanlist = os.listdir(
            os.path.join(opt.dataroot, opt.mode, modalitylist[0], cube))
        bscanlist = natsort.natsorted(bscanlist)
        for i, bscan in enumerate(bscanlist):
            for j, modal in enumerate(modalitylist):
                if modal != "label":
                    cube_images[0, :, :, i, j] = np.array(
                        misc.imresize(misc.imread(
                            os.path.join(opt.dataroot, opt.mode, modal, cube,
                                         bscan)),
                                      [BLOCK_SIZE[0], DATA_SIZE[1]],
                                      interp='nearest'))

        for i in range(DATA_SIZE[1] // BLOCK_SIZE[1]):
            for j in range(0, DATA_SIZE[2] // BLOCK_SIZE[2]):
                test_images[0, 0:BLOCK_SIZE[0], 0:BLOCK_SIZE[1],
                            0:BLOCK_SIZE[2], :] = cube_images[0, :,
                                                              BLOCK_SIZE[1] *
                                                              i:BLOCK_SIZE[1] *
                                                              (i + 1),
                                                              BLOCK_SIZE[2] *
                                                              j:BLOCK_SIZE[2] *
                                                              (j + 1), :]
                score, result0, piece_loss, sf0 = sess.run(
                    [y_, pred_, model_loss, sf],
                    feed_dict={
                        x: test_images,
                        y: test_annotations
                    })
                result[BLOCK_SIZE[1] * i:BLOCK_SIZE[1] * (i + 1),
                       BLOCK_SIZE[2] * j:BLOCK_SIZE[2] *
                       (j + 1)] = result0[0, 0, :, :] * 255
                result_pre[BLOCK_SIZE[1] * i:BLOCK_SIZE[1] * (i + 1),
                           BLOCK_SIZE[2] * j:BLOCK_SIZE[2] *
                           (j + 1)] = sf0[0, 0, :, :, 1] * 255
                loss2 += piece_loss
        loss2 = loss2 / (DATA_SIZE[1] // BLOCK_SIZE[1]) / (DATA_SIZE[2] //
                                                           BLOCK_SIZE[2])
        label = misc.imread(os.path.join(label_path, label_names[kk])) * 255
        acc = utils.cal_acc(result, label)
        dice = utils.cal_Dice(result, label)
        print(
            cube, 'loss -> {:.3f}, acc -> {:.3f}, dice -> {:.3f}'.format(
                loss2, acc, dice))
        loss_sum += loss2
        acc_sum += acc
        dice_sum += dice

        misc.imsave(os.path.join(test_results, cube + ".bmp"),
                    result.astype(np.uint8))
        misc.imsave(os.path.join(test_results, cube + "_pre.bmp"),
                    result_pre.astype(np.uint8))
    print('')
    print('mean: ','loss -> {:.3f}, acc -> {:.3f}, dice -> {:.3f}'.format(loss_sum/len(label_names),acc_sum/len(label_names),  \
         dice_sum/len(label_names)))
Exemplo n.º 16
0
    def train():

        data = data_loader.get_batch('train')
        train_acc = 0
        trainval_acc = 0
        val_acc = 0
        best_train_acc = 0
        best_trainval_acc = 0
        best_val_acc = 0
        balance_fac = math.factorial(opts['num_digits'])

        for iters in range(1, opts['max_iters']):

            data = torch.tensor(data, device=opts['device'], dtype=torch.long)

            optimizer.zero_grad()
            model.forward(data)

            elbo = -model.xent - model.kl / balance_fac
            output_loss = -elbo.mean()

            elbo_loss = output_loss * opts['output_loss_penalty']
            total_loss = elbo_loss

            weight_norm = sum([tensor.norm() for tensor in no_temp_params])
            weight_norm_loss = weight_norm * opts['weight_norm_penalty']

            total_loss += weight_norm_loss

            elbo_grads = torch.autograd.grad(elbo_loss,
                                             model.parameters(),
                                             retain_graph=True)
            torch.autograd.backward(model.parameters(), elbo_grads)

            wn_grads = torch.autograd.grad(weight_norm_loss, no_temp_params)
            torch.autograd.backward(no_temp_params, wn_grads)

            optimizer.step()

            pred_outs = model.preds_hard.detach().cpu().numpy()
            data = data.detach().cpu().numpy()

            train_acc, train_ind_acc = utils.cal_acc(data, pred_outs)
            print("Iters: ", iters, " Acc: ", train_acc, "total loss: ",
                  total_loss.detach().cpu().numpy())

            if train_acc > best_train_acc:
                best_train_acc = train_acc
                print("Best Train acc: " + str(best_train_acc) +
                      " at iters: " + str(iters))

            if iters % opts['trainval_interval'] == 0:
                trainval_acc, trainval_ind_acc, trainval_pred, trainval_target, trainval_lang = test(
                    trainval_data)

                precision, log_recall = get_prec_rec(eval_data)
                res_entropy = utils.get_residual_entropy(
                    trainval_lang, trainval_target)
                print(precision, log_recall, res_entropy)

                if trainval_acc > best_trainval_acc:
                    best_trainval_acc = trainval_acc
                    print("Best Trainval acc: " + str(best_trainval_acc) +
                          " at iters: " + str(iters))

            if best_train_acc >= opts[
                    'train_acc'] and best_trainval_acc >= opts[
                        'trainval_acc'] and iters % opts[
                            'trainval_interval'] == 0:

                val_acc, val_ind_acc, val_pred, val_target, val_lang = test(
                    eval_data)

                if best_val_acc < val_acc:
                    best_val_acc = val_acc

                    print("Val Acc: ", val_acc)

                    if best_val_acc >= val_acc:
                        some_train_data = np.array(
                            data_loader.collect_train_data)
                        train_acc, train_ind_acc, train_pred, train_target, train_lang = test(
                            some_train_data)

                        precision, log_recall = get_prec_rec(eval_data)
                        res_entropy = utils.get_residual_entropy(
                            train_lang, train_target)

                        np.save(os.path.join(save_path, 'train_preds.npy'),
                                train_pred)
                        np.save(os.path.join(save_path, 'train_targets.npy'),
                                train_target)
                        np.save(os.path.join(save_path, 'trainval_preds.npy'),
                                trainval_pred)
                        np.save(
                            os.path.join(save_path, 'trainval_targets.npy'),
                            trainval_target)
                        np.save(os.path.join(save_path, 'val_preds.npy'),
                                val_pred)
                        np.save(os.path.join(save_path, 'val_targets.npy'),
                                val_target)
                        np.save(os.path.join(save_path, 'language_train.npy'),
                                train_lang)
                        np.save(
                            os.path.join(save_path, 'language_trainval.npy'),
                            trainval_lang)
                        np.save(os.path.join(save_path, 'language_val.npy'),
                                val_lang)

                        model_dict = {
                            'model': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                            'iters': iters,
                            'train_acc': train_acc,
                            'trainval_acc': trainval_acc,
                            'val_acc': val_acc,
                            'precision': precision,
                            'log_recall': log_recall,
                            'res_entropy': res_entropy,
                            'total_params': num_all_params,
                            'spk_params': num_all_sender_params,
                            'rec_params': num_all_receiver_params
                        }
                        torch.save(model_dict,
                                   os.path.join(save_path, 'model.pt'))

            data = data_loader.get_batch('train')
Exemplo n.º 17
0
        # training
        train_metrics = model_epoch(loss_name="trainval",
                                    mode="train",
                                    epoch=epoch,
                                    model=model,
                                    k=CONFIG['k'],
                                    d=CONFIG['d'],
                                    data_loader=train_loader,
                                    concepts=concepts,
                                    optimizer=optimizer,
                                    writer=writer)

        torch.save(model.state_dict(),
                   PJ(SAVE_PATH, 'epoch' + str(epoch) + '.pkl'))

        train_class, train_acc = cal_acc(train_metrics)
        writer.add_scalar('trainval_acc', train_acc * 100, epoch)

        ######################################################################################

        # test
        record = {
            tn: {
                'acc': 0.0,
                'class': None
            }
            for tn in STATE['split_list'][1:]
        }
        record.update({
            tn + '_g': {
                'acc': 0.0,
Exemplo n.º 18
0
        for epoch in range(CONFIG['start_epoch'], CONFIG['end_epoch']):

            writer = SummaryWriter(PJ(SAVE_PATH, 'val' + str(val_times)))

            scheduler.step()

            # training
            train_metrics = model_epoch(loss_name="train", mode="train", epoch=epoch,
                                        model=model, k=CONFIG['k'], d=CONFIG['d'],
                                        data_loader=train_loader, concepts=concepts,
                                        optimizer=optimizer, writer=writer)

            torch.save(model.state_dict(), PJ(SAVE_PATH, str(val_times) + '_epoch' + str(epoch) + '.pkl'))

            train_class, train_acc = cal_acc(train_metrics, concepts['train']['concept_label'])
            writer.add_scalar('train_acc', train_acc * 100, epoch)

            ######################################################################################

            # val
            val_metric = model_epoch(mode="test", epoch=epoch, loss_name="val",
                                     model=model, k=CONFIG['k'], d=CONFIG['d'],
                                     data_loader=val_loader, concepts=concepts,
                                     optimizer=optimizer, writer=writer)

            val_class, val_acc = cal_acc(val_metric)
            val_g_class, val_g_acc = cal_acc(val_metric, general=True)

            writer.add_scalar('val_acc', val_acc * 100, epoch)
            writer.add_scalar('val_g_acc', val_g_acc * 100, epoch)
Exemplo n.º 19
0
def train(args):
    if "bert" in args.model_type:
        set_gelu("tanh")  # 切换gelu版本

        # Step1: Load Data
        data_generator = None
        if "siamese" in args.model_type:
            data_generator = SiameseDataGenerator
        elif "albert" in args.model_type:
            data_generator = BertDataGenerator

        train_ds = data_generator(data_path=args.train_data_path,
                                  batch_size=args.batch_size,
                                  dict_path=args.bert_dict_path,
                                  maxlen=args.query_len)
        dev_ds = data_generator(data_path=args.dev_data_path,
                                batch_size=args.batch_size,
                                maxlen=args.query_len,
                                dict_path=args.bert_dict_path)
        test_ds = data_generator(data_path=args.test_data_path,
                                 batch_size=args.batch_size,
                                 maxlen=args.query_len,
                                 dict_path=args.bert_dict_path)

        # Step2: Load Model
        model = None
        if "siamese" in args.model_type:
            model = SiameseBertModel(config_path=args.bert_config_path,
                                     checkpoint_path=args.bert_checkpoint_path,
                                     dense_units=args.dense_units)
        elif "albert" in args.model_type:
            model = BertModel(config_path=args.bert_config_path,
                              checkpoint_path=args.bert_checkpoint_path)

        model_name = model.__class__.__name__
        model = model.get_model()

        from bert4keras.optimizers import Adam
        model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer=Adam(2e-5),  # 用足够小的学习率
            # optimizer=PiecewiseLinearLearningRate(Adam(5e-5), {10000: 1, 30000: 0.1}),
            metrics=['accuracy'],
        )

        evaluator = Evaluator(dev_ds=dev_ds,
                              model_name=model_name,
                              is_bert_model=True,
                              test_ds=test_ds)
        logger.info("***** Running training *****")
        logger.info("  Model Class Name = %s", model_name)
        logger.info("  Num Epochs = %d", args.epoch)
        model.fit_generator(train_ds.forfit(),
                            steps_per_epoch=len(train_ds),
                            epochs=args.epoch,
                            callbacks=[evaluator],
                            verbose=2)

        model.load_weights('./checkpoints/best_{}.weight'.format(model_name))
        logger.info("***** Test Reslt *****")
        logger.info("  Model = %s", model_name)
        logger.info("  Batch Size = %d", args.batch_size)
        logger.info("  Final Test Acc:%05f",
                    cal_acc(data=test_ds, model=model, is_bert_model=True))

    elif "NN" in args.model_type:
        # Step 1 : Loda Data
        train_data = pd.read_csv(args.train_data_path)
        dev_data = pd.read_csv(args.dev_data_path)
        test_data = pd.read_csv(args.test_data_path)

        category_count = len(train_data["category"].value_counts())
        category_encoder = category_OneHotEncoder(data_df=train_data)

        loader = LoadData(w2v_path=args.w2v_path, query_len=args.query_len)
        word2idx = loader.word2idx
        emd_matrix = loader.emb_matrix
        """
        注意:
        shuffle的顺序很重要:一般建议是先执行shuffle方法,接着采用batch方法。
        这样是为了保证在整体数据打乱之后再取出batch_size大小的数据。
        如果先采取batch方法再采用shuffle方法,那么此时就只是对batch进行shuffle,
        而batch里面的数据顺序依旧是有序的,那么随机程度会减弱。
        """
        train_ds = loader.dataset(encoder=category_encoder, data_df=train_data)
        train_ds = train_ds.shuffle(buffer_size=len(train_data)).batch(
            batch_size=args.batch_size).repeat()

        dev_ds = loader.dataset(encoder=category_encoder, data_df=dev_data)
        dev_ds = dev_ds.batch(batch_size=args.batch_size)
        test_ds = loader.dataset(encoder=category_encoder, data_df=test_data)
        test_ds = test_ds.batch(batch_size=args.batch_size)

        # Step2: Load Model
        model = None
        if "siamese_CNN" in args.model_type:
            model = SiameseCnnModel(emb_matrix=emd_matrix,
                                    word2idx=word2idx,
                                    filters_nums=args.filters_nums,
                                    kernel_sizes=args.kernel_sizes,
                                    dense_units=args.dense_units,
                                    label_count=args.label_count,
                                    category_count=category_count,
                                    query_len=args.query_len,
                                    shared=args.feature_shared,
                                    add_feature=args.add_features)
        elif "siamese_RNN" in args.model_type:
            model = SiameseRnnModel(emb_matrix=emd_matrix,
                                    word2idx=word2idx,
                                    hidden_units=args.hidden_units,
                                    dense_units=args.dense_units,
                                    label_count=args.label_count,
                                    category_count=category_count,
                                    query_len=args.query_len,
                                    mask_zero=args.mask_zero,
                                    bidirection=args.bi_direction,
                                    shared=args.feature_shared,
                                    add_feature=args.add_features)
        model_name = model.__class__.__name__
        model = model.get_model()

        logger.info("***** Running training *****")
        logger.info("  Model Class Name = %s", model_name)
        logger.info("  Num examples = %d", len(train_data))
        logger.info("  Num Epochs = %d", args.epoch)

        model.compile(optimizer='adam',
                      loss="binary_crossentropy",
                      metrics=["acc"])
        early_stopping = EarlyStopping(monitor="val_acc",
                                       patience=3,
                                       mode="max")
        evaluator = Evaluator(dev_ds=dev_ds,
                              model_name=model_name,
                              is_bert_model=False,
                              dev_label=dev_data['label'])

        # Step3: Train Model
        history = model.fit(train_ds,
                            callbacks=[early_stopping, evaluator],
                            epochs=args.epoch,
                            steps_per_epoch=len(train_data) // args.batch_size,
                            validation_data=dev_ds,
                            validation_steps=len(dev_data) // args.batch_size)

        # Step4 : Save model and trainLogs
        logger.info("***** Training Logs *****")

        for epoch in history.epoch:
            logger.info("Epoch %d", epoch)
            logger.info("train_loss:%f train_acc:%f val_loss:%f val_acc:%f",
                        history.history.get("loss")[epoch],
                        history.history.get("acc")[epoch],
                        history.history.get("val_loss")[epoch],
                        history.history.get("val_acc")[epoch])
        #
        # time_stamp = datetime.datetime.now().strftime('%m-%d_%H-%M-%S')
        # path = './checkpoints/{}_{}.h5'.format(model_name, time_stamp)
        # model.save(path)

        model = load_model('./checkpoints/best_{}.h5'.format(model_name))
        y_pred = model.predict(test_ds)
        y_true = test_data["label"].values.reshape((-1, 1))

        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred < 0.5] = 0

        acc = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred)

        logger.info("***** Pramaters *****")
        logger.info("  ModelName = %s", args.model_type)
        logger.info("  Add Features = %s", args.add_features)
        logger.info("  Embedding dims = %d", len(emd_matrix[0]))
        logger.info("  BatchSize = %d", args.batch_size)

        if "CNN" in args.model_type:
            logger.info("  kernel_sizes = %s", args.kernel_sizes)
            logger.info("  filters_nums = %s", args.filters_nums)
        elif "RNN" in args.model_type:
            logger.info("  hidden_units = %s", args.hidden_units)
            logger.info("  bi_direction = %s", args.bi_direction)

        logger.info("  dense_units = %s", args.dense_units)
        logger.info("  feature_shared = %s", args.feature_shared)
        logger.info("***** Testing Results *****")
        logger.info("  Acc = %f", acc)
        logger.info("  Precision = %f", precision)
        logger.info("  Recall = %f", recall)
        logger.info("  F1-score = %f", f1)
Exemplo n.º 20
0
def main():
    # load config file
    config = load_config(config_path)

    # build dict for token (vocab_dict) and char (vocab_c_dict)
    vocab_dict, vocab_c_dict = build_dict(vocab_path, vocab_char_path)

    # load pre-trained embedding
    # W_init: token index * token embeding
    # embed_dim: embedding dimension
    W_init, embed_dim = load_word2vec_embedding(word_embedding_path, vocab_dict)
    
    K = 3

    # generate train/valid examples
    train_data, sen_cut_train = generate_examples(train_path, vocab_dict, vocab_c_dict, config, "train")
    dev_data, sen_cut_dev = generate_examples(valid_path, vocab_dict, vocab_c_dict, config, "dev")

    #------------------------------------------------------------------------
    # training process begins
    hidden_size = config['nhidden']
    batch_size = config['batch_size']

    coref_model = model.CorefQA(hidden_size, batch_size, K, W_init, config).to(device)

    if len(sys.argv) > 4 and str(sys.argv[4]) == "load":
        try:
            coref_model.load_state_dict(torch.load(torch_model_p))
            print("saved model loaded")
        except:
            print("no saved model")

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(coref_model.parameters(), lr=config['learning_rate']) # TODO: use hyper-params in paper

    iter_index = 0
    batch_acc_list = []
    batch_loss_list = []
    dev_acc_list = []

    max_iter = int(config['num_epochs'] * len(train_data) / batch_size)
    print("max iteration number: " + str(max_iter))

    while True:
        # building batch data
        # batch_xxx_data is a list of batch data (len 15)
        # [dw, m_dw, qw, m_qw, dc, m_dc, qc, m_qc, cd, m_cd, a, dei, deo, dri, dro]
        batch_train_data, sen_cut_batch = generate_batch_data(train_data, config, "train", -1, sen_cut_train)  # -1 means random sampling
        # dw, m_dw, qw, m_qw, dc, m_dc, qc, m_qc, cd, m_cd, a, dei, deo, dri, dro = batch_train_data

        print(len(sen_cut_batch))

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        dw, dc, qw, qc, cd, cd_m = extract_data(batch_train_data)
        cand_probs = coref_model(dw, dc, qw, qc, cd, cd_m, sen_cut_batch) # B x Cmax

        answer = torch.tensor(batch_train_data[10]).type(torch.LongTensor) # B x 1
        loss = criterion(cand_probs, answer)

        # evaluation process
        acc_batch = cal_acc(cand_probs, answer, batch_size)
        batch_acc_list.append(acc_batch)
        batch_loss_list.append(loss)
        dev_acc_list = evaluate_result(iter_index, config, dev_data, batch_acc_list, batch_loss_list, dev_acc_list, coref_model, sen_cut_dev)

        # save model
        if iter_index % config['model_save_frequency'] == 0 and len(sys.argv) > 4:
            torch.save(coref_model.state_dict(), torch_model_p)

        # back-prop
        loss.backward()
        optimizer.step()

        # check stopping criteria
        iter_index += 1
        if iter_index > max_iter: break
Exemplo n.º 21
0
    def train(self, model_path, flag='train', mode='train'):
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
            os.chmod(model_path, 0o755)
        model_path = os.path.join(model_path, 'model.pkl')

        # load hyperparams
        hps = self.hps
        if mode == 'pretrain_G':
            for iteration in range(2200):
                data = next(self.data_loader)
                c, x = self.permute_data(data)
                # encode
                enc = self.encode_step(x)
                x_tilde = self.decode_step(enc, c)
                loss_rec = torch.mean(torch.abs(x_tilde - x))
                reset_grad([self.Encoder, self.Decoder])
                loss_rec.backward()
                grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm)
                self.ae_opt.step()
                # tb info
                info = {
                    f'{flag}/pre_loss_rec': loss_rec.item(),
                }
                slot_value = (iteration + 1, 2200) + tuple(
                    [value for value in info.values()])
                log = 'pre_G:[%06d/%06d], loss_rec=%.3f'
                print(log % slot_value)
                if iteration % 100 == 0:
                    for tag, value in info.items():
                        self.logger.scalar_summary(tag, value, iteration + 1)
        elif mode == 'pretrain_D':
            for iteration in range(2200):
                data = next(self.data_loader)
                c, x = self.permute_data(data)
                # encode
                enc = self.encode_step(x)
                # classify speaker
                logits = self.clf_step(enc)
                loss_clf = self.cal_loss(logits, c)
                # update
                reset_grad([self.SpeakerClassifier])
                loss_clf.backward()
                grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
                self.clf_opt.step()
                # calculate acc
                acc = cal_acc(logits, c)
                info = {
                    f'{flag}/pre_loss_clf': loss_clf.item(),
                    f'{flag}/pre_acc': acc,
                }
                slot_value = (iteration + 1, 2200) + tuple(
                    [value for value in info.values()])
                log = 'pre_D:[%06d/%06d], loss_clf=%.2f, acc=%.2f'
                print(log % slot_value)
                if iteration % 100 == 0:
                    for tag, value in info.items():
                        self.logger.scalar_summary(tag, value, iteration + 1)
        elif mode == 'patchGAN':
            for iteration in range(1100):
                #=======train D=========#
                for step in range(hps.n_patch_steps):
                    data = next(self.data_loader)
                    c, x = self.permute_data(data)
                    ## encode
                    enc = self.encode_step(x)
                    # sample c
                    c_prime = self.sample_c(x.size(0))
                    # generator
                    x_tilde = self.gen_step(enc, c_prime)
                    # discriminstor
                    w_dis, real_logits, gp = self.patch_step(x,
                                                             x_tilde,
                                                             is_dis=True)
                    # aux classification loss
                    loss_clf = self.cal_loss(real_logits, c)
                    loss = -hps.beta_dis * w_dis + hps.beta_clf * loss_clf + hps.lambda_ * gp
                    reset_grad([self.PatchDiscriminator])
                    loss.backward()
                    grad_clip([self.PatchDiscriminator],
                              self.hps.max_grad_norm)
                    self.patch_opt.step()
                    # calculate acc
                    acc = cal_acc(real_logits, c)
                    info = {
                        f'{flag}/w_dis': w_dis.item(),
                        f'{flag}/gp': gp.item(),
                        f'{flag}/real_loss_clf': loss_clf.item(),
                        f'{flag}/real_acc': acc,
                    }
                    slot_value = (step, iteration + 1, 1100) + tuple(
                        [value for value in info.values()])
                    log = 'patch_D-%d:[%06d/%06d], w_dis=%.2f, gp=%.2f, loss_clf=%.2f, acc=%.2f'
                    print(log % slot_value)
                    if iteration % 100 == 0:
                        for tag, value in info.items():
                            self.logger.scalar_summary(tag, value,
                                                       iteration + 1)
                #=======train G=========#
                data = next(self.data_loader)
                c, x = self.permute_data(data)
                # encode
                enc = self.encode_step(x)
                # sample c
                c_prime = self.sample_c(x.size(0))
                # generator
                x_tilde = self.gen_step(enc, c_prime)
                # discriminstor
                loss_adv, fake_logits = self.patch_step(x,
                                                        x_tilde,
                                                        is_dis=False)
                # aux classification loss
                loss_clf = self.cal_loss(fake_logits, c_prime)
                loss = hps.beta_clf * loss_clf + hps.beta_gen * loss_adv
                reset_grad([self.Generator])
                loss.backward()
                grad_clip([self.Generator], self.hps.max_grad_norm)
                self.gen_opt.step()
                # calculate acc
                acc = cal_acc(fake_logits, c_prime)
                info = {
                    f'{flag}/loss_adv': loss_adv.item(),
                    f'{flag}/fake_loss_clf': loss_clf.item(),
                    f'{flag}/fake_acc': acc,
                }
                slot_value = (iteration + 1, 1100) + tuple(
                    [value for value in info.values()])
                log = 'patch_G:[%06d/%06d], loss_adv=%.2f, loss_clf=%.2f, acc=%.2f'
                print(log % slot_value)
                if iteration % 100 == 0:
                    for tag, value in info.items():
                        self.logger.scalar_summary(tag, value, iteration + 1)
                if iteration % 1000 == 0 or iteration + 1 == hps.patch_iters:
                    self.save_model(model_path, iteration + hps.iters)
        elif mode == 'train':
            for iteration in range(1100):
                # calculate current alpha
                if iteration < hps.lat_sched_iters:
                    current_alpha = hps.alpha_enc * (iteration /
                                                     hps.lat_sched_iters)
                else:
                    current_alpha = hps.alpha_enc
                #==================train D==================#
                for step in range(hps.n_latent_steps):
                    data = next(self.data_loader)
                    c, x = self.permute_data(data)
                    # encode
                    enc = self.encode_step(x)
                    # classify speaker
                    logits = self.clf_step(enc)
                    loss_clf = self.cal_loss(logits, c)
                    loss = hps.alpha_dis * loss_clf
                    # update
                    reset_grad([self.SpeakerClassifier])
                    loss.backward()
                    grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
                    self.clf_opt.step()
                    # calculate acc
                    acc = cal_acc(logits, c)
                    info = {
                        f'{flag}/D_loss_clf': loss_clf.item(),
                        f'{flag}/D_acc': acc,
                    }
                    slot_value = (step, iteration + 1, 1100) + tuple(
                        [value for value in info.values()])
                    log = 'D-%d:[%06d/%06d], loss_clf=%.2f, acc=%.2f'
                    print(log % slot_value)
                    if iteration % 100 == 0:
                        for tag, value in info.items():
                            self.logger.scalar_summary(tag, value,
                                                       iteration + 1)
                #==================train G==================#
                data = next(self.data_loader)
                c, x = self.permute_data(data)
                # encode
                enc = self.encode_step(x)
                # decode
                x_tilde = self.decode_step(enc, c)
                loss_rec = torch.mean(torch.abs(x_tilde - x))
                # classify speaker
                logits = self.clf_step(enc)
                acc = cal_acc(logits, c)
                loss_clf = self.cal_loss(logits, c)
                # maximize classification loss
                loss = loss_rec - current_alpha * loss_clf
                reset_grad([self.Encoder, self.Decoder])
                loss.backward()
                grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm)
                self.ae_opt.step()
                info = {
                    f'{flag}/loss_rec': loss_rec.item(),
                    f'{flag}/G_loss_clf': loss_clf.item(),
                    f'{flag}/alpha': current_alpha,
                    f'{flag}/G_acc': acc,
                }
                slot_value = (iteration + 1, 1100) + tuple(
                    [value for value in info.values()])
                log = 'G:[%06d/%06d], loss_rec=%.3f, loss_clf=%.2f, alpha=%.2e, acc=%.2f'
                print(log % slot_value)
                if iteration % 100 == 0:
                    for tag, value in info.items():
                        self.logger.scalar_summary(tag, value, iteration + 1)
                if iteration % 1000 == 0 or iteration + 1 == hps.iters:
                    self.save_model(model_path, iteration)
Exemplo n.º 22
0
def main(args):
    if torch.cuda.is_available():
        device = torch.device("cuda:" + str(args.gpu))
        is_cuda = True
    else:
        device = torch.device("cpu")
        is_cuda = False

    src_loader, tgt_loader = get_data(args)

    model = Net(task=args.task).to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)

    if args.resume:
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_acc = checkpoint['best_acc']
        model.load_state_dict(checkpoint['state_dict'])

        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch']))

    best_acc = 0
    best_label = []
    best_result = []

    # create centroids for known classes
    all_centroids = Centroids(args.class_num - 1, 100, use_cuda=is_cuda)

    try:
        # start training
        for epoch in range(args.epochs):
            data = (src_loader, tgt_loader, all_centroids)

            all_centroids = train(model, optimizer, data, epoch, device, args)

            result, gt_label, acc = test(model, tgt_loader, epoch, device,
                                         args)

            is_best = acc > best_acc
            if is_best:
                best_acc = acc
                best_label = gt_label
                best_pred = result

            utils.save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc
                }, is_best, args.check_dir)

        print("------Best-------")
        utils.cal_acc(best_label, best_result, args.class_num)

    except KeyboardInterrupt:
        print("------Best-------")
        utils.cal_acc(best_label, best_result, args.class_num)
Exemplo n.º 23
0
            label_placeholder: training_label}

        batch_loss, batch_output, _, summary = sess.run([loss, network_output, train_op, summary_op], feed_dict=train_feed_dict)

        if epoch == (para.total_epochs * 0.5) or epoch == (para.total_epochs * 0.7):
            epoch_learning_rate = epoch_learning_rate / 10

        if step % 2000 == 0:
            print('Validing----------------------------------')
            for _ in range(5):
                validing_array, validing_label = sess.run([valid_feature, valid_label])
                valid_feed_dict = {
                    inputs_placeholder: validing_array,
                    label_placeholder: validing_label}
                valid_batch_loss, valid_batch_output = sess.run([loss, network_output], feed_dict=valid_feed_dict)
                test_accuracy = cal_acc(validing_label, valid_batch_output, para.batch_size)
                print('batch_accuracy: %f' % test_accuracy)
                print('batch_loss: %f' % valid_batch_loss)
            print('Finish Validing---------------------------')

        if step % 20 == 0:
            batch_accuracy = cal_acc(training_label, batch_output, para.batch_size)
            print('steps: %d / %d' % (step, total_steps))
            print('accuracy: %f' % batch_accuracy)
            print('loss: %f' % batch_loss)
            print('cur_lr: %f' % epoch_learning_rate)
            print('------------------------------------------')
            
        if step % 2000 == 0:
            #store model
            print('storing model')
Exemplo n.º 24
0
        # record best result
        BEST_RESULT = {
            "h_acc": 0,
            "epoch": 0
        }

        for epoch in range(CONFIG['start_epoch'], CONFIG['end_epoch']):

            # train
            train_metrics = model_epoch(loss_name="trainval", epoch=epoch, model=model, neg_sample=CONFIG['neg_sample'],
                                        data_loader=train_loader, concepts=concepts,
                                        optimizer=optimizer, writer=writer, debug=CONFIG['debug'])

            for g in [False, True]:
                record_name = 'train_g' if g else 'train'
                train_class, train_acc = utils.cal_acc(train_metrics, g)
                writer.add_scalar(record_name + '_acc', train_acc * 100, epoch)

                if CONFIG['skewness']:
                    train_skew = utils.skewness(train_metrics, g)
                    writer.add_scalar(record_name + '_skewness', train_skew, epoch)

            ######################################################################################
            # test
            record = {tn: {'acc': 0.0, 'class': None} for tn in STATE['split_list'][1:]}
            record.update({tn + '_g': {'acc': 0.0, 'class': None} for tn in STATE['split_list'][1:]})

            for tn in STATE['split_list'][1:]:

                test_metric = model_epoch(loss_name=tn, epoch=epoch, model=model,
                                          data_loader=test_loaders[tn], concepts=concepts,
Exemplo n.º 25
0
 for step in range(hps.n_latent_steps):
     data = next(self.data_loader)
     c, x = self.permute_data(data)
     # encode
     enc = self.encode_step(x)
     # classify speaker
     logits = self.clf_step(enc)
     loss_clf = self.cal_loss(logits, c)
     loss = hps.alpha_dis * loss_clf
     # update 
     reset_grad([self.SpeakerClassifier])
     loss.backward()
     grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
     self.clf_opt.step()
     # calculate acc
     acc = cal_acc(logits, c)
     info = {
         f'{flag}/D_loss_clf': loss_clf.item(),
         f'{flag}/D_acc': acc,
     }
     slot_value = (step, iteration + 1, hps.iters) + tuple([value for value in info.values()])
     log = 'D-%d:[%06d/%06d], loss_clf=%.2f, acc=%.2f'
     print(log % slot_value)
     if iteration % 100 == 0:
         for tag, value in info.items():
             self.logger.scalar_summary(tag, value, iteration + 1)
 #==================train G==================#
 data = next(self.data_loader)
 c, x = self.permute_data(data)
 # encode
 enc = self.encode_step(x)
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    net = Question_Classifier(args.bert_mode,
                              args.bert_pretrain,
                              num_classes=3)

    save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__)))
    if args.resume_epoch != 0:
        runs = sorted(
            glob.glob(
                os.path.join(save_dir_root, 'run', args.train_fold, 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) if runs else 0
    else:
        runs = sorted(
            glob.glob(
                os.path.join(save_dir_root, 'run', args.train_fold, 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0

    if args.run_id >= 0:
        run_id = args.run_id

    save_dir = os.path.join(save_dir_root, 'run', args.train_fold,
                            'run_' + str(run_id))
    log_dir = os.path.join(
        save_dir,
        datetime.now().strftime('%b%d_%H-%M-%M%S') + '_' +
        socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    logger = open(os.path.join(save_dir, 'log.txt'), 'w')
    logger.write(
        'optim: SGD \nlr=%.4f\nweight_decay=%.4f\nmomentum=%.4f\nupdate_lr_every=%d\nseed=%d\n'
        % (args.lr, args.weight_decay, args.momentum, args.update_lr_every,
           args.seed))

    if not os.path.exists(os.path.join(save_dir, 'models')):
        os.makedirs(os.path.join(save_dir, 'models'))

    if args.resume_epoch == 0:
        print('Training from scratch...')
    else:
        net_resume_path = os.path.join(
            save_dir, 'models',
            'mcnet_epoch-' + str(args.resume_epoch - 1) + '.pth')
        print('Initializing weights from: {}, epoch: {}...'.format(
            save_dir, resume_epoch))
        net.load_state_dict(
            torch.load(net_resume_path,
                       map_location=lambda storage, loc: storage))

    torch.cuda.set_device(device=0)
    net.cuda()

    net_optim = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    dictionary = Dictionary.load_from_file(args.dictionary_path)
    trainset0 = Question_Dataset('train0',
                                 dictionary,
                                 args.data_root,
                                 question_len=12)
    trainset1 = Question_Dataset('train1',
                                 dictionary,
                                 args.data_root,
                                 question_len=12)
    trainset2 = Question_Dataset('train2',
                                 dictionary,
                                 args.data_root,
                                 question_len=12)
    valset = Question_Dataset('val',
                              dictionary,
                              args.data_root,
                              question_len=12)
    testset = Question_Dataset('test',
                               dictionary,
                               args.data_root,
                               question_len=12)

    trainloader0 = DataLoader(trainset0,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2)
    trainloader1 = DataLoader(trainset1,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2)
    trainloader2 = DataLoader(trainset2,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2)
    valloader = DataLoader(valset,
                           batch_size=args.batch_size,
                           shuffle=False,
                           num_workers=2)
    testloader = DataLoader(testset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=2)

    num_iter_tr = len(trainloader0)
    nitrs = args.resume_epoch * num_iter_tr
    nsamples = args.batch_size * nitrs
    print('each_epoch_num_iter: %d' % (num_iter_tr))

    global_step = 0

    epoch_losses = []
    recent_losses = []
    start_t = time.time()
    print('Training Network')

    for epoch in range(args.resume_epoch, args.nepochs):

        net.train()

        epoch_losses = []

        for ii, (sample_batched0, sample_batched1,
                 sample_batched2) in enumerate(
                     zip(trainloader0, trainloader1, trainloader2)):
            question0, label0 = sample_batched0['question'], sample_batched0[
                'label']
            question0, label0 = question0.cuda(), label0.cuda()
            question1, label1 = sample_batched1['question'], sample_batched1[
                'label']
            question1, label1 = question1.cuda(), label1.cuda()
            question2, label2 = sample_batched2['question'], sample_batched2[
                'label']
            question2, label2 = question2.cuda(), label2.cuda()

            global_step += args.batch_size

            out0 = net.forward(question0)
            out1 = net.forward(question1)
            out2 = net.forward(question2)

            loss0 = utils.CELoss(logit=out0, target=label0, reduction='mean')
            loss1 = utils.CELoss(logit=out1, target=label1, reduction='mean')
            loss2 = utils.CELoss(logit=out2, target=label2, reduction='mean')
            loss = (loss0 + loss1 + loss2) / 3

            trainloss = loss.item()
            epoch_losses.append(trainloss)
            if len(recent_losses) < args.log_every:
                recent_losses.append(trainloss)
            else:
                recent_losses[nitrs % len(recent_losses)] = trainloss
            net_optim.zero_grad()
            loss.backward()
            net_optim.step()

            nitrs += 1
            nsamples += args.batch_size

            if nitrs % args.log_every == 0:
                meanloss = sum(recent_losses) / len(recent_losses)

                print('epoch: %d ii: %d trainloss: %.2f timecost:%.2f secs' %
                      (epoch, ii, meanloss, time.time() - start_t))
                writer.add_scalar('data/trainloss', meanloss, nsamples)

        # validation
        net.eval()
        val_acc = 0.0
        test_acc = 0.0

        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            val_acc += (tmp_acc * question.shape[0])
        val_acc /= len(valset)

        for ii, sample_batched in enumerate(valloader):
            question, label = sample_batched['question'], sample_batched[
                'label']
            question, label = question.cuda(), label.cuda()

            out = net.forward(question)
            tmp_acc = utils.cal_acc(out, label)
            test_acc += (tmp_acc * question.shape[0])
        test_acc /= len(testset)

        print('Validation:')
        print('epoch: %d, val_questions: %d val_acc: %.4f' %
              (epoch, len(valset), val_acc))
        print('epoch: %d, test_questions: %d test_acc: %.4f' %
              (epoch, len(testset), test_acc))
        writer.add_scalar('data/valid_acc', val_acc, nsamples)

        if epoch % args.save_every == args.save_every - 1:
            net_save_path = os.path.join(
                save_dir, 'models',
                'question_classifier_epoch-' + str(epoch) + '.pth')
            torch.save(net.state_dict(), net_save_path)
            print("Save net at {}\n".format(net_save_path))

        if epoch % args.update_lr_every == args.update_lr_every - 1:
            lr_ = utils.lr_poly(args.lr, epoch, args.nepochs, 0.9)
            print('(poly lr policy) learning rate: ', lr_)
            net_optim = optim.SGD(net.parameters(),
                                  lr=lr_,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
from utils import plot_scatter
from utils import cal_acc

from model_baseline import AE
from dataset_baseline import Image_Dataset
from clustering_baseline import predict
from clustering_baseline import inference

from model_strong import AE
from dataset_strong import Image_Dataset
from clustering_strong import predict
from clustering_strong import inference

same_seeds(0)

model_filename = sys.argv[1]  # ~/checkpoints/baseline.pth
input_filename2 = sys.argv[2]  # ~/Downloads/dataset/valX.npy
input_filename3 = sys.argv[3]  # ~/Downloads/dataset/valY.npy
valX = np.load(input_filename2)
valY = np.load(input_filename3)

model = AE().cuda()
model.load_state_dict(torch.load(model_filename))
model.eval()

latents = inference(valX, model)
pred_from_latent, emb_from_latent = predict(latents)
acc_latent = cal_acc(valY, pred_from_latent)
print('The clustering accuracy is:', acc_latent)
print('The clustering result:')
plot_scatter(emb_from_latent, valY, savefig='p1_baseline.png')
Exemplo n.º 28
0
def train(args, validate=False, label=None):
    ## set pre-process
    if validate:
        dset_loaders = data_load_y(args, label)
    else:
        dset_loaders = data_load(args)
    class_num = args.class_num
    class_weight_src = torch.ones(class_num, ).cuda()
    ##################################################################################################

    ## set base network
    if args.net == 'resnet101':
        netG = utils.ResBase101().cuda()
    elif args.net == 'resnet50':
        netG = utils.ResBase50().cuda()

    netF = utils.ResClassifier(class_num=class_num,
                               feature_dim=netG.in_features,
                               bottleneck_dim=args.bottleneck_dim).cuda()

    max_len = max(len(dset_loaders["source"]), len(dset_loaders["target"]))
    args.max_iter = args.max_epoch * max_len

    ad_flag = False
    if args.method in {'DANN', 'DANNE'}:
        ad_net = utils.AdversarialNetwork(args.bottleneck_dim,
                                          1024,
                                          max_iter=args.max_iter).cuda()
        ad_flag = True
    if args.method in {'CDAN', 'CDANE'}:
        ad_net = utils.AdversarialNetwork(args.bottleneck_dim * class_num,
                                          1024,
                                          max_iter=args.max_iter).cuda()
        random_layer = None
        ad_flag = True

    optimizer_g = optim.SGD(netG.parameters(), lr=args.lr * 0.1)
    optimizer_f = optim.SGD(netF.parameters(), lr=args.lr)
    if ad_flag:
        optimizer_d = optim.SGD(ad_net.parameters(), lr=args.lr)

    base_network = nn.Sequential(netG, netF)

    if args.pl.startswith('atdoc_na'):
        mem_fea = torch.rand(len(dset_loaders["target"].dataset),
                             args.bottleneck_dim).cuda()
        mem_fea = mem_fea / torch.norm(mem_fea, p=2, dim=1, keepdim=True)
        mem_cls = torch.ones(len(dset_loaders["target"].dataset),
                             class_num).cuda() / class_num

    if args.pl == 'atdoc_nc':
        mem_fea = torch.rand(args.class_num, args.bottleneck_dim).cuda()
        mem_fea = mem_fea / torch.norm(mem_fea, p=2, dim=1, keepdim=True)

    source_loader_iter = iter(dset_loaders["source"])
    target_loader_iter = iter(dset_loaders["target"])

    ####
    list_acc = []
    best_ent = 100

    for iter_num in range(1, args.max_iter + 1):
        base_network.train()
        lr_scheduler(optimizer_g,
                     init_lr=args.lr * 0.1,
                     iter_num=iter_num,
                     max_iter=args.max_iter)
        lr_scheduler(optimizer_f,
                     init_lr=args.lr,
                     iter_num=iter_num,
                     max_iter=args.max_iter)
        if ad_flag:
            lr_scheduler(optimizer_d,
                         init_lr=args.lr,
                         iter_num=iter_num,
                         max_iter=args.max_iter)

        try:
            inputs_source, labels_source = source_loader_iter.next()
        except:
            source_loader_iter = iter(dset_loaders["source"])
            inputs_source, labels_source = source_loader_iter.next()
        try:
            inputs_target, _, idx = target_loader_iter.next()
        except:
            target_loader_iter = iter(dset_loaders["target"])
            inputs_target, _, idx = target_loader_iter.next()

        inputs_source, inputs_target, labels_source = inputs_source.cuda(
        ), inputs_target.cuda(), labels_source.cuda()

        if args.method == 'srconly' and args.pl == 'none':
            features_source, outputs_source = base_network(inputs_source)
        else:
            features_source, outputs_source = base_network(inputs_source)
            features_target, outputs_target = base_network(inputs_target)
            features = torch.cat((features_source, features_target), dim=0)
            outputs = torch.cat((outputs_source, outputs_target), dim=0)
            softmax_out = nn.Softmax(dim=1)(outputs)

        eff = utils.calc_coeff(iter_num, max_iter=args.max_iter)
        if args.method[-1] == 'E':
            entropy = loss.Entropy(softmax_out)
        else:
            entropy = None

        if args.method in {'CDAN', 'CDANE'}:
            transfer_loss = loss.CDAN([features, softmax_out], ad_net, entropy,
                                      eff, random_layer)

        elif args.method in {'DANN', 'DANNE'}:
            transfer_loss = loss.DANN(features, ad_net, entropy, eff)

        elif args.method == 'DAN':
            transfer_loss = eff * loss.DAN(features_source, features_target)
        elif args.method == 'DAN_Linear':
            transfer_loss = eff * loss.DAN_Linear(features_source,
                                                  features_target)

        elif args.method == 'JAN':
            transfer_loss = eff * loss.JAN(
                [features_source, softmax_out[0:args.batch_size, :]],
                [features_target, softmax_out[args.batch_size::, :]])
        elif args.method == 'JAN_Linear':
            transfer_loss = eff * loss.JAN_Linear(
                [features_source, softmax_out[0:args.batch_size, :]],
                [features_target, softmax_out[args.batch_size::, :]])

        elif args.method == 'CORAL':
            transfer_loss = eff * loss.CORAL(features_source, features_target)
        elif args.method == 'DDC':
            transfer_loss = loss.MMD_loss()(features_source, features_target)

        elif args.method == 'srconly':
            transfer_loss = torch.tensor(0.0).cuda()
        else:
            raise ValueError('Method cannot be recognized.')

        src_ = loss.CrossEntropyLabelSmooth(reduction='none',
                                            num_classes=class_num,
                                            epsilon=args.smooth)(
                                                outputs_source, labels_source)
        weight_src = class_weight_src[labels_source].unsqueeze(0)
        classifier_loss = torch.sum(
            weight_src * src_) / (torch.sum(weight_src).item())
        total_loss = transfer_loss + classifier_loss

        eff = iter_num / args.max_iter

        if args.pl == 'none':
            pass

        elif args.pl == 'square':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            square_loss = -torch.sqrt((softmax_out**2).sum(dim=1)).mean()
            total_loss += args.tar_par * eff * square_loss

        elif args.pl == 'bsp':
            sigma_loss = bsp_loss(features)
            total_loss += args.tar_par * sigma_loss

        elif args.pl == 'bnm':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            bnm_loss = -torch.norm(softmax_out, 'nuc')
            cof = torch.tensor(
                np.sqrt(np.min(softmax_out.size())) / softmax_out.size(0))
            bnm_loss *= cof
            total_loss += args.tar_par * eff * bnm_loss

        elif args.pl == "mcc":
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            ent_weight = 1 + torch.exp(-loss.Entropy(softmax_out)).detach()
            ent_weight /= ent_weight.sum()
            cov_tar = softmax_out.t().mm(
                torch.diag(softmax_out.size(0) * ent_weight)).mm(softmax_out)
            mcc_loss = (torch.diag(cov_tar) / cov_tar.sum(dim=1)).mean()
            total_loss -= args.tar_par * eff * mcc_loss

        elif args.pl == 'ent':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            ent_loss = torch.mean(loss.Entropy(softmax_out))
            ent_loss /= torch.log(torch.tensor(class_num + 0.0))
            total_loss += args.tar_par * eff * ent_loss

        elif args.pl[0:3] == 'npl':
            softmax_out = nn.Softmax(dim=1)(outputs_target)
            softmax_out = softmax_out**2 / ((softmax_out**2).sum(dim=0))

            weight_, pred = torch.max(softmax_out, 1)
            loss_ = nn.CrossEntropyLoss(reduction='none')(outputs_target, pred)
            classifier_loss = torch.sum(
                weight_ * loss_) / (torch.sum(weight_).item())
            total_loss += args.tar_par * eff * classifier_loss

        elif args.pl == 'atdoc_nc':
            mem_fea_norm = mem_fea / torch.norm(
                mem_fea, p=2, dim=1, keepdim=True)
            dis = torch.mm(features_target.detach(), mem_fea_norm.t())
            _, pred = torch.max(dis, dim=1)
            classifier_loss = nn.CrossEntropyLoss()(outputs_target, pred)
            total_loss += args.tar_par * eff * classifier_loss

        elif args.pl.startswith('atdoc_na'):

            dis = -torch.mm(features_target.detach(), mem_fea.t())
            for di in range(dis.size(0)):
                dis[di, idx[di]] = torch.max(dis)
            _, p1 = torch.sort(dis, dim=1)

            w = torch.zeros(features_target.size(0), mem_fea.size(0)).cuda()
            for wi in range(w.size(0)):
                for wj in range(args.K):
                    w[wi][p1[wi, wj]] = 1 / args.K

            weight_, pred = torch.max(w.mm(mem_cls), 1)

            if args.pl == 'atdoc_na_now':
                classifier_loss = nn.CrossEntropyLoss()(outputs_target, pred)
            else:
                loss_ = nn.CrossEntropyLoss(reduction='none')(outputs_target,
                                                              pred)
                classifier_loss = torch.sum(
                    weight_ * loss_) / (torch.sum(weight_).item())
            total_loss += args.tar_par * eff * classifier_loss

        optimizer_g.zero_grad()
        optimizer_f.zero_grad()
        if ad_flag:
            optimizer_d.zero_grad()
        total_loss.backward()
        optimizer_g.step()
        optimizer_f.step()
        if ad_flag:
            optimizer_d.step()

        if args.pl.startswith('atdoc_na'):
            base_network.eval()
            with torch.no_grad():
                features_target, outputs_target = base_network(inputs_target)
                features_target = features_target / torch.norm(
                    features_target, p=2, dim=1, keepdim=True)
                softmax_out = nn.Softmax(dim=1)(outputs_target)
                if args.pl == 'atdoc_na_nos':
                    outputs_target = softmax_out
                else:
                    outputs_target = softmax_out**2 / (
                        (softmax_out**2).sum(dim=0))

            mem_fea[idx] = (1.0 - args.momentum) * mem_fea[
                idx] + args.momentum * features_target.clone()
            mem_cls[idx] = (1.0 - args.momentum) * mem_cls[
                idx] + args.momentum * outputs_target.clone()

        if args.pl == 'atdoc_nc':
            base_network.eval()
            with torch.no_grad():
                features_target, outputs_target = base_network(inputs_target)
                softmax_t = nn.Softmax(dim=1)(outputs_target)
                _, pred_t = torch.max(softmax_t, 1)
                onehot_t = torch.eye(args.class_num)[pred_t].cuda()
                center_t = torch.mm(features_target.t(),
                                    onehot_t) / (onehot_t.sum(dim=0) + 1e-8)

            mem_fea = (1.0 - args.momentum
                       ) * mem_fea + args.momentum * center_t.t().clone()

        if iter_num % int(args.eval_epoch * max_len) == 0:
            base_network.eval()
            if args.dset == 'VISDA-C':
                acc, py, score, y, tacc = utils.cal_acc_visda(
                    dset_loaders["test"], base_network)
                args.out_file.write(tacc + '\n')
                args.out_file.flush()

                _ent = loss.Entropy(score)
                mean_ent = 0
                for ci in range(args.class_num):
                    mean_ent += _ent[py == ci].mean()
                mean_ent /= args.class_num

            else:
                acc, py, score, y = utils.cal_acc(dset_loaders["test"],
                                                  base_network)
                mean_ent = torch.mean(loss.Entropy(score))

            list_acc.append(acc * 100)
            if best_ent > mean_ent:
                best_ent = mean_ent
                val_acc = acc * 100
                best_y = y
                best_py = py
                best_score = score

            log_str = 'Task: {}, Iter:{}/{}; Accuracy = {:.2f}%; Mean Ent = {:.4f}'.format(
                args.name, iter_num, args.max_iter, acc * 100, mean_ent)
            args.out_file.write(log_str + '\n')
            args.out_file.flush()
            print(log_str + '\n')

    idx = np.argmax(np.array(list_acc))
    max_acc = list_acc[idx]
    final_acc = list_acc[-1]

    log_str = '\n==========================================\n'
    log_str += '\nVal Acc = {:.2f}\nMax Acc = {:.2f}\nFin Acc = {:.2f}\n'.format(
        val_acc, max_acc, final_acc)
    args.out_file.write(log_str + '\n')
    args.out_file.flush()

    # torch.save(base_network.state_dict(), osp.join(args.output_dir, args.log + ".pt"))
    # sio.savemat(osp.join(args.output_dir, args.log + ".mat"), {'y':best_y.cpu().numpy(),
    #     'py':best_py.cpu().numpy(), 'score':best_score.cpu().numpy()})

    return best_y.cpu().numpy().astype(np.int64)
Exemplo n.º 29
0
    def train_val(self):
        # Build record objs
        self.build_recorder()

        iter_per_epoch = len(
            self.train_loader.dataset) // self.cfg.train_batch_size
        if len(self.train_loader.dataset) % self.cfg.train_batch_size != 0:
            iter_per_epoch += 1

        for epoch in range(self.start_epoch,
                           self.start_epoch + self.cfg.n_epochs):

            self.model.train()

            self.train_time.reset()
            self.train_loss.reset()
            self.train_cls_acc.reset()
            self.train_pix_acc.reset()
            self.train_mIoU.reset()

            for i, (image, label) in enumerate(self.train_loader):
                start_time = time.time()
                image_var = image.to(self.device)
                label_var = label.to(self.device)

                output = self.model(image_var)
                loss = self.criterion(output, label_var)

                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

                end_time = time.time()

                self.train_time.update(end_time - start_time)
                self.train_loss.update(loss.item())

                if self.cfg.task == 'cls':
                    # Record classification accuracy
                    cls_acc = cal_acc(output, label_var)

                    # Update recorder
                    self.train_cls_acc.update(cls_acc.item())

                    if (i + 1) % self.cfg.log_step == 0:
                        print(
                            'Epoch[{0}][{1}/{2}]\t'
                            'Time {train_time.val:.3f} ({train_time.avg:.3f})\t'
                            'Loss {train_loss.val:.4f} ({train_loss.avg:.4f})\t'
                            'Accuracy {train_cls_acc.val:.4f} ({train_cls_acc.avg:.4f})'
                            .format(epoch + 1,
                                    i + 1,
                                    iter_per_epoch,
                                    train_time=self.train_time,
                                    train_loss=self.train_loss,
                                    train_cls_acc=self.train_cls_acc))

                    if self.cfg.use_tensorboard:
                        self.writer.add_scalar('train/loss', loss.item(),
                                               epoch * iter_per_epoch + i)
                        self.writer.add_scalar('train/accuracy',
                                               cls_acc.item(),
                                               epoch * iter_per_epoch + i)

                elif self.cfg.task == 'seg':
                    # Record mIoU and pixel-wise accuracy
                    pix_acc = cal_pixel_acc(output, label_var)
                    mIoU = cal_mIoU(output, label_var)[-1]
                    mIoU = torch.mean(mIoU)

                    # Update recorders
                    self.train_pix_acc.update(pix_acc.item())
                    self.train_mIoU.update(mIoU.item())

                    if (i + 1) % self.cfg.log_step == 0:
                        print(
                            'Epoch[{0}][{1}/{2}]\t'
                            'Time {train_time.val:.3f} ({train_time.avg:.3f})\t'
                            'Loss {train_loss.val:.4f} ({train_loss.avg:.4f})\t'
                            'Pixel-Acc {train_pix_acc.val:.4f} ({train_pix_acc.avg:.4f})\t'
                            'mIoU {train_mIoU.val:.4f} ({train_mIoU.avg:.4f})'.
                            format(epoch + 1,
                                   i + 1,
                                   iter_per_epoch,
                                   train_time=self.train_time,
                                   train_loss=self.train_loss,
                                   train_pix_acc=self.train_pix_acc,
                                   train_mIoU=self.train_mIoU))

                    if self.cfg.use_tensorboard:
                        self.writer.add_scalar('train/loss', loss.item(),
                                               epoch * iter_per_epoch + i)
                        self.writer.add_scalar('train/pix_acc', pix_acc.item(),
                                               epoch * iter_per_epoch + i)
                        self.writer.add_scalar('train/mIoU', mIoU.item(),
                                               epoch * iter_per_epoch + i)

                #FIXME currently test validation code
                #if (i + 1) % 100 == 0:
            if (epoch + 1) % self.cfg.val_step == 0:
                self.validate(epoch)

        # Close logging
        self.writer.close()
Exemplo n.º 30
0
        elif CONFIG['optim'] == 'Adam':
            optimizer = optim.Adam(params, np.float64(CONFIG['l_rate']))

        for epoch in range(1, CONFIG['end_epoch']):

            writer = SummaryWriter(PJ(SAVE_PATH, 'val' + str(val_times)))

            # training
            train_metrics = model_epoch(loss_name="train", epoch=epoch, model=model,
                                        data_loader=train_loader, concepts=concepts,
                                        optimizer=optimizer, writer=writer)

            for g in [False, True]:
                record_name = 'train_g' if g else 'train'
                train_class, train_acc = utils.cal_acc(train_metrics, g)
                writer.add_scalar(record_name + '_acc', train_acc * 100, epoch)

            ######################################################################################

            # val
            record = {'val': {'acc': 0.0, 'class': None}}
            record.update({'val_g': {'acc': 0.0, 'class': None}})

            val_metric = model_epoch(loss_name="val", epoch=epoch, model=model,
                                     data_loader=val_loader, concepts=concepts,
                                     optimizer=optimizer, writer=writer)

            for g in [False, True]:
                record_name = 'val_g' if g else 'val'
                val_class, val_acc = utils.cal_acc(val_metric, g)