예제 #1
0
def evaluate(args, model, graph, labels, train_idx, val_idx, test_idx, use_labels, loss_fcn, evaluator):
    model.eval()

    feat = graph.ndata["feat"]

    if use_labels:
        feat = add_labels(feat, labels, train_idx, n_classes, device)

    pred = model(graph, feat)
    if args.n_label_iters > 0 and args.use_labels:
        unlabel_idx = torch.cat([val_idx, test_idx])
        for _ in range(args.n_label_iters):
            feat[unlabel_idx, -n_classes:] = F.softmax(pred[unlabel_idx], dim=-1)
            pred = model(graph, feat)
    train_loss = loss_fcn(pred[train_idx], labels[train_idx])
    val_loss = loss_fcn(pred[val_idx], labels[val_idx])
    test_loss = loss_fcn(pred[test_idx], labels[test_idx])

    return (
        compute_acc(pred[train_idx], labels[train_idx], evaluator),
        compute_acc(pred[val_idx], labels[val_idx], evaluator),
        compute_acc(pred[test_idx], labels[test_idx], evaluator),
        train_loss,
        val_loss,
        test_loss,
        pred
    )
def validate(epoch, model, device, dataloader, criterion, args, writer):
    """ Test loop, print metrics """
    progbar = tqdm(total=len(dataloader), desc='Val')

    loss_record = utils.RunningAverage()
    acc_record = utils.RunningAverage()
    model.eval()
    with torch.no_grad():
        #    for batch_idx, (data,label,_,_) in enumerate(tqdm(islice(dataloader,10))):
        for batch_idx, (data, label, _, _) in enumerate(tqdm(dataloader)):
            data, label = data.to(device), label.to(device)
            output = model(data)
            loss = criterion(output, label)

            # measure accuracy and record loss
            acc = utils.compute_acc(output, label)
            #        acc_record.update(100 * acc[0].item())
            acc_record.update(100 * acc[0].item() / data.size(0))
            loss_record.update(loss.item())
            #print('val Step: {}/{} Loss: {:.4f} \t Acc: {:.4f}'.format(batch_idx,len(dataloader), loss_record(), acc_record()))
            progbar.set_description('Val (loss=%.4f)' % (loss_record()))
            progbar.update(1)

    writer.add_scalar('validation/Loss_epoch', loss_record(), epoch)
    writer.add_scalar('validation/Acc_epoch', acc_record(), epoch)

    return loss_record(), acc_record()
예제 #3
0
    def train(self,
              sess,
              dataset,
              generate_session=None,
              is_train=True,
              ftest_name=FLAGS['agn_output_file'].value):
        st, ed, loss, acc, acc_1 = 0, 0, [], [], []
        if generate_session:
            dataset = dataset + generate_session
        print("Get %s data:len(dataset) is %d " %
              ("training" if is_train else "testing", len(dataset)))
        if not is_train:
            fout = open(ftest_name, "w")
            fout.close()
        while ed < len(dataset):
            st, ed = ed, ed + FLAGS['batch_size'].value if ed + \
                FLAGS['batch_size'].value < len(dataset) else len(dataset)
            batch_data = gen_batched_data(dataset[st:ed])
            outputs = self.step_decoder(
                sess, batch_data, forward_only=False if is_train else True)
            loss.append(outputs[0])
            predict_id = outputs[1]  # [batch_size, length, 10]

            tmp_acc, tmp_acc_1 = compute_acc(batch_data["aims"],
                                             predict_id,
                                             batch_data["rec_lists"],
                                             batch_data["rec_mask"],
                                             batch_data["purchase"],
                                             ftest_name=ftest_name,
                                             output=(not is_train))
            acc.append(tmp_acc)
            acc_1.append(tmp_acc_1)
        if is_train:
            sess.run(self.epoch_add_op)
        return np.mean(loss), np.mean(acc), np.mean(acc_1)
예제 #4
0
    def test(self):
        self.test_net.load_state_dict(self.train_net.state_dict())

        DB = self.gen_batch()
        seq = Variable(Tensor(DB[0]))
        seq_m = Variable(torch.LongTensor(DB[1].astype('int64')))
        target = Variable(Tensor(DB[2]))
        label = Variable(torch.LongTensor(DB[3].astype('int64')))

        pointer = self.test_net(seq, seq_m, target)
        loss = utils.compute_loss(pointer, label, target)
        acc = utils.compute_acc(pointer, label)
        return pointer, loss, acc, label
예제 #5
0
def test(val_dir=data_dir, val_text_dir=text_dir):
    g = model.Graph(is_training=True)
    print('loading validation data, please wait---------------------', 'end= ')
    val_feeder = utils.DataIterator2(data_dir=val_dir, text_dir=val_text_dir)
    print('***************get image: ', val_feeder.size)

    num_val_samples = val_feeder.size
    num_val_per_epoch = int(num_val_samples / FLAGS.batch_size)

    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(graph=g.graph, config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
        ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
        if ckpt:
            saver.restore(sess, ckpt)
            print('restore from ckpt{}'.format(ckpt))
        else:
            print('cannot restore')

        print(
            '=============================begin testing============================='
        )
        if True:
            if True:
                if True:
                    acc_avg = 0.0
                    for cur_batch_cv in range(num_val_per_epoch):
                        print(num_val_per_epoch)
                        index_cv = []
                        for i in range(FLAGS.batch_size):
                            index_cv.append(cur_batch_cv * FLAGS.batch_size +
                                            i)
                        #print("index_cv",index_cv)
                        val_inputs, val_seq_len, val_labels = val_feeder.input_index_generate_batch(
                            index_cv)
                        val_feed = {
                            g.inputs: val_inputs,
                            g.labels: val_labels,
                            g.keep_prob_cv: 1
                        }
                        predict_word_index, lr = sess.run(
                            [g.logits, g.learning_rate], val_feed)
                        print(val_labels[0], predict_word_index[0])
                        acc = utils.compute_acc(val_labels, predict_word_index)
                        acc_avg += acc
                    acc_avg = acc_avg / num_val_per_epoch
                    print("acc", acc_avg)
예제 #6
0
    def train(self):
        DB = self.gen_batch()
        seq = Variable(Tensor(DB[0]))
        seq_m = Variable(torch.LongTensor(DB[1].astype('int64')))
        target = Variable(Tensor(DB[2]))
        label = Variable(torch.LongTensor(DB[3].astype('int64')))

        pointer = self.train_net(seq, seq_m, target)
        loss = utils.compute_loss(pointer, label, target)
        acc = utils.compute_acc(pointer, label)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return pointer, loss, acc, label
예제 #7
0
def eval(model, criterion, optimizer, loader, batch_size, epoch_it, rep):
    model.eval()
    loss_collect = []
    metric_collect = []
    if rep == 'occ':
        sigmoid = torch.nn.Sigmoid()

    with tqdm(total=int(len(loader)), ascii=True) as pbar:
        with torch.no_grad():
            for mbatch in loader:
                img_input, points_input, values = mbatch
                img_input = Variable(img_input).cuda()

                points_input = Variable(points_input).cuda()
                values = Variable(values).cuda()

                optimizer.zero_grad()

                logits = model(points_input, img_input)

                loss = criterion(logits, values)

                loss_collect.append(loss.data.cpu().item())

                if rep == 'occ':
                    logits = sigmoid(logits)

                    iou = utils.compute_iou(logits.detach().cpu().numpy(), \
                                values.detach().cpu().numpy())
                    metric_collect.append(iou)
                elif rep == 'sdf':
                    # acc_sign is sign IoU
                    # acc_thres is accuracy within a threshold
                    # More detail explanation in utils.py
                    acc_sign, acc_thres, iou = utils.compute_acc(\
                                        logits.detach().cpu().numpy(), \
                                        values.detach().cpu().numpy())
                    metric_collect.append([acc_sign, acc_thres, iou])
                pbar.update(1)

    mean_loss = np.mean(np.array(loss_collect))
    if rep == 'occ':
        mean_metric = np.mean(np.concatenate(metric_collect))
        mean_metric = [mean_metric]
    else:
        mean_metric = np.mean(np.array(metric_collect), axis=0)

    return mean_loss, mean_metric
def test(model, device, dataloader, criterion, args):
    """ Test loop, print metrics """
    loss_record = utils.RunningAverage()
    acc_record = utils.RunningAverage()
    model.eval()
    with torch.no_grad():
        #   for batch_idx, (data,label,_,_) in enumerate(tqdm(islice(dataloader,10))):
        for batch_idx, (data, label, _, _) in enumerate(tqdm(dataloader)):
            data, label = data.to(device), label.to(device)
            output = model(data)
            loss = criterion(output, label)

            # measure accuracy and record loss
            acc = utils.compute_acc(output, label)
            #        acc_record.update(100 * acc[0].item())
            acc_record.update(100 * acc[0].item() / data.size(0))
            loss_record.update(loss.item())
#            print('Test Step: {}/{} Loss: {:.4f} \t Acc: {:.4f}'.format(batch_idx,len(dataloader), loss_record(), acc_record()))

    return loss_record(), acc_record()
예제 #9
0
def train_disc(
    disc, gen, data, device, batch_size, real_label, nz,
    fake_label, criterion_src, criterion_cls, optimD
    ):

    # Real Images
    disc.zero_grad()
    real_image, real_class = data
    real_image = real_image.to(device)
    real_class = real_class.to(device)
    src_label = torch.full((batch_size,), real_label, device=device)
    cls_label = real_class.view(batch_size,)

    src, cls = disc(real_image)
    errD_src_real = criterion_src(src, src_label)
    errD_cls_real = criterion_cls(cls, cls_label)
    errD_real = errD_src_real + errD_cls_real
    errD_real.backward()
    D_x = src.mean().item()
    accuracy = compute_acc(cls, cls_label)

    # Fake Images
    noise = torch.randn(batch_size, nz, 1, 1, device=device)
    cls_label = torch.randint(0, 2, (batch_size,), device=device)
    src_label.fill_(fake_label)

    fake = gen(noise)
    src, cls = disc(fake.detach())
    errD_src_fake = criterion_src(src, src_label)
    errD_cls_fake = criterion_cls(cls, cls_label)
    errD_fake = errD_src_fake + errD_cls_fake
    errD_fake.backward()
    D_G_z1 = src.mean().item()
    errD = errD_real + errD_fake

    optimD.step()

    return accuracy, errD.item(), D_x, D_G_z1, fake
예제 #10
0
def train(args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, teacher_output, loss_fcn, evaluator, epoch=1):
    model.train()

    feat = graph.ndata["feat"]

    if args.use_labels:
        mask = torch.rand(train_idx.shape) < args.mask_rate

        train_labels_idx = train_idx[mask]
        train_pred_idx = train_idx[~mask]

        feat = add_labels(feat, labels, train_labels_idx, n_classes, device)
    else:
        mask = torch.rand(train_idx.shape) < args.mask_rate
        # We change mask to ~mask to match previous definition
        train_pred_idx = train_idx[~mask]

    optimizer.zero_grad()
    pred = model(graph, feat)
    if args.n_label_iters > 0 and args.use_labels:
        unlabel_idx = torch.cat([train_pred_idx, val_idx, test_idx])
        for _ in range(args.n_label_iters):
            pred = pred.detach()
            torch.cuda.empty_cache()
            # unlabel_probs = F.softmax(pred[unlabel_idx], dim=-1)
            # unlabel_preds = torch.argmax(unlabel_probs, dim=-1)
            # confident_unlabel_idx = unlabel_idx[unlabel_probs.max(dim=-1)[0] > 0.7]
            feat[unlabel_idx, -n_classes:] = F.softmax(pred[unlabel_idx], dim=-1)
            pred = model(graph, feat)
    
    loss = loss_fcn(pred[train_pred_idx], labels[train_pred_idx])
    if args.mode == "student":
        loss_kd = loss_kd_only(pred, teacher_output, args.temp)
        loss = loss*(1-args.alpha) + loss_kd*args.alpha
    loss.backward()
    optimizer.step()

    return compute_acc(pred[train_idx], labels[train_idx], evaluator), loss
예제 #11
0
    def forward(self, x_shot, y_shot, x_query, y_query):
        x_all = torch.cat([x_shot, x_query], dim=0)
        x_all = self.encoder(x_all)
        x_shot, x_query = x_all[:len(x_shot)], x_all[-len(x_query):]

        n_way = int(y_shot.max()) + 1
        proto = []
        for c in range(n_way):
            ind = []
            for i, y in enumerate(y_shot):
                if int(y) == c:
                    ind.append(i)
            proto.append(x_shot[ind].mean(dim=0))
        proto = torch.stack(proto)

        logits = utils.compute_logits(x_query,
                                      proto,
                                      metric='cos',
                                      temp=self.temp)
        loss = F.cross_entropy(logits, y_query)
        acc = utils.compute_acc(logits, y_query)

        return loss, acc
예제 #12
0
def train():
    best_acc = 0
    for epoch in range(start_epoch, args.num_epochs + 1):
        if args.lr_decay < 1:
            lr = args.lr * args.lr_decay**max(epoch - 1, 0)
            adjust_learning_rate(optimizer, lr)
        else:
            lr = args.lr

        loss_avg, acc_avg = 0, 0
        total = len(train_dataloader)

        batch_N_prev = torch.zeros((args.batch_size, 1)).long().to(device) + \
                 train_dataloader.eof_idxN
        batch_T_voc_prev = torch.zeros((args.batch_size, 1)).long().to(device) +\
                         vocab_manager.eof_idxT_in
        batch_P_prev = torch.zeros((args.batch_size, 1)).long().to(device)
        hs_prev = torch.zeros(
            (args.batch_size, 1, model.hidden_size)).to(device)
        hc_prev = [torch.zeros((1, 1, 1)).to(device),\
                   torch.zeros((1, 1, 1)).to(device)]
        if args.anonym is not None:
            dyn_embs = [torch.zeros((args.batch_size, \
                       model.decoder.vocab_size_dyn)).long().to(device)-1,\
            torch.zeros((0, 1, \
                         model.decoder.embedding_sizeT_dyn)).float().to(device),\
            torch.zeros((0, 1, \
                         model.decoder.embedding_sizeT_dyn)).float().to(device)]
        batch_T_raw_prev = batch_T_voc_prev

        model.train()
        vocab_manager.reset()
        for i, batch in enumerate(tqdm(train_dataloader)):
            batch_N, batch_T_raw, batch_T_attn, batch_P = batch
            batch_N, batch_T_raw, batch_T_attn, batch_P = \
                                                     batch_N.to(device), \
                                                     batch_T_raw.to(device), \
                                                     batch_T_attn.to(device), \
                                                     batch_P.to(device)
            batch_T_voc_in, batch_T_voc_out = vocab_manager.get_batch_T_voc(
                batch_T_raw)
            optimizer.zero_grad()

            forward_result = model(batch_N, batch_T_voc_in, batch_T_voc_out, \
                                   batch_T_attn, batch_P, \
                                   batch_N_prev, batch_T_voc_prev, batch_P_prev,\
                                   hs_prev, hc_prev, \
                                   dyn_embs if args.anonym else None)
            if args.anonym is None:
                loss, ans, hs_, hc_ = forward_result
            else:
                loss, ans, hs_, hc_, dyn_embs = forward_result
            loss_avg += loss.item()

            acc = compute_acc(ans, batch_T_raw, batch_T_raw_prev, \
                                    vocab_manager.vocab_size_out,
                                    vocab_manager.vocab_size_out_cl,\
                                    vocab_manager.unk_idx_out,\
                                    vocab_manager.eof_idxT_out)
            acc_avg += acc.item()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), \
                                           args.grad_clip)
            optimizer.step()

            batch_N_prev = batch_N
            batch_T_voc_prev = batch_T_voc_in
            batch_P_prev = batch_P
            batch_T_raw_prev = batch_T_raw
            hs_prev = hs_
            hc_prev = hc_

            if (i + 1) % 100 == 0 and not args.no_verbose:
                prints = 'temp_loss: %f, temp_acc: %f' % (loss_avg /
                                                          (i + 1), acc_avg /
                                                          (i + 1))
                if args.save_verbose:
                    log.print(prints)
                else:
                    print(prints, flush=True)

            if args.test_code and i > 3:
                break

        train_loss = loss_avg / total
        train_acc = acc_avg / total

        batch_N_prev = torch.zeros((args.batch_size, 1)).long().to(device) + \
                 train_dataloader.eof_idxN
        batch_T_voc_prev = torch.zeros((args.batch_size, 1)).long().to(device) +\
                         vocab_manager.eof_idxT_in
        batch_P_prev = torch.zeros((args.batch_size, 1)).long().to(device)
        hs_prev = torch.zeros(
            (args.batch_size, 1, model.hidden_size)).to(device)
        hc_prev = [torch.zeros((1, 1, 1)).to(device),\
                   torch.zeros((1, 1, 1)).to(device)]
        if args.anonym is not None:
            dyn_embs = [torch.zeros((args.batch_size, \
                        model.decoder.vocab_size_dyn)).long().to(device)-1,\
            torch.zeros((0, 1, \
                        model.decoder.embedding_sizeT_dyn)).float().to(device),\
            torch.zeros((0, 1, \
                        model.decoder.embedding_sizeT_dyn)).float().to(device)]
        batch_T_raw_prev = batch_T_voc_prev
        with torch.no_grad():
            model.eval()
            total_acc = 0.
            loss_eval = 0.
            vocab_manager.reset()
            for i, batch in enumerate(tqdm(test_dataloader)):
                batch_N, batch_T_raw, batch_T_attn, batch_P = batch
                batch_N, batch_T_raw, batch_T_attn, batch_P = \
                                                     batch_N.to(device), \
                                                     batch_T_raw.to(device), \
                                                     batch_T_attn.to(device), \
                                                     batch_P.to(device)
                batch_T_voc_in, batch_T_voc_out = vocab_manager.get_batch_T_voc(
                    batch_T_raw)
                forward_result = model(batch_N, batch_T_voc_in, batch_T_voc_out, \
                                   batch_T_attn, batch_P, \
                                   batch_N_prev, batch_T_voc_prev, batch_P_prev,\
                                   hs_prev, hc_prev,\
                                   dyn_embs if args.anonym else None)
                if args.anonym is None:
                    loss, ans, hs_, hc_ = forward_result
                else:
                    loss, ans, hs_, hc_, dyn_embs = forward_result
                loss_eval += loss.item()
                acc = compute_acc(ans, batch_T_raw, batch_T_raw_prev, \
                                    vocab_manager.vocab_size_out,
                                    vocab_manager.vocab_size_out_cl,\
                                    vocab_manager.unk_idx_out,\
                                    vocab_manager.eof_idxT_out)
                total_acc += acc.item()
                batch_N_prev = batch_N
                batch_T_voc_prev = batch_T_voc_in
                batch_P_prev = batch_P
                batch_T_raw_prev = batch_T_raw
                hs_prev = hs_
                hc_prev = hc_

                if args.test_code:
                    break

            total_acc /= len(test_dataloader)
            loss_eval /= len(test_dataloader)
        values = [lr, train_loss, train_acc, \
                  loss_eval, total_acc]
        for (k, _), v in zip(fmt_list, values):
            log.add(epoch, **{k: v})

        log.iter_info()
        log.save(silent=True)

        if not args.not_save_weights:
            if (epoch + 1) % args.save_fq == 0:
                torch.save(
                    {
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'epoch': epoch
                    }, log.path + '/model_epoch_%04d.cpt' % (epoch))
            elif total_acc > best_acc:
                torch.save(
                    {
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'epoch': epoch
                    }, log.path + '/best_model.cpt')
        if total_acc > best_acc:
            best_acc = total_acc
        if args.test_code:
            break
def main(config):
    svname = args.name
    if svname is None:
        svname = 'classifier_{}'.format(config['train_dataset'])
        svname += '_' + config['model_args']['encoder']
        clsfr = config['model_args']['classifier']
        if clsfr != 'linear-classifier':
            svname += '-' + clsfr
    if args.tag is not None:
        svname += '_' + args.tag
    save_path = os.path.join('./save', svname)
    utils.ensure_path(save_path)
    utils.set_log_path(save_path)
    writer = SummaryWriter(os.path.join(save_path, 'tensorboard'))

    yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w'))

    #### Dataset ####

    # train
    train_dataset = datasets.make(config['train_dataset'],
                                  **config['train_dataset_args'])
    augmentations = [
        transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.RandomResizedCrop(size=(80, 80),
                                         scale=(0.08, 1.0),
                                         ratio=(0.75, 1.3333)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.RandomRotation(35),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.RandomResizedCrop(size=(80, 80),
                                         scale=(0.08, 1.0),
                                         ratio=(0.75, 1.3333)),
            transforms.RandomRotation(35),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.RandomRotation(35),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.RandomResizedCrop(size=(80, 80),
                                         scale=(0.08, 1.0),
                                         ratio=(0.75, 1.3333)),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        transforms.Compose([
            transforms.RandomRotation(35),
            transforms.RandomResizedCrop(size=(80, 80),
                                         scale=(0.08, 1.0),
                                         ratio=(0.75, 1.3333)),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.1),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    ]
    train_dataset.transform = augmentations[int(config['_a'])]
    print(train_dataset.transform)
    print("_a", config['_a'])
    input("Continue with these augmentations?")

    train_loader = DataLoader(train_dataset,
                              config['batch_size'],
                              shuffle=True,
                              num_workers=0,
                              pin_memory=True)
    utils.log('train dataset: {} (x{}), {}'.format(train_dataset[0][0].shape,
                                                   len(train_dataset),
                                                   train_dataset.n_classes))
    if config.get('visualize_datasets'):
        utils.visualize_dataset(train_dataset, 'train_dataset', writer)

    # val
    if config.get('val_dataset'):
        eval_val = True
        val_dataset = datasets.make(config['val_dataset'],
                                    **config['val_dataset_args'])
        val_loader = DataLoader(val_dataset,
                                config['batch_size'],
                                num_workers=0,
                                pin_memory=True)
        utils.log('val dataset: {} (x{}), {}'.format(val_dataset[0][0].shape,
                                                     len(val_dataset),
                                                     val_dataset.n_classes))
        if config.get('visualize_datasets'):
            utils.visualize_dataset(val_dataset, 'val_dataset', writer)
    else:
        eval_val = False

    # few-shot eval
    if config.get('fs_dataset'):
        ef_epoch = config.get('eval_fs_epoch')
        if ef_epoch is None:
            ef_epoch = 5
        eval_fs = True

        fs_dataset = datasets.make(config['fs_dataset'],
                                   **config['fs_dataset_args'])
        utils.log('fs dataset: {} (x{}), {}'.format(fs_dataset[0][0].shape,
                                                    len(fs_dataset),
                                                    fs_dataset.n_classes))
        if config.get('visualize_datasets'):
            utils.visualize_dataset(fs_dataset, 'fs_dataset', writer)

        n_way = 5
        n_query = 15
        n_shots = [1, 5]
        fs_loaders = []
        for n_shot in n_shots:
            fs_sampler = CategoriesSampler(fs_dataset.label,
                                           200,
                                           n_way,
                                           n_shot + n_query,
                                           ep_per_batch=4)
            fs_loader = DataLoader(fs_dataset,
                                   batch_sampler=fs_sampler,
                                   num_workers=0,
                                   pin_memory=True)
            fs_loaders.append(fs_loader)
    else:
        eval_fs = False

    ########

    #### Model and Optimizer ####

    if config.get('load'):
        model_sv = torch.load(config['load'])
        model = models.load(model_sv)
    else:
        model = models.make(config['model'], **config['model_args'])

    if eval_fs:
        fs_model = models.make('meta-baseline', encoder=None)
        fs_model.encoder = model.encoder

    if config.get('_parallel'):
        model = nn.DataParallel(model)
        if eval_fs:
            fs_model = nn.DataParallel(fs_model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    optimizer, lr_scheduler = utils.make_optimizer(model.parameters(),
                                                   config['optimizer'],
                                                   **config['optimizer_args'])

    ########

    max_epoch = config['max_epoch']
    save_epoch = config.get('save_epoch')
    max_va = 0.
    timer_used = utils.Timer()
    timer_epoch = utils.Timer()

    for epoch in range(1, max_epoch + 1 + 1):
        if epoch == max_epoch + 1:
            if not config.get('epoch_ex'):
                break
            train_dataset.transform = train_dataset.default_transform
            print(train_dataset.transform)
            train_loader = DataLoader(train_dataset,
                                      config['batch_size'],
                                      shuffle=True,
                                      num_workers=0,
                                      pin_memory=True)

        timer_epoch.s()
        aves_keys = ['tl', 'ta', 'vl', 'va']
        if eval_fs:
            for n_shot in n_shots:
                aves_keys += ['fsa-' + str(n_shot)]
        aves = {k: utils.Averager() for k in aves_keys}

        # train
        model.train()
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        for data, label in tqdm(train_loader, desc='train', leave=False):
            # for data, label in train_loader:
            data, label = data.cuda(), label.cuda()
            logits = model(data)
            loss = F.cross_entropy(logits, label)
            acc = utils.compute_acc(logits, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            aves['tl'].add(loss.item())
            aves['ta'].add(acc)

            logits = None
            loss = None

        # eval
        if eval_val:
            model.eval()
            for data, label in tqdm(val_loader, desc='val', leave=False):
                data, label = data.cuda(), label.cuda()
                with torch.no_grad():
                    logits = model(data)
                    loss = F.cross_entropy(logits, label)
                    acc = utils.compute_acc(logits, label)

                aves['vl'].add(loss.item())
                aves['va'].add(acc)

        if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1):
            fs_model.eval()
            for i, n_shot in enumerate(n_shots):
                np.random.seed(0)
                for data, _ in tqdm(fs_loaders[i],
                                    desc='fs-' + str(n_shot),
                                    leave=False):
                    x_shot, x_query = fs.split_shot_query(data.cuda(),
                                                          n_way,
                                                          n_shot,
                                                          n_query,
                                                          ep_per_batch=4)
                    label = fs.make_nk_label(n_way, n_query,
                                             ep_per_batch=4).cuda()
                    with torch.no_grad():
                        logits = fs_model(x_shot, x_query).view(-1, n_way)
                        acc = utils.compute_acc(logits, label)
                    aves['fsa-' + str(n_shot)].add(acc)

        # post
        if lr_scheduler is not None:
            lr_scheduler.step()

        for k, v in aves.items():
            aves[k] = v.item()

        t_epoch = utils.time_str(timer_epoch.t())
        t_used = utils.time_str(timer_used.t())
        t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch)

        if epoch <= max_epoch:
            epoch_str = str(epoch)
        else:
            epoch_str = 'ex'
        log_str = 'epoch {}, train {:.4f}|{:.4f}'.format(
            epoch_str, aves['tl'], aves['ta'])
        writer.add_scalars('loss', {'train': aves['tl']}, epoch)
        writer.add_scalars('acc', {'train': aves['ta']}, epoch)

        if eval_val:
            log_str += ', val {:.4f}|{:.4f}'.format(aves['vl'], aves['va'])
            writer.add_scalars('loss', {'val': aves['vl']}, epoch)
            writer.add_scalars('acc', {'val': aves['va']}, epoch)

        if eval_fs and (epoch % ef_epoch == 0 or epoch == max_epoch + 1):
            log_str += ', fs'
            for n_shot in n_shots:
                key = 'fsa-' + str(n_shot)
                log_str += ' {}: {:.4f}'.format(n_shot, aves[key])
                writer.add_scalars('acc', {key: aves[key]}, epoch)

        if epoch <= max_epoch:
            log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate)
        else:
            log_str += ', {}'.format(t_epoch)
        utils.log(log_str)

        if config.get('_parallel'):
            model_ = model.module
        else:
            model_ = model

        training = {
            'epoch': epoch,
            'optimizer': config['optimizer'],
            'optimizer_args': config['optimizer_args'],
            'optimizer_sd': optimizer.state_dict(),
        }
        save_obj = {
            'file': __file__,
            'config': config,
            'model': config['model'],
            'model_args': config['model_args'],
            'model_sd': model_.state_dict(),
            'training': training,
        }
        if epoch <= max_epoch:
            torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth'))

            if (save_epoch is not None) and epoch % save_epoch == 0:
                torch.save(
                    save_obj,
                    os.path.join(save_path, 'epoch-{}.pth'.format(epoch)))

            if aves['va'] > max_va:
                max_va = aves['va']
                torch.save(save_obj, os.path.join(save_path, 'max-va.pth'))
        else:
            torch.save(save_obj, os.path.join(save_path, 'epoch-ex.pth'))

        writer.flush()
    # top_k = 1
    top_k = [1,3,5]
    n_samples = [200,500,1000,2000] #TODO: 5,000 in sleep
    
    for n in n_samples:
        for k in top_k:
            datafile = '../data/training_sub_' + str(n) + '.json'
            datafile = '../data/training_sub_' + str(n) + '.json'
            datafile = '../data/training_sub_' + str(n) + '.json'

            

            # prepare training subset
            with open(datafile, 'r') as f:
                data = json.load(f)

            # prepare topics
            topics = extract_topics(topic_file)

            # prepare targets
            targets = extract_targets(target_file)

            # find best matching segments for all queries
            best_segments = find_top_k_segments(topics, data, embedder, k)
            # print(best_segments)
            # print accuracy
            acc = compute_acc(best_segments, targets)

            print('n_samples:', n , 'top_k:', k, 'acc:', acc )

예제 #15
0
    def train_batch(self, images, labels):
        # optimize D
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        for _ in range(1):
            # train with real
            self.netD.zero_grad()
            batch_size = images.size(0)
            if params.cuda:
                images = images.cuda()
                labels = labels.cuda()
            self.input.data.resize_as_(images).copy_(images)
            self.dis_label.data.resize_(batch_size).fill_(self.real_label)
            self.aux_label.data.resize_(batch_size).copy_(labels)
            dis_output, aux_output = self.netD(self.input)

            dis_errD_real = self.dis_criterion(dis_output, self.dis_label)
            aux_errD_real = self.aux_criterion(aux_output, self.aux_label)
            errD_real = dis_errD_real + aux_errD_real
            errD_real.backward()
            D_x = dis_output.data.mean()

            # compute the current classification accuracy
            accuracy = compute_acc(aux_output, self.aux_label)

            # train with fake
            self.noise.data.resize_(batch_size, self.nz, 1, 1).normal_(0, 1)
            labels = np.random.randint(0, self.num_classes, batch_size)
            noise_ = np.random.normal(0, 1, (batch_size, self.nz))
            class_onehot = np.zeros((batch_size, self.num_classes))
            class_onehot[np.arange(batch_size), labels] = 1
            noise_[np.arange(batch_size), :self.num_classes] = class_onehot[
                np.arange(batch_size)]
            noise_ = (torch.from_numpy(noise_))
            self.noise.data.copy_(noise_.view(batch_size, self.nz, 1, 1))
            self.aux_label.data.resize_(batch_size).copy_(
                torch.from_numpy(labels))

            fake = self.netG(self.noise)
            self.dis_label.data.fill_(self.fake_label)
            dis_output, aux_output = self.netD(fake.detach())

            dis_errD_fake = self.dis_criterion(dis_output, self.dis_label)
            aux_errD_fake = self.aux_criterion(aux_output, self.aux_label)
            errD_fake = dis_errD_fake + aux_errD_fake
            errD_fake.backward()
            D_G_z1 = dis_output.data.mean()
            errD = errD_real + errD_fake
            self.optimizerD.step()
        # optimize G
        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        for _ in range(1):
            self.netG.zero_grad()
            # fake labels are real for generator cost
            self.dis_label.data.fill_(self.real_label)
            dis_output, aux_output = self.netD(fake)
            dis_errG = self.dis_criterion(dis_output, self.dis_label)
            aux_errG = self.aux_criterion(aux_output, self.aux_label)
            errG = dis_errG + aux_errG
            errG.backward()
            D_G_z2 = dis_output.data.mean()
            # todo
            #dav_loss and pert_loss
            # C = 0.1
            # loss_perturb = torch.mean(torch.norm(fake.view(fake.shape[0],-1),2,dim=1))
            # loss_G = adv_lambda * loss_adv + pert_lambda * loss_perturb
            self.optimizerG.step()
        return errD, errG, D_x, D_G_z1, D_G_z2, accuracy
예제 #16
0
def main(config):
    svname = args.name
    if svname is None:
        svname = 'pretrain-multi'
    if args.tag is not None:
        svname += '_' + args.tag
    save_path = os.path.join('./save', svname)
    utils.ensure_path(save_path)
    utils.set_log_path(save_path)
    writer = SummaryWriter(os.path.join(save_path, 'tensorboard'))

    yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w'))

    #### Dataset ####

    def make_dataset(name):
        dataset = make_md([name],
            'batch', split='train', image_size=126, batch_size=256)
        return dataset

    ds_names = ['ilsvrc_2012', 'omniglot', 'aircraft', 'cu_birds', 'dtd', \
            'quickdraw', 'fungi', 'vgg_flower']
    datasets = []
    for name in ds_names:
        datasets.append(make_dataset(name))
    iters = []
    for d in datasets:
        iters.append(d.make_one_shot_iterator().get_next())

    to_torch_labels = lambda a: torch.from_numpy(a).long()

    to_pil = transforms.ToPILImage()
    augmentation = transforms.Compose([
        transforms.Resize(146),
        transforms.RandomResizedCrop(128),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])
    ])
    ########

    #### Model and Optimizer ####

    if config.get('load'):
        model_sv = torch.load(config['load'])
        model = models.load(model_sv)
    else:
        model = models.make(config['model'], **config['model_args'])

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    optimizer, lr_scheduler = utils.make_optimizer(
            model.parameters(),
            config['optimizer'], **config['optimizer_args'])

    ########
    
    max_epoch = config['max_epoch']
    save_epoch = config.get('save_epoch')
    max_va = 0.
    timer_used = utils.Timer()
    timer_epoch = utils.Timer()

    for epoch in range(1, max_epoch + 1):
        timer_epoch.s()
        aves_keys = ['tl', 'ta', 'vl', 'va']
        aves = {k: utils.Averager() for k in aves_keys}

        # train
        model.train()
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        n_batch = 915547 // 256
        with tf.Session() as sess:
            for i_batch in tqdm(range(n_batch)):
                if random.randint(0, 1) == 0:
                    ds_id = 0
                else:
                    ds_id = random.randint(1, len(datasets) - 1)

                next_element = iters[ds_id]
                e, cfr_id = sess.run(next_element)

                data_, label = e[0], to_torch_labels(e[1])
                data_ = ((data_ + 1.0) * 0.5 * 255).astype('uint8')
                data = torch.zeros(256, 3, 128, 128).float()
                for i in range(len(data_)):
                    x = data_[i]
                    x = to_pil(x)
                    x = augmentation(x)
                    data[i] = x

                data = data.cuda()
                label = label.cuda()

                logits = model(data, cfr_id=ds_id)
                loss = F.cross_entropy(logits, label)
                acc = utils.compute_acc(logits, label)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                aves['tl'].add(loss.item())
                aves['ta'].add(acc)

                logits = None; loss = None

        # post
        if lr_scheduler is not None:
            lr_scheduler.step()

        for k, v in aves.items():
            aves[k] = v.item()

        t_epoch = utils.time_str(timer_epoch.t())
        t_used = utils.time_str(timer_used.t())
        t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch)

        if epoch <= max_epoch:
            epoch_str = str(epoch)
        else:
            epoch_str = 'ex'
        log_str = 'epoch {}, train {:.4f}|{:.4f}'.format(
                epoch_str, aves['tl'], aves['ta'])
        writer.add_scalars('loss', {'train': aves['tl']}, epoch)
        writer.add_scalars('acc', {'train': aves['ta']}, epoch)

        if epoch <= max_epoch:
            log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate)
        else:
            log_str += ', {}'.format(t_epoch)
        utils.log(log_str)

        if config.get('_parallel'):
            model_ = model.module
        else:
            model_ = model

        training = {
            'epoch': epoch,
            'optimizer': config['optimizer'],
            'optimizer_args': config['optimizer_args'],
            'optimizer_sd': optimizer.state_dict(),
        }
        save_obj = {
            'file': __file__,
            'config': config,

            'model': config['model'],
            'model_args': config['model_args'],
            'model_sd': model_.state_dict(),

            'training': training,
        }
        if epoch <= max_epoch:
            torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth'))

            if (save_epoch is not None) and epoch % save_epoch == 0:
                torch.save(save_obj, os.path.join(
                    save_path, 'epoch-{}.pth'.format(epoch)))

            if aves['va'] > max_va:
                max_va = aves['va']
                torch.save(save_obj, os.path.join(save_path, 'max-va.pth'))
        else:
            torch.save(save_obj, os.path.join(save_path, 'epoch-ex.pth'))

        writer.flush()
예제 #17
0
def main(unused_argv):
    loader = Loader(base_path=None, path="/data")
    datasets = loader.CUB(ratio=0.2, total_ratio=total_ratio)
    model = Resnet18(batch_size=FLAGS.batch_size)
    with model.graph.as_default():
        model.preload()

        vars = [
            var for var in tf.global_variables() if var.name.startswith("conv")
        ]

        global_step = tf.Variable(0, name='global_step', trainable=False)
        learning_rate = tf.train.exponential_decay(
            1e-3,
            global_step=global_step,
            decay_steps=5 * int(len(datasets["train"]) / FLAGS.batch_size),
            decay_rate=0.1,
            staircase=True)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
            grad_and_vars = opt.compute_gradients(loss=model.loss)

            for index, (grad, var) in enumerate(grad_and_vars):
                if FLAGS.fine_tune:
                    if var.op.name.startswith(
                            "dense") or var.op.name.startswith("conv5"):
                        grad_and_vars[index] = (grad * 10.0, var)
                elif FLAGS.freeze:
                    if var.op.name.startswith(
                            "conv1") or var.op.name.startswith("conv2"):
                        grad_and_vars[index] = (grad * 1e-3, var)

            train_op = opt.apply_gradients(grad_and_vars,
                                           global_step=global_step)
            # train_op = tf.train.AdamOptimizer(learning_rate=learning_rate)\
            #     .minimize(loss=model.loss, global_step=global_step)

        rest_vars = list(
            set([var for var in tf.global_variables()]) - set(vars))
        init_rest_vars = tf.variables_initializer(rest_vars)

    # writer = tf.summary.FileWriter("logs/", model.graph)
    # writer.flush()
    # writer.close()

    # vars = [var.name for var in vars]
    # print('\n'.join(vars))
    # import sys
    # sys.exit(0)

    with tf.Session(graph=model.graph) as sess:
        if os.path.exists(utils.path("models/trained")):
            tf.train.Saver().restore(
                sess,
                tf.train.latest_checkpoint(utils.path("models/trained/")))
        else:
            init_rest_vars.run()
            tf.train.Saver(vars).restore(sess,
                                         utils.path("models/init/models.ckpt"))

        from BatchLoader import BatchLoader
        LOG = utils.Log()

        for epoch in range(FLAGS.num_epochs):
            for phase in ('train', 'test'):
                dataset = datasets[phase]

                accs = utils.AverageMeter()
                losses = utils.AverageMeter()
                start_time = time.time()
                bar = progressbar.ProgressBar()

                for features, boxes, im_sizes in bar(
                        BatchLoader(dataset,
                                    batch_size=FLAGS.batch_size,
                                    pre_fetch=FLAGS.pre_fetch,
                                    shuffle=(phase == 'train'),
                                    op_fn=CUB_Dataset.list_to_tuple)):
                    boxes = utils.crop_boxes(boxes, im_sizes)
                    boxes = utils.box_transform(boxes, im_sizes)

                    if phase == 'train':
                        _, loss, outputs = sess.run(
                            [train_op, model.loss, model.fc],
                            feed_dict={
                                'features:0': features,
                                'boxes:0': boxes,
                                'training:0': phase == 'train',
                            })
                    else:
                        loss, outputs = sess.run(
                            [model.loss, model.fc],
                            feed_dict={
                                'features:0': features,
                                'boxes:0': boxes,
                                'training:0': phase == 'train',
                            })

                    acc = utils.compute_acc(outputs, boxes, im_sizes)

                    nsample = model.batch_size
                    accs.update(acc, nsample)
                    losses.update(loss, nsample)

                    LOG.add(phase, {"accu": float(acc), "loss": float(loss)})

                elapsed_time = time.time() - start_time
                print(
                    '[{}]\tEpoch: {}/{}\tLoss: {:.4f}\tAcc: {:.2%}\tTime: {:.3f}'
                    .format(phase, epoch, FLAGS.num_epochs, losses.avg,
                            accs.avg, elapsed_time))

        tf.train.Saver().save(sess,
                              utils.path("models/trained/resnet18.ckpt"),
                              global_step=global_step)
        if FLAGS.log_path is not None:
            LOG.dump(FLAGS.log_path)
#Loss_oflstm = EMDLoss()

for i in range(EPOCH2):
    TRAIN_ACC = []
    VAL_ACC = []
    for step, image_DATA1 in enumerate(image_loader1):
        input_oflstm = model(image_DATA1['image'].to(device)).view(-1, 1,
                                                                   1).float()
        labels_oflstm = image_DATA1['annotations']
        optimizer_oflstm.zero_grad()
        output_oflstm, (hn, cn) = rnn(input_oflstm)
        if output_oflstm.shape[0] != 16:
            continue
        TRAIN_ACC.append(
            compute_acc(
                labels_oflstm,
                output_oflstm[FEATURE_ON_X_AXIS * FEATURE_ON_Y_AXIS -
                              1]).item())
        loss_oflstm = Loss_oflstm(
            output_oflstm[FEATURE_ON_X_AXIS * FEATURE_ON_Y_AXIS - 1].squeeze(),
            labels_oflstm.float().squeeze())
        #        loss_oflstm = Loss_oflstm(
        #                output_oflstm[FEATURE_ON_X_AXIS*FEATURE_ON_Y_AXIS-1].squeeze(),
        #                labels_oflstm.float().squeeze(),
        #                Mark = 'IN')
        #        print(loss_oflstm)
        print('Latest ACC: %.4f' % (sum(TRAIN_ACC) / len(TRAIN_ACC)))
        loss_oflstm.backward()
        optimizer_oflstm.step()
    for step, image_DATA2 in enumerate(image_loader2):
        input_oflstm = model(image_DATA2['image'].to(device)).view(-1, 1,
                                                                   1).float()
예제 #19
0
def main(extractor, unique_combos_analysis, 
        input_data_analysis, input_gold_analysis, 
        input_system_gold_analysis):
    """
    Main analysis function - obtain results on the input data only, input in combination with gold output, and in combination with the system answers.
    """
    table_data={'input': defaultdict(dict), 
                'input_gold': defaultdict(dict),
                'system_gold': defaultdict(dict)}

    for partition in partitions:
#    for partition in ['partial']:

        # Setup directories
        #input files
        gold_prop_dir='../data/input/%s/annotation' % partition
        auto_prop_dir='../systems/extracted_data/%s' % partition

        # load gold file
        gold_file='../data/gold/%s/participants.json' % partition
        with open (gold_file, 'rb') as gf:
            gold_data=json.load(gf)

        # system files
        baseline_dir='../data/system/%s/%s' % (extractor, partition)
        profiler_dir='../data/system/%s_profiling/%s' % (extractor, partition)

        # load system data
        sys_data={}
        for bdir in glob.glob('%s/*' % baseline_dir):
            b=utils.get_filename(bdir)
            ext_baseline_dir='%s/%s' % (bdir, property_set)
            sys_data[b]=utils.load_system_datasets(ext_baseline_dir)
        sys_data['profiler']=utils.load_system_datasets(profiler_dir)

        # define input dir and property set
        if extractor=='gold':
            data_dir=gold_prop_dir
            keys=['Ethnicity', 'Gender', 'Age', 'Religion', 'CauseOfDeath',
              'Occupation']
        else:
            data_dir=auto_prop_dir
            keys=['Residence', 'Ethnicity', 'EducationLevel', 'MedicalCondition', 
              'BirthPlace', 'Gender', 'Age', 'Religion', 'PastConviction',
              'CauseOfDeath', 'DeathPlace', 'DeathDate', 'Name']



        # 1. Analyze numbers of local contexts with unique properties
        if unique_combos_analysis:
            print_stars()
            get_num_unique_combos(data_dir, keys)
            print_stars()


        input_sets=analyze_distinguishability_of_props(data_dir, keys)
        # 2. Analyze distinguishability of properties
        if input_data_analysis:
            for filename, sets in input_sets.items():
                dataset=utils.map_filename(filename, partition)
                if not dataset: continue
                for k, v in sets.items():
                    data_part=k.replace('set_', '')
                    table_data['input'][dataset][data_part]=len(v)
                prof_perc=round(len(sets['set_profiler'])*100/len(sets['set_total']))
                table_data['input'][dataset]['profiler %']=prof_perc

        # 3. Analyze distribution of sameness in each of the distinguishability sets
        if input_gold_analysis:
            for filename, sets in input_sets.items():
                dataset=utils.map_filename(filename, partition)
                if not dataset: continue
                for set_name, the_set in sets.items():
                    same, different=utils.analyze_performance_on_pairs(the_set, 
                                                                       gold_data)
                   
                    k1=(set_name.replace('set_', ''), 'same')
                    table_data['input_gold'][dataset][k1]=same
                    k2=(set_name.replace('set_', ''), 'different')
                    table_data['input_gold'][dataset][k2]=different

        # 4. Analyze system performance on each of the sets
        if input_system_gold_analysis:
            print_stars()
            for s, sdata in sys_data.items():
                for filename, sets in input_sets.items():
                    if filename not in sdata.keys(): 
                        continue
                    dataset=utils.map_filename(filename, partition)
                    if not dataset: continue
                        
                    sys_predictions=sdata[filename]
                    for set_name, the_set in sets.items():
                        acc_counts, total_counts=utils.scores_vs_identity(gold_data,
                                                            sys_predictions, the_set)
                        gold_same_acc=utils.compute_acc(acc_counts, total_counts,
                                                            gold_same=True)
                        gold_diff_acc=utils.compute_acc(acc_counts, total_counts, 
                                                            gold_same=False)
                        
                        k1=(set_name.replace('set_', ''), s, 'same')
                        table_data['system_gold'][dataset][k1]=round(gold_same_acc, 2)
                        k2=(set_name.replace('set_', ''), s, 'different')
                        table_data['system_gold'][dataset][k2]=round(gold_diff_acc, 2)
                print_stars()    

    print_tables(table_data)
예제 #20
0
def main(config):
    svname = args.name
    if svname is None:
        svname = 'meta_{}-{}shot'.format(
                config['train_dataset'], config['n_shot'])
        svname += '_' + config['model'] + '-' + config['model_args']['encoder']
    if args.tag is not None:
        svname += '_' + args.tag
    save_path = os.path.join('./save', svname)
    utils.ensure_path(save_path)
    utils.set_log_path(save_path)
    writer = SummaryWriter(os.path.join(save_path, 'tensorboard'))

    yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w'))

    #### Dataset ####

    n_way, n_shot = config['n_way'], config['n_shot']
    n_query = config['n_query']

    if config.get('n_train_way') is not None:
        n_train_way = config['n_train_way']
    else:
        n_train_way = n_way
    if config.get('n_train_shot') is not None:
        n_train_shot = config['n_train_shot']
    else:
        n_train_shot = n_shot
    if config.get('ep_per_batch') is not None:
        ep_per_batch = config['ep_per_batch']
    else:
        ep_per_batch = 1

    # train
    train_dataset = datasets.make(config['train_dataset'],
                                  **config['train_dataset_args'])
    utils.log('train dataset: {} (x{}), {}'.format(
            train_dataset[0][0].shape, len(train_dataset),
            train_dataset.n_classes))
    if config.get('visualize_datasets'):
        utils.visualize_dataset(train_dataset, 'train_dataset', writer)
    train_sampler = CategoriesSampler(
            train_dataset.label, config['train_batches'],
            n_train_way, n_train_shot + n_query,
            ep_per_batch=ep_per_batch)
    train_loader = DataLoader(train_dataset, batch_sampler=train_sampler,
                              num_workers=8, pin_memory=True)

    # tval
    if config.get('tval_dataset'):
        tval_dataset = datasets.make(config['tval_dataset'],
                                     **config['tval_dataset_args'])
        utils.log('tval dataset: {} (x{}), {}'.format(
                tval_dataset[0][0].shape, len(tval_dataset),
                tval_dataset.n_classes))
        if config.get('visualize_datasets'):
            utils.visualize_dataset(tval_dataset, 'tval_dataset', writer)
        tval_sampler = CategoriesSampler(
                tval_dataset.label, 200,
                n_way, n_shot + n_query,
                ep_per_batch=4)
        tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler,
                                 num_workers=8, pin_memory=True)
    else:
        tval_loader = None

    # val
    val_dataset = datasets.make(config['val_dataset'],
                                **config['val_dataset_args'])
    utils.log('val dataset: {} (x{}), {}'.format(
            val_dataset[0][0].shape, len(val_dataset),
            val_dataset.n_classes))
    if config.get('visualize_datasets'):
        utils.visualize_dataset(val_dataset, 'val_dataset', writer)
    val_sampler = CategoriesSampler(
            val_dataset.label, 200,
            n_way, n_shot + n_query,
            ep_per_batch=4)
    val_loader = DataLoader(val_dataset, batch_sampler=val_sampler,
                            num_workers=8, pin_memory=True)

    ########

    #### Model and optimizer ####

    if config.get('load'):
        model_sv = torch.load(config['load'])
        model = models.load(model_sv)
    else:
        model = models.make(config['model'], **config['model_args'])

        if config.get('load_encoder'):
            encoder = models.load(torch.load(config['load_encoder'])).encoder
            model.encoder.load_state_dict(encoder.state_dict())

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    optimizer, lr_scheduler = utils.make_optimizer(
            model.parameters(),
            config['optimizer'], **config['optimizer_args'])

    ########
    
    max_epoch = config['max_epoch']
    save_epoch = config.get('save_epoch')
    max_va = 0.
    timer_used = utils.Timer()
    timer_epoch = utils.Timer()

    aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va']
    trlog = dict()
    for k in aves_keys:
        trlog[k] = []

    for epoch in range(1, max_epoch + 1):
        timer_epoch.s()
        aves = {k: utils.Averager() for k in aves_keys}

        # train
        model.train()
        if config.get('freeze_bn'):
            utils.freeze_bn(model) 
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        np.random.seed(epoch)
        for data, _ in tqdm(train_loader, desc='train', leave=False):
            x_shot, x_query = fs.split_shot_query(
                    data.cuda(), n_train_way, n_train_shot, n_query,
                    ep_per_batch=ep_per_batch)
            label = fs.make_nk_label(n_train_way, n_query,
                    ep_per_batch=ep_per_batch).cuda()

            logits = model(x_shot, x_query).view(-1, n_train_way)
            loss = F.cross_entropy(logits, label)
            acc = utils.compute_acc(logits, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            aves['tl'].add(loss.item())
            aves['ta'].add(acc)

            logits = None; loss = None 

        # eval
        model.eval()

        for name, loader, name_l, name_a in [
                ('tval', tval_loader, 'tvl', 'tva'),
                ('val', val_loader, 'vl', 'va')]:

            if (config.get('tval_dataset') is None) and name == 'tval':
                continue

            np.random.seed(0)
            for data, _ in tqdm(loader, desc=name, leave=False):
                x_shot, x_query = fs.split_shot_query(
                        data.cuda(), n_way, n_shot, n_query,
                        ep_per_batch=4)
                label = fs.make_nk_label(n_way, n_query,
                        ep_per_batch=4).cuda()

                with torch.no_grad():
                    logits = model(x_shot, x_query).view(-1, n_way)
                    loss = F.cross_entropy(logits, label)
                    acc = utils.compute_acc(logits, label)
                
                aves[name_l].add(loss.item())
                aves[name_a].add(acc)

        _sig = int(_[-1])

        # post
        if lr_scheduler is not None:
            lr_scheduler.step()

        for k, v in aves.items():
            aves[k] = v.item()
            trlog[k].append(aves[k])

        t_epoch = utils.time_str(timer_epoch.t())
        t_used = utils.time_str(timer_used.t())
        t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch)
        utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, '
                'val {:.4f}|{:.4f}, {} {}/{} (@{})'.format(
                epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'],
                aves['vl'], aves['va'], t_epoch, t_used, t_estimate, _sig))

        writer.add_scalars('loss', {
            'train': aves['tl'],
            'tval': aves['tvl'],
            'val': aves['vl'],
        }, epoch)
        writer.add_scalars('acc', {
            'train': aves['ta'],
            'tval': aves['tva'],
            'val': aves['va'],
        }, epoch)

        if config.get('_parallel'):
            model_ = model.module
        else:
            model_ = model

        training = {
            'epoch': epoch,
            'optimizer': config['optimizer'],
            'optimizer_args': config['optimizer_args'],
            'optimizer_sd': optimizer.state_dict(),
        }
        save_obj = {
            'file': __file__,
            'config': config,

            'model': config['model'],
            'model_args': config['model_args'],
            'model_sd': model_.state_dict(),

            'training': training,
        }
        torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth'))
        torch.save(trlog, os.path.join(save_path, 'trlog.pth'))

        if (save_epoch is not None) and epoch % save_epoch == 0:
            torch.save(save_obj,
                    os.path.join(save_path, 'epoch-{}.pth'.format(epoch)))

        if aves['va'] > max_va:
            max_va = aves['va']
            torch.save(save_obj, os.path.join(save_path, 'max-va.pth'))

        writer.flush()
예제 #21
0
    def train(self,
              sess,
              dataset,
              is_train=True,
              ftest_name=FLAGS['env_output_file'].value):
        st, ed, loss, acc, acc_1, pr_loss, pu_loss = 0, 0, [], [], [], [], []
        tp, tn, fp, fn = [], [], [], []
        print("Get %s data:len(dataset) is %d " %
              ("training" if is_train else "testing", len(dataset)))
        if not is_train:
            fout = open(ftest_name, "w")
            fout.close()
        while ed < len(dataset):
            st, ed = ed, ed + FLAGS['batch_size'].value if ed + \
                FLAGS['batch_size'].value < len(dataset) else len(dataset)
            batch_data = gen_batched_data(dataset[st:ed])
            outputs = self.step_decoder(
                sess, batch_data, forward_only=False if is_train else True)
            loss.append(outputs[0])
            predict_index = outputs[1]  # [batch_size, length, 10]
            pr_loss.append(outputs[2])
            pu_loss.append(outputs[3])
            purchase_prob = outputs[4][:, :, 1]
            tmp_acc, tmp_acc_1 = compute_acc(batch_data["aims"],
                                             predict_index,
                                             batch_data["rec_lists"],
                                             batch_data["rec_mask"],
                                             batch_data["purchase"],
                                             ftest_name=ftest_name,
                                             output=(not is_train))
            acc.append(tmp_acc)
            acc_1.append(tmp_acc_1)

            if not FLAGS['use_simulated_data'].value:
                all_num, true_pos, true_neg, false_pos, false_neg = 1e-6, 0., 0., 0., 0.
                for b_pu, b_pu_l in zip(batch_data["purchase"], purchase_prob):
                    for pu, pu_l in zip(b_pu, b_pu_l):
                        if pu != -1.:
                            #print pu, pu_l
                            all_num += 1
                            if pu == 1. and pu_l > 0.5:
                                true_pos += 1
                            if pu == 1. and pu_l <= 0.5:
                                false_neg += 1
                            if pu == 0. and pu_l > 0.5:
                                false_pos += 1
                            if pu == 0. and pu_l <= 0.5:
                                true_neg += 1
                tp.append(true_pos / all_num)
                tn.append(true_neg / all_num)
                fp.append(false_pos / all_num)
                fn.append(false_neg / all_num)
        if not FLAGS['use_simulated_data'].value:
            print("Confusion matrix for purchase prediction:")
            print("true positive:%.4f" % np.mean(tp),
                  "true negative:%.4f" % np.mean(tn))
            print("false positive:%.4f" % np.mean(fp),
                  "false negative:%.4f" % np.mean(fn))
        print("predict:p@1:%.4f%%" % (np.mean(acc_1) * 100),
              "p@%d:%.4f%%" % (FLAGS['metric'].value, np.mean(acc) * 100))

        if is_train:
            sess.run(self.epoch_add_op)
        return np.mean(loss), np.mean(pr_loss), np.mean(pu_loss), np.mean(
            acc), np.mean(acc_1)
예제 #22
0
def main(config):
    # dataset
    dataset = datasets.make(config['dataset'], **config['dataset_args'])
    utils.log('dataset: {} (x{}), {}'.format(dataset[0][0].shape, len(dataset),
                                             dataset.n_classes))
    if not args.sauc:
        n_way = 5
    else:
        n_way = 2
    n_shot, n_unlabel, n_query = args.shot, 30, 15
    n_batch = 200
    ep_per_batch = 4
    batch_sampler = CategoriesSampler_Semi(dataset.label,
                                           n_batch,
                                           n_way,
                                           n_shot,
                                           n_unlabel,
                                           n_query,
                                           ep_per_batch=ep_per_batch)
    loader = DataLoader(dataset,
                        batch_sampler=batch_sampler,
                        num_workers=8,
                        pin_memory=True)

    # model
    if config.get('load') is None:
        model = models.make('meta-baseline', encoder=None)
    else:
        model = models.load(torch.load(config['load']))

    if config.get('load_encoder') is not None:
        encoder = models.load(torch.load(config['load_encoder'])).encoder
        model.encoder = encoder

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    model.eval()
    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    # testing
    aves_keys = ['vl', 'va']
    aves = {k: utils.Averager() for k in aves_keys}

    test_epochs = args.test_epochs
    np.random.seed(0)
    va_lst = []
    for epoch in range(1, test_epochs + 1):
        for data, _ in tqdm(loader, leave=False):
            x_shot, x_unlabel, x_query = fs.split_shot_query_semi(
                data.cuda(),
                n_way,
                n_shot,
                n_unlabel,
                n_query,
                ep_per_batch=ep_per_batch)

            with torch.no_grad():
                if not args.sauc:
                    logits = model(x_shot, x_unlabel, x_query).view(-1, n_way)
                    label = fs.make_nk_label(n_way,
                                             n_query,
                                             ep_per_batch=ep_per_batch).cuda()
                    loss = F.cross_entropy(logits, label)
                    acc = utils.compute_acc(logits, label)

                    aves['vl'].add(loss.item(), len(data))
                    aves['va'].add(acc, len(data))
                    va_lst.append(acc)
                else:
                    x_shot = x_shot[:, 0, :, :, :, :].contiguous()
                    shot_shape = x_shot.shape[:-3]
                    img_shape = x_shot.shape[-3:]
                    bs = shot_shape[0]
                    p = model.encoder(x_shot.view(-1, *img_shape)).reshape(
                        *shot_shape, -1).mean(dim=1, keepdim=True)
                    q = model.encoder(x_query.view(-1, *img_shape)).view(
                        bs, -1, p.shape[-1])
                    p = F.normalize(p, dim=-1)
                    q = F.normalize(q, dim=-1)
                    s = torch.bmm(q, p.transpose(2, 1)).view(bs, -1).cpu()
                    for i in range(bs):
                        k = s.shape[1] // 2
                        y_true = [1] * k + [0] * k
                        acc = roc_auc_score(y_true, s[i])
                        aves['va'].add(acc, len(data))
                        va_lst.append(acc)

        print('test epoch {}: acc={:.2f} +- {:.2f} (%), loss={:.4f} (@{})'.
              format(epoch, aves['va'].item() * 100,
                     mean_confidence_interval(va_lst) * 100, aves['vl'].item(),
                     _[-1]))
    print('-' * 5 + '>' + 'Epoch {}/{}'.format(epoch, EPOCH - 1))
    print('=' * 40)

    train_batch_losses = []
    val_batch_losses = []
    step = 0
    TRAIN_ACC = []
    VAL_ACC = []
    for i, DATA in enumerate(train_loader):
        images = DATA['image'].to(device)
        labels = DATA['annotations'].to(device).float()
        outputs = model(images)
        step += 1
        optimizer.zero_grad()
        outputs = outputs.view(-1, 10, 1)
        TRAIN_ACC.append(compute_acc(labels, outputs).item())
        #        train_losses = corLoss(labels, outputs)
        #        train_losses = mseLoss(labels, outputs)
        #        train_losses = emdLoss(labels, outputs)
        #        train_losses = cepLoss(labels, outputs)
        train_losses = edwklLoss(labels, outputs)
        train_batch_losses.append(train_losses.item())
        train_losses.backward()
        optimizer.step()
        if (i + 1) % 1000 == 0:
            print('--->')
            print('Latest loss: %.4f' % (train_batch_losses))
            print('Latest ACC: %.4f' % (sum(TRAIN_ACC) / len(TRAIN_ACC)))

    avg_loss = sum(train_batch_losses) / (len(trainset) // TRAIN_BATCH_SIZE +
                                          1)
예제 #24
0
def main(config):
    svname = config.get('sv_name')
    if args.tag is not None:
        svname += '_' + args.tag
    config['sv_name'] = svname
    save_path = os.path.join('./save', svname)
    utils.ensure_path(save_path)
    utils.set_log_path(save_path)
    utils.log(svname)
    writer = SummaryWriter(os.path.join(save_path, 'tensorboard'))
    yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w'))

    #### Dataset ####

    n_way, n_shot = config['n_way'], config['n_shot']
    n_query = config['n_query']
    n_pseudo = config['n_pseudo']
    ep_per_batch = config['ep_per_batch']

    if config.get('test_batches') is not None:
        test_batches = config['test_batches']
    else:
        test_batches = config['train_batches']

    for s in ['train', 'val', 'tval']:
        if config.get(f"{s}_dataset_args") is not None:
            config[f"{s}_dataset_args"]['data_dir'] = os.path.join(os.getcwd(), os.pardir, 'data_root')

    # train
    train_dataset = CustomDataset(config['train_dataset'], save_dir=config.get('load_encoder'),
                                  **config['train_dataset_args'])

    if config['train_dataset_args']['split'] == 'helper':
        with open(os.path.join(save_path, 'train_helper_cls.pkl'), 'wb') as f:
            pkl.dump(train_dataset.dataset_classes, f)

    train_sampler = EpisodicSampler(train_dataset, config['train_batches'], n_way, n_shot, n_query,
                                    n_pseudo, episodes_per_batch=ep_per_batch)
    train_loader = DataLoader(train_dataset, batch_sampler=train_sampler,
                                  num_workers=4, pin_memory=True)

    # tval
    if config.get('tval_dataset'):
        tval_dataset = CustomDataset(config['tval_dataset'],
                                     **config['tval_dataset_args'])

        tval_sampler = EpisodicSampler(tval_dataset, test_batches, n_way, n_shot, n_query,
                                       n_pseudo, episodes_per_batch=ep_per_batch)
        tval_loader = DataLoader(tval_dataset, batch_sampler=tval_sampler,
                                 num_workers=4, pin_memory=True)
    else:
        tval_loader = None

    # val
    val_dataset = CustomDataset(config['val_dataset'],
                                **config['val_dataset_args'])
    val_sampler = EpisodicSampler(val_dataset, test_batches, n_way, n_shot, n_query,
                                  n_pseudo, episodes_per_batch=ep_per_batch)
    val_loader = DataLoader(val_dataset, batch_sampler=val_sampler,
                            num_workers=4, pin_memory=True)


    #### Model and optimizer ####

    if config.get('load'):
        model_sv = torch.load(config['load'])
        model = models.load(model_sv)
    else:
        model = models.make(config['model'], **config['model_args'])
        if config.get('load_encoder'):
            encoder = models.load(torch.load(config['load_encoder'])).encoder
            model.encoder.load_state_dict(encoder.state_dict())
            if config.get('freeze_encoder'):
                for param in model.encoder.parameters():
                    param.requires_grad = False

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    optimizer, lr_scheduler = utils.make_optimizer(
        model.parameters(),
        config['optimizer'], **config['optimizer_args'])

    ########

    max_epoch = config['max_epoch']
    save_epoch = config.get('save_epoch')
    max_va = 0.
    timer_used = utils.Timer()
    timer_epoch = utils.Timer()

    aves_keys = ['tl', 'ta', 'tvl', 'tva', 'vl', 'va']
    trlog = dict()
    for k in aves_keys:
        trlog[k] = []

    for epoch in range(1, max_epoch + 1):
        timer_epoch.s()
        aves = {k: utils.Averager() for k in aves_keys}

        # train
        model.train()
        if config.get('freeze_bn'):
            utils.freeze_bn(model)
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)
        np.random.seed(epoch)

        for data in tqdm(train_loader, desc='train', leave=False):
            x_shot, x_query, x_pseudo = fs.split_shot_query(
                data.cuda(), n_way, n_shot, n_query, n_pseudo,
                ep_per_batch=ep_per_batch)
            label = fs.make_nk_label(n_way, n_query,
                                     ep_per_batch=ep_per_batch).cuda()

            logits = model(x_shot, x_query, x_pseudo)
            logits = logits.view(-1, n_way)
            loss = F.cross_entropy(logits, label)
            acc = utils.compute_acc(logits, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            aves['tl'].add(loss.item())
            aves['ta'].add(acc)

            logits = None; loss = None

            # eval
        model.eval()
        for name, loader, name_l, name_a in [
            ('tval', tval_loader, 'tvl', 'tva'),
            ('val', val_loader, 'vl', 'va')]:

            if (config.get('tval_dataset') is None) and name == 'tval':
                continue

            np.random.seed(0)
            for data in tqdm(loader, desc=name, leave=False):
                x_shot, x_query, x_pseudo = fs.split_shot_query(
                    data.cuda(), n_way, n_shot, n_query, n_pseudo,
                    ep_per_batch=ep_per_batch)
                label = fs.make_nk_label(n_way, n_query,
                                         ep_per_batch=ep_per_batch).cuda()

                with torch.no_grad():
                    logits = model(x_shot, x_query, x_pseudo)
                    logits = logits.view(-1, n_way)
                    loss = F.cross_entropy(logits, label)
                    acc = utils.compute_acc(logits, label)

                aves[name_l].add(loss.item())
                aves[name_a].add(acc)

        # post
        if lr_scheduler is not None:
            lr_scheduler.step()

        for k, v in aves.items():
            aves[k] = v.item()
            trlog[k].append(aves[k])

        t_epoch = utils.time_str(timer_epoch.t())
        t_used = utils.time_str(timer_used.t())
        t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch)
        utils.log('epoch {}, train {:.4f}|{:.4f}, tval {:.4f}|{:.4f}, '
                  'val {:.4f}|{:.4f}, {} {}/{}'.format(
            epoch, aves['tl'], aves['ta'], aves['tvl'], aves['tva'],
            aves['vl'], aves['va'], t_epoch, t_used, t_estimate))

        writer.add_scalars('loss', {
            'train': aves['tl'],
            'tval': aves['tvl'],
            'val': aves['vl'],
        }, epoch)
        writer.add_scalars('acc', {
            'train': aves['ta'],
            'tval': aves['tva'],
            'val': aves['va'],
        }, epoch)

        if config.get('_parallel'):
            model_ = model.module
        else:
            model_ = model

        training = {
            'epoch': epoch,
            'optimizer': config['optimizer'],
            'optimizer_args': config['optimizer_args'],
            'optimizer_sd': optimizer.state_dict(),
        }
        save_obj = {
            'file': __file__,
            'config': config,

            'model': config['model'],
            'model_args': config['model_args'],
            'model_sd': model_.state_dict(),

            'training': training,
        }
        torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth'))
        torch.save(trlog, os.path.join(save_path, 'trlog.pth'))

        if (save_epoch is not None) and epoch % save_epoch == 0:
            torch.save(save_obj,
                       os.path.join(save_path, 'epoch-{}.pth'.format(epoch)))

        if aves['va'] > max_va:
            max_va = aves['va']
            torch.save(save_obj, os.path.join(save_path, 'max-va.pth'))

        writer.flush()
예제 #25
0
def main(config):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    ##### Dataset #####

    dataset = datasets.make(config['dataset'], **config['test'])
    utils.log('meta-test set: {} (x{}), {}'.format(dataset[0][0].shape,
                                                   len(dataset),
                                                   dataset.n_classes))
    loader = DataLoader(dataset,
                        config['test']['n_episode'],
                        collate_fn=datasets.collate_fn,
                        num_workers=1,
                        pin_memory=True)

    ##### Model #####

    ckpt = torch.load(config['load'])
    inner_args = utils.config_inner_args(config.get('inner_args'))
    model = models.load(ckpt, load_clf=(not inner_args['reset_classifier']))

    if args.efficient:
        model.go_efficient()

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    ##### Evaluation #####

    model.eval()
    aves_va = utils.AverageMeter()
    va_lst = []

    for epoch in range(1, config['epoch'] + 1):
        for data in tqdm(loader, leave=False):
            x_shot, x_query, y_shot, y_query = data
            x_shot, y_shot = x_shot.cuda(), y_shot.cuda()
            x_query, y_query = x_query.cuda(), y_query.cuda()

            if inner_args['reset_classifier']:
                if config.get('_parallel'):
                    model.module.reset_classifier()
                else:
                    model.reset_classifier()

            logits = model(x_shot,
                           x_query,
                           y_shot,
                           inner_args,
                           meta_train=False)
            logits = logits.view(-1, config['test']['n_way'])
            labels = y_query.view(-1)

            pred = torch.argmax(logits, dim=1)
            acc = utils.compute_acc(pred, labels)
            aves_va.update(acc, 1)
            va_lst.append(acc)

        print('test epoch {}: acc={:.2f} +- {:.2f} (%)'.format(
            epoch,
            aves_va.item() * 100,
            utils.mean_confidence_interval(va_lst) * 100))
예제 #26
0
            weight_tac = 1.

        vy, psi, dis, ac, tac = netD(input,
                                     label,
                                     detach=weight_tac < opt.detach_below)

        proj_errD_real = dis_criterion(vy + psi, dis_label)
        dis_errD_real = dis_criterion(dis, dis_label)
        aux_errD_real = aux_criterion(ac, label)
        errD_real = proj_errD_real * weight_proj + \
                    (dis_errD_real + aux_errD_real) * weight_tac
        errD_real.backward()
        D_x = torch.sigmoid(dis_errD_real).data.mean()

        # compute the current classification accuracy
        accuracy = compute_acc(ac, label)

        # get fake
        fake_label.resize_(batch_size).random_(0, num_classes)
        noise.resize_(batch_size, nz).normal_(0, 1)
        fake = netG(noise, fake_label)

        # train with fake
        dis_label.resize_(batch_size).fill_(fake_label_const)
        vy, psi, dis, ac, tac = netD(fake.detach(),
                                     fake_label,
                                     detach=weight_tac < opt.detach_below)

        proj_errD_fake = dis_criterion(vy + psi, dis_label)
        dis_errD_fake = dis_criterion(dis, dis_label)
        aux_errD_fake = aux_criterion(tac, fake_label)
예제 #27
0
        batch_size = real_cpu.size(0)
        if opt.cuda:
            real_cpu = real_cpu.cuda()
        input.data.resize_as_(real_cpu).copy_(real_cpu)
        dis_label.data.resize_(batch_size).fill_(real_label)
        aux_label.data.resize_(batch_size).copy_(label)
        dis_output, aux_output = netD(input)

        dis_errD_real = dis_criterion(dis_output, dis_label)
        aux_errD_real = aux_criterion(aux_output, aux_label)
        errD_real = dis_errD_real + aux_errD_real
        errD_real.backward()
        D_x = dis_output.data.mean()

        # compute the current classification accuracy
        accuracy = compute_acc(aux_output, aux_label)

        # train with fake
        noise.data.resize_(batch_size, nz, 1, 1).normal_(0, 1)
        label = np.random.randint(0, num_classes, batch_size)
        noise_ = np.random.normal(0, 1, (batch_size, nz))
        class_onehot = np.zeros((batch_size, num_classes))
        class_onehot[np.arange(batch_size), label] = 1
        noise_[np.arange(batch_size), :num_classes] = class_onehot[np.arange(
            batch_size)]
        noise_ = (torch.from_numpy(noise_))
        noise.data.copy_(noise_.view(batch_size, nz, 1, 1))
        aux_label.data.resize_(batch_size).copy_(torch.from_numpy(label))

        fake = netG(noise)
        dis_label.data.fill_(fake_label)
예제 #28
0
def main(config):
    svname = args.name
    if svname is None:
        svname = 'meta_{}-{}shot'.format(config['train_dataset'],
                                         config['n_shot'])
        svname += '_' + config['model']
        if config['model_args'].get('encoder'):
            svname += '-' + config['model_args']['encoder']
        if config['model_args'].get('prog_synthesis'):
            svname += '-' + config['model_args']['prog_synthesis']
    svname += '-seed' + str(args.seed)
    if args.tag is not None:
        svname += '_' + args.tag

    save_path = os.path.join(args.save_dir, svname)
    utils.ensure_path(save_path, remove=False)
    utils.set_log_path(save_path)
    writer = SummaryWriter(os.path.join(save_path, 'tensorboard'))

    yaml.dump(config, open(os.path.join(save_path, 'config.yaml'), 'w'))

    logger = utils.Logger(file_name=os.path.join(save_path, "log_sdout.txt"),
                          file_mode="a+",
                          should_flush=True)

    #### Dataset ####

    n_way, n_shot = config['n_way'], config['n_shot']
    n_query = config['n_query']

    if config.get('n_train_way') is not None:
        n_train_way = config['n_train_way']
    else:
        n_train_way = n_way
    if config.get('n_train_shot') is not None:
        n_train_shot = config['n_train_shot']
    else:
        n_train_shot = n_shot
    if config.get('ep_per_batch') is not None:
        ep_per_batch = config['ep_per_batch']
    else:
        ep_per_batch = 1

    random_state = np.random.RandomState(args.seed)
    print('seed:', args.seed)

    # train
    train_dataset = datasets.make(config['train_dataset'],
                                  **config['train_dataset_args'])
    utils.log('train dataset: {} (x{})'.format(train_dataset[0][0].shape,
                                               len(train_dataset)))
    if config.get('visualize_datasets'):
        utils.visualize_dataset(train_dataset, 'train_dataset', writer)
    train_sampler = BongardSampler(train_dataset.n_tasks,
                                   config['train_batches'], ep_per_batch,
                                   random_state.randint(2**31))
    train_loader = DataLoader(train_dataset,
                              batch_sampler=train_sampler,
                              num_workers=8,
                              pin_memory=True)

    # tvals
    tval_loaders = {}
    tval_name_ntasks_dict = {
        'tval': 2000,
        'tval_ff': 600,
        'tval_bd': 480,
        'tval_hd_comb': 400,
        'tval_hd_novel': 320
    }  # numbers depend on dataset
    for tval_type in tval_name_ntasks_dict.keys():
        if config.get('{}_dataset'.format(tval_type)):
            tval_dataset = datasets.make(
                config['{}_dataset'.format(tval_type)],
                **config['{}_dataset_args'.format(tval_type)])
            utils.log('{} dataset: {} (x{})'.format(tval_type,
                                                    tval_dataset[0][0].shape,
                                                    len(tval_dataset)))
            if config.get('visualize_datasets'):
                utils.visualize_dataset(tval_dataset, 'tval_ff_dataset',
                                        writer)
            tval_sampler = BongardSampler(
                tval_dataset.n_tasks,
                n_batch=tval_name_ntasks_dict[tval_type] // ep_per_batch,
                ep_per_batch=ep_per_batch,
                seed=random_state.randint(2**31))
            tval_loader = DataLoader(tval_dataset,
                                     batch_sampler=tval_sampler,
                                     num_workers=8,
                                     pin_memory=True)
            tval_loaders.update({tval_type: tval_loader})
        else:
            tval_loaders.update({tval_type: None})

    # val
    val_dataset = datasets.make(config['val_dataset'],
                                **config['val_dataset_args'])
    utils.log('val dataset: {} (x{})'.format(val_dataset[0][0].shape,
                                             len(val_dataset)))
    if config.get('visualize_datasets'):
        utils.visualize_dataset(val_dataset, 'val_dataset', writer)
    val_sampler = BongardSampler(val_dataset.n_tasks,
                                 n_batch=900 // ep_per_batch,
                                 ep_per_batch=ep_per_batch,
                                 seed=random_state.randint(2**31))
    val_loader = DataLoader(val_dataset,
                            batch_sampler=val_sampler,
                            num_workers=8,
                            pin_memory=True)

    ########

    #### Model and optimizer ####

    if config.get('load'):
        print('loading pretrained model: ', config['load'])
        model = models.load(torch.load(config['load']))
    else:
        model = models.make(config['model'], **config['model_args'])

        if config.get('load_encoder'):
            print('loading pretrained encoder: ', config['load_encoder'])
            encoder = models.load(torch.load(config['load_encoder'])).encoder
            model.encoder.load_state_dict(encoder.state_dict())

        if config.get('load_prog_synthesis'):
            print('loading pretrained program synthesis model: ',
                  config['load_prog_synthesis'])
            prog_synthesis = models.load(
                torch.load(config['load_prog_synthesis']))
            model.prog_synthesis.load_state_dict(prog_synthesis.state_dict())

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))

    optimizer, lr_scheduler = utils.make_optimizer(model.parameters(),
                                                   config['optimizer'],
                                                   **config['optimizer_args'])

    ########

    max_epoch = config['max_epoch']
    save_epoch = config.get('save_epoch')
    max_va = 0.
    timer_used = utils.Timer()
    timer_epoch = utils.Timer()

    aves_keys = ['tl', 'ta', 'vl', 'va']
    tval_tuple_lst = []
    for k, v in tval_loaders.items():
        if v is not None:
            loss_key = 'tvl' + k.split('tval')[-1]
            acc_key = ' tva' + k.split('tval')[-1]
            aves_keys.append(loss_key)
            aves_keys.append(acc_key)
            tval_tuple_lst.append((k, v, loss_key, acc_key))

    trlog = dict()
    for k in aves_keys:
        trlog[k] = []

    for epoch in range(1, max_epoch + 1):
        timer_epoch.s()
        aves = {k: utils.Averager() for k in aves_keys}

        # train
        model.train()
        if config.get('freeze_bn'):
            utils.freeze_bn(model)
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        for data, label in tqdm(train_loader, desc='train', leave=False):

            x_shot, x_query = fs.split_shot_query(data.cuda(),
                                                  n_train_way,
                                                  n_train_shot,
                                                  n_query,
                                                  ep_per_batch=ep_per_batch)
            label_query = fs.make_nk_label(n_train_way,
                                           n_query,
                                           ep_per_batch=ep_per_batch).cuda()

            if config['model'] == 'snail':  # only use one selected label_query
                query_dix = random_state.randint(n_train_way * n_query)
                label_query = label_query.view(ep_per_batch, -1)[:, query_dix]
                x_query = x_query[:, query_dix:query_dix + 1]

            if config['model'] == 'maml':  # need grad in maml
                model.zero_grad()

            logits = model(x_shot, x_query).view(-1, n_train_way)
            loss = F.cross_entropy(logits, label_query)
            acc = utils.compute_acc(logits, label_query)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            aves['tl'].add(loss.item())
            aves['ta'].add(acc)

            logits = None
            loss = None

        # eval
        model.eval()

        for name, loader, name_l, name_a in [('val', val_loader, 'vl', 'va')
                                             ] + tval_tuple_lst:

            if config.get('{}_dataset'.format(name)) is None:
                continue

            np.random.seed(0)
            for data, _ in tqdm(loader, desc=name, leave=False):
                x_shot, x_query = fs.split_shot_query(
                    data.cuda(),
                    n_way,
                    n_shot,
                    n_query,
                    ep_per_batch=ep_per_batch)
                label_query = fs.make_nk_label(
                    n_way, n_query, ep_per_batch=ep_per_batch).cuda()

                if config[
                        'model'] == 'snail':  # only use one randomly selected label_query
                    query_dix = random_state.randint(n_train_way)
                    label_query = label_query.view(ep_per_batch, -1)[:,
                                                                     query_dix]
                    x_query = x_query[:, query_dix:query_dix + 1]

                if config['model'] == 'maml':  # need grad in maml
                    model.zero_grad()
                    logits = model(x_shot, x_query, eval=True).view(-1, n_way)
                    loss = F.cross_entropy(logits, label_query)
                    acc = utils.compute_acc(logits, label_query)
                else:
                    with torch.no_grad():
                        logits = model(x_shot, x_query,
                                       eval=True).view(-1, n_way)
                        loss = F.cross_entropy(logits, label_query)
                        acc = utils.compute_acc(logits, label_query)

                aves[name_l].add(loss.item())
                aves[name_a].add(acc)

        # post
        if lr_scheduler is not None:
            lr_scheduler.step()

        for k, v in aves.items():
            aves[k] = v.item()
            trlog[k].append(aves[k])

        t_epoch = utils.time_str(timer_epoch.t())
        t_used = utils.time_str(timer_used.t())
        t_estimate = utils.time_str(timer_used.t() / epoch * max_epoch)
        log_str = 'epoch {}, train {:.4f}|{:.4f}, val {:.4f}|{:.4f}'.format(
            epoch, aves['tl'], aves['ta'], aves['vl'], aves['va'])
        for tval_name, _, loss_key, acc_key in tval_tuple_lst:
            log_str += ', {} {:.4f}|{:.4f}'.format(tval_name, aves[loss_key],
                                                   aves[acc_key])
            writer.add_scalars('loss', {tval_name: aves[loss_key]}, epoch)
            writer.add_scalars('acc', {tval_name: aves[acc_key]}, epoch)
        log_str += ', {} {}/{}'.format(t_epoch, t_used, t_estimate)
        utils.log(log_str)

        writer.add_scalars('loss', {
            'train': aves['tl'],
            'val': aves['vl'],
        }, epoch)
        writer.add_scalars('acc', {
            'train': aves['ta'],
            'val': aves['va'],
        }, epoch)

        if config.get('_parallel'):
            model_ = model.module
        else:
            model_ = model

        training = {
            'epoch': epoch,
            'optimizer': config['optimizer'],
            'optimizer_args': config['optimizer_args'],
            'optimizer_sd': optimizer.state_dict(),
        }
        save_obj = {
            'file': __file__,
            'config': config,
            'model': config['model'],
            'model_args': config['model_args'],
            'model_sd': model_.state_dict(),
            'training': training,
        }
        torch.save(save_obj, os.path.join(save_path, 'epoch-last.pth'))
        torch.save(trlog, os.path.join(save_path, 'trlog.pth'))

        if (save_epoch is not None) and epoch % save_epoch == 0:
            torch.save(save_obj,
                       os.path.join(save_path, 'epoch-{}.pth'.format(epoch)))

        if aves['va'] > max_va:
            max_va = aves['va']
            torch.save(save_obj, os.path.join(save_path, 'max-va.pth'))

        writer.flush()

    print('finished training!')
    logger.close()
예제 #29
0
            real_cpu, label = data
            batch_size = real_cpu.size(0)
            if opt.cuda:
                real_cpu = real_cpu.cuda()
            with torch.no_grad():
                input.resize_(real_cpu.size()).copy_(real_cpu)
                disr_label.resize_(batch_size).fill_(real_label)
                aux_label.resize_(batch_size).copy_(label)
            dis_output, aux_output = netD(input)
            dis_errD_real = van_loss(dis_output, disr_label)
            aux_errD_real = aux_criterion(aux_output, aux_label)
            errD_real = dis_errD_real + aux_errD_real
            errD_real.backward()
            D_x = dis_output.data.mean()
            # compute the current classification accuracy
            accuracy = compute_acc(aux_output, aux_label)

            # train with fake
            with torch.no_grad():
                noise.resize_(batch_size, nz, 1, 1).normal_(0, 1)
            label = np.random.randint(0, num_classes, batch_size)
            noise_ = np.random.normal(0, 1, (batch_size, nz))
            class_onehot = np.zeros((batch_size, num_classes))
            class_onehot[np.arange(batch_size), label] = 1
            noise_[np.arange(batch_size), :num_classes] = class_onehot[
                np.arange(batch_size)]
            noise_ = (torch.from_numpy(noise_))
            noise.data.copy_(noise_.view(batch_size, nz, 1, 1))
            aux_label.resize_(batch_size).copy_(torch.from_numpy(label))

            fake = netG(noise)
예제 #30
0
def main(config, args):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    wandb_auth()
    try:
        __IPYTHON__
        wandb.init(project="NAS", group=f"maml")
    except:
        wandb.init(project="NAS", group=f"maml", config=config)

    ckpt_name = args.name
    if ckpt_name is None:
        ckpt_name = config['encoder']
        ckpt_name += '_' + config['dataset'].replace('meta-', '')
        ckpt_name += '_{}_way_{}_shot'.format(config['train']['n_way'],
                                              config['train']['n_shot'])
    if args.tag is not None:
        ckpt_name += '_' + args.tag

    ckpt_path = os.path.join('./save', ckpt_name)
    utils.ensure_path(ckpt_path)
    utils.set_log_path(ckpt_path)
    writer = SummaryWriter(os.path.join(ckpt_path, 'tensorboard'))
    yaml.dump(config, open(os.path.join(ckpt_path, 'config.yaml'), 'w'))

    ##### Dataset #####
    # meta-train
    train_set = datasets.make(config['dataset'], **config['train'])
    utils.log('meta-train set: {} (x{}), {}'.format(train_set[0][0].shape,
                                                    len(train_set),
                                                    train_set.n_classes))

    # meta-val
    eval_val = False
    if config.get('val'):
        eval_val = True
        val_set = datasets.make(config['dataset'], **config['val'])
        utils.log('meta-val set: {} (x{}), {}'.format(val_set[0][0].shape,
                                                      len(val_set),
                                                      val_set.n_classes))
        val_loader = DataLoader(val_set,
                                config['val']['n_episode'],
                                collate_fn=datasets.collate_fn,
                                num_workers=1,
                                pin_memory=True)

    # if args.split == "traintrain" and config.get('val'): # TODO I dont think this is what they meant by train-train :D
    #   train_set = torch.utils.data.ConcatDataset([train_set, val_set])
    train_loader = DataLoader(train_set,
                              config['train']['n_episode'],
                              collate_fn=datasets.collate_fn,
                              num_workers=1,
                              pin_memory=True)

    ##### Model and Optimizer #####

    inner_args = utils.config_inner_args(config.get('inner_args'))
    if config.get('load') or (args.load is True and
                              os.path.exists(ckpt_path + '/epoch-last.pth')):
        if config.get('load') is None:
            config['load'] = ckpt_path + '/epoch-last.pth'
        ckpt = torch.load(config['load'])
        config['encoder'] = ckpt['encoder']
        config['encoder_args'] = ckpt['encoder_args']
        config['classifier'] = ckpt['classifier']
        config['classifier_args'] = ckpt['classifier_args']
        model = models.load(ckpt,
                            load_clf=(not inner_args['reset_classifier']))
        optimizer, lr_scheduler = optimizers.load(ckpt, model.parameters())
        start_epoch = ckpt['training']['epoch'] + 1
        max_va = ckpt['training']['max_va']
    else:
        config['encoder_args'] = config.get('encoder_args') or dict()
        config['classifier_args'] = config.get('classifier_args') or dict()
        config['encoder_args']['bn_args']['n_episode'] = config['train'][
            'n_episode']
        config['classifier_args']['n_way'] = config['train']['n_way']
        model = models.make(config['encoder'], config['encoder_args'],
                            config['classifier'], config['classifier_args'])
        optimizer, lr_scheduler = optimizers.make(config['optimizer'],
                                                  model.parameters(),
                                                  **config['optimizer_args'])
        start_epoch = 1
        max_va = 0.

    if args.efficient:
        model.go_efficient()

    if config.get('_parallel'):
        model = nn.DataParallel(model)

    utils.log('num params: {}'.format(utils.compute_n_params(model)))
    timer_elapsed, timer_epoch = utils.Timer(), utils.Timer()

    ##### Training and evaluation #####

    # 'tl': meta-train loss
    # 'ta': meta-train accuracy
    # 'vl': meta-val loss
    # 'va': meta-val accuracy
    aves_keys = ['tl', 'ta', 'vl', 'va']
    trlog = dict()
    for k in aves_keys:
        trlog[k] = []

    for epoch in tqdm(range(start_epoch, config['epoch'] + 1),
                      desc="Iterating over epochs"):
        timer_epoch.start()
        aves = {k: utils.AverageMeter() for k in aves_keys}

        # meta-train
        model.train()
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)
        np.random.seed(epoch)

        all_sotls = 0
        all_sovls = 0
        for data_idx, data in enumerate(
                tqdm(train_loader, desc='meta-train', leave=False)):
            x_shot, x_query, y_shot, y_query = data
            x_shot, y_shot = x_shot.cuda(), y_shot.cuda()
            x_query, y_query = x_query.cuda(), y_query.cuda()

            if inner_args['reset_classifier']:
                if config.get('_parallel'):
                    model.module.reset_classifier()
                else:
                    model.reset_classifier()

            if args.split == "traintrain":
                x_query = x_shot
                y_query = y_shot

            logits, sotl, all_losses = model(x_shot,
                                             x_query,
                                             y_shot,
                                             inner_args,
                                             meta_train=True)
            # print("HAHHA", data_idx, all_losses)
            # sotl = sum([l[-1] for l in all_losses])
            # for l in all_losses[:-1]:
            #   for i in range(len(l)-1):
            #     l[i] = l[i].detach()

            logits = logits.flatten(0, 1)
            labels = y_query.flatten()

            all_sotls += sotl

            pred = torch.argmax(logits, dim=-1)
            acc = utils.compute_acc(pred, labels)
            loss = F.cross_entropy(logits, labels)

            # all_sovls += loss # TODO I think this causes blowup because it creates new tensors that never get discarded and it maintains the computational graph after?
            if args.split == "trainval" or (
                    args.split == "sovl"
                    and not data_idx % args.sotl_freq == 0):

                aves['tl'].update(loss.item(), 1)
                aves['ta'].update(acc, 1)

                optimizer.zero_grad()
                loss.backward()
                for param in optimizer.param_groups[0]['params']:
                    nn.utils.clip_grad_value_(param, 10)
                optimizer.step()
            elif args.split == "traintrain":

                aves['tl'].update(loss.item(), 1)
                aves['ta'].update(acc, 1)

                # sotl = sum(sotl) + loss
                optimizer.zero_grad()
                # sotl.backward()
                loss.backward()
                for param in optimizer.param_groups[0]['params']:
                    nn.utils.clip_grad_value_(param, 10)
                optimizer.step()

            elif args.split == "sotl" and data_idx % args.sotl_freq == 0:
                # TODO doesnt work whatsoever

                aves['tl'].update(loss.item(), 1)
                aves['ta'].update(acc, 1)
                optimizer.zero_grad()
                all_sotls.backward()
                for param in optimizer.param_groups[0]['params']:
                    nn.utils.clip_grad_value_(param, 10)
                optimizer.step()
                all_sotls = 0  # detach
            elif args.split == "sovl" and data_idx % args.sotl_freq == 0:
                # TODO doesnt work whatsoever

                aves['tl'].update(loss.item(), 1)
                aves['ta'].update(acc, 1)
                optimizer.zero_grad()
                all_sovls.backward()
                for param in optimizer.param_groups[0]['params']:
                    nn.utils.clip_grad_value_(param, 10)
                optimizer.step()
                all_sovls = 0  # detach

        # meta-val
        if eval_val:
            model.eval()
            np.random.seed(0)

            for data in tqdm(val_loader, desc='meta-val', leave=False):
                x_shot, x_query, y_shot, y_query = data
                x_shot, y_shot = x_shot.cuda(), y_shot.cuda()
                x_query, y_query = x_query.cuda(), y_query.cuda()

                if inner_args['reset_classifier']:
                    if config.get('_parallel'):
                        model.module.reset_classifier()
                    else:
                        model.reset_classifier()

                logits, sotl, all_losses = model(x_shot,
                                                 x_query,
                                                 y_shot,
                                                 inner_args,
                                                 meta_train=False)
                logits = logits.flatten(0, 1)
                labels = y_query.flatten()

                pred = torch.argmax(logits, dim=-1)
                acc = utils.compute_acc(pred, labels)
                loss = F.cross_entropy(logits, labels)
                aves['vl'].update(loss.item(), 1)
                aves['va'].update(acc, 1)

        if lr_scheduler is not None:
            lr_scheduler.step()

        for k, avg in aves.items():
            aves[k] = avg.item()
            trlog[k].append(aves[k])

        t_epoch = utils.time_str(timer_epoch.end())
        t_elapsed = utils.time_str(timer_elapsed.end())
        t_estimate = utils.time_str(timer_elapsed.end() /
                                    (epoch - start_epoch + 1) *
                                    (config['epoch'] - start_epoch + 1))

        # formats output
        log_str = 'epoch {}, meta-train {:.4f}|{:.4f}'.format(
            str(epoch), aves['tl'], aves['ta'])
        writer.add_scalars('loss', {'meta-train': aves['tl']}, epoch)
        writer.add_scalars('acc', {'meta-train': aves['ta']}, epoch)

        if eval_val:
            log_str += ', meta-val {:.4f}|{:.4f}'.format(
                aves['vl'], aves['va'])
            writer.add_scalars('loss', {'meta-val': aves['vl']}, epoch)
            writer.add_scalars('acc', {'meta-val': aves['va']}, epoch)

        wandb.log({
            "train_loss": aves['tl'],
            "train_acc": aves['ta'],
            "val_loss": aves['vl'],
            "val_acc": aves['va']
        })
        log_str += ', {} {}/{}'.format(t_epoch, t_elapsed, t_estimate)
        utils.log(log_str)

        # saves model and meta-data
        if config.get('_parallel'):
            model_ = model.module
        else:
            model_ = model

        training = {
            'epoch':
            epoch,
            'max_va':
            max(max_va, aves['va']),
            'optimizer':
            config['optimizer'],
            'optimizer_args':
            config['optimizer_args'],
            'optimizer_state_dict':
            optimizer.state_dict(),
            'lr_scheduler_state_dict':
            lr_scheduler.state_dict() if lr_scheduler is not None else None,
        }
        ckpt = {
            'file': __file__,
            'config': config,
            'encoder': config['encoder'],
            'encoder_args': config['encoder_args'],
            'encoder_state_dict': model_.encoder.state_dict(),
            'classifier': config['classifier'],
            'classifier_args': config['classifier_args'],
            'classifier_state_dict': model_.classifier.state_dict(),
            'training': training,
        }

        # 'epoch-last.pth': saved at the latest epoch
        # 'max-va.pth': saved when validation accuracy is at its maximum
        torch.save(ckpt, os.path.join(ckpt_path, 'epoch-last.pth'))
        torch.save(trlog, os.path.join(ckpt_path, 'trlog.pth'))

        if aves['va'] > max_va:
            max_va = aves['va']
            torch.save(ckpt, os.path.join(ckpt_path, 'max-va.pth'))

        writer.flush()