Exemplo n.º 1
0
    def test(self, dataloader):
        device = self.args.device
        pred = []
        self.classifier.eval()
        labels = []
        scores_all = []
        with torch.no_grad():
            for data in dataloader:
                _input, _label, _name = data
                # _input = _input.to(device)
                _label = _label.long()
                out = self.classifier(_input)
                pred.append(out.argmax(1).data.cpu().numpy())
                labels.append(_label.data.numpy())
                scores_all.append(out.data.cpu().numpy())

        pred_labels = np.hstack(pred)
        labels = np.hstack(labels)
        if scores_all[0].ndim == 1:
            scores_all = np.hstack(scores_all)
        else:
            scores_all = np.vstack(scores_all)[:, 1]
        # acc = np.mean(pred_labels == labels) * 100
        # fpr, tpr, thresholds = metrics.roc_curve(labels, scores_all)
        # auc = metrics.auc(fpr, tpr)
        # ce = cross_entropy(scores_all, labels)
        auc, acc, ce = compute_metrics(scores_all, labels)
        return acc, auc, ce, (scores_all, labels)
Exemplo n.º 2
0
 def test(self, val_models, val_labels):
     with torch.no_grad():
         pred = list()
         device = self.args.device
         for i, model in enumerate(tqdm(val_models)):
             cnn = self.load_model(model)
             # cnn.fc[2].register_forward_hook(hook_fn_logit_layer)
             cnn.eval()
             cnn.to(device)
             logit = self.compute_logit(
                 cnn, self.classifier.X, self.classifier.W, self.classifier.b
             )
             pred.append(logit.data.cpu().numpy())
             # pred.append(torch.argmax(logit, 1))
         scores_all = np.vstack(pred)[:, 1]
         auc, acc, ce = compute_metrics(scores_all, val_labels)
         return acc, auc, ce, (scores_all, val_labels)
Exemplo n.º 3
0
    def test(self, dataloader):
        device = self.args.device
        pred = []
        self.classifier.eval()
        labels = []
        scores_all = []
        with torch.no_grad():
            for data in dataloader:
                _input, _valid, _label, _arch, _name = data
                _input = _input.to(device)
                _valid = _valid.to(device)
                if self.args.stocastic:
                    out_lst = []
                    pred_lst = []
                    for _ in range(self.args.T):
                        _out = self.classifier((_input, _valid, _arch))
                        out_lst.append(_out)
                        pred_lst.append(_out.argmax(1))
                    if self.args.hard:
                        out = torch.stack(pred_lst).float().mean(0)
                        pred.append((out > 0.5).long().data.cpu().numpy())
                    else:
                        out = torch.stack(out_lst).mean(0)
                        pred.append(out.argmax(1).data.cpu().numpy())
                else:
                    out = self.classifier((_input, _valid, _arch))
                    pred.append(out.argmax(1).data.cpu().numpy())
                labels.append(_label.data.numpy())
                scores_all.append(out.data.cpu().numpy())

        pred_labels = np.hstack(pred)
        labels = np.hstack(labels)
        if scores_all[0].ndim == 1:
            scores_all = np.hstack(scores_all)
        else:
            scores_all = np.vstack(scores_all)[:, 1]
        # acc = np.mean(pred_labels == labels) * 100
        # fpr, tpr, thresholds = metrics.roc_curve(labels, scores_all)
        # auc = metrics.auc(fpr, tpr)
        # ce = cross_entropy(scores_all, labels)
        auc, acc, ce = compute_metrics(scores_all, labels)
        return acc, auc, ce, (scores_all, labels)
Exemplo n.º 4
0
        # Building ensemble (average)
        test_info = []
        for num_models in range(args.num_ensemble):
            print(
                f"Training ensemble model {num_models} / {args.num_ensemble}")
            valid_acc, valid_auc, valid_ce, valid_info = model.train(
                dl_train, dl_val)
            acc, auc, ce, _test_info = model.test(dl_test)
            test_info.append(_test_info)
            print(f"validation auc = {valid_auc}\ntest auc = {auc}")
        prob = np.asarray([sigmoid(it[0]) for it in test_info]).mean(0)
        # Convert back to logits
        scores = np.log(prob / (1 - prob + 1e-10))
        labels = test_info[0][1]
        auc, acc, ce = compute_metrics(scores, labels)
        print(f"Fold {i}\nAcc: {acc:.2f}\nAuc: {auc:.2f}\nCE: {ce:.2f}")

        # auc = train_model(
        #     model,
        #     patience,
        #     n_epochs,
        #     train_loader,
        #     valid_loader,
        #     optimizer,
        #     criterion,
        #     device,
        #     ckt_path=ckt_path,
        # )
        # acc = 0
Exemplo n.º 5
0
def main(args):

    where_to_save = os.path.join(args.save_dir, args.project_name,
                                 args.model_name)
    checkpoints_dir = os.path.join(where_to_save, 'checkpoints')
    logs_dir = os.path.join(where_to_save, 'logs')

    if not args.log_term:
        sys.stdout = open(os.path.join(logs_dir, 'eval.log'), 'w')
        sys.stderr = open(os.path.join(logs_dir, 'eval.err'), 'w')

    transforms_list = []
    transforms_list.append(transforms.Resize((args.crop_size)))
    transforms_list.append(transforms.CenterCrop(args.crop_size))
    transforms_list.append(transforms.ToTensor())
    transforms_list.append(
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
    #Image preprocessing
    transform = transforms.Compose(transforms_list)

    #Data loader
    data_loader, dataset = get_loader(args.data_dir,
                                      'test',
                                      args.maxnumlabels,
                                      batch_size=args.batch_size,
                                      transform=transform,
                                      shuffle=False,
                                      num_workers=args.num_workers,
                                      drop_last=False,
                                      max_num_samples=-1)

    ingr_vocab_size = dataset.get_ingrs_vocab_size()

    args.numgens = 1

    #Build the model
    model = get_model(args, ingr_vocab_size)
    model_path = os.path.join(args.save_dir, args.project_name,
                              args.model_name, 'checkpoints', 'modelbest.ckpt')
    model.load_state_dict(torch.load(model_path, map_location=map_loc))

    model.eval()
    model = model.to(device)
    error_types = {
        'tp_i': 0,
        'fp_i': 0,
        'fn_i': 0,
        'tn_i': 0,
        'tp_all': 0,
        'fp_all': 0,
        'fn_all': 0
    }

    for i, (img_inputs, ingr_gt, img_id, path) in tqdm(enumerate(data_loader)):

        ingr_gt = ingr_gt.to(device)
        img_inputs = img_inputs.to(device)

        for gens in range(args.numgens):
            with torch.no_grad():

                outputs = model.sample(img_inputs)

                fake_ingrs = outputs['ingr_ids']
                pred_one_hot = label2onehot(fake_ingrs, ingr_vocab_size - 1)
                target_one_hot = label2onehot(ingr_gt, ingr_vocab_size - 1)

                update_error_types(error_types, pred_one_hot, target_one_hot)

    ret_metrics = {'accuracy': [], 'f1': []}
    compute_metrics(ret_metrics,
                    error_types, ['accuracy', 'f1'],
                    eps=1e-10,
                    weights=None)

    for k, v in ret_metrics.items():
        print(k, np.mean(v))