Пример #1
0
    def evaluate_alt(self, conf, mode='test'):
        label_names = self.loader.dataset.label_names
        if not conf.no_bkg:
            label_names = ['null'] + label_names

        for i in range(len(self.models)):
            self.models[i].eval()

        do_mean = -1 if len(self.models) > 1 else 0
        ind_iter = range(do_mean, len(self.models))
        predictions = dict(zip(ind_iter, [[] for i in ind_iter]))
        prob = dict(zip(ind_iter, [[] for i in ind_iter]))
        labels = []
        pos = 2 if mode == 'train' else 1
        self.eval_loader.dataset.set_mode(mode)  # todo check this works :)

        with torch.no_grad():
            for imgs, label in tqdm(self.eval_loader,
                                    total=len(self.eval_loader),
                                    desc=mode,
                                    position=pos):
                imgs = imgs.to(conf.device)

                self.optimizer.zero_grad()
                thetas = [model(imgs).detach() for model in self.models]
                if len(self.models) > 1:
                    thetas = [torch.mean(torch.stack(thetas), 0)] + thetas
                for ind, theta in zip(range(do_mean, len(self.models)),
                                      thetas):
                    val, arg = torch.max(theta, dim=1)
                    predictions[ind].append(arg.cpu().numpy())
                    prob[ind].append(theta.cpu().numpy())
                labels.append(label.detach().cpu().numpy())

        labels = np.hstack(labels)
        results = []
        for ind in range(do_mean, len(self.models)):
            curr_predictions = np.hstack(predictions[ind])
            curr_prob = np.vstack(prob[ind])

            # Compute ROC curve and ROC area for each class
            img_d_fig = plot_confusion_matrix(labels,
                                              curr_predictions,
                                              label_names,
                                              tensor_name='dev/cm_' + mode)
            res = (curr_predictions == labels)
            acc = sum(res) / len(res)
            fpr, tpr, _ = roc_curve(np.repeat(res, self.n_classes),
                                    curr_prob.ravel())
            buf = gen_plot(fpr, tpr)
            roc_curve_im = Image.open(buf)
            roc_curve_tensor = trans.ToTensor()(roc_curve_im)
            results.append((acc, roc_curve_tensor, img_d_fig))
        return results
Пример #2
0
    def evaluate(self, conf, model_num, mode='test'):
        if model_num == -1:  # means mean model
            for i in range(len(self.models)):
                self.models[i].eval()
        else:
            model = self.models[model_num]
            model.eval()
        # TODO look into this https://github.com/pytorch/pytorch/issues/11476
        # batching is unstable... limit to less gpus or use sync
        label_names = []
        if conf.type_only:
            label_names = ['calc', 'mass']
        elif conf.cancer_only or conf.single_type:
            label_names = ['mal', 'ben']
        else:
            label_names = ['calc_mal', 'calc_ben', 'mass_mal', 'mass_ben']

        if not conf.no_bkg:
            label_names = ['bkg'] + label_names

        predictions = []
        prob = []
        labels = []
        loader = self.eval_train if mode == 'train' else self.eval_test
        pos = 2 if mode == 'train' else 1
        model_num_str = model_num if model_num > -1 else 'mean'
        with torch.no_grad():
            for imgs, label in tqdm(loader, total=len(loader), desc=mode+'_'+str(model_num_str), position=pos):
                imgs = torch.cat(imgs).to(conf.device)

                self.optimizer.zero_grad()
                if model_num == -1: # means mean model
                    theta = torch.mean(torch.stack([model(imgs).detach() for model in self.models]), 0)
                else:
                    theta = model(imgs).detach()

                val, arg = torch.max(theta, dim=1)
                predictions.append(arg.cpu().numpy())
                prob.append(theta.cpu().numpy())
                labels.append(torch.cat(label).detach().cpu().numpy())

        predictions = np.hstack(predictions)
        prob = np.vstack(prob)
        labels = np.hstack(labels)

        # Compute ROC curve and ROC area for each class
        img_d_fig = plot_confusion_matrix(labels, predictions, label_names, tensor_name='dev/cm_' + mode)
        res = (predictions == labels)
        acc = sum(res) / len(res)
        fpr, tpr, _ = roc_curve(np.repeat(res, self.n_classes), prob.ravel())
        buf = gen_plot(fpr, tpr)
        roc_curve_im = Image.open(buf)
        roc_curve_tensor = trans.ToTensor()(roc_curve_im)
        return acc, roc_curve_tensor, img_d_fig
Пример #3
0
    def evaluate_alt(self, conf, mode='test'):
        label_names = []
        if conf.type_only:
            label_names = ['calc', 'mass']
        elif conf.cancer_only or conf.single_type:
            label_names = ['mal', 'ben']
        else:
            label_names = ['calc_mal', 'mass_mal', 'calc_ben', 'mass_ben']

        if not conf.no_bkg:
            label_names = ['bkg'] + label_names

        # todo experiment
        label_names = ['mass_mal', 'mass_ben', 'bkg', 'calc_ben', 'calc_mal']

        for i in range(len(self.models)):
            self.models[i].eval()

        do_mean = -1 if len(self.models) > 1 else 0
        ind_iter = range(do_mean, len(self.models))
        predictions = dict(zip(ind_iter, [[] for i in ind_iter]))
        prob = dict(zip(ind_iter, [[] for i in ind_iter]))
        labels = []
        loader = self.eval_train if mode == 'train' else self.eval_test
        pos = 2 if mode == 'train' else 1
        with torch.no_grad():
            for imgs, label in tqdm(loader, total=len(loader), desc=mode, position=pos):
                imgs = torch.cat(imgs).to(conf.device)

                self.optimizer.zero_grad()
                thetas = [model(imgs).detach() for model in self.models]
                if len(self.models) > 1: thetas = [torch.mean(torch.stack(thetas), 0)] + thetas
                for ind, theta in zip(range(do_mean, len(self.models)), thetas):
                    val, arg = torch.max(theta, dim=1)
                    predictions[ind].append(arg.cpu().numpy())
                    prob[ind].append(theta.cpu().numpy())
                labels.append(torch.cat(label).detach().cpu().numpy())

        labels = np.hstack(labels)
        results = []
        for ind in range(do_mean, len(self.models)):
            curr_predictions = np.hstack(predictions[ind])
            curr_prob = np.vstack(prob[ind])

            # Compute ROC curve and ROC area for each class
            img_d_fig = plot_confusion_matrix(labels, curr_predictions, label_names, tensor_name='dev/cm_' + mode)
            res = (curr_predictions == labels)
            acc = sum(res) / len(res)
            fpr, tpr, _ = roc_curve(np.repeat(res, self.n_classes), curr_prob.ravel())
            buf = gen_plot(fpr, tpr)
            roc_curve_im = Image.open(buf)
            roc_curve_tensor = trans.ToTensor()(roc_curve_im)
            results.append((acc, roc_curve_tensor, img_d_fig))
        return results
Пример #4
0
    def evaluate(self, conf, mode='test'):

        for i in range(len(self.models)):
            self.models[i].eval()

        do_mean = -1 if len(self.models) > 1 else 0
        ind_iter = range(do_mean, len(self.models))
        predictions = dict(zip(ind_iter, [[] for i in ind_iter]))
        prob = dict(zip(ind_iter, [[] for i in ind_iter]))

        with torch.no_grad():
            self.optimizer.zero_grad()
            thetas = [model(self.X_test).detach() for model in self.models]
            if len(self.models) > 1:
                thetas = [torch.mean(torch.stack(thetas), 0)] + thetas
            for ind, theta in zip(range(do_mean, len(self.models)), thetas):
                val, arg = torch.max(theta, dim=1)
                predictions[ind].append(arg.cpu().numpy())
                prob[ind].append(theta.cpu().numpy())

        labels = self.y_test
        results = []
        for ind in range(do_mean, len(self.models)):
            curr_predictions = np.hstack(predictions[ind])
            curr_prob = np.vstack(prob[ind])

            # Compute ROC curve and ROC area for each class
            img_d_fig = plot_confusion_matrix(labels,
                                              curr_predictions,
                                              label_names,
                                              tensor_name='dev/cm_' + mode)
            res = (curr_predictions == labels)
            acc = sum(res) / len(res)
            fpr, tpr, _ = roc_curve(np.repeat(res, self.n_classes),
                                    curr_prob.ravel())
            buf = gen_plot(fpr, tpr)
            roc_curve_im = Image.open(buf)
            roc_curve_tensor = trans.ToTensor()(roc_curve_im)
            results.append((acc, roc_curve_tensor, img_d_fig))
        return results
Пример #5
0
    def evaluate(self, conf, mode='test'):
        for i in range(len(self.models)):
            self.models[i].eval()

        do_mean = -1 if len(self.models) > 1 else 0
        ind_iter = range(do_mean, len(self.models))
        prob = dict(zip(ind_iter, [[] for i in ind_iter]))
        rank_prob = dict(zip(ind_iter, [[] for i in ind_iter]))
        rank_predictions = dict(zip(ind_iter, [[] for i in ind_iter]))
        labels = []
        rank_labels = []
        pos = 2 if mode == 'train' else 1
        self.loader.dataset.train = False
        with torch.no_grad():
            for imgs, label in tqdm(self.eval_loader,
                                    total=len(self.eval_loader),
                                    desc=mode,
                                    position=pos):
                imgs = imgs.to(conf.device)
                if conf.rank:
                    label, rank_label = label
                    rank_labels.append(rank_label.detach().cpu().numpy())
                labels.append(label.detach().cpu().numpy())

                bs, n_crops, c, h, w = imgs.size()
                imgs = imgs.view(-1, c, h, w).cuda()

                self.optimizer.zero_grad()
                #thetas = [model(imgs).view(bs, n_crops, -1).mean(1).detach() for model in self.models]
                thetas = []
                rank_thetas = []
                for model_num in range(conf.n_models):
                    if conf.rank:
                        theta, rank_theta = self.models[model_num](imgs)
                        if mode != 'train':
                            rank_theta = rank_theta.view(bs, n_crops,
                                                         -1).mean(1).detach()
                        rank_thetas.append(rank_theta.detach())
                    else:
                        theta = self.models[model_num](imgs)
                    if mode != 'train':
                        theta = theta.view(bs, n_crops, -1).mean(1).detach()
                    thetas.append(theta.detach())

                if len(self.models) > 1:
                    thetas = [torch.mean(torch.stack(thetas), 0)] + thetas
                for ind, theta in zip(range(do_mean, len(self.models)),
                                      thetas):
                    prob[ind].append(theta.cpu().numpy())

                if conf.rank:
                    if len(self.models) > 1:
                        rank_thetas = [
                            torch.mean(torch.stack(rank_thetas), 0)
                        ] + rank_thetas
                    for ind, theta in zip(range(do_mean, len(self.models)),
                                          rank_thetas):
                        val, arg = torch.max(theta, dim=1)
                        rank_predictions[ind].append(arg.cpu().numpy())
                        rank_prob[ind].append(theta.cpu().numpy())

        labels = np.vstack(labels)
        if conf.rank:
            rank_labels = np.hstack(rank_labels)
        results = []
        for ind in range(do_mean, len(self.models)):
            cur_res = []
            curr_prob = np.vstack(prob[ind])

            AUROCs = []
            for i in range(self.n_classes):
                AUROCs.append(roc_auc_score(labels[:, i], curr_prob[:, i]))
            AUROC_avg = np.array(AUROCs).mean()
            img_d_fig = plot_auc_vector(AUROCs, self.ds_test.label_names)
            cur_res.append((AUROC_avg, img_d_fig))

            if conf.rank:
                mask = (rank_labels != -1)
                curr_predictions = np.hstack(rank_predictions[ind])[mask]
                curr_prob = np.vstack(rank_prob[ind])[mask]

                img_d_fig = plot_confusion_matrix(
                    rank_labels,
                    curr_predictions,
                    self.ds_test.rank_label_names,
                    tensor_name='dev/cm_' + mode)
                res = (curr_predictions == rank_labels)
                acc = sum(res) / len(res)
                fpr, tpr, _ = roc_curve(
                    np.repeat(res, self.ds_test.n_rank_labels),
                    curr_prob.ravel())
                buf = gen_plot(fpr, tpr)
                roc_curve_im = Image.open(buf)
                roc_curve_tensor = trans.ToTensor()(roc_curve_im)
                cur_res.append((acc, roc_curve_tensor, img_d_fig))

            results.append(cur_res)

        return results
Пример #6
0
    def evaluate(self, conf, mode='test'):

        for i in range(len(self.models)):
            self.models[i].eval()

        do_mean = -1 if len(self.models) > 1 else 0
        ind_iter = range(do_mean, len(self.models))
        predictions = dict(zip(ind_iter, [[] for i in ind_iter]))
        prob = dict(zip(ind_iter, [[] for i in ind_iter]))
        labels = []
        pos = 2 if mode == 'train' else 1

        eval_loader = self.train_eval_loader if mode == 'train' else self.eval_loader
        has_ood = eval_loader.dataset.ood
        report_ood = has_ood and (do_mean == -1) and mode != 'train'

        eval_loader.dataset.evaluate()
        with torch.no_grad():
            for imgs, label in tqdm(eval_loader, total=len(eval_loader), desc=mode, position=pos):
                imgs = imgs.to(conf.device)
                bs, n_crops, c, h, w = imgs.size()
                imgs = imgs.view(-1, c, h, w).cuda()

                self.optimizer.zero_grad()
                thetas = []
                for model_num in range(conf.n_models):
                    theta = self.models[model_num](imgs)
                    theta = theta.view(bs, n_crops, -1).mean(1).detach()
                    thetas.append(theta.detach())

                if len(self.models) > 1: thetas = [torch.mean(torch.stack(thetas), 0)] + thetas
                for ind, theta in zip(range(do_mean, len(self.models)), thetas):
                    val, arg = torch.max(theta, dim=1)
                    predictions[ind].append(arg.cpu().numpy())
                    prob[ind].append(theta.cpu().numpy())
                labels.append(label.detach().cpu().numpy())

        labels = np.hstack(labels)
        predictions = {key: np.hstack(predictions[key]) for key in predictions}
        prob = {key: np.vstack(prob[key]) for key in prob}
        results = []
        label_names = eval_loader.dataset.classes
        for ind in range(do_mean, len(self.models)):
            curr_predictions = predictions[ind]
            curr_prob = prob[ind]
            eval_labels = labels
            if has_ood:
                # ood filtering requeired
                ood_ind = has_ood
                is_ood = labels != ood_ind
                eval_labels = labels[is_ood]
                curr_predictions = curr_predictions[labels != ood_ind]
                curr_prob = curr_prob[labels != ood_ind]

            # Compute ROC curve and ROC area for each class
            img_d_fig = plot_confusion_matrix(eval_labels, curr_predictions, label_names,
                                              tensor_name='dev/cm_' + mode)

            # sample_w = compute_sample_weight(class_weight, eval_labels)
            recall = recall_score(eval_labels, curr_predictions, average='weighted')
            # res = (curr_predictions == eval_labels)
            # acc = sum(res) / len(res)

            dummies = np.eye(self.n_classes)[eval_labels]
            fpr = dict()
            tpr = dict()
            for i in range(self.n_classes):
                fpr[i], tpr[i], _ = roc_curve(dummies[:, i], curr_prob[:, i])

            if report_ood and ind == do_mean:
                # ood eval
                ood_ind = has_ood
                ensemble_prob = np.stack([prob[ind] for ind in range(len(self.models))]).mean(0)
                #ensemble_pred = np.stack([predictions[ind] for ind in range(len(self.models))]).T
                ood_confidance = entropy(ensemble_prob, axis=1, base=ensemble_prob.shape[1])
                fpr[ood_ind], tpr[ood_ind], _ = roc_curve(is_ood, ood_confidance)
                ood_auc = roc_auc_score(is_ood, ood_confidance)
                roc_labels = label_names
            else:
                roc_labels = label_names if mode == 'train' else label_names[:-1]

            buf = gen_plot_mult(fpr, tpr, roc_labels)
            roc_curve_im = Image.open(buf)
            roc_curve_tensor = trans.ToTensor()(roc_curve_im)
            results.append((recall, roc_curve_tensor, img_d_fig))

        return results if (not report_ood) else (results, ood_auc)