Exemplo n.º 1
0
    def evaluate(self, loader, log_to_mlflow=False):
        """
        Evaluates model on given validation test subset
        :param loader: DataLoader of validation/test
        :param log_to_mlflow: Log metrics to Mlflow
        :return: Dict of calculated metrics
        """

        # Evaluation mode
        self.eval()
        scores_mse = []
        scores_mse_top_k = []
        true_labels = []

        with torch.no_grad():
            for batch_data in tqdm(loader,
                                   desc='Validation',
                                   total=len(loader)):
                # Format input batch
                inp = batch_data['image'].to(self.device)
                mask = batch_data['mask'].to(self.device)
                labels = batch_data['label']

                # Forward pass
                output = self(inp)
                loss = self.outer_loss(
                    output, inp,
                    mask) if self.masked_loss_on_val else self.outer_loss(
                        output, inp)

                score_mse = Mean.calculate(loss,
                                           masked_loss=self.masked_loss_on_val,
                                           mask=mask)
                score_top_k = TopK.calculate(loss, TOP_K, reduce_to_mean=True)

                scores_mse_top_k.extend(score_top_k)
                scores_mse.extend(score_mse)
                true_labels.extend(labels.numpy())

        metrics_mse = calculate_metrics(np.array(scores_mse),
                                        np.array(true_labels), 'mse')
        metrics_top_k = calculate_metrics(np.array(scores_mse_top_k),
                                          np.array(true_labels), 'mse_top_k')
        metrics = {**metrics_mse, **metrics_top_k}

        if log_to_mlflow:
            for (metric, value) in metrics.items():
                mlflow.log_metric(metric, value)

        return metrics
def test_results(model, test_loader, icdtype, device):

    model.eval()
    with torch.no_grad():
        model_result = []
        targets = []
        for x_test, batch_targets in test_loader:
            x_test = x_test.to(device)
            model_batch_result = model(x_test)
            model_result.extend(model_batch_result.cpu().numpy())
            targets.extend(batch_targets[icdtype].cpu().numpy())
    result = calculate_metrics(np.array(model_result), np.array(targets))
    print('-' * 10 + icdtype + '-' * 10)
    print(result)
Exemplo n.º 3
0
def hybrid_test_results(model, hybrid_test_loader, icdtype, device):

    model.eval()
    with torch.no_grad():
        model_result = []
        targets = []
        for rnn_x, cnn_x, batch_targets in hybrid_test_loader:
            rnn_x = rnn_x.to(device)
            cnn_x = cnn_x.to(device)

            model_batch_result = model(rnn_x, cnn_x)
            model_result.extend(model_batch_result.cpu().numpy())
            targets.extend(batch_targets[icdtype].cpu().numpy())

    result = calculate_metrics(np.array(model_result), np.array(targets))
    print('-' * 10 + icdtype + '-' * 10)
    print(result)
def bert_test_results(model, test_loader, icdtype, device):

    model.eval()
    with torch.no_grad():
        model_result = []
        targets = []
        for resps, batch_targets in test_loader:
            model_batch_result = model(resps["ids"].to(device),
                                       resps["mask"].to(device),
                                       resps["token_type_ids"].to(device))
            model_result.extend(model_batch_result.cpu().numpy())
            targets.extend(batch_targets[icdtype].cpu().numpy())
    result = calculate_metrics(np.array(model_result), np.array(targets))
    print('-' * 20 + icdtype + '-' * 20)
    print(result)
    print('-' * len('-' * 20 + icdtype + '-' * 20))


########################################################################
Exemplo n.º 5
0
    def evaluate(self, loader, log_to_mlflow=False):
        """
        Evaluates discriminator on given validation test subset
        :param loader: DataLoader of validation/test
        :param type: 'validation' or 'test'
        :param log_to_mlflow: Log metrics to Mlflow
        :param val_metrics: For :param type = 'test' only. Metrcis should contain optimal threshold
        :return: Dict of calculated metrics
        """

        # Evaluation mode
        self.discriminator.eval()
        with torch.no_grad():
            scores = []
            true_labels = []
            for batch_data in tqdm(loader,
                                   desc='Validation',
                                   total=len(loader)):
                # Format input batch
                inp = batch_data['image'].to(self.device)

                # Forward pass
                output = self.discriminator(inp).to('cpu').numpy().reshape(-1)

                # Scores, based on output of discriminator - Higher score must correspond to positive labeled images
                score = output if bool(self.fake_label) else 1 - output

                scores.extend(score)
                true_labels.extend(batch_data['label'].numpy())

            metrics = calculate_metrics(np.array(scores),
                                        np.array(true_labels), 'proba')

            if log_to_mlflow:
                for (metric, value) in metrics.items():
                    mlflow.log_metric(metric, value)

            return metrics
Exemplo n.º 6
0
    def evaluate(self, loader, log_to_mlflow=False):
        """
        Evaluates discriminator on given validation test subset
        :param loader: DataLoader of validation/test
        :param type: 'validation' or 'test'
        :param log_to_mlflow: Log metrics to Mlflow
        :param val_metrics: For :param type = 'test' only. Metrcis should contain optimal threshold
        :return: Dict of calculated metrics
        """

        # Evaluation mode
        self.generator.eval()
        self.encoder.eval()
        self.discriminator.eval()
        self.codescriminator.eval()
        scores_mse = []
        scores_proba = []
        scores_coproba = []
        scores_proba_coproba = []
        scores_top_k = []
        true_labels = []

        with torch.no_grad():

            for batch_data in tqdm(loader,
                                   desc='Validation',
                                   total=len(loader)):
                # Format input batch
                inp = batch_data['image'].to(self.device)
                mask = batch_data['mask'].to(self.device)

                # Forward pass
                z_mean, z_logvar, _, _ = self.encoder(inp)
                # z_hat = z_mean + z_logvar * torch.randn(z_mean.size()).to(self.device)
                if len(z_mean.size()) == 1:
                    z_mean = z_mean.view(1, z_mean.size(0))
                # Decoder (generator)
                x_rec, _, _ = self.generator(z_mean)

                loss = self.outer_loss(x_rec, inp, mask) if self.masked_loss_on_val \
                    else self.outer_loss(x_rec, inp)

                # Scores, based on output of discriminator - Higher score must correspond to positive labeled images
                score_proba = 1 - self.discriminator(inp)[0].to(
                    'cpu').numpy().reshape(-1)
                score_coproba = 1 - self.codescriminator(z_mean).to(
                    'cpu').numpy().reshape(-1)
                score_proba_coproba = (score_proba + score_coproba) / 2

                score_mse = Mean.calculate(loss,
                                           masked_loss=self.masked_loss_on_val,
                                           mask=mask)
                score_top_k = TopK.calculate(loss, TOP_K, reduce_to_mean=True)

                scores_mse.extend(score_mse)
                scores_top_k.extend(score_top_k)
                scores_proba.extend(score_proba)
                scores_coproba.extend(score_coproba)
                scores_proba_coproba.extend(score_proba_coproba)
                true_labels.extend(batch_data['label'].numpy())

            metrics_mse = calculate_metrics(np.array(scores_mse),
                                            np.array(true_labels), 'mse')
            metrics_mse_top_k = calculate_metrics(np.array(scores_top_k),
                                                  np.array(true_labels),
                                                  'mse_top_k')
            metrics_proba = calculate_metrics(np.array(scores_proba),
                                              np.array(true_labels), 'proba')
            metrics_coproba = calculate_metrics(np.array(scores_coproba),
                                                np.array(true_labels),
                                                'coproba')
            metrics_proba_coproba = calculate_metrics(
                np.array(scores_proba_coproba), np.array(true_labels),
                'proba_coproba')

            metrics = {
                **metrics_mse,
                **metrics_mse_top_k,
                **metrics_proba,
                **metrics_coproba,
                **metrics_proba_coproba
            }

            if log_to_mlflow:
                for (metric, value) in metrics.items():
                    mlflow.log_metric(metric, value)

            return metrics
Exemplo n.º 7
0
    def evaluate(self, loader, log_to_mlflow=False):
        """
        Computes ROC-AUC, APS
        :param loader: data loader
        :param log_to_mlflow: boolean variable to enable logging
        :return: calculated metrics
        """

        self.eval()
        scores_L1 = []
        scores_L2 = []
        scores_L1_top_k = []
        scores_L2_top_k = []
        scores_kld = []
        scores_L1_kld = []
        scores_L2_kld = []
        true_labels = []

        with torch.no_grad():

            for batch_data in tqdm(loader,
                                   desc='Validation',
                                   total=len(loader)):
                inp = batch_data['image'].to(self.device)
                mask = batch_data['mask'].to(
                    self.device) if self.masked_loss_on_val else None
                labels = batch_data['label']

                # forward pass
                output, mu, var = self(inp)
                L1, KLD = self.loss_L1(output,
                                       inp,
                                       mu,
                                       var,
                                       reduction='none',
                                       mask=mask)
                L2 = self.outer_loss(output, inp, mask) if self.masked_loss_on_val \
                    else self.outer_loss(output, inp)

                score_L1 = Mean.calculate(L1,
                                          masked_loss=self.masked_loss_on_val,
                                          mask=mask)
                score_L2 = Mean.calculate(L2,
                                          masked_loss=self.masked_loss_on_val,
                                          mask=mask)
                score_L1_top_k = TopK.calculate(L1, TOP_K, reduce_to_mean=True)
                score_L2_top_k = TopK.calculate(L2, TOP_K, reduce_to_mean=True)
                score_kld = KLD.to('cpu').numpy().sum(axis=1)
                score_L1_kld = score_L1 + score_kld
                score_L2_kld = score_L2 + score_kld

                scores_L1.extend(score_L1)
                scores_L2.extend(score_L2)
                scores_L1_top_k.extend(score_L1_top_k)
                scores_L2_top_k.extend(score_L2_top_k)
                scores_kld.extend(score_kld)
                scores_L1_kld.extend(score_L1_kld)
                scores_L2_kld.extend(score_L2_kld)
                true_labels.extend(labels.numpy())

            metrics_L1 = calculate_metrics(np.array(scores_L1),
                                           np.array(true_labels), 'l1')
            metrics_L2 = calculate_metrics(np.array(scores_L2),
                                           np.array(true_labels), 'mse')
            metrics_L1_top_k = calculate_metrics(np.array(scores_L1_top_k),
                                                 np.array(true_labels),
                                                 'l1_top_k')
            metrics_L2_top_k = calculate_metrics(np.array(scores_L2_top_k),
                                                 np.array(true_labels),
                                                 'mse_top_k')
            metrics_kld = calculate_metrics(np.array(scores_kld),
                                            np.array(true_labels), 'kld')
            metrics_l1_kld = calculate_metrics(np.array(scores_L1_kld),
                                               np.array(true_labels), 'l1_kld')
            metrics_l2_kld = calculate_metrics(np.array(scores_L2_kld),
                                               np.array(true_labels),
                                               'mse_kld')
            metrics = {
                **metrics_L1,
                **metrics_L2,
                **metrics_L1_top_k,
                **metrics_L2_top_k,
                **metrics_kld,
                **metrics_l1_kld,
                **metrics_l2_kld
            }

            if log_to_mlflow:
                for (metric, value) in metrics.items():
                    mlflow.log_metric(metric, value)

            return metrics