Пример #1
0
def test(args):
    """Run model testing."""

    model_args = args.model_args
    data_args = args.data_args
    logger_args = args.logger_args

    # import pdb; pdb.set_trace()

    # Get logger.
    logger = Logger(logger_args.log_path, logger_args.save_dir,
                    logger_args.results_dir)

    # Get image paths corresponding to predictions for logging
    paths = None

    if model_args.config_path is not None:
        # Instantiate the EnsemblePredictor class for obtaining
        # model predictions.
        predictor = EnsemblePredictor(config_path=model_args.config_path,
                                      model_args=model_args,
                                      data_args=data_args,
                                      gpu_ids=args.gpu_ids,
                                      device=args.device,
                                      logger=logger)
        # Obtain ensemble predictions.
        # Caches both individual and ensemble predictions.
        # We always turn off caching to ensure that we write the Path column.
        predictions, groundtruth, paths = predictor.predict(cache=False,
                                                            return_paths=True,
                                                            all_gt_tasks=True)
    else:
        # Load the model at ckpt_path.
        ckpt_path = model_args.ckpt_path
        ckpt_save_dir = Path(ckpt_path).parent
        model_uncertainty = model_args.model_uncertainty
        # Get model args from checkpoint and add them to
        # command-line specified model args.
        model_args, transform_args\
            = ModelSaver.get_args(cl_model_args=model_args,
                                  dataset=data_args.dataset,
                                  ckpt_save_dir=ckpt_save_dir,
                                  model_uncertainty=model_uncertainty)

        # TODO JBY: in test moco should never be true.
        model_args.moco = args.model_args.moco
        model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path,
                                                 gpu_ids=args.gpu_ids,
                                                 model_args=model_args,
                                                 is_training=False)

        # Instantiate the Predictor class for obtaining model predictions.
        predictor = Predictor(model=model, device=args.device)
        # Get phase loader object.
        return_info_dict = True
        loader = get_loader(phase=data_args.phase,
                            data_args=data_args,
                            transform_args=transform_args,
                            is_training=False,
                            return_info_dict=return_info_dict,
                            logger=logger)
        # Obtain model predictions.
        if return_info_dict:
            predictions, groundtruth, paths = predictor.predict(loader)
        else:
            predictions, groundtruth = predictor.predict(loader)
        # print(predictions[CHEXPERT_COMPETITION_TASKS])
        if model_args.calibrate:
            #open the json file which has the saved parameters
            import json
            with open(CALIBRATION_FILE) as f:
                data = json.load(f)
            i = 0
            #print(predictions)
            import math

            def sigmoid(x):
                return 1 / (1 + math.exp(-x))

            for column in predictions:
                predictions[column] = predictions[column].apply \
                                      (lambda x: sigmoid(x * data[i][0][0][0] \
                                      + data[i][1][0]))
                i += 1

            # print(predictions[CHEXPERT_COMPETITION_TASKS])
            #run forward on all the predictions in each row of predictions

    # Log predictions and groundtruth to file in CSV format.
    logger.log_predictions_groundtruth(predictions, groundtruth, paths)

    if not args.inference_only:
        # Instantiate the evaluator class for evaluating models.
        evaluator = Evaluator(logger, operating_points_path=CHEXPERT_RAD_PATH)
        # Get model metrics and curves on the phase dataset.
        metrics, curves = evaluator.evaluate_tasks(groundtruth, predictions)
        # Log metrics to stdout and file.
        logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
        logger.log_metrics(metrics, save_csv=True)

    # TODO: make this work with ensemble
    # TODO: investigate if the eval_loader can just be the normal loader here
    if logger_args.save_cams:
        cams_dir = logger_args.save_dir / 'cams'
        print(f'Save cams to {cams_dir}')
        save_grad_cams(args,
                       loader,
                       model,
                       cams_dir,
                       only_competition=logger_args.only_competition_cams,
                       only_top_task=False)

    logger.log("=== Testing Complete ===")
Пример #2
0
def test(args):
    """Run model testing."""
    test_args = args.test_args
    logger_args = args.logger_args

    # Load the model at ckpt_path.
    ckpt_path = test_args.ckpt_path
    ckpt_save_dir = Path(ckpt_path).parent

    # Get model args from checkpoint and add them to
    # command-line specified model args.
    model_args, data_args, optim_args, logger_args\
        = ModelSaver.get_args(cl_logger_args=logger_args,
                              ckpt_save_dir=ckpt_save_dir)

    model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path,
                                             gpu_ids=args.gpu_ids,
                                             model_args=model_args,
                                             is_training=False)

    # Get logger.
    logger = Logger(logger_args=logger_args,
                    data_args=data_args,
                    optim_args=optim_args,
                    test_args=test_args)

    # Instantiate the Predictor class for obtaining model predictions.
    predictor = Predictor(model=model, device=args.device)

    phase = test_args.phase
    is_test = False
    if phase == 'test':
        is_test = True
        phase = 'valid'  # Run valid first to get threshold

    print(f"======================{phase}=======================")
    # Get phase loader object.
    loader = get_loader(phase=phase,
                        data_args=data_args,
                        is_training=False,
                        logger=logger)
    # Obtain model predictions.
    predictions, groundtruth = predictor.predict(loader)

    # Instantiate the evaluator class for evaluating models.
    evaluator = Evaluator(logger=logger, tune_threshold=True)

    # Get model metrics and curves on the phase dataset.
    metrics = evaluator.evaluate(groundtruth, predictions)

    # Log metrics to stdout and file.
    logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
    logger.log_metrics(metrics, phase=phase)

    # Evaluate dense to get back thresholds
    dense_loader = get_loader(phase=phase,
                              data_args=data_args,
                              is_training=False,
                              logger=logger)
    dense_predictions, dense_groundtruth = predictor.predict(dense_loader)
    dense_metrics = evaluator.dense_evaluate(dense_groundtruth,
                                             dense_predictions)

    # Log metrics to stdout and file.
    logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
    logger.log_metrics(dense_metrics, phase=phase)

    if is_test:
        phase = 'test'
        threshold = metrics['threshold']
        print(f"======================{phase}=======================")

        # Get phase loader object.
        loader = get_loader(phase=phase,
                            data_args=data_args,
                            is_training=False,
                            test_args=test_args,
                            logger=logger)
        # Obtain model predictions.
        predictions, groundtruth = predictor.predict(loader)

        # Instantiate the evaluator class for evaluating models.
        evaluator = Evaluator(logger=logger,
                              threshold=threshold,
                              tune_threshold=False)

        # Get model metrics and curves on the phase dataset.
        metrics = evaluator.evaluate(groundtruth, predictions)
        # Log metrics to stdout and file.
        logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
        logger.log_metrics(metrics, phase=phase)

        # Dense test
        phase = 'dense_test'
        dense_loader = get_loader(phase=phase,
                                  data_args=data_args,
                                  is_training=False,
                                  test_args=test_args,
                                  logger=logger)
        threshold_dense = dense_metrics["threshold_dense"]
        threshold_tunef1_dense = dense_metrics["threshold_tunef1_dense"]
        dense_predictions, dense_groundtruth = predictor.predict(dense_loader)
        dense_metrics = evaluator.dense_evaluate(
            dense_groundtruth,
            dense_predictions,
            threshold=threshold_dense,
            threshold_tunef1=threshold_tunef1_dense)
        logger.log_stdout(f"Writing metrics to {logger.metrics_path}.")
        logger.log_metrics(dense_metrics, phase=phase)
Пример #3
0
def calibrate(args):
    """Run model testing."""
    model_args = args.model_args
    data_args = args.data_args
    logger_args = args.logger_args

    # Get logger.
    logger = Logger(logger_args.log_path, logger_args.save_dir,
                    logger_args.results_dir)

    # Get image paths corresponding to predictions for logging
    paths = None

    if model_args.config_path is not None:
        # Instantiate the EnsemblePredictor class for obtaining
        # model predictions.
        predictor = EnsemblePredictor(config_path=model_args.config_path,
                                      model_args=model_args,
                                      data_args=data_args,
                                      gpu_ids=args.gpu_ids,
                                      device=args.device,
                                      logger=logger)
        # Obtain ensemble predictions.
        # Caches both individual and ensemble predictions.
        # We always turn off caching to ensure that we write the Path column.
        predictions, groundtruth, paths = predictor.predict(cache=False,
                                                            return_paths=True,
                                                            all_gt_tasks=True)
    else:
        # Load the model at ckpt_path.
        ckpt_path = model_args.ckpt_path
        ckpt_save_dir = Path(ckpt_path).parent
        model_uncertainty = model_args.model_uncertainty
        # Get model args from checkpoint and add them to
        # command-line specified model args.
        model_args, transform_args\
            = ModelSaver.get_args(cl_model_args=model_args,
                                  dataset=data_args.dataset,
                                  ckpt_save_dir=ckpt_save_dir,
                                  model_uncertainty=model_uncertainty)
        model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path,
                                                 gpu_ids=args.gpu_ids,
                                                 model_args=model_args,
                                                 is_training=False)
        # Instantiate the Predictor class for obtaining model predictions.
        predictor = Predictor(model=model, device=args.device)
        # Get phase loader object.
        return_info_dict = True
        loader = get_loader(phase=data_args.phase,
                            data_args=data_args,
                            transform_args=transform_args,
                            is_training=False,
                            return_info_dict=return_info_dict,
                            logger=logger)
        # Obtain model predictions
        if return_info_dict:
            predictions, groundtruth, paths = predictor.predict(loader)
        else:
            predictions, groundtruth = predictor.predict(loader)
        #print(groundtruth)
    # custom function
    from sklearn.linear_model import LogisticRegression as LR
    params = []
    for column in predictions:
        #print(predictions[column].values)
        #print(groundtruth[column].values)
        #drop corresponding rows where gt is -1  and
        lr = LR(C=15)
        to_drop = groundtruth.index[groundtruth[column] == -1].tolist()
        lr.fit(predictions[column].drop(to_drop).values.reshape(-1, 1),
               groundtruth[column].drop(
                   to_drop).values)  # LR needs X to be 2-dimensional
        print("num_rows_used", predictions[column].drop(to_drop).values.size)
        #print(groundtruth[column].drop(to_drop).values.size)
        #print(predictions[column].values)
        print("coeffs", lr.coef_, lr.intercept_)
        p_calibrated = lr.predict_proba(predictions[column].values.reshape(
            -1, 1))
        params.append((lr.coef_, lr.intercept_))
    import json
    with open('calibration_params.json', 'w') as f:
        import pandas as pd
        pd.Series(params).to_json(f, orient='values')
Пример #4
0
    def predict(self, cache=False):
        """Get model predictions on the evaluation set.

        Args:
            cache: Bool indicating whether to cache ensemble predictions.
                   If true, first tries to load already cached files,
                   then writes all predictions (and groundtruth) which
                   have not been cached.

        Return:
            ensemble probabilities Pandas DataFrame,
            ensemble groundtruth Pandas DataFrame
        """
        is_cached = False
        if cache and self.logger is not None:
            results_dir = self.logger.results_dir
            self.predictions_path = results_dir / "ensemble_predictions.csv"
            self.groundtruth_path = results_dir / "groundtruth.csv"
            if (self.predictions_path.exists()
                    and self.groundtruth_path.exists()):
                self.logger.log(f"Predictions at {self.predictions_path} " +
                                "already exist. Loading from this file.")
                ensemble_probs_df = pd.read_csv(self.predictions_path)
                ensemble_gt_df = pd.read_csv(self.groundtruth_path)
                is_cached = True

        elif cache:
            raise ValueError("Must instantiate Predictor with logger" +
                             "if caching.")

        if not is_cached:
            model2probs = {}
            model2gt = {}
            task2ensemble_probs = {}
            task2gt = {}

            self.save_config()
            for task, model_dicts in self.task2models.items():
                for model_dict in model_dicts:
                    ckpt_path = Path(model_dict[CFG_CKPT_PATH])
                    is_3class = model_dict[CFG_IS_3CLASS]

                    if (ckpt_path in model2probs):
                        # We've already computed predictions for this model,
                        # skip it!
                        continue
                    ckpt_save_dir = Path(ckpt_path).parent

                    results_parent_dir = ckpt_save_dir / "results"
                    results_dir = results_parent_dir / self.data_args.phase
                    results_dir.mkdir(parents=True, exist_ok=True)
                    ckpt_iter = ckpt_path.stem.split(".")[0]
                    predictions_name = f"{ckpt_iter}-predictions.csv"
                    groundtruth_name = f"{ckpt_iter}-groundtruth.csv"
                    predictions_path = results_dir / predictions_name
                    groundtruth_path = results_dir / groundtruth_name
                    if cache and (predictions_path.exists()
                                  and groundtruth_path.exists()):
                        self.logger.log(f"Predictions at {predictions_path}" +
                                        " already exist. Loading from this " +
                                        "file.")
                        probs_df = pd.read_csv(predictions_path,
                                               dtype=np.float32)
                        gt_df = pd.read_csv(groundtruth_path, dtype=np.float32)

                    else:
                        dataset = self.data_args.dataset
                        # Get model args from checkpoint and add them to
                        # command-line specified model args.
                        model_args, transform_args =\
                            ModelSaver.get_args(cl_model_args=self.model_args,
                                                dataset=dataset,
                                                ckpt_save_dir=ckpt_save_dir,
                                                model_uncertainty=is_3class)
                        model, ckpt_info =\
                            ModelSaver.load_model(ckpt_path=ckpt_path,
                                                  gpu_ids=self.gpu_ids,
                                                  model_args=model_args,
                                                  is_training=False)
                        predictor = Predictor(model=model, device=self.device)
                        loader = get_loader(phase=self.data_args.phase,
                                            data_args=self.data_args,
                                            transform_args=transform_args,
                                            is_training=False,
                                            return_info_dict=False,
                                            logger=self.logger)
                        probs_df, gt_df = predictor.predict(loader)

                        if cache:
                            self.logger.log("Writing predictions to " +
                                            f"{predictions_path}.")
                            probs_df.astype(np.float64).to_csv(
                                predictions_path, index=False)
                            self.logger.log("Writing groundtruth to " +
                                            f"{groundtruth_path}.")
                            gt_df.astype(np.float64).to_csv(groundtruth_path,
                                                            index=False)

                    model2probs[ckpt_path] = probs_df
                    model2gt[ckpt_path] = gt_df

                task_ckpt_probs =\
                    [model2probs[Path(model_dict[CFG_CKPT_PATH])][task]
                     for model_dict in model_dicts]
                task2ensemble_probs[task] =\
                    self.aggregation_fn(task_ckpt_probs, axis=0)
                first_gt = model2gt[Path(model_dicts[0][CFG_CKPT_PATH])][task]
                assert all([
                    model2gt[Path(
                        model_dict[CFG_CKPT_PATH])][task].equals(first_gt)
                    for model_dict in model_dicts
                ])
                task2gt[task] =\
                    model2gt[Path(model_dicts[0][CFG_CKPT_PATH])][task]

            ensemble_probs_df = pd.DataFrame(
                {task: task2ensemble_probs[task]
                 for task in self.task2models})
            ensemble_gt_df = pd.DataFrame(
                {task: task2gt[task]
                 for task in self.task2models})
            if cache:
                self.logger.log("Writing predictions to "
                                f"{self.predictions_path}.")
                ensemble_probs_df.astype(np.float64).to_csv(
                    self.predictions_path, index=False)
                self.logger.log("Writing groundtruth to "
                                f"{self.groundtruth_path}.")
                ensemble_gt_df.astype(np.float64).to_csv(self.groundtruth_path,
                                                         index=False)

        return ensemble_probs_df, ensemble_gt_df