示例#1
0
def test_calculate_loss():
    # In a 0-1 ranged scorer, make sure that the loss
    # has a expected positive value
    y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
    y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
    score = sklearn.metrics.accuracy_score(y_true, y_pred)
    assert pytest.approx(score) == calculate_score(
        target=y_true,
        prediction=y_pred,
        task_type=TABULAR_CLASSIFICATION,
        metrics=[accuracy],
    )['accuracy']
    loss = 1.0 - score
    assert pytest.approx(loss) == calculate_loss(
        target=y_true,
        prediction=y_pred,
        task_type=TABULAR_CLASSIFICATION,
        metrics=[accuracy],
    )['accuracy']

    # Test the dictionary case
    score_dict = calculate_score(
        target=y_true,
        prediction=y_pred,
        task_type=TABULAR_CLASSIFICATION,
        metrics=[accuracy, balanced_accuracy],
    )
    expected_score_dict = {
        'accuracy': 0.9,
        'balanced_accuracy': 0.9285714285714286,
    }
    loss_dict = calculate_loss(
        target=y_true,
        prediction=y_pred,
        task_type=TABULAR_CLASSIFICATION,
        metrics=[accuracy, balanced_accuracy],
    )
    for expected_metric, expected_score in expected_score_dict.items():
        assert pytest.approx(expected_score) == score_dict[expected_metric]
        assert pytest.approx(1 - expected_score) == loss_dict[expected_metric]

    # Lastly make sure that metrics whose optimum is zero
    # are also properly working
    y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
    y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
    score = sklearn.metrics.mean_squared_error(y_true, y_pred)
    assert pytest.approx(score) == calculate_score(
        target=y_true,
        prediction=y_pred,
        task_type=TABULAR_REGRESSION,
        metrics=[mean_squared_error],
    )['mean_squared_error']
    loss = score
    assert pytest.approx(loss) == calculate_loss(
        target=y_true,
        prediction=y_pred,
        task_type=TABULAR_REGRESSION,
        metrics=[mean_squared_error],
    )['mean_squared_error']
示例#2
0
    def score(self,
              X: np.ndarray,
              y: np.ndarray,
              batch_size: Optional[int] = None,
              metric_name: str = 'accuracy') -> float:
        """Scores the fitted estimator on (X, y)

        Args:
            X (np.ndarray):
                input to the pipeline, from which to guess targets
            batch_size (Optional[int]):
                batch_size controls whether the pipeline
                will be called on small chunks of the data.
                Useful when calling the predict method on
                the whole array X results in a MemoryError.
            y (np.ndarray):
                Ground Truth labels
            metric_name (str: default = 'accuracy'):
                 name of the metric to be calculated
        Returns:
            float: score based on the metric name
        """
        from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics, calculate_score
        metrics = get_metrics(self.dataset_properties, [metric_name])
        y_pred = self.predict(X, batch_size=batch_size)
        score = calculate_score(y,
                                y_pred,
                                task_type=STRING_TO_TASK_TYPES[str(
                                    self.dataset_properties['task_type'])],
                                metrics=metrics)[metric_name]
        return score
示例#3
0
 def compute_metrics(self, outputs_data: np.ndarray,
                     targets_data: np.ndarray) -> Dict[str, float]:
     # TODO: change once Ravin Provides the PR
     outputs_data = torch.cat(outputs_data, dim=0)
     targets_data = torch.cat(targets_data, dim=0)
     return calculate_score(targets_data, outputs_data, self.task_type,
                            self.metrics)
示例#4
0
def test_classification_only_metric():
    y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
    y_pred = \
        np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
    scorer = accuracy

    score = calculate_score(y_true, y_pred, TABULAR_CLASSIFICATION, [scorer])

    previous_score = scorer._optimum
    assert score['accuracy'] == pytest.approx(previous_score)
示例#5
0
    def test_metrics(self):
        # test of all classification metrics
        dataset_properties = {'task_type': 'tabular_classification'}
        y_target = np.array([0, 1, 0, 1])
        y_pred = np.array([0, 0, 0, 1])
        metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True)
        score_dict = calculate_score(y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics)
        self.assertIsInstance(score_dict, dict)
        for name, score in score_dict.items():
            self.assertIsInstance(name, str)
            self.assertIsInstance(score, float)

        # test of all regression metrics
        dataset_properties = {'task_type': 'tabular_regression'}
        y_target = np.array([0.1, 0.6, 0.7, 0.4])
        y_pred = np.array([0.6, 0.7, 0.4, 1])
        metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True)
        score_dict = calculate_score(y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics)

        self.assertIsInstance(score_dict, dict)
        for name, score in score_dict.items():
            self.assertIsInstance(name, str)
            self.assertIsInstance(score, float)
示例#6
0
def test_classification_metrics():
    # test of all classification metrics
    dataset_properties = {
        'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
        'output_type': OUTPUT_TYPES_TO_STRING[BINARY]
    }
    y_target = np.array([0, 1, 0, 1])
    y_pred = np.array([0, 0, 0, 1])
    metrics = get_metrics(dataset_properties=dataset_properties,
                          all_supported_metrics=True)
    score_dict = calculate_score(
        y_pred, y_target,
        STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics)
    assert isinstance(score_dict, dict)
    for name, score in score_dict.items():
        assert isinstance(name, str)
        assert isinstance(score, float)
示例#7
0
def test_regression_metrics():
    # test of all regression metrics
    dataset_properties = {
        'task_type': TASK_TYPES_TO_STRING[TABULAR_REGRESSION],
        'output_type': OUTPUT_TYPES_TO_STRING[CONTINUOUS]
    }
    y_target = np.array([0.1, 0.6, 0.7, 0.4])
    y_pred = np.array([0.6, 0.7, 0.4, 1])
    metrics = get_metrics(dataset_properties=dataset_properties,
                          all_supported_metrics=True)
    score_dict = calculate_score(
        y_pred, y_target,
        STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics)

    assert isinstance(score_dict, dict)
    for name, score in score_dict.items():
        assert isinstance(name, str)
        assert isinstance(score, float)
示例#8
0
    def score(self,
              X: np.ndarray,
              y: np.ndarray,
              batch_size: Optional[int] = None) -> np.ndarray:
        """score.

                Args:
                    X (np.ndarray): input to the pipeline, from which to guess targets
                    batch_size (Optional[int]): batch_size controls whether the pipeline
                        will be called on small chunks of the data. Useful when calling the
                        predict method on the whole array X results in a MemoryError.
                Returns:
                    np.ndarray: coefficient of determination R^2 of the prediction
                """
        from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics, calculate_score
        metrics = get_metrics(self.dataset_properties, ['r2'])
        y_pred = self.predict(X, batch_size=batch_size)
        r2 = calculate_score(y,
                             y_pred,
                             task_type=STRING_TO_TASK_TYPES[
                                 self.dataset_properties['task_type']],
                             metrics=metrics)['r2']
        return r2
    def _fit(
        self,
        predictions: List[np.ndarray],
        labels: np.ndarray,
    ) -> None:
        """Fast version of Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []  # type: List[np.ndarray]
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        weighted_ensemble_prediction = np.zeros(
            predictions[0].shape,
            dtype=np.float64,
        )
        fant_ensemble_prediction = np.zeros(
            weighted_ensemble_prediction.shape,
            dtype=np.float64,
        )
        for i in range(ensemble_size):
            scores = np.zeros(
                (len(predictions)),
                dtype=np.float64,
            )
            s = len(ensemble)
            if s == 0:
                weighted_ensemble_prediction.fill(0.0)
            else:
                weighted_ensemble_prediction.fill(0.0)
                for pred in ensemble:
                    np.add(
                        weighted_ensemble_prediction,
                        pred,
                        out=weighted_ensemble_prediction,
                    )
                np.multiply(
                    weighted_ensemble_prediction,
                    1 / s,
                    out=weighted_ensemble_prediction,
                )
                np.multiply(
                    weighted_ensemble_prediction,
                    (s / float(s + 1)),
                    out=weighted_ensemble_prediction,
                )

            for j, pred in enumerate(predictions):
                # Memory-efficient averaging!
                fant_ensemble_prediction.fill(0.0)
                np.add(
                    fant_ensemble_prediction,
                    weighted_ensemble_prediction,
                    out=fant_ensemble_prediction
                )
                np.add(
                    fant_ensemble_prediction,
                    (1. / float(s + 1)) * pred,
                    out=fant_ensemble_prediction
                )

                # Calculate score is versatile and can return a dict of score
                # when all_scoring_functions=False, we know it will be a float
                score = calculate_score(
                    metrics=[self.metric],
                    solution=labels,
                    prediction=fant_ensemble_prediction,
                    task_type=self.task_type,
                )
                scores[j] = self.metric._optimum - score[self.metric.name]

            all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
            best = self.random_state.choice(all_best)
            ensemble.append(predictions[best])
            trajectory.append(scores[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = order
        self.trajectory_ = trajectory
        self.train_score_ = trajectory[-1]