Пример #1
0
    def _loss(
        self,
        y_true: np.ndarray,
        y_hat: np.ndarray,
        scoring_functions: Optional[List[Scorer]] = None
    ) -> Union[float, Dict[str, float]]:
        """Auto-sklearn follows a minimization goal.
        The calculate_loss internally translate a score function to
        a minimization problem.

        For a dummy prediction, the worst result is assumed.

        Parameters
        ----------
            y_true
        """
        scoring_functions = (self.scoring_functions if
                             scoring_functions is None else scoring_functions)
        if not isinstance(self.configuration, Configuration):
            if scoring_functions:
                return {self.metric.name: self.metric._worst_possible_result}
            else:
                return self.metric._worst_possible_result

        return calculate_loss(y_true,
                              y_hat,
                              self.task_type,
                              self.metric,
                              scoring_functions=scoring_functions)
Пример #2
0
    def _slow(
        self,
        predictions: List[np.ndarray],
        labels: np.ndarray
    ) -> None:
        """Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        for i in range(ensemble_size):
            losses = np.zeros(
                [np.shape(predictions)[0]],
                dtype=np.float64,
            )
            for j, pred in enumerate(predictions):
                ensemble.append(pred)
                ensemble_prediction = np.mean(np.array(ensemble), axis=0)
                # calculate_loss is versatile and can return a dict of losses
                # when scoring_functions=None, we know it will be a float
                losses[j] = cast(
                    float,
                    calculate_loss(
                        solution=labels,
                        prediction=ensemble_prediction,
                        task_type=self.task_type,
                        metric=self.metric,
                        scoring_functions=None
                    )
                )
                ensemble.pop()
            best = np.nanargmin(losses)
            ensemble.append(predictions[best])
            trajectory.append(losses[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = np.array(
            order,
            dtype=np.int64,
        )
        self.trajectory_ = np.array(
            trajectory,
            dtype=np.float64,
        )
        self.train_loss_ = trajectory[-1]
Пример #3
0
    def predict_and_loss(
        self,
        train: bool = False
    ) -> Tuple[Union[Dict[str, float], float], np.array, Any, Any]:

        if train:
            Y_pred = self.predict_function(self.X_train, self.model,
                                           self.task_type, self.Y_train)
            err = calculate_loss(solution=self.Y_train,
                                 prediction=Y_pred,
                                 task_type=self.task_type,
                                 metric=self.metric,
                                 scoring_functions=self.scoring_functions)
        else:
            Y_pred = self.predict_function(self.X_test, self.model,
                                           self.task_type, self.Y_train)
            err = calculate_loss(solution=self.Y_test,
                                 prediction=Y_pred,
                                 task_type=self.task_type,
                                 metric=self.metric,
                                 scoring_functions=self.scoring_functions)

        return err, Y_pred, None, None
Пример #4
0
    def _loss(
        self,
        y_true: np.ndarray,
        y_hat: np.ndarray,
        idx: np.ndarray,
        scoring_functions: Optional[List[Scorer]] = None
    ) -> Union[float, Dict[str, float]]:
        """Auto-sklearn follows a minimization goal.
        The calculate_loss internally translate a score function to
        a minimization problem.

        For a dummy prediction, the worst result is assumed.

        Parameters
        ----------
            y_true
        """
        scoring_functions = (self.scoring_functions if
                             scoring_functions is None else scoring_functions)
        if not isinstance(self.configuration, Configuration):
            if scoring_functions:
                return {self.metric.name: self.metric._worst_possible_result}
            else:
                return self.metric._worst_possible_result

        # Handle protected attributes
        if self.metric.needs_prot:
            sensitive_features = self.X_train[idx, 0]
            metric = copy.copy(self.metric)
            metric._kwargs.update({'sensitive_features': sensitive_features})
        else:
            metric = self.metric

        return calculate_loss(y_true,
                              y_hat,
                              self.task_type,
                              metric,
                              scoring_functions=scoring_functions)
Пример #5
0
    def _fast(
        self,
        predictions: List[np.ndarray],
        labels: np.ndarray,
    ) -> None:
        """Fast version of Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []  # type: List[np.ndarray]
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        weighted_ensemble_prediction = np.zeros(
            predictions[0].shape,
            dtype=np.float64,
        )
        fant_ensemble_prediction = np.zeros(
            weighted_ensemble_prediction.shape,
            dtype=np.float64,
        )
        for i in range(ensemble_size):
            losses = np.zeros(
                (len(predictions)),
                dtype=np.float64,
            )
            s = len(ensemble)
            if s > 0:
                np.add(
                    weighted_ensemble_prediction,
                    ensemble[-1],
                    out=weighted_ensemble_prediction,
                )

            # Memory-efficient averaging!
            for j, pred in enumerate(predictions):
                # fant_ensemble_prediction is the prediction of the current ensemble
                # and should be ([predictions[selected_prev_iterations] + predictions[j])/(s+1)
                # We overwrite the contents of fant_ensemble_prediction
                # directly with weighted_ensemble_prediction + new_prediction and then scale for avg
                np.add(weighted_ensemble_prediction,
                       pred,
                       out=fant_ensemble_prediction)
                np.multiply(fant_ensemble_prediction, (1. / float(s + 1)),
                            out=fant_ensemble_prediction)

                # calculate_loss is versatile and can return a dict of losses
                # when scoring_functions=None, we know it will be a float
                losses[j] = cast(
                    float,
                    calculate_loss(solution=labels,
                                   prediction=fant_ensemble_prediction,
                                   task_type=self.task_type,
                                   metric=self.metric,
                                   scoring_functions=None))

            all_best = np.argwhere(losses == np.nanmin(losses)).flatten()
            best = self.random_state.choice(all_best)
            ensemble.append(predictions[best])
            trajectory.append(losses[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = order
        self.trajectory_ = trajectory
        self.train_loss_ = trajectory[-1]
Пример #6
0
def test_calculate_loss():
    # In a 0-1 ranged scorer, make sure that the loss
    # has a expected positive value
    y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
    y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
    score = sklearn.metrics.accuracy_score(y_true, y_pred)
    assert pytest.approx(score) == calculate_score(
        solution=y_true,
        prediction=y_pred,
        task_type=BINARY_CLASSIFICATION,
        metric=autosklearn.metrics.accuracy,
    )
    loss = 1.0 - score
    assert pytest.approx(loss) == calculate_loss(
        solution=y_true,
        prediction=y_pred,
        task_type=BINARY_CLASSIFICATION,
        metric=autosklearn.metrics.accuracy,
    )

    # Test the dictionary case
    score_dict = calculate_score(solution=y_true,
                                 prediction=y_pred,
                                 task_type=BINARY_CLASSIFICATION,
                                 metric=autosklearn.metrics.accuracy,
                                 scoring_functions=[
                                     autosklearn.metrics.accuracy,
                                     autosklearn.metrics.balanced_accuracy
                                 ])
    expected_score_dict = {
        'accuracy': 0.9,
        'balanced_accuracy': 0.9285714285714286,
    }
    loss_dict = calculate_loss(solution=y_true,
                               prediction=y_pred,
                               task_type=BINARY_CLASSIFICATION,
                               metric=autosklearn.metrics.accuracy,
                               scoring_functions=[
                                   autosklearn.metrics.accuracy,
                                   autosklearn.metrics.balanced_accuracy
                               ])
    for expected_metric, expected_score in expected_score_dict.items():
        assert pytest.approx(expected_score) == score_dict[expected_metric]
        assert pytest.approx(1 - expected_score) == loss_dict[expected_metric]

    # Lastly make sure that metrics whose optimum is zero
    # are also properly working
    y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
    y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
    score = sklearn.metrics.mean_squared_error(y_true, y_pred)
    assert pytest.approx(score) == calculate_score(
        solution=y_true,
        prediction=y_pred,
        task_type=REGRESSION,
        metric=autosklearn.metrics.mean_squared_error,
    )
    loss = score
    assert pytest.approx(loss) == calculate_loss(
        solution=y_true,
        prediction=y_pred,
        task_type=REGRESSION,
        metric=autosklearn.metrics.mean_squared_error,
    )