Пример #1
0
 def _score_with_pred_proba(self,
                            y,
                            y_internal,
                            y_pred_proba_internal,
                            metric,
                            sample_weight=None,
                            weight_evaluation=None):
     metric = get_metric(metric, self.problem_type, 'leaderboard_metric')
     if weight_evaluation is None:
         weight_evaluation = self.weight_evaluation
     if metric.needs_pred:
         if self.problem_type == BINARY:
             # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
             y_pred = get_pred_from_proba(y_pred_proba_internal, problem_type=self.problem_type)
             y_tmp = y_internal
         else:
             y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=True)
             y_tmp = y
     elif metric.needs_quantile:
         y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=True)
         y_tmp = y
     else:
         y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=False)
         y_tmp = y_internal
     return compute_weighted_metric(y_tmp, y_pred, metric, weights=sample_weight, weight_evaluation=weight_evaluation, quantile_levels=self.quantile_levels)
 def inverse_transform_proba(self, y, as_pandas=False, as_pred=False):
     y_index = None
     if isinstance(y, DataFrame):
         y_index = y.index
         y = y.to_numpy()
     if self.invalid_class_count > 0:
         y_transformed = np.zeros(
             [len(y), len(self.ordered_class_labels)], dtype=np.float32)
         y_transformed[:, self.label_index_to_keep] = y
     else:
         y_transformed = y
     if as_pred:
         y_transformed = get_pred_from_proba(
             y_pred_proba=y_transformed,
             problem_type=self.problem_type_transform)
         y_transformed = self._convert_to_valid_series(y_transformed)
         y_transformed = y_transformed.map(
             self.cat_mappings_dependent_var_uncleaned)
         if y_index is not None:
             y_transformed.index = y_index
     if as_pandas and not as_pred:
         y_transformed = DataFrame(data=y_transformed,
                                   index=y_index,
                                   columns=self.ordered_class_labels,
                                   dtype=np.float32)
     return y_transformed
Пример #3
0
 def predict(self, X: DataFrame, model=None, as_pandas=True):
     if as_pandas:
         X_index = copy.deepcopy(X.index)
     else:
         X_index = None
     y_pred_proba = self.predict_proba(X=X,
                                       model=model,
                                       as_pandas=False,
                                       as_multiclass=False,
                                       inverse_transform=False)
     problem_type = self.label_cleaner.problem_type_transform or self.problem_type
     y_pred = get_pred_from_proba(y_pred_proba=y_pred_proba,
                                  problem_type=problem_type)
     if problem_type != QUANTILE:
         y_pred = self.label_cleaner.inverse_transform(pd.Series(y_pred))
         if as_pandas:
             y_pred.index = X_index
             y_pred.name = self.label
         else:
             y_pred = y_pred.values
     else:
         if as_pandas:
             y_pred = pd.DataFrame(data=y_pred,
                                   columns=self.quantile_levels,
                                   index=X_index)
     return y_pred
 def predict(self, X: DataFrame, model=None, as_pandas=False):
     if as_pandas:
         X_index = copy.deepcopy(X.index)
     else:
         X_index = None
     y_pred_proba = self.predict_proba(X=X, model=model, inverse_transform=False)
     problem_type = self.label_cleaner.problem_type_transform or self.problem_type
     y_pred = get_pred_from_proba(y_pred_proba=y_pred_proba, problem_type=problem_type)
     y_pred = self.label_cleaner.inverse_transform(pd.Series(y_pred))
     if as_pandas:
         y_pred.index = X_index
         y_pred.name = self.label
     else:
         y_pred = y_pred.values
     return y_pred
 def inverse_transform_proba(self, y, as_pandas=False, as_pred=False):
     if not as_pred:
         return y
     y_index = None
     if isinstance(y, Series):
         y_index = y.index
         y = y.to_numpy()
     if as_pred:
         y = get_pred_from_proba(y_pred_proba=y,
                                 problem_type=self.problem_type_transform)
         y = self._convert_to_valid_series(y)
         y = y.map(self.cat_mappings_dependent_var)
         y = y.to_numpy()
     if as_pandas:
         y = Series(data=y, index=y_index)
     return y
Пример #6
0
 def inverse_transform_proba(self, y, as_pandas=False, as_pred=False):
     if isinstance(y, DataFrame):
         y = copy.deepcopy(y)
         y.columns = copy.deepcopy(self.ordered_class_labels)
         if as_pred:
             y = get_pred_from_proba_df(y, problem_type=self.problem_type_transform)
         if not as_pandas:
             y = y.to_numpy()
     elif as_pred:
         y_index = None
         if isinstance(y, Series):
             y_index = y.index
             y = y.to_numpy()
         y = get_pred_from_proba(y_pred_proba=y, problem_type=self.problem_type_transform)
         y = self._convert_to_valid_series(y)
         y = y.map(self.cat_mappings_dependent_var)
         y = y.to_numpy()
         if as_pandas:
             y = Series(data=y, index=y_index)
     return y
Пример #7
0
    def evaluate_predictions(self, y_true, y_pred, silent=False, auxiliary_metrics=False, detailed_report=True, high_always_good=False):
        """ Evaluate predictions. Does not support sample weights since this method reports a variety of metrics.
            Args:
                silent (bool): Should we print which metric is being used as well as performance.
                auxiliary_metrics (bool): Should we compute other (problem_type specific) metrics in addition to the default metric?
                detailed_report (bool): Should we computed more-detailed versions of the auxiliary_metrics? (requires auxiliary_metrics=True).
                high_always_good (bool): If True, this means higher values of returned metric are ALWAYS superior (so metrics like MSE should be returned negated)

            Returns single performance-value if auxiliary_metrics=False.
            Otherwise returns dict where keys = metrics, values = performance along each metric.
        """
        is_proba = False
        assert isinstance(y_true, (np.ndarray, pd.Series))
        assert isinstance(y_pred, (np.ndarray, pd.Series, pd.DataFrame))
        self._validate_class_labels(y_true)
        if isinstance(y_pred, np.ndarray):
            if self.problem_type == QUANTILE:
                y_pred = pd.DataFrame(data=y_pred, columns=self.quantile_levels)
            elif len(y_pred.shape) > 1:
                y_pred = pd.DataFrame(data=y_pred, columns=self.class_labels)
        if self.problem_type == BINARY:
            if isinstance(y_pred, pd.DataFrame):
                # roc_auc crashes if this isn't done
                y_pred = y_pred[self.positive_class]
                is_proba = True
            elif not self.eval_metric.needs_pred:
                raise AssertionError(f'`evaluate_predictions` requires y_pred_proba input for binary classification '
                                     f'when evaluating "{self.eval_metric.name}"... Please generate valid input via `predictor.predict_proba(data)`.\n'
                                     f'This may have occurred if you passed in predict input instead of predict_proba input, '
                                     f'or if you specified `as_multiclass=False` to `predictor.predict_proba(data, as_multiclass=False)`, '
                                     f'which is not supported by `evaluate_predictions`.')
        elif self.problem_type == MULTICLASS:
            if isinstance(y_pred, pd.DataFrame):
                is_proba = True
        if is_proba and self.eval_metric.needs_pred:
            if self.problem_type == BINARY:
                y_pred = get_pred_from_proba(y_pred_proba=y_pred, problem_type=self.problem_type)
                y_pred = self.label_cleaner.inverse_transform(y_pred)
            else:
                y_pred = get_pred_from_proba_df(y_pred_proba=y_pred, problem_type=self.problem_type)
        if not self.eval_metric.needs_pred:
            y_true = self.label_cleaner.transform(y_true)  # Get labels in numeric order
            performance = self.eval_metric(y_true, y_pred)
        elif self.problem_type == BINARY:
            # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
            y_true_internal = self.label_cleaner.transform(y_true)
            y_pred_internal = self.label_cleaner.transform(y_pred)
            performance = self.eval_metric(y_true_internal, y_pred_internal)
        else:
            performance = self.eval_metric(y_true, y_pred)

        metric = self.eval_metric.name

        if not high_always_good:
            performance = self.eval_metric.convert_score_to_sklearn_val(performance)  # flip negative once again back to positive (so higher is no longer necessarily better)

        if not silent:
            logger.log(20, f"Evaluation: {metric} on test data: {performance}")

        if not auxiliary_metrics:
            return performance

        # Otherwise compute auxiliary metrics:
        auxiliary_metrics = []
        if self.problem_type == REGRESSION:  # Adding regression metrics
            pearson_corr = lambda x, y: corrcoef(x, y)[0][1]
            pearson_corr.__name__ = 'pearson_correlation'
            auxiliary_metrics += [
                mean_absolute_error, explained_variance_score, r2_score, pearson_corr, mean_squared_error, median_absolute_error,
                # max_error
            ]
        else:  # Adding classification metrics
            auxiliary_metrics += [accuracy_score, balanced_accuracy_score, matthews_corrcoef]
            if self.problem_type == BINARY:  # binary-specific metrics
                # def auc_score(y_true, y_pred): # TODO: this requires y_pred to be probability-scores
                #     fpr, tpr, _ = roc_curve(y_true, y_pred, pos_label)
                #   return auc(fpr, tpr)
                f1micro_score = lambda y_true, y_pred: f1_score(y_true, y_pred, average='micro')
                f1micro_score.__name__ = f1_score.__name__
                auxiliary_metrics += [f1micro_score]  # TODO: add auc?
            # elif self.problem_type == MULTICLASS:  # multiclass metrics
            #     auxiliary_metrics += []  # TODO: No multi-class specific metrics for now. Include top-5, top-10 accuracy here.

        performance_dict = OrderedDict({metric: performance})
        for metric_function in auxiliary_metrics:
            if isinstance(metric_function, tuple):
                metric_function, metric_kwargs = metric_function
            else:
                metric_kwargs = None
            metric_name = metric_function.__name__
            if metric_name not in performance_dict:
                try:  # only compute auxiliary metrics which do not error (y_pred = class-probabilities may cause some metrics to error)
                    if metric_kwargs:
                        performance_dict[metric_name] = metric_function(y_true, y_pred, **metric_kwargs)
                    else:
                        performance_dict[metric_name] = metric_function(y_true, y_pred)
                except ValueError:
                    pass

        if not silent:
            logger.log(20, "Evaluations on test data:")
            logger.log(20, json.dumps(performance_dict, indent=4))

        if detailed_report and (self.problem_type != REGRESSION):
            # Construct confusion matrix
            try:
                performance_dict['confusion_matrix'] = confusion_matrix(y_true, y_pred, labels=self.label_cleaner.ordered_class_labels, output_format='pandas_dataframe')
            except ValueError:
                pass
            # One final set of metrics to report
            cl_metric = lambda y_true, y_pred: classification_report(y_true, y_pred, output_dict=True)
            metric_name = 'classification_report'
            if metric_name not in performance_dict:
                try:  # only compute auxiliary metrics which do not error (y_pred = class-probabilities may cause some metrics to error)
                    performance_dict[metric_name] = cl_metric(y_true, y_pred)
                except ValueError:
                    pass
                if not silent and metric_name in performance_dict:
                    logger.log(20, "Detailed (per-class) classification report:")
                    logger.log(20, json.dumps(performance_dict[metric_name], indent=4))
        return performance_dict
Пример #8
0
    def score_debug(self, X: DataFrame, y=None, extra_info=False, compute_oracle=False, silent=False):
        leaderboard_df = self.leaderboard(extra_info=extra_info, silent=silent)
        if y is None:
            X, y = self.extract_label(X)
        self._validate_class_labels(y)
        w = None
        if self.weight_evaluation:
            X, w = extract_column(X, self.sample_weight)

        X = self.transform_features(X)
        y_internal = self.label_cleaner.transform(y)
        y_internal = y_internal.fillna(-1)

        trainer = self.load_trainer()
        scores = {}
        all_trained_models = trainer.get_model_names()
        all_trained_models_can_infer = trainer.get_model_names(can_infer=True)
        all_trained_models_original = all_trained_models.copy()
        model_pred_proba_dict, pred_time_test_marginal = trainer.get_model_pred_proba_dict(X=X, models=all_trained_models_can_infer, fit=False, record_pred_time=True)

        if compute_oracle:
            pred_probas = list(model_pred_proba_dict.values())
            ensemble_selection = EnsembleSelection(ensemble_size=100, problem_type=trainer.problem_type, metric=self.eval_metric, quantile_levels=self.quantile_levels)
            ensemble_selection.fit(predictions=pred_probas, labels=y_internal, identifiers=None, sample_weight=w)  # TODO: Only fit non-nan

            oracle_weights = ensemble_selection.weights_
            oracle_pred_time_start = time.time()
            oracle_pred_proba_norm = [pred * weight for pred, weight in zip(pred_probas, oracle_weights)]
            oracle_pred_proba_ensemble = np.sum(oracle_pred_proba_norm, axis=0)
            oracle_pred_time = time.time() - oracle_pred_time_start
            model_pred_proba_dict['OracleEnsemble'] = oracle_pred_proba_ensemble
            pred_time_test_marginal['OracleEnsemble'] = oracle_pred_time
            all_trained_models.append('OracleEnsemble')

        for model_name, y_pred_proba_internal in model_pred_proba_dict.items():
            if self.eval_metric.needs_pred:
                if self.problem_type == BINARY:
                    # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
                    y_pred = get_pred_from_proba(y_pred_proba_internal, problem_type=self.problem_type)
                    y_tmp = y_internal
                else:
                    y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=True)
                    y_tmp = y
            elif self.eval_metric.needs_quantile:
                y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=True)
                y_tmp = y
            else:
                y_pred = self.label_cleaner.inverse_transform_proba(y_pred_proba_internal, as_pred=False)
                y_tmp = y_internal
            scores[model_name] = compute_weighted_metric(y_tmp, y_pred, self.eval_metric, w, weight_evaluation=self.weight_evaluation, quantile_levels=self.quantile_levels)

        pred_time_test = {}
        # TODO: Add support for calculating pred_time_test_full for oracle_ensemble, need to copy graph from trainer and add oracle_ensemble to it with proper edges.
        for model in model_pred_proba_dict.keys():
            if model in all_trained_models_original:
                base_model_set = trainer.get_minimum_model_set(model)
                if len(base_model_set) == 1:
                    pred_time_test[model] = pred_time_test_marginal[base_model_set[0]]
                else:
                    pred_time_test_full_num = 0
                    for base_model in base_model_set:
                        pred_time_test_full_num += pred_time_test_marginal[base_model]
                    pred_time_test[model] = pred_time_test_full_num
            else:
                pred_time_test[model] = None

        scored_models = list(scores.keys())
        for model in all_trained_models:
            if model not in scored_models:
                scores[model] = None
                pred_time_test[model] = None
                pred_time_test_marginal[model] = None

        logger.debug('Model scores:')
        logger.debug(str(scores))
        model_names_final = list(scores.keys())
        df = pd.DataFrame(
            data={
                'model': model_names_final,
                'score_test': list(scores.values()),
                'pred_time_test': [pred_time_test[model] for model in model_names_final],
                'pred_time_test_marginal': [pred_time_test_marginal[model] for model in model_names_final],
            }
        )

        df_merged = pd.merge(df, leaderboard_df, on='model', how='left')
        df_merged = df_merged.sort_values(by=['score_test', 'pred_time_test', 'score_val', 'pred_time_val', 'model'], ascending=[False, True, False, True, False]).reset_index(drop=True)
        df_columns_lst = df_merged.columns.tolist()
        explicit_order = [
            'model',
            'score_test',
            'score_val',
            'pred_time_test',
            'pred_time_val',
            'fit_time',
            'pred_time_test_marginal',
            'pred_time_val_marginal',
            'fit_time_marginal',
            'stack_level',
            'can_infer',
        ]
        df_columns_other = [column for column in df_columns_lst if column not in explicit_order]
        df_columns_new = explicit_order + df_columns_other
        df_merged = df_merged[df_columns_new]

        return df_merged