Exemplo n.º 1
0
 def model_support_dmd(self):
     return GeneralUtils.dmd_supported(self.model, self.test)
Exemplo n.º 2
0
    def score_value_report(
            self,
            model,
            dmd_test: DMD,
            labels=None,
            y_proba: numpy.ndarray = None,
            y_pred: numpy.ndarray = None) -> [ScoringMetricReport]:
        '''

        :param model: model of interest
        :param dmd_test: test set
        :param y_proba: pre-calculated predicted probabilities for test set, if available
        :param y_pred: pre-calculated models' predictions for test set, if available
        :return: scoring report
        '''
        score_report = []

        model_support_dmd = GeneralUtils.dmd_supported(model, dmd_test)
        x_test = dmd_test if model_support_dmd else dmd_test.values
        y_true = dmd_test.target

        is_classification = GeneralUtils.is_classification(model)

        confusion_matrix, scatter, classification_report = None, None, None
        if is_classification:

            y_proba = y_proba if y_proba is not None else model.predict_proba(
                x_test)
            y_pred = y_pred if y_pred is not None else numpy.argmax(y_proba,
                                                                    axis=1)

            confusion_matrix = ConfusionMatrixReport(
                y_true=y_true,
                y_pred=y_pred,
                labels=labels if labels is not None else unique_labels(
                    y_true, y_pred))

            classification_report = SklearnClassificationReport(
                y_true=y_true, y_pred=y_pred, y_proba=y_proba, labels=labels)

            for metric in self.metrics:
                if not metric.ptype == CLASSIFICATION:
                    continue
                if metric.is_proba:
                    yp = y_proba
                else:
                    yp = y_pred

                score = metric.function(y_true, yp)
                ci_low, ci_high = Metrics.confidence_interval(metric,
                                                              y_true=y_true,
                                                              y_pred=y_pred,
                                                              y_proba=y_proba)
                score_report.append(
                    ScoringMetricReport(metric_name=metric.name,
                                        value=score,
                                        ci_low=ci_low,
                                        ci_high=ci_high))

        else:
            y_pred = y_pred if y_pred is not None else model.predict(x_test)

            error_bars = self._calc_error_bars(dmd_test, model)

            scatter = ScatterReport(y_true=y_true,
                                    y_pred=y_pred,
                                    error_bars=error_bars)
            for metric in self.metrics:
                if not metric.ptype == REGRESSION:
                    continue

                score = metric.function(y_true, y_pred)
                ci_low, ci_high = Metrics.confidence_interval(metric,
                                                              y_true=y_true,
                                                              y_pred=y_pred)

                ci_low = GeneralUtils.f5(ci_low)
                ci_high = GeneralUtils.f5(ci_high)
                score = GeneralUtils.f5(score)
                score_report.append(
                    ScoringMetricReport(metric_name=metric.name,
                                        value=score,
                                        ci_low=ci_low,
                                        ci_high=ci_high))

        return score_report, confusion_matrix, scatter, classification_report
Exemplo n.º 3
0
    def fit(self, dmd_test: DMD, **kwargs):
        self.dmd_supported = GeneralUtils.dmd_supported(model=self.model,
                                                        dmd=dmd_test)

        self.fit_uncertainty_model(dmd_test, **kwargs)
        return self
Exemplo n.º 4
0
    def sensitivity_analysis(self,
                             model,
                             dmd_test: DMD,
                             metric,
                             dmd_train=None,
                             method=SensitivityTypes.shuffled,
                             raw_scores=False,
                             y_pred=None) -> SensitivityOfFeaturesReport:

        self.model_support_dmd = GeneralUtils.dmd_supported(model, dmd_test)
        x = dmd_test if self.model_support_dmd else dmd_test.values

        y_pred = y_pred or model.predict(x)
        ytest = dmd_test.target

        score_function = self.metrics[metric].function
        if metric in ['auc', 'logloss'] and ytest is not None:
            base_score = score_function(ytest, model.predict_proba(x))
            y_pred = ytest
        else:
            base_score = 0
            y_pred = y_pred

        predict_function = model.predict_proba if self.metrics[
            metric].is_proba \
            else model.predict

        scores = {}
        for i, name in enumerate(dmd_test.feature_names):
            if dmd_test.n_samples > self.max_samples_to_use:
                rs = numpy.random.RandomState(i)
                subset = rs.permutation(
                    dmd_test.n_samples)[:self.max_samples_to_use]
                dmd_test_ = dmd_test.split_by_indices(subset)
                y_pred_ = y_pred[subset]
            else:
                dmd_test_ = dmd_test
                y_pred_ = y_pred

            shuffled_x = self.get_shuffled_x(
                dmd_test_,
                i,
                dmd_train=dmd_train,
                method=method,
                model_support_dmd=self.model_support_dmd)
            shuffled_pred = predict_function(shuffled_x)

            if base_score > 0:
                scores[name] = 1 - abs(
                    base_score - score_function(y_pred_, shuffled_pred)
                )  # higher difference - more impact so add 1- in front
            else:
                scores[name] = score_function(
                    y_pred_, shuffled_pred)  # higher score - less impact

        if raw_scores:
            # description = "The raw scores of how each feature affects the model's predictions."
            return SensitivityOfFeaturesReport(
                method=method,
                sensitivities=scores,
                stats_report=self._sensitivity_stats_report(scores))

        # higher score / lower loss means the shuffled feature did less impact
        if self.metrics[metric].is_loss:
            impact = scores
        else:
            impact = {name: 1 - score for name, score in scores.items()}

        total_impact = sum([score for score in impact.values()])
        impact = {
            name: float(score / total_impact)
            for name, score in impact.items()
        }
        impact = GeneralUtils.round_values(impact)

        # description="The impact of each feature on model's predictions. "
        #             "Higher value mean larger impact (0 means no impact at all). "
        #             "Values are normalized to 1.")
        return SensitivityOfFeaturesReport(
            method=method,
            sensitivities=impact,
            stats_report=self._sensitivity_stats_report(sensitivities=impact))