Exemplo n.º 1
0
    def test_get_callable(self):
        fn = get_callable('sklearn.metrics.accuracy_score')

        from sklearn.metrics import accuracy_score
        y_true = [1, 1, 0, 1, 0]
        y_pred = [1, 1, 1, 0, 0]

        assert accuracy_score(y_true, y_pred) == fn(y_true, y_pred)
Exemplo n.º 2
0
    def display_model_performance(self):
        """
        Displays the performance of the model. The metrics are computed using the config dict.

        Metrics should be given as a list of dict. Each dict contains they following keys :
        'path' (path to the metric function, ex: 'sklearn.metrics.mean_absolute_error'),
        'name' (optional, name of the metric as displayed in the report),
        and 'use_proba_values' (optional, possible values are False (default) or True
        if the metric uses proba values instead of predicted values).

        For example :
        config['metrics'] = [
                {
                    'path': 'sklearn.metrics.mean_squared_error',
                    'name': 'Mean absolute error',  # Optional : name that will be displayed next to the metric
                    'y_pred': 'predicted_values'  # Optional
                },
                {
                    'path': 'Scoring_AP.utils.lift10',  # Custom function path
                    'name': 'Lift10',
                    'y_pred': 'proba_values'  # Use proba values instead of predicted values
                }
            ]
        """
        if self.y_test is None:
            logging.info(
                "No labels given for test set. Skipping model performance part"
            )
            return

        print_md("### Univariate analysis of target variable")
        df = pd.concat([
            pd.DataFrame({
                self.target_name: self.y_pred
            }).assign(_dataset="pred"),
            pd.DataFrame({
                self.target_name: self.y_test
            }).assign(_dataset="true") if self.y_test is not None else None
        ])
        self._perform_and_display_analysis_univariate(
            df=df,
            col_splitter="_dataset",
            split_values=["pred", "true"],
            names=["Prediction values", "True values"],
            group_id='target-distribution')

        if 'metrics' not in self.config.keys():
            logging.info(
                "No 'metrics' key found in report config dict. Skipping model performance part."
            )
            return
        print_md("### Metrics")

        for metric in self.config['metrics']:
            if 'name' not in metric.keys():
                metric['name'] = metric['path']

            if metric['path'] in ['confusion_matrix', 'sklearn.metrics.confusion_matrix'] or \
                    metric['name'] == 'confusion_matrix':
                print_md(f"**{metric['name']} :**")
                print_html(
                    convert_fig_to_html(
                        generate_confusion_matrix_plot(y_true=self.y_test,
                                                       y_pred=self.y_pred)))
            else:
                try:
                    metric_fn = get_callable(path=metric['path'])
                    #  Look if we should use proba values instead of predicted values
                    if 'use_proba_values' in metric.keys(
                    ) and metric['use_proba_values'] is True:
                        y_pred = self.explainer.proba_values
                    else:
                        y_pred = self.y_pred
                    res = metric_fn(self.y_test, y_pred)
                except Exception as e:
                    logging.info(
                        f"Could not compute following metric : {metric['path']}. \n{e}"
                    )
                    continue
                if isinstance(res, Number):
                    res = display_value(round_to_k(res, 3))
                    print_md(f"**{metric['name']} :** {res}")
                elif isinstance(res, (list, tuple, np.ndarray)):
                    print_md(f"**{metric['name']} :**")
                    print_html(
                        pd.DataFrame(res).to_html(classes="greyGridTable"))
                elif isinstance(res, str):
                    print_md(f"**{metric['name']} :**")
                    print_html(f"<pre>{res}</pre>")
                else:
                    logging.info(
                        f"Could not compute following metric : {metric['path']}. \n"
                        f"Result of type {res} cannot be displayed")
        print_md('---')