def test_pytrust_scoring_report(self): pytrust = self.get_pytrust(is_classification=True) scoring_report = pytrust.scoring_report for metric in Metrics.supported_metrics().values(): if metric.ptype == CLASSIFICATION: metric_report = scoring_report.metric_scores[metric.name] score_value = metric_report.value ci_low = metric_report.ci_low ci_high = metric_report.ci_high self.assertTrue(ci_low < score_value < ci_high) pytrust = self.get_pytrust(is_classification=False) scoring_report = pytrust.scoring_report for metric in Metrics.supported_metrics().values(): if metric.ptype == REGRESSION: metric_report = scoring_report.metric_scores[metric.name] score_value = metric_report.value ci_low = metric_report.ci_low ci_high = metric_report.ci_high self.assertTrue(ci_low < score_value < ci_high)
def create_scoring_report(cls, model, test: DMD, metric: str, y_pred=None, y_proba=None, scoring: Scoring = None, **kwargs) -> ScoringFullReport: """ Create scoring report Args: model - model to be analyzed based on test data train - train data (DMD). Train data is used only for separability test test - test data (DMD). . scoring - Scoring instance. If None, instance is initiated internally. Returns: scoring report """ metrics = Metrics.supported_metrics() scoring = scoring or Scoring(metrics=metrics) score_values_report, confusion_matrix, scatter, classification_report = \ scoring.score_value_report(model=model, dmd_test=test, labels=test.labels, y_pred=y_pred, y_proba=y_proba) return ScoringFullReport(target_metric=metric, metric_reports=score_values_report, confusion_matrix=confusion_matrix, scatter=scatter, classification_report=classification_report)
def create_scoring_report(cls, model, train: DMD, test: DMD, metric: str, y_pred=None, y_proba=None, scoring: Scoring = None, **kwargs) -> ScoringFullReport: metrics = Metrics.supported_metrics() scoring = scoring or Scoring(metrics=metrics) score_values_report, confusion_matrix, scatter, classification_report = \ scoring.score_value_report(model=model, dmd_test=test, labels=test.labels, y_pred=y_pred, y_proba=y_proba) if train is not None and test is not None: separation_quality = scoring.separation_quality(dmd_train=train, dmd_test=test) else: separation_quality = numpy.nan return ScoringFullReport(target_metric=metric, metric_reports=score_values_report, separation_quality=separation_quality, confusion_matrix=confusion_matrix, scatter=scatter, classification_report=classification_report)
def __init__(self, metrics: list = None): self.supported_metric = Metrics.supported_metrics() self.metrics = metrics or self.supported_metric self.metrics = [ self.supported_metric[metric] for metric in self.metrics if metric in self.supported_metric ]
def _create_scoring_report(self) -> ScoringFullReport: metrics = Metrics.supported_metrics() self.scoring = Scoring(metrics=metrics) return self.create_scoring_report(model=self.model, train=self.train, test=self.test, metric=self.metric, y_pred=self.y_pred_test, y_proba=self.y_proba_test, scoring=self.scoring)
def _get_model_normalized_loss(self): metric = Metrics.supported_metrics()[self.scoring_report.target_metric] if not metric.is_loss: return self._get_model_loss() if metric.ptype == REGRESSION: secondary_metric = Metrics.r2.name else: secondary_metric = Metrics.recall.name if secondary_metric not in self.scoring_report.metric_scores: logging.warning( "secondary metric {} is not available in scoring report". format(secondary_metric)) return 0 else: return Metrics.metric_as_loss( self.scoring_report.metric_scores[secondary_metric].value, secondary_metric)
def plot(self, ax=None, figsize=(10, 5)): if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) ci_low = GeneralUtils.f5(self.ci_low) ci_high = GeneralUtils.f5(self.ci_high) value = GeneralUtils.f5(self.value) if ci_high == ci_low: n_digits = 5 else: n_digits = -int( numpy.log10(ci_high - ci_low)) + 1 # 0.0011 --> -(-2) +1 = 3 ax.plot( [ci_low, ci_high], [1, 1], '-b', ci_low, 1, '|b', ci_high, 1, '|b', value, 1, 'or', ) delta = (ci_high - ci_low) * 1e-1 + 10**-n_digits / 2 metric_obj = Metrics.supported_metrics()[self.metric_name] metric_full_name = metric_obj.full_name r_lim = 1e100 if metric_obj.is_loss else 1 l_lim = 0 if metric_obj.is_loss else -1e100 l_lim = max(l_lim, numpy.round(ci_low - delta, n_digits)) r_lim = min(r_lim, numpy.round(ci_high + delta, n_digits)) ax.set_xlim(l_lim, r_lim) n_points = 1 + int( numpy.round(r_lim - l_lim, n_digits) / 10**-n_digits) % 10 n_points = max(n_points, 3) x = numpy.linspace(l_lim, r_lim, num=n_points) xlabels = ["%.5g" % numpy.round(k, n_digits) for k in x] ax.set( xticks=x.tolist(), xticklabels=xlabels, yticks=[0.5], yticklabels=[''], title='Confidence interval for metric {}'.format(metric_full_name), ylabel='', xlabel='') # Loop over data dimensions and create text annotations. for x, label in [ (ci_low, 'ci_low (25%)'), (value, '{} value: {:.5g}'.format(self.metric_name, numpy.round(value, 1 + n_digits))), (ci_high, 'ci_high (75%)') ]: y = 1.01 + 0.01 * (x == value) ax.text( x, y, label, ha="center", va="center", ) plt.draw()
def __init__(self): self.metrics = Metrics.supported_metrics() self.model_support_dmd = None self.max_samples_to_use = 20000 self.low_sensitivity_threshold = 0.05 self.very_low_sensitivity_threshold = 1e-4