def test_calculate_metrics(self): """ Test for method calculate_metrics. Handle all given metrics with a scorer call. """ for implemented_metric in self.all_implemented_metrics: self.assertIsInstance(Scorer.calculate_metrics([1, 1, 0, 1], [0, 1, 0, 1], [implemented_metric])[implemented_metric], float) for not_implemented_metric in self.some_not_implemented_metrics: np.testing.assert_equal(Scorer.calculate_metrics([1, 1, 0, 1], [0, 1, 0, 1], [not_implemented_metric])[not_implemented_metric], np.nan)
def test_calculate_metric_mean_absolute_error(): yt = [1, 1, 1, 1] yp = [1, 1, 1, 0] metrics = ["mean_absolute_error"] s = Scorer() assert s.calculate_metrics(yt, yp, metrics) == { "mean_absolute_error": 0.25 }
def test_calculate_metric_silhouette_score(): yt = [1, 1, 1, 1] yp = [1, 1, 1, 0] metrics = ["SC"] s = Scorer() assert s.calculate_metrics(np.vstack((yt, yt, yp, yp)), yp, metrics) == { "SC": 0.0 }
def score(estimator, X, y_true, metrics, indices=[], calculate_metrics: bool = True, training: bool = False, **kwargs): """ Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics :param estimator: the pipeline or pipeline element for prediction :param X: the data for prediction :param y_true: the truth values for the data :param metrics: the metrics to be calculated :param indices: the indices of the given data and targets that are logged into the result tree :param training: if True, all training_only pipeline elements are executed, if False they are skipped :param calculate_metrics: if True, calculates metrics for given data :return: ScoreInformation object """ scoring_time_start = time.time() output_metrics = {} non_default_score_metrics = list(metrics) # that does not work because it is not an exact match and also reacts to e.g. f1_score # if 'score' in metrics: # so we use this: checklist = ["score"] matches = set(checklist).intersection(set(non_default_score_metrics)) if len(matches) > 0: # Todo: Here it is potentially slowing down!!!!!!!!!!!!!!!! default_score = estimator.score(X, y_true) output_metrics["score"] = default_score non_default_score_metrics.remove("score") if not training: y_pred = estimator.predict(X, **kwargs) else: X, y_true_new, kwargs_new = estimator.transform( X, y_true, **kwargs) if y_true_new is not None: y_true = y_true_new if kwargs_new is not None and len(kwargs_new) > 0: kwargs = kwargs_new y_pred = estimator.predict(X, training=True, **kwargs) # Nice to have # InnerFoldManager.plot_some_data(y_true, y_pred) if calculate_metrics: score_metrics = Scorer.calculate_metrics( y_true, y_pred, non_default_score_metrics) # add default metric if output_metrics: output_metrics = {**output_metrics, **score_metrics} else: output_metrics = score_metrics else: output_metrics = {} final_scoring_time = time.time() - scoring_time_start probabilities = [] if hasattr(estimator, "_final_estimator"): if hasattr(estimator._final_estimator.base_element, "predict_proba"): probabilities = estimator.predict_proba(X, training=training, **kwargs) try: if probabilities is not None: if not len(probabilities) == 0: probabilities = probabilities.tolist() except: warnings.warn("No probabilities available.") if not isinstance(y_pred, list): y_pred = np.asarray(y_pred).tolist() if not isinstance(y_true, list): y_true = np.asarray(y_true).tolist() score_result_object = MDBScoreInformation( metrics=output_metrics, score_duration=final_scoring_time, y_pred=y_pred, y_true=y_true, indices=np.asarray(indices).tolist(), probabilities=probabilities, ) return score_result_object
def process_fit_results(config_item, calculate_metrics_across_folds, calculate_metrics_per_fold, metrics): overall_y_pred_test = [] overall_y_true_test = [] overall_y_pred_train = [] overall_y_true_train = [] for fold in config_item.inner_folds: curr_test_fold = fold.validation curr_train_fold = fold.training if calculate_metrics_across_folds: # if we have one hot encoded values -> concat horizontally if isinstance(curr_test_fold.y_pred, np.ndarray): if len(curr_test_fold.y_pred.shape) > 1: axis = 1 else: axis = 0 else: # if we have lists concat axis = 0 overall_y_true_test = np.concatenate( (overall_y_true_test, curr_test_fold.y_true), axis=axis) overall_y_pred_test = np.concatenate( (overall_y_pred_test, curr_test_fold.y_pred), axis=axis) # we assume y_pred from the training set comes in the same shape as y_pred from the test se overall_y_true_train = np.concatenate( (overall_y_true_train, curr_train_fold.y_true), axis=axis) overall_y_pred_train = np.concatenate( (overall_y_pred_train, curr_train_fold.y_pred), axis=axis) # metrics across folds metrics_to_calculate = list(metrics) if "score" in metrics_to_calculate: metrics_to_calculate.remove("score") metrics_train = Scorer.calculate_metrics( overall_y_true_train, overall_y_pred_train, metrics_to_calculate) metrics_test = Scorer.calculate_metrics( overall_y_true_test, overall_y_pred_test, metrics_to_calculate) def metric_to_db_class(metric_list): db_metrics = [] for metric_name, metric_value in metric_list.items(): new_metric = MDBFoldMetric( operation=FoldOperations.RAW, metric_name=metric_name, value=metric_value, ) db_metrics.append(new_metric) return db_metrics db_metrics_train = metric_to_db_class(metrics_train) db_metrics_test = metric_to_db_class(metrics_test) # if we want to have metrics for each fold as well, calculate mean and std. if calculate_metrics_per_fold: db_metrics_fold_train, db_metrics_fold_test = MDBHelper.aggregate_metrics_for_inner_folds( config_item.inner_folds, metrics) config_item.metrics_train = db_metrics_train + db_metrics_fold_train config_item.metrics_test = db_metrics_test + db_metrics_fold_test else: config_item.metrics_train = db_metrics_train config_item.metrics_test = db_metrics_test elif calculate_metrics_per_fold: # calculate mean and std over all fold metrics config_item.metrics_train, config_item.metrics_test = MDBHelper.aggregate_metrics_for_inner_folds( config_item.inner_folds, metrics)
def test_calculate_metric_HCV(): yt = [1, 1, 1, 1] yp = [1, 1, 1, 0] metrics = ["HCV"] s = Scorer() assert s.calculate_metrics(yt, yp, metrics) == {"HCV": 1.0}
def test_calculate_metric_accuracy(): yt = [1, 1, 1, 1] yp = [1, 1, 1, 0] metrics = ["accuracy"] s = Scorer() assert s.calculate_metrics(yt, yp, metrics) == {"accuracy": 0.75}
def score(estimator, X, y_true, metrics, indices=[], calculate_metrics: bool = True, training: bool = False, **kwargs): """ Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics :param estimator: the pipeline or pipeline element for prediction :param X: the data for prediction :param y_true: the truth values for the data :param metrics: the metrics to be calculated :param indices: the indices of the given data and targets that are logged into the result tree :param training: if True, all training_only pipeline elements are executed, if False they are skipped :param calculate_metrics: if True, calculates metrics for given data :return: ScoreInformation object """ scoring_time_start = time.time() output_metrics = {} if not training: y_pred = estimator.predict(X, **kwargs) else: X, y_true_new, kwargs_new = estimator.transform( X, y_true, **kwargs) if y_true_new is not None: y_true = y_true_new if kwargs_new is not None and len(kwargs_new) > 0: kwargs = kwargs_new y_pred = estimator.predict(X, training=True, **kwargs) # Nice to have # InnerFoldManager.plot_some_data(y_true, y_pred) if calculate_metrics: if isinstance(y_pred, np.ndarray) and y_pred.dtype.names: y_pred_names = [y_pred.dtype.names] if "y_pred" not in y_pred_names[0]: msg = "If scorer object does not return 1d array or list, PHOTON expected name 'y_pred' in nd array." logger.error(msg) raise KeyError(msg) score_metrics = Scorer.calculate_metrics( y_true, y_pred["y_pred"], metrics) else: y_pred_names = [] score_metrics = Scorer.calculate_metrics( y_true, y_pred, metrics) # add default metric if output_metrics: output_metrics = {**output_metrics, **score_metrics} else: output_metrics = score_metrics else: output_metrics = {} final_scoring_time = time.time() - scoring_time_start probabilities = [] if hasattr(estimator, '_final_estimator'): if hasattr(estimator._final_estimator.base_element, 'predict_proba'): probabilities = estimator.predict_proba(X, training=training, **kwargs) try: if probabilities is not None: if not len(probabilities) == 0: probabilities = probabilities.tolist() except: warnings.warn('No probabilities available.') if not isinstance(y_pred, list): y_pred = y_pred_names + np.asarray(y_pred).tolist() if not isinstance(y_true, list): y_true = np.asarray(y_true).tolist() score_result_object = MDBScoreInformation( metrics=output_metrics, score_duration=final_scoring_time, y_pred=y_pred, y_true=y_true, indices=np.asarray(indices).tolist(), probabilities=probabilities) return score_result_object