def test_binary_dice(truth_binary_image, prediction_binary_image, correct_value): cm = create_binary_confusion_matrix(truth_binary_image, prediction_binary_image) value = metrics.binary_dice(cm) assert value == correct_value
def compute_metrics(truth_file_stream, prediction_file_stream) -> Dict[str, Dict[str, float]]: truth_probabilities = parse_csv(truth_file_stream, CATEGORIES) prediction_probabilities = parse_csv(prediction_file_stream, CATEGORIES) exclude_rows(truth_probabilities, EXCLUDE_LABELS) exclude_rows(prediction_probabilities, EXCLUDE_LABELS) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) scores: Dict[str, Dict[str, float]] = {} for category in CATEGORIES: truth_category_probabilities: pd.Series = truth_probabilities[category] prediction_category_probabilities: pd.Series = prediction_probabilities[category] truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt(0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), name=category, ) scores[category] = { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc(truth_category_probabilities, prediction_category_probabilities), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, 0.80 ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities ), } # Compute averages for all per-category metrics per_category_metrics: ValuesView[str] = next(iter(scores.values())).keys() scores['macro_average'] = { metric: float(np.mean([scores[category][metric] for category in CATEGORIES])) for metric in per_category_metrics } # Compute multi-category aggregate metrics scores['aggregate'] = { 'balanced_accuracy': metrics.balanced_multiclass_accuracy( truth_probabilities, prediction_probabilities ) } return scores
def _category_score( truth_category_probabilities: pd.Series, prediction_category_probabilities: pd.Series, truth_weights: pd.DataFrame, category: str, ) -> pd.Series: truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt(0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), weights=truth_weights.score_weight.to_numpy(), name=category, ) return pd.Series( { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, 0.80, ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), }, index=[ 'accuracy', 'sensitivity', 'specificity', 'dice', 'ppv', 'npv', 'auc', 'auc_sens_80', 'ap', ], name=category, )
def score(truth_path: pathlib.Path, prediction_path: pathlib.Path) -> ScoresType: confusion_matrics = pd.DataFrame( [ create_binary_confusion_matrix( truth_binary_values=image_pair.truth_image > 128, prediction_binary_values=image_pair.prediction_image > 128, name=(image_pair.attribute_id, image_pair.image_id), ) for image_pair in iter_image_pairs(truth_path, prediction_path) ] ) confusion_matrics = confusion_matrics.reindex( index=pd.MultiIndex.from_tuples(confusion_matrics.index, names=('attribute_id', 'image_id')) ) # Normalize all values, since image sizes vary normalized_confusion_matrics = confusion_matrics.apply( normalize_confusion_matrix, axis='columns' ) scores: ScoresType = {} for attribute in sorted(confusion_matrics.index.unique('attribute_id')): attribute_confusion_matrics = normalized_confusion_matrics.loc(axis=0)[attribute, :] sum_attribute_confusion_matrics = attribute_confusion_matrics.sum(axis='index') scores[attribute] = { 'jaccard': metrics.binary_jaccard(sum_attribute_confusion_matrics), 'dice': metrics.binary_dice(sum_attribute_confusion_matrics), } sum_confusion_matrix = normalized_confusion_matrics.sum(axis='index') scores['micro_average'] = { 'jaccard': metrics.binary_jaccard(sum_confusion_matrix), 'dice': metrics.binary_dice(sum_confusion_matrix), } score['overall'] = scores['micro_average']['jaccard'] return scores
def score(truth_path: pathlib.Path, prediction_path: pathlib.Path) -> Dict[str, Dict[str, float]]: confusion_matrics = pd.DataFrame( [ create_binary_confusion_matrix( truth_binary_values=image_pair.truth_image > 128, prediction_binary_values=image_pair.predictionImage > 128, name=(image_pair.attribute_id, image_pair.imageId), ) for image_pair in iter_image_pairs(truth_path, prediction_path) ] ) confusion_matrics = confusion_matrics.reindex( index=pd.MultiIndex.from_tuples(confusion_matrics.index, names=('attributeId', 'imageId')) ) # Normalize all values, since image sizes vary normalized_confusion_matrics = confusion_matrics.apply( normalize_confusion_matrix, axis='columns' ) scores: Dict[str, Dict[str, float]] = {} for attribute in sorted(confusion_matrics.index.unique('attributeId')): attribute_confusion_matrics = normalized_confusion_matrics.loc(axis=0)[attribute, :] sum_attribute_confusion_matrics = attribute_confusion_matrics.sum(axis='index') scores[attribute] = { 'jaccard': metrics.binary_jaccard(sum_attribute_confusion_matrics), 'dice': metrics.binary_dice(sum_attribute_confusion_matrics), } sum_confusion_matrix = normalized_confusion_matrics.sum(axis='index') scores['micro_average'] = { 'jaccard': metrics.binary_jaccard(sum_confusion_matrix), 'dice': metrics.binary_dice(sum_confusion_matrix), } return scores
def compute_metrics(truth_file_stream, prediction_file_stream) -> ScoresType: truth_probabilities, truth_weights = parse_truth_csv(truth_file_stream) categories = truth_probabilities.columns prediction_probabilities = parse_csv(prediction_file_stream, categories) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) scores: ScoresType = {} for category in categories: truth_category_probabilities: pd.Series = truth_probabilities[category] prediction_category_probabilities: pd.Series = prediction_probabilities[ category] truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt( 0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), weights=truth_weights.score_weight.to_numpy(), name=category, ) scores[category] = { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, 0.80, ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'roc': metrics.roc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), } # Compute averages for all per-category metrics per_category_metrics: KeysView[str] = next(iter(scores.values())).keys() scores['macro_average'] = { metric: float(np.mean([scores[category][metric] for category in categories])) for metric in per_category_metrics if metric != 'roc' } # Compute multi-category aggregate metrics scores['aggregate'] = { 'balanced_accuracy': metrics.balanced_multiclass_accuracy(truth_probabilities, prediction_probabilities, truth_weights.score_weight) } scores['overall'] = scores['aggregate']['balanced_accuracy'] scores['validation'] = metrics.balanced_multiclass_accuracy( truth_probabilities, prediction_probabilities, truth_weights.validation_weight) return scores