Пример #1
0
def test_collect_entity_predictions(
    entity_results, targets, predictions, successes, errors
):
    actual = collect_successful_entity_predictions(entity_results, targets, predictions)

    assert len(successes) == len(actual)
    assert successes == actual

    actual = collect_incorrect_entity_predictions(entity_results, targets, predictions)

    assert len(errors) == len(actual)
    assert errors == actual
Пример #2
0
def test_collect_entity_predictions(
    entity_results: List[EntityEvaluationResult],
    targets: List[Text],
    predictions: List[Text],
    successes: List[Dict[Text, Any]],
    errors: List[Dict[Text, Any]],
):
    actual = collect_successful_entity_predictions(entity_results, targets, predictions)

    assert len(successes) == len(actual)
    assert successes == actual

    actual = collect_incorrect_entity_predictions(entity_results, targets, predictions)

    assert len(errors) == len(actual)
    assert errors == actual
Пример #3
0
    def __evaluate_entities(entity_results, extractors: Set[Text]) -> Dict:
        """
        Creates summary statistics for each entity extractor.

        Logs precision, recall, and F1 per entity type for each extractor.

        Args:
            entity_results: entity evaluation results
            extractors: entity extractors to consider

        Returns: dictionary with evaluation results
        """
        from rasa.model_testing import get_evaluation_metrics
        from rasa.nlu.test import (
            NO_ENTITY,
            align_all_entity_predictions,
            merge_labels,
            substitute_labels,
            collect_successful_entity_predictions,
            collect_incorrect_entity_predictions
        )

        aligned_predictions = align_all_entity_predictions(entity_results, extractors)
        merged_targets = merge_labels(aligned_predictions)
        from rasa.shared.nlu.constants import NO_ENTITY_TAG
        merged_targets = substitute_labels(merged_targets, NO_ENTITY_TAG, NO_ENTITY)

        result = {}

        for extractor in extractors:
            merged_predictions = merge_labels(aligned_predictions, extractor)
            merged_predictions = substitute_labels(
                merged_predictions, NO_ENTITY_TAG, NO_ENTITY
            )

            report, precision, f1, accuracy = get_evaluation_metrics(
                    merged_targets,
                    merged_predictions,
                    output_dict=False,
                    exclude_label=NO_ENTITY,
                )

            successes = collect_successful_entity_predictions(
                entity_results, merged_predictions, merged_targets
            )
            errors = collect_incorrect_entity_predictions(
                entity_results, merged_predictions, merged_targets
            )

            result[extractor] = {
                "total_count": len(successes) + len(errors),
                "success_count": len(successes),
                "failure_count": len(errors),
                "precision": precision,
                "f1_score": f1,
                "accuracy": accuracy,
                # 'successes': successes,
                'errors': errors
            }

        return result