Exemplo n.º 1
0
def test_analyzer_with_generated_text(test_input, acceptance_threshold):
    """
        Test analyzer with a generated dataset text file
        :param test_input: input text file location
        :param acceptance_threshold: minimim precision/recall
         allowed for tests to pass
    """
    # read test input from generated file

    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(test_input.format(dir_path))

    updated_samples = Evaluator.align_entity_types(
        input_samples=input_samples, entities_mapping=PresidioAnalyzerWrapper.presidio_entities_map
    )

    analyzer = PresidioAnalyzerWrapper()
    evaluator = Evaluator(model=analyzer)
    evaluated_samples = evaluator.evaluate_all(updated_samples)
    scores = evaluator.calculate_score(evaluation_results=evaluated_samples)

    assert acceptance_threshold <= scores.pii_precision
    assert acceptance_threshold <= scores.pii_recall
def test_flair_simple(small_dataset):

    flair_model = FlairModel(model_path="ner", entities_to_keep=["PERSON"])
    evaluator = Evaluator(model=flair_model)
    evaluation_results = evaluator.evaluate_all(small_dataset)
    scores = evaluator.calculate_score(evaluation_results)

    assert_model_results_gt(scores, "PERSON", 0)
Exemplo n.º 3
0
def score_model(
    model: BaseModel,
    entities_to_keep: List[str],
    input_samples: List[InputSample],
    verbose: bool = False,
    beta: float = 2.5,
) -> EvaluationResult:
    """
    Run data through a model and gather results and stats
    """

    print("Evaluating samples")

    evaluator = Evaluator(model=model, entities_to_keep=entities_to_keep)
    evaluated_samples = evaluator.evaluate_all(input_samples)

    print("Estimating metrics")
    evaluation_result = evaluator.calculate_score(
        evaluation_results=evaluated_samples, beta=beta)
    precision = evaluation_result.pii_precision
    recall = evaluation_result.pii_recall
    entity_recall = evaluation_result.entity_recall_dict
    entity_precision = evaluation_result.entity_precision_dict
    f = evaluation_result.pii_f
    errors = evaluation_result.model_errors
    #
    print(f"precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F {beta}: {f}")
    print(f"Precision per entity: {entity_precision}")
    print(f"Recall per entity: {entity_recall}")

    if verbose:

        false_negatives = [
            str(mistake) for mistake in errors if mistake.error_type == "FN"
        ]
        false_positives = [
            str(mistake) for mistake in errors if mistake.error_type == "FP"
        ]
        other_mistakes = [
            str(mistake) for mistake in errors
            if mistake.error_type not in ["FN", "FP"]
        ]

        print("False negatives: ")
        print("\n".join(false_negatives))
        print("\n******************\n")

        print("False positives: ")
        print("\n".join(false_positives))
        print("\n******************\n")

        print("Other mistakes: ")
        print("\n".join(other_mistakes))

    return evaluation_result
Exemplo n.º 4
0
def test_crf_simple(small_dataset):
    train_test_ratios = [0.7, 0.3]

    train, test = split_dataset(small_dataset, train_test_ratios)

    crf_model = CRFModel(model_pickle_path=None, entities_to_keep=["PERSON"])
    crf_model.fit(train)
    evaluator = Evaluator(model=crf_model)
    evaluation_results = evaluator.evaluate_all(test)
    scores = evaluator.calculate_score(evaluation_results)

    assert_model_results_gt(scores, "PERSON", 0)
Exemplo n.º 5
0
def test_dataset_to_metric_identity_model():
    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = InputSample.read_dataset_json(
        "{}/data/generated_small.json".format(dir_path), length=10)

    model = IdentityTokensMockModel()
    evaluator = Evaluator(model=model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    metrics = evaluator.calculate_score(evaluation_results)

    assert metrics.pii_precision == 1
    assert metrics.pii_recall == 1
Exemplo n.º 6
0
def test_evaluate_multiple_examples_correct_statistics():
    prediction = ["U-PERSON", "O", "O", "U-PERSON", "O", "O"]
    model = MockTokensModel(prediction=prediction)
    evaluator = Evaluator(model=model, entities_to_keep=["PERSON"])
    input_sample = InputSample("My name is Raphael or David",
                               masked=None,
                               spans=None)
    input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"]
    input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"]

    evaluated = evaluator.evaluate_all(
        [input_sample, input_sample, input_sample, input_sample])
    scores = evaluator.calculate_score(evaluated)
    assert scores.pii_precision == 0.5
    assert scores.pii_recall == 0.5
Exemplo n.º 7
0
def test_spacy_simple():
    import os
    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(
        os.path.join(dir_path, "data/generated_small.txt"))

    spacy_model = SpacyModel(model_name="en_core_web_lg",
                             entities_to_keep=['PERSON'])
    evaluator = Evaluator(model=spacy_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(scores.pii_precision,
                                   scores.entity_precision_dict['PERSON'])
    np.testing.assert_almost_equal(scores.pii_recall,
                                   scores.entity_recall_dict['PERSON'])
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
Exemplo n.º 8
0
def test_dataset_to_metric_50_50_model():
    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = InputSample.read_dataset_json(
        "{}/data/generated_small.json".format(dir_path), length=100)

    # Replace 50% of the predictions with a list of "O"
    model = FiftyFiftyIdentityTokensMockModel()
    evaluator = Evaluator(model=model, entities_to_keep=["PERSON"])
    evaluation_results = evaluator.evaluate_all(input_samples)
    metrics = evaluator.calculate_score(evaluation_results)

    print(metrics.pii_precision)
    print(metrics.pii_recall)
    print(metrics.pii_f)

    assert metrics.pii_precision == 1
    assert metrics.pii_recall < 0.75
    assert metrics.pii_recall > 0.25
Exemplo n.º 9
0
def test_flair_simple():
    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(
        os.path.join(dir_path, "data/generated_small.txt"))

    model = SequenceTagger.load("ner-ontonotes-fast")  # .load('ner')

    flair_model = FlairModel(model=model, entities_to_keep=["PERSON"])
    evaluator = Evaluator(model=flair_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(scores.pii_precision,
                                   scores.entity_precision_dict["PERSON"])
    np.testing.assert_almost_equal(scores.pii_recall,
                                   scores.entity_recall_dict["PERSON"])
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
Exemplo n.º 10
0
def test_test_crf_simple():
    import os
    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(
        os.path.join(dir_path, "data/generated_small.txt"))

    model_path = os.path.abspath(
        os.path.join(dir_path, "..", "model-outputs/crf.pickle"))

    crf_model = CRFModel(model_pickle_path=model_path,
                         entities_to_keep=['PERSON'])
    evaluator = Evaluator(model=crf_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(scores.pii_precision,
                                   scores.entity_precision_dict['PERSON'])
    np.testing.assert_almost_equal(scores.pii_recall,
                                   scores.entity_recall_dict['PERSON'])
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0