Exemplo n.º 1
0
def test_evaluate_multiple_examples_correct_statistics():
    prediction = ["U-PERSON", "O", "O", "U-PERSON", "O", "O"]
    model = MockTokensModel(prediction=prediction)
    evaluator = Evaluator(model=model, entities_to_keep=["PERSON"])
    input_sample = InputSample("My name is Raphael or David",
                               masked=None,
                               spans=None)
    input_sample.tokens = ["My", "name", "is", "Raphael", "or", "David"]
    input_sample.tags = ["O", "O", "O", "U-PERSON", "O", "U-PERSON"]

    evaluated = evaluator.evaluate_all(
        [input_sample, input_sample, input_sample, input_sample])
    scores = evaluator.calculate_score(evaluated)
    assert scores.pii_precision == 0.5
    assert scores.pii_recall == 0.5
Exemplo n.º 2
0
def test_evaluate_multiple_tokens_no_match_match_correct_statistics():
    prediction = ["O", "O", "O", "B-SPACESHIP", "L-SPACESHIP", "O"]
    model = MockTokensModel(prediction=prediction)
    evaluator = Evaluator(model=model, entities_to_keep=["ANIMAL"])
    sample = InputSample("I am the walrus amaericanus magnifico",
                         masked=None,
                         spans=None)
    sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"]
    sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"]

    evaluated = evaluator.evaluate_sample(sample, prediction)
    evaluation = evaluator.calculate_score([evaluated])

    assert np.isnan(evaluation.pii_precision)
    assert evaluation.pii_recall == 0
Exemplo n.º 3
0
def test_evaluate_multiple_entities_to_keep_correct_statistics():
    prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"]
    entities_to_keep = ["ANIMAL", "PLANT", "SPACESHIP"]
    model = MockTokensModel(prediction=prediction)
    evaluator = Evaluator(model=model, entities_to_keep=entities_to_keep)

    sample = InputSample(full_text="I dog the walrus",
                         masked="I [ANIMAL] the [ANIMAL]",
                         spans=None)
    sample.tokens = ["I", "am", "the", "walrus"]
    sample.tags = ["O", "O", "O", "U-ANIMAL"]

    evaluation_result = evaluator.evaluate_sample(sample, prediction)
    assert evaluation_result.results[("O", "O")] == 2
    assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1
    assert evaluation_result.results[("O", "ANIMAL")] == 1
Exemplo n.º 4
0
def test_evaluator_simple():
    prediction = ["O", "O", "O", "U-ANIMAL"]
    model = MockTokensModel(prediction=prediction, entities_to_keep=["ANIMAL"])

    evaluator = Evaluator(model=model)
    sample = InputSample(full_text="I am the walrus",
                         masked="I am the [ANIMAL]",
                         spans=None)
    sample.tokens = ["I", "am", "the", "walrus"]
    sample.tags = ["O", "O", "O", "U-ANIMAL"]

    evaluated = evaluator.evaluate_sample(sample, prediction)
    final_evaluation = evaluator.calculate_score([evaluated])

    assert final_evaluation.pii_precision == 1
    assert final_evaluation.pii_recall == 1
Exemplo n.º 5
0
def test_analyzer_simple_input():
    model = PresidioAnalyzerWrapper(entities_to_keep=["PERSON"])
    sample = InputSample(
        full_text="My name is Mike",
        masked="My name is [PERSON]",
        spans=[Span("PERSON", "Mike", 10, 14)],
        create_tags_from_span=True,
    )

    prediction = model.predict(sample)
    evaluator = Evaluator(model=model)

    evaluated = evaluator.evaluate_sample(sample, prediction)
    metrics = evaluator.calculate_score([evaluated])

    assert metrics.pii_precision == 1
    assert metrics.pii_recall == 1
Exemplo n.º 6
0
def test_align_entity_types_wrong_mapping_exception():

    sample1 = InputSample(
        "I live in ABC",
        spans=[
            Span("A", "a", 0, 1),
            Span("A", "a", 10, 11),
            Span("B", "b", 100, 101)
        ],
        create_tags_from_span=False,
    )

    entities_mapping = {"Z": "z"}

    with pytest.raises(ValueError):
        Evaluator.align_entity_types(input_samples=[sample1],
                                     entities_mapping=entities_mapping)
Exemplo n.º 7
0
def test_spacy_simple():
    import os
    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(
        os.path.join(dir_path, "data/generated_small.txt"))

    spacy_model = SpacyModel(model_name="en_core_web_lg",
                             entities_to_keep=['PERSON'])
    evaluator = Evaluator(model=spacy_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(scores.pii_precision,
                                   scores.entity_precision_dict['PERSON'])
    np.testing.assert_almost_equal(scores.pii_recall,
                                   scores.entity_recall_dict['PERSON'])
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
Exemplo n.º 8
0
def test_dataset_to_metric_50_50_model():
    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = InputSample.read_dataset_json(
        "{}/data/generated_small.json".format(dir_path), length=100)

    # Replace 50% of the predictions with a list of "O"
    model = FiftyFiftyIdentityTokensMockModel()
    evaluator = Evaluator(model=model, entities_to_keep=["PERSON"])
    evaluation_results = evaluator.evaluate_all(input_samples)
    metrics = evaluator.calculate_score(evaluation_results)

    print(metrics.pii_precision)
    print(metrics.pii_recall)
    print(metrics.pii_f)

    assert metrics.pii_precision == 1
    assert metrics.pii_recall < 0.75
    assert metrics.pii_recall > 0.25
Exemplo n.º 9
0
def test_flair_simple():
    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(
        os.path.join(dir_path, "data/generated_small.txt"))

    model = SequenceTagger.load("ner-ontonotes-fast")  # .load('ner')

    flair_model = FlairModel(model=model, entities_to_keep=["PERSON"])
    evaluator = Evaluator(model=flair_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(scores.pii_precision,
                                   scores.entity_precision_dict["PERSON"])
    np.testing.assert_almost_equal(scores.pii_recall,
                                   scores.entity_recall_dict["PERSON"])
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
Exemplo n.º 10
0
def test_test_crf_simple():
    import os
    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = read_synth_dataset(
        os.path.join(dir_path, "data/generated_small.txt"))

    model_path = os.path.abspath(
        os.path.join(dir_path, "..", "model-outputs/crf.pickle"))

    crf_model = CRFModel(model_pickle_path=model_path,
                         entities_to_keep=['PERSON'])
    evaluator = Evaluator(model=crf_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(scores.pii_precision,
                                   scores.entity_precision_dict['PERSON'])
    np.testing.assert_almost_equal(scores.pii_recall,
                                   scores.entity_recall_dict['PERSON'])
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
Exemplo n.º 11
0
def score_presidio_recognizer(
    recognizer: EntityRecognizer,
    entities_to_keep: List[str],
    input_samples: Optional[List[InputSample]] = None,
    labeling_scheme: str = "BILUO",
    with_nlp_artifacts: bool = False,
    verbose: bool = False,
) -> EvaluationResult:
    """
    Run data through one EntityRecognizer and gather results and stats
    """

    if not input_samples:
        print("Reading dataset")
        input_samples = InputSample.read_dataset_json(
            "../../data/synth_dataset_v2.json")
    else:
        input_samples = list(input_samples)

    print(
        "Preparing dataset by aligning entity names to Presidio's entity names"
    )

    updated_samples = Evaluator.align_entity_types(
        input_samples,
        entities_mapping=PresidioAnalyzerWrapper.presidio_entities_map)

    model = PresidioRecognizerWrapper(
        recognizer=recognizer,
        entities_to_keep=entities_to_keep,
        labeling_scheme=labeling_scheme,
        nlp_engine=SpacyNlpEngine(),
        with_nlp_artifacts=with_nlp_artifacts,
    )
    return score_model(
        model=model,
        entities_to_keep=entities_to_keep,
        input_samples=updated_samples,
        verbose=verbose,
    )
Exemplo n.º 12
0
def score_presidio_analyzer(
    input_samples: Optional[List[InputSample]] = None,
    entities_to_keep: Optional[List[str]] = None,
    labeling_scheme: str = "BILUO",
    verbose: bool = True,
) -> EvaluationResult:
    """"""
    if not input_samples:
        print("Reading dataset")
        input_samples = read_synth_dataset("../../data/synth_dataset.txt")
    else:
        input_samples = list(input_samples)

    print(
        "Preparing dataset by aligning entity names to Presidio's entity names"
    )

    updated_samples = Evaluator.align_entity_types(input_samples)

    flatten = lambda l: [item for sublist in l for item in sublist]
    from collections import Counter

    count_per_entity = Counter([
        span.entity_type for span in flatten(
            [input_sample.spans for input_sample in updated_samples])
    ])
    if verbose:
        print("Count per entity:")
        print(count_per_entity)
    analyzer = PresidioAnalyzerWrapper(entities_to_keep=entities_to_keep,
                                       labeling_scheme=labeling_scheme)

    return score_model(
        model=analyzer,
        entities_to_keep=list(count_per_entity.keys()),
        input_samples=updated_samples,
        verbose=verbose,
    )
Exemplo n.º 13
0
def test_align_entity_types_correct_output():

    sample1 = InputSample(
        "I live in ABC",
        spans=[
            Span("A", "a", 0, 1),
            Span("A", "a", 10, 11),
            Span("B", "b", 100, 101)
        ],
        create_tags_from_span=False,
    )
    sample2 = InputSample(
        "I live in ABC",
        spans=[
            Span("A", "a", 0, 1),
            Span("A", "a", 10, 11),
            Span("C", "c", 100, 101)
        ],
        create_tags_from_span=False,
    )
    samples = [sample1, sample2]
    mapping = {
        "A": "1",
        "B": "2",
        "C": "1",
    }

    new_samples = Evaluator.align_entity_types(samples, mapping)

    count_per_entity = Counter()
    for sample in new_samples:
        for span in sample.spans:
            count_per_entity[span.entity_type] += 1

    assert count_per_entity["1"] == 5
    assert count_per_entity["2"] == 1