def test_evaluate_sample_wrong_entities_to_keep_correct_statistics(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['SPACESHIP']) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = model.evaluate_sample(sample) assert evaluated.results[("O", "O")] == 4
def test_evaluate_same_entity_correct_statistics(): prediction = ["O", "U-ANIMAL", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample(full_text="I dog the walrus", masked="I [ANIMAL] the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluation_result = model.evaluate_sample(sample) assert evaluation_result.results[("O", "O")] == 2 assert evaluation_result.results[("ANIMAL", "ANIMAL")] == 1 assert evaluation_result.results[("O", "ANIMAL")] == 1
def test_evaluate_multiple_tokens_partial_match_correct_statistics(): prediction = ["O", "O", "O", "B-ANIMAL", "L-ANIMAL", "O"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample("I am the walrus amaericanus magnifico", masked=None, spans=None) sample.tokens = ["I", "am", "the", "walrus", "americanus", "magnifico"] sample.tags = ["O", "O", "O", "B-ANIMAL", "I-ANIMAL", "L-ANIMAL"] evaluated = model.evaluate_sample(sample) evaluation = model.calculate_score([evaluated]) assert evaluation.pii_precision == 1 assert evaluation.pii_recall == 4 / 6
def test_evaluator_simple(): prediction = ["O", "O", "O", "U-ANIMAL"] model = MockTokensModel(prediction=prediction, entities_to_keep=['ANIMAL']) sample = InputSample(full_text="I am the walrus", masked="I am the [ANIMAL]", spans=None) sample.tokens = ["I", "am", "the", "walrus"] sample.tags = ["O", "O", "O", "U-ANIMAL"] evaluated = model.evaluate_sample(sample) final_evaluation = model.calculate_score([evaluated]) assert final_evaluation.pii_precision == 1 assert final_evaluation.pii_recall == 1