def test_entities_that_would_overlap_keeps_longer_earlier_match( nlp: Language, patterns: List[Dict[str, Any]], doc: Doc) -> None: """It matches the longest/earliest entities.""" ruler = SpaczzRuler(nlp, spaczz_patterns=patterns) ruler.add_patterns([{"label": "TEST", "pattern": "Fake", "type": "fuzzy"}]) doc = ruler(doc) assert "FAKE" not in [ent.label_ for ent in doc.ents]
def test_add_patterns_warns_if_spaczz_type_unrecognized( nlp: Language, ) -> None: """It raises a ValueError if patterns not correct format.""" ruler = SpaczzRuler(nlp) with pytest.warns(PatternTypeWarning): ruler.add_patterns([{ "label": "GPE", "pattern": "Montana", "type": "invalid" }])
def test_seeing_tokens_again( nlp: Language, patterns: list[dict[str, Any]], doc: Doc ) -> None: """If ruler has already seen tokens, it ignores them.""" ruler = SpaczzRuler(nlp, spaczz_patterns=patterns) ruler.add_patterns( [{"label": "ADDRESS", "pattern": "122 Fake St, Apt 54", "type": "fuzzy"}] ) doc = ruler(doc) assert "ADDRESS" in [ent.label_ for ent in doc.ents]
def test_add_patterns_raises_error_pattern_not_iter_of_dict( nlp: Language) -> None: """It raises a TypeError if pattern not iterable of dicts.""" ruler = SpaczzRuler(nlp) with pytest.raises(TypeError): ruler.add_patterns({"label": "GPE", "pattern": "Montana"})
def test_add_patterns_raises_error_if_not_spaczz_pattern( nlp: Language, ) -> None: """It raises a ValueError if patterns not correct format.""" ruler = SpaczzRuler(nlp) with pytest.raises(ValueError): ruler.add_patterns([{"label": "GPE", "pattern": "Montana"}])