Exemplo n.º 1
0
def test_entities_that_would_overlap_keeps_longer_earlier_match(
        nlp: Language, patterns: List[Dict[str, Any]], doc: Doc) -> None:
    """It matches the longest/earliest entities."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    ruler.add_patterns([{"label": "TEST", "pattern": "Fake", "type": "fuzzy"}])
    doc = ruler(doc)
    assert "FAKE" not in [ent.label_ for ent in doc.ents]
Exemplo n.º 2
0
def test_add_patterns_warns_if_spaczz_type_unrecognized(
    nlp: Language, ) -> None:
    """It raises a ValueError if patterns not correct format."""
    ruler = SpaczzRuler(nlp)
    with pytest.warns(PatternTypeWarning):
        ruler.add_patterns([{
            "label": "GPE",
            "pattern": "Montana",
            "type": "invalid"
        }])
Exemplo n.º 3
0
def test_seeing_tokens_again(
    nlp: Language, patterns: list[dict[str, Any]], doc: Doc
) -> None:
    """If ruler has already seen tokens, it ignores them."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    ruler.add_patterns(
        [{"label": "ADDRESS", "pattern": "122 Fake St, Apt 54", "type": "fuzzy"}]
    )
    doc = ruler(doc)
    assert "ADDRESS" in [ent.label_ for ent in doc.ents]
Exemplo n.º 4
0
def test_add_patterns_raises_error_pattern_not_iter_of_dict(
        nlp: Language) -> None:
    """It raises a TypeError if pattern not iterable of dicts."""
    ruler = SpaczzRuler(nlp)
    with pytest.raises(TypeError):
        ruler.add_patterns({"label": "GPE", "pattern": "Montana"})
Exemplo n.º 5
0
def test_add_patterns_raises_error_if_not_spaczz_pattern(
    nlp: Language, ) -> None:
    """It raises a ValueError if patterns not correct format."""
    ruler = SpaczzRuler(nlp)
    with pytest.raises(ValueError):
        ruler.add_patterns([{"label": "GPE", "pattern": "Montana"}])