Exemplo n.º 1
0
def test_entities_that_would_overlap_keeps_longer_earlier_match(
        nlp: Language, patterns: List[Dict[str, Any]], doc: Doc) -> None:
    """It matches the longest/earliest entities."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    ruler.add_patterns([{"label": "TEST", "pattern": "Fake", "type": "fuzzy"}])
    doc = ruler(doc)
    assert "FAKE" not in [ent.label_ for ent in doc.ents]
Exemplo n.º 2
0
def test_add_patterns_warns_if_spaczz_type_unrecognized(
    nlp: Language, ) -> None:
    """It raises a ValueError if patterns not correct format."""
    ruler = SpaczzRuler(nlp)
    with pytest.warns(PatternTypeWarning):
        ruler.add_patterns([{
            "label": "GPE",
            "pattern": "Montana",
            "type": "invalid"
        }])
Exemplo n.º 3
0
def test_seeing_tokens_again(
    nlp: Language, patterns: list[dict[str, Any]], doc: Doc
) -> None:
    """If ruler has already seen tokens, it ignores them."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    ruler.add_patterns(
        [{"label": "ADDRESS", "pattern": "122 Fake St, Apt 54", "type": "fuzzy"}]
    )
    doc = ruler(doc)
    assert "ADDRESS" in [ent.label_ for ent in doc.ents]
Exemplo n.º 4
0
def test_ruler_added_ents_have_custom_attr(nlp: Language,
                                           patterns: List[Dict[str, Any]],
                                           doc: Doc) -> None:
    """Ents added by ruler have "spaczz_ent" custom attribute."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    doc = ruler(doc)
    assert all([ent._.spaczz_ent for ent in doc.ents])
Exemplo n.º 5
0
def test_labels(nlp: Language, patterns: list[dict[str, Any]]) -> None:
    """It returns all unique labels."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    assert all(
        [label in ruler.labels for label in ["GPE", "STREET", "DRUG", "NAME", "BAND"]]
    )
    assert len(ruler.labels) == 5
Exemplo n.º 6
0
def test_calling_ruler_with_overwrite_ents(nlp: Language,
                                           patterns: List[Dict[str, Any]],
                                           doc: Doc) -> None:
    """It overwrites existing entities."""
    sr = SpaczzRuler(nlp, spaczz_patterns=patterns, spaczz_overwrite_ents=True)
    doc.ents += (Span(doc, 2, 4, label="WRONG"), )
    doc = sr(doc)
    assert "WRONG" not in [ent.label_ for ent in doc.ents]
Exemplo n.º 7
0
def test_ent_ids(nlp: Language, patterns: List[Dict[str, Any]]) -> None:
    """It returns all unique ent ids."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    assert all([
        ent_id in ruler.ent_ids
        for ent_id in ["Antibiotic", "Developer", "USA"]
    ])
    assert len(ruler.ent_ids) == 3
Exemplo n.º 8
0
def test_add_patterns_with_other_pipeline_components(
        patterns: List[Dict[str, Any]]) -> None:
    """It disables other pipeline components when adding patterns."""
    nlp = spacy.blank("en")
    nlp.add_pipe(nlp.create_pipe("ner"))
    ruler = SpaczzRuler(nlp)
    nlp.add_pipe(ruler, first=True)
    nlp.get_pipe("spaczz_ruler").add_patterns(patterns)
    assert len(ruler) == 5
Exemplo n.º 9
0
def test_calling_ruler(nlp: Language, patterns: List[Dict[str, Any]],
                       doc: Doc) -> None:
    """It adds entities to doc."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    doc = ruler(doc)
    ents = [ent for ent in doc.ents]
    assert all(ent._.spaczz_ent for ent in ents)
    assert ents[0]._.spaczz_ent_ratio == 86
    assert ents[1]._.spaczz_ent_counts == (0, 0, 0)
    assert len(doc.ents) == 5
Exemplo n.º 10
0
def test_calling_ruler_without_overwrite_will_keep_exisiting_ents(
        nlp: Language, patterns: List[Dict[str, Any]], doc: Doc) -> None:
    """It keeps existing ents without overwrite_ents."""
    sr = SpaczzRuler(nlp, spaczz_patterns=patterns)
    doc.ents += (
        Span(doc, 2, 4, label="WRONG"),
        Span(doc, 15, 16, label="WRONG"),
    )
    doc = sr(doc)
    assert len([ent.label_ for ent in doc.ents if ent.label_ == "WRONG"]) == 2
Exemplo n.º 11
0
def test_set_entity_ids(nlp: Language, patterns: List[Dict[str, Any]]) -> None:
    """It writes ids to entities."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    nlp.add_pipe(ruler)
    doc = nlp("Grint Anderson was prescribed Zithroma.")
    assert len(doc.ents) == 2
    assert doc.ents[0].label_ == "NAME"
    assert doc.ents[0].ent_id_ == "Developer"
    assert doc.ents[1].label_ == "DRUG"
    assert doc.ents[1].ent_id_ == "Antibiotic"
Exemplo n.º 12
0
def test_spaczz_ruler_to_from_disk(
    nlp: Language, patterns: list[dict[str, Any]]
) -> None:
    """It writes the ruler to disk and reads it back correctly."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns, spaczz_overwrite_ents=True)
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 5
    with tempfile.TemporaryDirectory() as tmpdir:
        ruler.to_disk(f"{tmpdir}/ruler")
        assert os.path.isdir(f"{tmpdir}/ruler")
        new_ruler = SpaczzRuler(nlp)
        new_ruler = new_ruler.from_disk(f"{tmpdir}/ruler")
    assert len(new_ruler) == len(patterns)
    assert len(new_ruler.labels) == 5
    assert len(new_ruler.patterns) == len(ruler.patterns)
    for pattern in ruler.patterns:
        assert pattern in new_ruler.patterns
    assert sorted(new_ruler.labels) == sorted(ruler.labels)
    assert new_ruler.overwrite is True
Exemplo n.º 13
0
def test_spaczz_patterns_to_from_disk(
    nlp: Language, patterns: list[dict[str, Any]]
) -> None:
    """It writes the patterns to disk and reads them back correctly."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns, spaczz_overwrite_ents=True)
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 5
    with tempfile.NamedTemporaryFile() as tmpfile:
        ruler.to_disk(f"{tmpfile.name}.jsonl")
        assert os.path.isfile(tmpfile.name)
        new_ruler = SpaczzRuler(nlp)
        new_ruler = new_ruler.from_disk(f"{tmpfile.name}.jsonl")
    assert len(new_ruler) == len(patterns)
    assert len(new_ruler.labels) == 5
    assert len(new_ruler.patterns) == len(ruler.patterns)
    for pattern in ruler.patterns:
        assert pattern in new_ruler.patterns
    assert sorted(new_ruler.labels) == sorted(ruler.labels)
    assert new_ruler.overwrite is False
Exemplo n.º 14
0
def test_spaczz_ruler_serialize_bytes(nlp: Language,
                                      patterns: List[Dict[str, Any]]) -> None:
    """It serializes the ruler to bytes and reads from bytes correctly."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    assert len(ruler) == len(patterns)
    assert len(ruler.labels) == 4
    ruler_bytes = ruler.to_bytes()
    new_ruler = SpaczzRuler(nlp)
    assert len(new_ruler) == 0
    assert len(new_ruler.labels) == 0
    new_ruler = new_ruler.from_bytes(ruler_bytes)
    assert len(new_ruler) == len(patterns)
    assert len(new_ruler.labels) == 4
    assert len(new_ruler.patterns) == len(ruler.patterns)
    for pattern in ruler.patterns:
        assert pattern in new_ruler.patterns
    assert sorted(new_ruler.labels) == sorted(ruler.labels)
Exemplo n.º 15
0
def test_add_patterns_raises_error_pattern_not_iter_of_dict(
        nlp: Language) -> None:
    """It raises a TypeError if pattern not iterable of dicts."""
    ruler = SpaczzRuler(nlp)
    with pytest.raises(TypeError):
        ruler.add_patterns({"label": "GPE", "pattern": "Montana"})
Exemplo n.º 16
0
def test_add_patterns_raises_error_if_not_spaczz_pattern(
    nlp: Language, ) -> None:
    """It raises a ValueError if patterns not correct format."""
    ruler = SpaczzRuler(nlp)
    with pytest.raises(ValueError):
        ruler.add_patterns([{"label": "GPE", "pattern": "Montana"}])
Exemplo n.º 17
0
def test_add_patterns(nlp: Language, patterns: List[Dict[str, Any]]) -> None:
    """It adds patterns correctly."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    assert len(ruler) == 5
Exemplo n.º 18
0
def test_ruler_with_defaults_as_not_dict_raises_error(nlp: Language) -> None:
    """It raises a TypeError if defaults not dict."""
    with pytest.raises(TypeError):
        SpaczzRuler(nlp, spaczz_fuzzy_defaults="ignore_case")
Exemplo n.º 19
0
def test_ruler_with_changed_matcher_defaults(nlp: Language) -> None:
    """It intializes with changed defaults in the matchers."""
    ruler = SpaczzRuler(nlp, spaczz_fuzzy_defaults={"ignore_case": False})
    assert ruler.fuzzy_matcher.defaults == {"ignore_case": False}
Exemplo n.º 20
0
def test_empty_default_ruler(nlp: Language) -> None:
    """It initialzes an empty ruler."""
    ruler = SpaczzRuler(nlp)
    assert not ruler.fuzzy_patterns
    assert not ruler.regex_patterns
Exemplo n.º 21
0
def test__create_label_w_no_ent_id(nlp: Language) -> None:
    """It returns the label only."""
    ruler = SpaczzRuler(nlp)
    assert ruler._create_label("TEST", None) == "TEST"
Exemplo n.º 22
0
def test_contains(nlp: Language, patterns: List[Dict[str, Any]]) -> None:
    """It returns True if label in ruler."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    assert "GPE" in ruler
Exemplo n.º 23
0
def test_calling_ruler(nlp: Language, patterns: List[Dict[str, Any]],
                       doc: Doc) -> None:
    """It adds entities to doc."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    doc = ruler(doc)
    assert len(doc.ents) == 5
Exemplo n.º 24
0
def test_patterns(nlp: Language, patterns: List[Dict[str, Any]]) -> None:
    """It returns list of all patterns."""
    ruler = SpaczzRuler(nlp, spaczz_patterns=patterns)
    assert all([pattern in ruler.patterns for pattern in patterns])