Exemplo n.º 1
0
def test_matcher_warns_if_unknown_pattern_elements(nlp: Language) -> None:
    """Calling the matcher on a `Doc` object with no matches returns empty list."""
    matcher = TokenMatcher(nlp.vocab)
    matcher.add("TEST", [[{"TEXT": {"fuzzy": "test"}}]])
    doc = nlp("test")
    with pytest.warns(UserWarning):
        matcher(doc)
Exemplo n.º 2
0
def test_matcher_pipe_with_context(nlp: Language) -> None:
    """It returns a stream of Doc objects as tuples with context."""
    doc_stream = (
        (nlp("test doc 1: Corvold"), "Jund"),
        (nlp("test doc 2: Prosh"), "Jund"),
    )
    matcher = TokenMatcher(nlp.vocab)
    output = matcher.pipe(doc_stream, as_tuples=True)
    assert list(output) == list(doc_stream)
Exemplo n.º 3
0
def test_matcher_pipe(nlp: Language) -> None:
    """It returns a stream of Doc objects."""
    doc_stream = (
        nlp("test doc 1: Corvold"),
        nlp("test doc 2: Prosh"),
    )
    matcher = TokenMatcher(nlp.vocab)
    output = matcher.pipe(doc_stream)
    assert list(output) == list(doc_stream)
Exemplo n.º 4
0
def test_matcher_pipe_with_matches(nlp: Language) -> None:
    """It returns a stream of Doc objects and matches as tuples."""
    doc_stream = (
        nlp("test doc 1: Corvold"),
        nlp("test doc 2: Prosh"),
    )
    matcher = TokenMatcher(nlp.vocab)
    matcher.add(
        "DRAGON", [[{"TEXT": {"FUZZY": "Korvold"}}], [{"TEXT": {"FUZZY": "Prossh"}}]]
    )
    output = matcher.pipe(doc_stream, return_matches=True)
    matches = [entry[1] for entry in output]
    assert matches == [[("DRAGON", 4, 5, None)], [("DRAGON", 4, 5, None)]]
Exemplo n.º 5
0
def matcher(model: Language) -> TokenMatcher:
    """It returns a token matcher."""
    matcher = TokenMatcher(vocab=model.vocab)
    matcher.add(
        "DATA",
        [
            [
                {"TEXT": "SQL"},
                {"LOWER": {"FREGEX": "(database){s<=1}"}},
                {"LOWER": {"FUZZY": "access"}, "POS": "NOUN"},
            ],
            [{"TEXT": {"FUZZY": "Sequel"}}, {"LOWER": "db"}],
        ],
    )
    matcher.add("NAME", [[{"TEXT": {"FUZZY": "Garfield"}}]])
    return matcher
Exemplo n.º 6
0
def test_add_with_zero_len_pattern(matcher: TokenMatcher) -> None:
    """Trying to add zero-length patterns raises a ValueError."""
    with pytest.raises(ValueError):
        matcher.add("TEST", [[]])
Exemplo n.º 7
0
def test_add_without_sequence_of_patterns_raises_error(matcher: TokenMatcher,) -> None:
    """Trying to add non-sequences of patterns raises a TypeError."""
    with pytest.raises(TypeError):
        matcher.add("TEST", [{"TEXT": "error"}])  # type: ignore
Exemplo n.º 8
0
def test_matcher_returns_empty_list_if_no_matches(nlp: Language) -> None:
    """Calling the matcher on a `Doc` object with no matches returns empty list."""
    matcher = TokenMatcher(nlp.vocab)
    matcher.add("TEST", [[{"TEXT": {"FUZZY": "blah"}}]])
    doc = nlp("No matches here.")
    assert matcher(doc) == []
Exemplo n.º 9
0
def test_remove_label_raises_error_if_label_not_in_matcher(
    matcher: TokenMatcher,
) -> None:
    """It raises a ValueError if trying to remove a label not present."""
    with pytest.raises(ValueError):
        matcher.remove("TEST")
Exemplo n.º 10
0
def test_remove_label(matcher: TokenMatcher) -> None:
    """It removes a label from the matcher."""
    matcher.add("TEST", [[{"TEXT": "test"}]])
    assert "TEST" in matcher
    matcher.remove("TEST")
    assert "TEST" not in matcher