def test_matcher_pipe_with_context(nlp: Language) -> None: """It returns a stream of Doc objects as tuples with context.""" doc_stream = ( (nlp.make_doc("test doc 1: Corvold"), "Jund"), (nlp.make_doc("test doc 2: Prosh"), "Jund"), ) matcher = FuzzyMatcher(nlp.vocab) output = matcher.pipe(doc_stream, as_tuples=True) assert list(output) == list(doc_stream)
def test_matcher_pipe(nlp: Language) -> None: """It returns a stream of Doc objects.""" doc_stream = ( nlp.make_doc("test doc 1: Corvold"), nlp.make_doc("test doc 2: Prosh"), ) matcher = FuzzyMatcher(nlp.vocab) output = matcher.pipe(doc_stream) assert list(output) == list(doc_stream)
def test_matcher_pipe_with_matches(nlp: Language) -> None: """It returns a stream of Doc objects and matches as tuples.""" doc_stream = ( nlp.make_doc("test doc 1: Corvold"), nlp.make_doc("test doc 2: Prosh"), ) matcher = FuzzyMatcher(nlp.vocab) matcher.add("DRAGON", [nlp.make_doc("Korvold"), nlp.make_doc("Prossh")]) output = matcher.pipe(doc_stream, return_matches=True) matches = [entry[1] for entry in output] assert matches == [[("DRAGON", 4, 5, 86)], [("DRAGON", 4, 5, 91)]]
def test_add_with_more_patterns_than_explicit_kwargs_warns( matcher: FuzzyMatcher, nlp: Language) -> None: """It will warn when more patterns are added than explicit kwargs.""" with pytest.warns(KwargsWarning): matcher.add( "TEST", [nlp.make_doc("Test1"), nlp.make_doc("Test2")], [{ "ignore_case": False }], )
def test_matcher_pipe_with_matches_and_context(nlp: Language) -> None: """It returns a stream of Doc objects and matches and context as tuples.""" doc_stream = ( (nlp.make_doc("test doc 1: Corvold"), "Jund"), (nlp.make_doc("test doc 2: Prosh"), "Jund"), ) matcher = FuzzyMatcher(nlp.vocab) matcher.add("DRAGON", [nlp.make_doc("Korvold"), nlp.make_doc("Prossh")]) output = matcher.pipe(doc_stream, return_matches=True, as_tuples=True) matches = [(entry[0][1], entry[1]) for entry in output] assert matches == [ ([("DRAGON", 4, 5, 86)], "Jund"), ([("DRAGON", 4, 5, 91)], "Jund"), ]
def matcher(nlp: Language, ) -> FuzzyMatcher: """Fuzzy matcher with patterns added.""" animals = ["Heifer", "chicken"] sounds = ["mooo"] names = ["Steven"] matcher = FuzzyMatcher(nlp.vocab) matcher.add( "ANIMAL", [nlp.make_doc(animal) for animal in animals], kwargs=[{ "ignore_case": False }, {}], ) matcher.add("SOUND", [nlp.make_doc(sound) for sound in sounds]) matcher.add("NAME", [nlp.make_doc(name) for name in names], on_match=add_name_ent) return matcher
def test_remove_label_raises_error_if_label_not_in_matcher( matcher: FuzzyMatcher, ) -> None: """It raises a ValueError if trying to remove a label not present.""" with pytest.raises(ValueError): matcher.remove("TEST")
def test_remove_label(matcher: FuzzyMatcher, nlp: Language) -> None: """It removes a label from the matcher.""" matcher.add("TEST", [nlp.make_doc("test")]) assert "TEST" in matcher matcher.remove("TEST") assert "TEST" not in matcher
def test_add_where_kwargs_are_not_dicts_raises_error(matcher: FuzzyMatcher, nlp: Language) -> None: """Trying to add non Dict objects as kwargs raises a TypeError.""" with pytest.raises(TypeError): matcher.add("TEST", [nlp.make_doc("Test1")], ["ignore_case"])
def test_add_without_doc_objects_raises_error(matcher: FuzzyMatcher, ) -> None: """Trying to add non Doc objects as patterns raises a TypeError.""" with pytest.raises(TypeError): matcher.add("TEST", ["Test1"])