def test_add_with_more_explicit_kwargs_than_patterns_warns( matcher: RegexMatcher, ) -> None: """It will warn when more explicit kwargs are added than patterns.""" with pytest.warns(KwargsWarning): matcher.add("TEST", ["Test1"], [{ "ignore_case": True }, { "ignore_case": True }])
def test_matcher_pipe_with_matches(nlp: Language) -> None: """It returns a stream of Doc objects and matches as tuples.""" doc_stream = ( nlp.make_doc("test doc 1: United States"), nlp.make_doc("test doc 2: US"), ) matcher = RegexMatcher(nlp.vocab) matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"]) output = matcher.pipe(doc_stream, return_matches=True) matches = [entry[1] for entry in output] assert matches == [[("GPE", 4, 6)], [("GPE", 4, 5)]]
def test_matcher_pipe_with_matches_and_context(nlp: Language) -> None: """It returns a stream of Doc objects, matches, and context as a tuple.""" doc_stream = ( (nlp.make_doc("test doc 1: United States"), "Country"), (nlp.make_doc("test doc 2: US"), "Country"), ) matcher = RegexMatcher(nlp.vocab) matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"]) output = matcher.pipe(doc_stream, return_matches=True, as_tuples=True) matches = [(entry[0][1], entry[1]) for entry in output] assert matches == [([("GPE", 4, 6)], "Country"), ([("GPE", 4, 5)], "Country")]
def matcher(nlp: Language) -> RegexMatcher: """Regex matcher with patterns added.""" matcher = RegexMatcher(nlp.vocab) matcher.add("GPE", ["(?i)[U](nited|\\.?) ?[S](tates|\\.?)"], on_match=add_gpe_ent) matcher.add("STREET", ["street_addresses"], kwargs=[{"predef": True}]) matcher.add("ZIP", ["zip_codes"], kwargs=[{"predef": True}]) return matcher
def test_add_where_kwargs_are_not_dicts_raises_error( matcher: RegexMatcher, ) -> None: """Trying to add non Dict objects as kwargs raises a TypeError.""" with pytest.raises(TypeError): matcher.add("TEST", ["Test1"], ["ignore_case"])
def test_add_str_pattern_outside_list_raises_error( matcher: RegexMatcher, ) -> None: """Trying to add string as patterns, not iterable of strings, raises a TypeError.""" with pytest.raises(TypeError): matcher.add("TEST", "Test1")
def test_add_without_string_pattern_raises_error(matcher: RegexMatcher, nlp: Language) -> None: """Trying to add non strings as patterns raises a TypeError.""" with pytest.raises(TypeError): matcher.add("TEST", [nlp.make_doc("Test1")])
def test_remove_label(matcher: RegexMatcher) -> None: """It removes a label from the matcher.""" matcher.add("TEST", ["test"]) assert "TEST" in matcher matcher.remove("TEST") assert "TEST" not in matcher