def test_with_fuzzy_string_transpose(self): x1 = TokenSet.from_string("abr") x2 = TokenSet.from_string("bra") x3 = TokenSet.from_string("foo") y = TokenSet.from_fuzzy_string("bar", 1) assert x1.intersect(y).to_list() == ["abr"] assert x2.intersect(y).to_list() == ["bra"] assert x3.intersect(y).to_list() == []
def test_with_fuzzy_string_deletion(self): x1 = TokenSet.from_string("ar") x2 = TokenSet.from_string("br") x3 = TokenSet.from_string("ba") x4 = TokenSet.from_string("bar") x5 = TokenSet.from_string("foo") y = TokenSet.from_fuzzy_string("bar", 1) assert x1.intersect(y).to_list() == ["ar"] assert x2.intersect(y).to_list() == ["br"] assert x3.intersect(y).to_list() == ["ba"] assert x4.intersect(y).to_list() == ["bar"] assert x5.intersect(y).to_list() == []
def test_with_fuzzy_string_substitution(self): x1 = TokenSet.from_string("bar") x2 = TokenSet.from_string("cur") x3 = TokenSet.from_string("cat") x4 = TokenSet.from_string("car") x5 = TokenSet.from_string("foo") y = TokenSet.from_fuzzy_string("car", 1) assert x1.intersect(y).to_list() == ["bar"] assert x2.intersect(y).to_list() == ["cur"] assert x3.intersect(y).to_list() == ["cat"] assert x4.intersect(y).to_list() == ["car"] assert x5.intersect(y).to_list() == []
def test_with_fuzzy_string_insertion(self): x1 = TokenSet.from_string("bbar") x2 = TokenSet.from_string("baar") x3 = TokenSet.from_string("barr") x4 = TokenSet.from_string("bar") x5 = TokenSet.from_string("ba") x6 = TokenSet.from_string("foo") x7 = TokenSet.from_string("bara") y = TokenSet.from_fuzzy_string("bar", 1) assert x1.intersect(y).to_list() == ["bbar"] assert x2.intersect(y).to_list() == ["baar"] assert x3.intersect(y).to_list() == ["barr"] assert x4.intersect(y).to_list() == ["bar"] assert x5.intersect(y).to_list() == ["ba"] assert x6.intersect(y).to_list() == [] assert x7.intersect(y).to_list() == ["bara"]
def test_catastrophic_backtracking_with_leading_characters(self): x = TokenSet.from_string("f" * 100) y = TokenSet.from_string("*f") assert len(x.intersect(y).to_list()) == 1
def test_contained_wildcard_backtracking_no_intersection(self): x = TokenSet.from_string("ababc") y = TokenSet.from_string("a*ac") assert x.intersect(y).to_list() == []
def test_leading_wildcard_backtracking_no_intersection(self): x = TokenSet.from_string("aaacbab") y = TokenSet.from_string("*abc") assert x.intersect(y).to_list() == []
def test_fuzzy_string_transpose(self): x = TokenSet.from_string("bca") y = TokenSet.from_fuzzy_string("abc", 2) assert x.intersect(y).to_list() == ["bca"]
def test_leading_wildcard_intersection(self): x = TokenSet.from_string("cat") y = TokenSet.from_string("*t") z = x.intersect(y) assert {"cat"} == set(z.to_list())
def test_no_intersection(self): x = TokenSet.from_string("cat") y = TokenSet.from_string("bar") z = x.intersect(y) assert len(z.to_list()) == 0
def test_from_string_with_trailing_wildcard(self): x = TokenSet.from_string("a*") wild = x.edges["a"].edges["*"] assert wild == wild.edges["*"] assert wild.final
def test_wildcard_zero_or_more_characters(self): x = TokenSet.from_string("foo") y = TokenSet.from_string("foo*") z = x.intersect(y) assert {"foo"} == set(z.to_list())
def test_contained_wildcard_no_intersection(self): x = TokenSet.from_string("foo") y = TokenSet.from_string("b*r") z = x.intersect(y) assert len(z.to_list()) == 0
def test_contained_wildcard_intersection(self): x = TokenSet.from_string("foo") y = TokenSet.from_string("f*o") z = x.intersect(y) assert {"foo"} == set(z.to_list())
def test_leading_wildcard_no_intersection(self): x = TokenSet.from_string("cat") y = TokenSet.from_string("*r") z = x.intersect(y) assert len(z.to_list()) == 0
def test_leading_atrailing_wildcard_backtracking_intersection(self): x = TokenSet.from_string("acbaabab") y = TokenSet.from_string("a*ba*b") assert x.intersect(y).to_list() == ["acbaabab"]
def test_from_string_without_wildcard(self): TokenSet._next_id = 1 x = TokenSet.from_string("a") assert str(x) == "0a2" assert x.edges["a"].final
def test_fuzzy_string_substitution(self): x = TokenSet.from_string("axx") y = TokenSet.from_fuzzy_string("abc", 2) assert x.intersect(y).to_list() == ["axx"]
def test_to_list_includes_single_words(self): word = "bat" token_set = TokenSet.from_string(word) assert {word} == set(token_set.to_list())
def test_fuzzy_string_deletion(self): x = TokenSet.from_string("a") y = TokenSet.from_fuzzy_string("abc", 2) assert x.intersect(y).to_list() == ["a"]
def test_builds_a_token_set_for_the_corpus(self): needle = TokenSet.from_string("test") assert "test" in self.builder.token_set.intersect(needle).to_list()
def test_simple_intersection(self): x = TokenSet.from_string("cat") y = TokenSet.from_string("cat") z = x.intersect(y) assert {"cat"} == set(z.to_list())