Exemplo n.º 1
0
def trie_test_nested_search():
    """
    All nested sequences are returned - not only the
    longest match. Nested sequences' return order is
    following: for any two matches the one with the
    smallest end position is returned first. If two
    matches have the same end position - the longest
    is returned first. 
    """

    nested_trie = Trie(["a" * i for i in xrange(1, 11)])
    matches = nested_trie.find_in_string("a" * 10)
    expected_matches = []
    for end_position in xrange(1, 11):
        for seq_len in xrange(end_position, 0, -1):
            expected_matches.append(("a" * seq_len, end_position - seq_len))
    assert expected_matches == matches

    same_suffix_strings = ["b" + "a" * i for i in xrange(1, 11)]
    same_suffix_trie = Trie(same_suffix_strings)
    assert zip(same_suffix_strings, [0] * 10) == same_suffix_trie.find_in_string("b" + "a" * 10)

    same_prefix_strings = ["a" * i + "b" for i in xrange(1, 11)]
    same_prefix_trie = Trie(same_prefix_strings)
    assert zip(reversed(same_prefix_strings), range(10)) == same_prefix_trie.find_in_string("a" * 10 + "b")
Exemplo n.º 2
0
def trie_test_empty_samples():
    """
    Nothig is to be returned if we are searching for nothing
    """

    empty_trie = Trie([])
    assert [] == empty_trie.find_in_string(string.letters)
    assert [] == empty_trie.find_in_string(string.digits)
    assert [] == empty_trie.find_in_string("".join(chr(i) for i in xrange(1, 256)))
    assert [] == empty_trie.find_in_string("a" * 1000000)
Exemplo n.º 3
0
def trie_test_empty_string():
    """
    In case of searching for an empy substring returns
    all occurances of the empty string in a search_in
    including one preceding any character of search_in
    and one after all characters of search_in string.
    If search string is empty itself the latter two are
    the same.
    """
    trie = Trie([""])
    assert [("", 0)] == trie.find_in_string("")
    assert [("", i) for i in xrange(11)] == trie.find_in_string("a" * 10)