def test_find_self(): wordlist = sorted(u("book bake bike bone").split()) st = RamStorage() f = st.create_file("test") spelling.wordlist_to_graph_file(wordlist, f) gr = fst.GraphReader(st.open_file("test")) gc = spelling.GraphCorrector(gr) assert gc.suggest("book")[0] != "book" assert gc.suggest("bake")[0] != "bake" assert gc.suggest("bike")[0] != "bike" assert gc.suggest("bone")[0] != "bone"
def test_insert_bytes(): # This test is only meaningful on Python 3 domain = [b("alfa"), b("bravo"), b("charlie")] st = RamStorage() gw = fst.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() cur = fst.GraphReader(st.open_file("test")).cursor() assert list(cur.flatten()) == domain
def __init__(self, word_file, graph_file): dirname = os.path.dirname(graph_file) st = FileStorage(dirname) f = st.open_file(graph_file) gr = fst.GraphReader(f) self.graph = gr self.dict = {} with codecs.open(word_file,'r','utf-8') as file: for line in file: tokens = line.split(" ") if len(tokens) >= 2: self.dict[tokens[0].strip()] = int(tokens[1].strip())
def _fst_roundtrip(domain, t): with TempStorage() as st: f = st.create_file("test") gw = fst.GraphWriter(f, vtype=t) gw.start_field("_") for key, value in domain: gw.insert(key, value) gw.finish_field() gw.close() f = st.open_file("test") gr = fst.GraphReader(f, vtype=t) cur = fst.Cursor(gr) assert list(cur.flatten_v()) == domain f.close()
def test_insert_unicode(): domain = [ u("\u280b\u2817\u2801\u281d\u2809\u2811"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] st = RamStorage() gw = fst.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() cur = fst.GraphReader(st.open_file("test")).cursor() assert list(cur.flatten_strings()) == domain
def test_within_unicode(): domain = [ u("\u280b\u2817\u2801\u281d\u2809\u2811"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] st = RamStorage() gw = fst.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() gr = fst.GraphReader(st.open_file("test")) s = list(fst.within(gr, u("\uc774.\ud76c"))) assert s == [u("\uc774\uc124\ud76c")]
def test_fields(): with TempStorage() as st: f = st.create_file("test") gw = fst.GraphWriter(f) gw.start_field("f1") gw.insert("a") gw.insert("aa") gw.insert("ab") gw.finish_field() gw.start_field("f2") gw.insert("ba") gw.insert("baa") gw.insert("bab") gw.close() gr = fst.GraphReader(st.open_file("test")) cur1 = fst.Cursor(gr, gr.root("f1")) cur2 = fst.Cursor(gr, gr.root("f2")) assert list(cur1.flatten_strings()) == ["a", "aa", "ab"] assert list(cur2.flatten_strings()) == ["ba", "baa", "bab"] gr.close()
def words_to_corrector(words): st = RamStorage() f = st.create_file("test") spelling.wordlist_to_graph_file(words, f) f = st.open_file("test") return spelling.GraphCorrector(fst.GraphReader(f))
def greader(st): return fst.GraphReader(st.open_file("test"))