예제 #1
0
def test_find_self():
    wordlist = sorted(u("book bake bike bone").split())
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(wordlist, f)

    gr = fst.GraphReader(st.open_file("test"))
    gc = spelling.GraphCorrector(gr)
    assert gc.suggest("book")[0] != "book"
    assert gc.suggest("bake")[0] != "bake"
    assert gc.suggest("bike")[0] != "bike"
    assert gc.suggest("bone")[0] != "bone"
예제 #2
0
def test_insert_bytes():
    # This test is only meaningful on Python 3
    domain = [b("alfa"), b("bravo"), b("charlie")]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = fst.GraphReader(st.open_file("test")).cursor()
    assert list(cur.flatten()) == domain
예제 #3
0
    def __init__(self, word_file, graph_file):
        dirname = os.path.dirname(graph_file)
        st = FileStorage(dirname)
        f = st.open_file(graph_file)
        gr = fst.GraphReader(f)
        self.graph = gr

        self.dict = {}
        with codecs.open(word_file,'r','utf-8') as file:
            for line in file:
                tokens = line.split(" ")
                if len(tokens) >= 2:
                    self.dict[tokens[0].strip()] = int(tokens[1].strip())
예제 #4
0
def _fst_roundtrip(domain, t):
    with TempStorage() as st:
        f = st.create_file("test")
        gw = fst.GraphWriter(f, vtype=t)
        gw.start_field("_")
        for key, value in domain:
            gw.insert(key, value)
        gw.finish_field()
        gw.close()

        f = st.open_file("test")
        gr = fst.GraphReader(f, vtype=t)
        cur = fst.Cursor(gr)
        assert list(cur.flatten_v()) == domain
        f.close()
예제 #5
0
def test_insert_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = fst.GraphReader(st.open_file("test")).cursor()
    assert list(cur.flatten_strings()) == domain
예제 #6
0
def test_within_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    gr = fst.GraphReader(st.open_file("test"))
    s = list(fst.within(gr, u("\uc774.\ud76c")))
    assert s == [u("\uc774\uc124\ud76c")]
예제 #7
0
def test_fields():
    with TempStorage() as st:
        f = st.create_file("test")
        gw = fst.GraphWriter(f)
        gw.start_field("f1")
        gw.insert("a")
        gw.insert("aa")
        gw.insert("ab")
        gw.finish_field()
        gw.start_field("f2")
        gw.insert("ba")
        gw.insert("baa")
        gw.insert("bab")
        gw.close()

        gr = fst.GraphReader(st.open_file("test"))
        cur1 = fst.Cursor(gr, gr.root("f1"))
        cur2 = fst.Cursor(gr, gr.root("f2"))
        assert list(cur1.flatten_strings()) == ["a", "aa", "ab"]
        assert list(cur2.flatten_strings()) == ["ba", "baa", "bab"]
        gr.close()
예제 #8
0
def words_to_corrector(words):
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(words, f)
    f = st.open_file("test")
    return spelling.GraphCorrector(fst.GraphReader(f))
예제 #9
0
def greader(st):
    return fst.GraphReader(st.open_file("test"))