Python RakutenMA.str2csent示例

    def test_str2csent(self):
        rma = RakutenMA()
        actual = rma.str2csent("hoge")
        desired = [
            Token(l=_BEOS_LABEL),
            Token(c="h", t=rma.ctype_ja_default_func("h")),
            Token(c="o", t=rma.ctype_ja_default_func("o")),
            Token(c="g", t=rma.ctype_ja_default_func("g")),
            Token(c="e", t=rma.ctype_ja_default_func("e")),
            Token(l=_BEOS_LABEL)]

        assert_equals(len(actual), len(desired))
        for i in range(len(actual)):
            assert_equals(actual[i].c, desired[i].c)
            assert_equals(actual[i].t, desired[i].t)
            assert_equals(actual[i].f, desired[i].f)
            assert_equals(actual[i].l, desired[i].l)

示例#2

显示文件

文件： test_rakutenma.py 项目： pombredanne/rakutenma-python

    def test_add_efeats(self):
        # feature functions test
        rma = RakutenMA()
        rma.hash_func = None
        rma.featset = ["w0"]
        csent = rma.str2csent("A1-b")
        csent = rma.add_efeats(csent)
        assert_equals(csent[0].f, [["w0", ""]])
        assert_equals(csent[1].f, [["w0", "A"]])
        assert_equals(csent[2].f, [["w0", "1"]])
        assert_equals(csent[3].f, [["w0", "-"]])
        assert_equals(csent[4].f, [["w0", "b"]])
        assert_equals(csent[5].f, [["w0", ""]])

        rma.featset = ["b1"]
        csent = rma.add_efeats(csent)
        assert_equals(csent[0].f, [["b1", "", "A"]])
        assert_equals(csent[1].f, [["b1", "A", "1"]])
        assert_equals(csent[2].f, [["b1", "1", "-"]])
        assert_equals(csent[3].f, [["b1", "-", "b"]])
        assert_equals(csent[4].f, [["b1", "b", ""]])
        assert_equals(csent[5].f, [["b1", "", ""]])

        rma.featset = ["c0"]
        csent = rma.add_efeats(csent)
        assert_equals(csent[0].f, [["c0", ""]])
        assert_equals(csent[1].f, [["c0", "A"]])
        assert_equals(csent[2].f, [["c0", "N"]])
        assert_equals(csent[3].f, [["c0", "O"]])
        assert_equals(csent[4].f, [["c0", "a"]])
        assert_equals(csent[5].f, [["c0", ""]])

        rma.featset = ["d9"]
        csent = rma.add_efeats(csent)
        assert_equals(csent[0].f, [["d9", "", ""]])
        assert_equals(csent[1].f, [["d9", "", "A"]])
        assert_equals(csent[2].f, [["d9", "A", "N"]])
        assert_equals(csent[3].f, [["d9", "N", "O"]])
        assert_equals(csent[4].f, [["d9", "O", "a"]])
        assert_equals(csent[5].f, [["d9", "a", ""]])

        rma.featset = ["t0"]
        csent = rma.add_efeats(csent)
        assert_equals(csent[0].f, [["t0", "", "", "A"]])
        assert_equals(csent[1].f, [["t0", "", "A", "1"]])
        assert_equals(csent[2].f, [["t0", "A", "1", "-"]])
        assert_equals(csent[3].f, [["t0", "1", "-", "b"]])
        assert_equals(csent[4].f, [["t0", "-", "b", ""]])
        assert_equals(csent[5].f, [["t0", "b", "", ""]])

        # test a custom function for feature
        # args _t: a function which receives position i and returns the token,
        #          taking care of boundary cases
        #       i: current position
        # sample function -> returns if the character is a capitalized letter
        rma.featset = [lambda _t, i: ["CAP", "T" if _t(i).t == "A" else "F"]]
        csent = rma.add_efeats(csent)
        assert_equals(csent[0].f, [["CAP", "F"]])
        assert_equals(csent[1].f, [["CAP", "T"]])
        assert_equals(csent[2].f, [["CAP", "F"]])
        assert_equals(csent[3].f, [["CAP", "F"]])
        assert_equals(csent[4].f, [["CAP", "F"]])
        assert_equals(csent[5].f, [["CAP", "F"]])

        rma.featset = ["NONEXISTENT_FEATURE"]
        assert_raises(Exception, rma.add_efeats, csent)