def test_str2csent(self): rma = RakutenMA() actual = rma.str2csent("hoge") desired = [ Token(l=_BEOS_LABEL), Token(c="h", t=rma.ctype_ja_default_func("h")), Token(c="o", t=rma.ctype_ja_default_func("o")), Token(c="g", t=rma.ctype_ja_default_func("g")), Token(c="e", t=rma.ctype_ja_default_func("e")), Token(l=_BEOS_LABEL)] assert_equals(len(actual), len(desired)) for i in range(len(actual)): assert_equals(actual[i].c, desired[i].c) assert_equals(actual[i].t, desired[i].t) assert_equals(actual[i].f, desired[i].f) assert_equals(actual[i].l, desired[i].l)
def test_add_efeats(self): # feature functions test rma = RakutenMA() rma.hash_func = None rma.featset = ["w0"] csent = rma.str2csent("A1-b") csent = rma.add_efeats(csent) assert_equals(csent[0].f, [["w0", ""]]) assert_equals(csent[1].f, [["w0", "A"]]) assert_equals(csent[2].f, [["w0", "1"]]) assert_equals(csent[3].f, [["w0", "-"]]) assert_equals(csent[4].f, [["w0", "b"]]) assert_equals(csent[5].f, [["w0", ""]]) rma.featset = ["b1"] csent = rma.add_efeats(csent) assert_equals(csent[0].f, [["b1", "", "A"]]) assert_equals(csent[1].f, [["b1", "A", "1"]]) assert_equals(csent[2].f, [["b1", "1", "-"]]) assert_equals(csent[3].f, [["b1", "-", "b"]]) assert_equals(csent[4].f, [["b1", "b", ""]]) assert_equals(csent[5].f, [["b1", "", ""]]) rma.featset = ["c0"] csent = rma.add_efeats(csent) assert_equals(csent[0].f, [["c0", ""]]) assert_equals(csent[1].f, [["c0", "A"]]) assert_equals(csent[2].f, [["c0", "N"]]) assert_equals(csent[3].f, [["c0", "O"]]) assert_equals(csent[4].f, [["c0", "a"]]) assert_equals(csent[5].f, [["c0", ""]]) rma.featset = ["d9"] csent = rma.add_efeats(csent) assert_equals(csent[0].f, [["d9", "", ""]]) assert_equals(csent[1].f, [["d9", "", "A"]]) assert_equals(csent[2].f, [["d9", "A", "N"]]) assert_equals(csent[3].f, [["d9", "N", "O"]]) assert_equals(csent[4].f, [["d9", "O", "a"]]) assert_equals(csent[5].f, [["d9", "a", ""]]) rma.featset = ["t0"] csent = rma.add_efeats(csent) assert_equals(csent[0].f, [["t0", "", "", "A"]]) assert_equals(csent[1].f, [["t0", "", "A", "1"]]) assert_equals(csent[2].f, [["t0", "A", "1", "-"]]) assert_equals(csent[3].f, [["t0", "1", "-", "b"]]) assert_equals(csent[4].f, [["t0", "-", "b", ""]]) assert_equals(csent[5].f, [["t0", "b", "", ""]]) # test a custom function for feature # args _t: a function which receives position i and returns the token, # taking care of boundary cases # i: current position # sample function -> returns if the character is a capitalized letter rma.featset = [lambda _t, i: ["CAP", "T" if _t(i).t == "A" else "F"]] csent = rma.add_efeats(csent) assert_equals(csent[0].f, [["CAP", "F"]]) assert_equals(csent[1].f, [["CAP", "T"]]) assert_equals(csent[2].f, [["CAP", "F"]]) assert_equals(csent[3].f, [["CAP", "F"]]) assert_equals(csent[4].f, [["CAP", "F"]]) assert_equals(csent[5].f, [["CAP", "F"]]) rma.featset = ["NONEXISTENT_FEATURE"] assert_raises(Exception, rma.add_efeats, csent)