def test_decode(self): rma = RakutenMA() rma.hash_func = None csent = rma.tokens2csent([["foo", "N"], ["bar", "N"]], "SBIEO") csent = rma.add_efeats(csent) for i in range(len(csent)): csent[i].l = "" rma.model["mu"] = WEIGHTS csent = rma.decode(csent) assert_equals(csent[0].l, "_") assert_equals(csent[1].l, "B-N") assert_equals(csent[2].l, "I-N") assert_equals(csent[3].l, "E-N") assert_equals(csent[4].l, "B-N") assert_equals(csent[5].l, "I-N") assert_equals(csent[6].l, "E-N") assert_equals(csent[7].l, "_") csent = rma.tokens2csent([["foX", "N"], ["bar", "N"]], "SBIEO") csent = rma.add_efeats(csent) csent = rma.decode(csent) assert_equals(csent[0].l, "_") assert_equals(csent[1].l, "B-N") assert_equals(csent[2].l, "I-N") assert_equals(csent[3].l, "O") assert_equals(csent[4].l, "B-N") assert_equals(csent[5].l, "I-N") assert_equals(csent[6].l, "E-N") assert_equals(csent[7].l, "_")
def test_csent2tokens(self): sent = [["hoge", "X"], ["fuga", "Y"], ["p", "Z"]] rma = RakutenMA() csent = rma.tokens2csent(sent, "SBIEO") sent = RakutenMA.csent2tokens(csent, "SBIEO") assert_equals(sent[0][0], "hoge") assert_equals(sent[0][1], "X") assert_equals(sent[1][0], "fuga") assert_equals(sent[1][1], "Y") assert_equals(sent[2][0], "p") assert_equals(sent[2][1], "Z") assert_raises(Exception, RakutenMA.csent2tokens, (csent, "UNKNOWN_SCHEME"))
def test_tokens2csent(self): sent = [["hoge", "X"], ["fuga", "Y"], ["p", "Z"]] rma = RakutenMA() assert_raises(Exception, rma.tokens2csent, (sent, "UNKNOWN_SCHEME")) csent = rma.tokens2csent(sent, "SBIEO") assert_equals(csent[1].c, "h") assert_equals(csent[1].l, "B-X") assert_equals(csent[2].c, "o") assert_equals(csent[2].l, "I-X") assert_equals(csent[4].c, "e") assert_equals(csent[4].l, "E-X") assert_equals(csent[9].c, "p") assert_equals(csent[9].l, "S-Z")
def test_csent2feats(self): rma = RakutenMA() rma.hash_func = None rma.featset = ["w0"] csent = rma.tokens2csent([["foo", "N"], ["bar", "N"]], "SBIEO") csent = rma.add_efeats(csent) feats = rma.csent2feats(csent) desired = ( ["w0", "", "_"], ["w0", "f", "B-N"], ["w0", "o", "I-N"], ["w0", "o", "E-N"], ["w0", "b", "B-N"], ["w0", "a", "I-N"], ["w0", "r", "E-N"], ["t", "B-N", "_"], ["t", "I-N", "B-N"], ["t", "E-N", "I-N"], ["t", "B-N", "E-N"], ["t", "_", "E-N"]) for d in desired: assert_true(d in feats) assert_true(["t", "E-N", "B-N"] not in feats) assert_true(["t", "B-N", "I-N"] not in feats)
def test_calc_states0(self): rma = RakutenMA() rma.hash_func = None rma.featset = ["c0", "w0"] csent = rma.tokens2csent([["foo", "N"], ["bar", "N"]], "SBIEO") csent = rma.add_efeats(csent) assert_equals(rma.calc_states0(csent[1].f, WEIGHTS), {"B-N": 2, "I-N": 1, "E-N": 1}) assert_equals(rma.calc_states0(csent[2].f, WEIGHTS), {"B-N": 1, "I-N": 2, "E-N": 2}) assert_equals(rma.calc_states0(csent[3].f, WEIGHTS), {"B-N": 1, "I-N": 2, "E-N": 2}) assert_equals(rma.calc_states0(csent[4].f, WEIGHTS), {"B-N": 2, "I-N": 1, "E-N": 1}) assert_equals(rma.calc_states0(csent[5].f, WEIGHTS), {"B-N": 1, "I-N": 2, "E-N": 1}) assert_equals(rma.calc_states0(csent[6].f, WEIGHTS), {"B-N": 1, "I-N": 1, "E-N": 2})