def eval_sent(s1, s2): s1 = split_pos(s1.split()) s2 = split_pos(s2.split()) word1, pos1 = s1 word2, pos2 = s2 assert len(word1) == len(pos1) assert len(word1) == len(word2) assert len(pos1) == len(pos2) correct = 0 for i in range(len(word1)): w1 = word1[i] p1 = pos1[i] w2 = word2[i] p2 = pos2[i] assert w1 == w2 stat.word(w1, p1, p2) if p1 == p2: correct += 1 stat.sent(correct == len(word1))
def tag_file(tagger, filein, fileout, test): for sent in filein: s = sent.split() if len(s) == 0: continue if test: print >> fileout, sent[:-1] words, t = split_pos(sent.split()) else: words = sent.split() tag = tagger.tag_sentence(words, 5) assert (len(words) == len(tag)) #for i, w in enumerate(words): for i in range(len(words)): w = words[i] fileout.write('%s/%s ' % (w, tag[i])) print >> fileout print >> fileout