Exemplo n.º 1
0
import MBSP

inpt = ""

with open('_raw/lemmaCase-v6.txt', 'r') as f:
    #	inS = f.read()
    #	parsed_string = MBSP.parse(inS)
    #	text = MBSP.Text(parsed_string, token=[WORD, POS, CHUNK, PNP, REL, ANCHOR, LEMMA])
    #	print text.sentences

    for line in f:
        inpt = line
        print inpt
        ustr = unicode(inpt, encoding="utf-8")
        tokenized_lemmas = MBSP.lemmatize(ustr, tokenize=True)
        print tokenized_lemmas
        #for lem in tokenized_lemmas:
        #print lem
        #sentence = MBSP.tokenize(ustr);
        #print tokenized_lemmas
        #pos_word = MBSP.tag(word, tokenize=False, lemmata=False)
        #print MBSP.lemma(word.encode('ascii', 'ignore'))
        #print '{} {}'.format(word[0].encode('ascii', 'ignore'), word[1])
Exemplo n.º 2
0
 def _lemmatize_MBSP(self, txt):
     lemmatized = MBSP.lemmatize(txt)
     return unicode(lemmatized)