def sdfprocess(rvdata, partidx): os.environ["MALT_PARSER"] = "/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8" parser = MaltParser( mco='engmalt.poly-1.7', working_dir='/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8', additional_java_args=['-Xmx5000m']) sdfdata = [] cnn = 1 # demo() print parser.raw_parse("I am a student.") for eg in rvdata: if cnn % 100 == 0: print "%f%% of document %d finished" % (cnn * 100 * 1.0 / len(rvdata), partidx + 1) cmt = eg[3].decode('utf-8') #3 is the idx of comment sentences = nltk.sent_tokenize(cmt) sdfparsed = [parser.raw_parse(sentence) for sentence in sentences] sdfdata.append(eg[:3] + [sdfparsed]) # print cnn print sdfparsed # print sdfdata cnn += 1 if cnn > 5: break return sdfdata
def mltprocess(tp, path, filenamels, docid): parser=MaltParser(working_dir='/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8/maltparser-1.8.jar', mco='engmalt.poly-1.7.mco', additional_java_args='-mx5000m') sdfdata = [] for i in range(len(filenamels)): if (i+1)%100 == 0: print "%f%% of document %d of %s finished" % ((i+1)*100*1.0/len(filenamels), docid, tp) filename = filenamels[i] h = open(path + filename, 'r') lines = h.readlines() h.close() headraw, bodyraw = preprocess(lines[0]), preprocess(lines[1]) sentences = [headraw] + nltk.sent_tokenize(bodyraw) sdfparsed = [parser.raw_parse(sentence) for sentence in sentences] sdfdata.append(sdfparsed) # print sdfparsed # print sdfdata # if i > 5: break return sdfdata
#!/usr/bin/env python from nltk.parse.malt import MaltParser parser = MaltParser('maltparser-1.8.1','espmalt-1.0.mco') txt="This is a test sentence" parser.train_from_file('Tibidabo_Treebank.txt') parser.raw_parse(txt)