示例#1
0
def sdfprocess(rvdata, partidx):
    os.environ["MALT_PARSER"] = "/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8"
    parser = MaltParser(
        mco='engmalt.poly-1.7',
        working_dir='/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8',
        additional_java_args=['-Xmx5000m'])
    sdfdata = []
    cnn = 1
    # demo()
    print parser.raw_parse("I am a student.")
    for eg in rvdata:
        if cnn % 100 == 0:
            print "%f%% of document %d finished" % (cnn * 100 * 1.0 /
                                                    len(rvdata), partidx + 1)
        cmt = eg[3].decode('utf-8')  #3 is the idx of comment
        sentences = nltk.sent_tokenize(cmt)
        sdfparsed = [parser.raw_parse(sentence) for sentence in sentences]
        sdfdata.append(eg[:3] + [sdfparsed])
        # print cnn
        print sdfparsed
        # print sdfdata
        cnn += 1
        if cnn > 5: break

    return sdfdata
示例#2
0
def mltprocess(tp, path, filenamels, docid):
    parser=MaltParser(working_dir='/home/cosmo/Dropbox/Purdue/nlp/maltparser-1.8/maltparser-1.8.jar', mco='engmalt.poly-1.7.mco', additional_java_args='-mx5000m')
    sdfdata = []
    for i in range(len(filenamels)):
        if (i+1)%100 == 0: print "%f%% of document %d of %s finished" % ((i+1)*100*1.0/len(filenamels), docid, tp) 
        filename = filenamels[i]
        h = open(path + filename, 'r')
        lines = h.readlines()
        h.close()
        headraw, bodyraw = preprocess(lines[0]), preprocess(lines[1])

        sentences = [headraw] + nltk.sent_tokenize(bodyraw)
        sdfparsed = [parser.raw_parse(sentence) for sentence in sentences]
        sdfdata.append(sdfparsed)
        # print sdfparsed
        # print sdfdata      
        # if i > 5: break
    return sdfdata
示例#3
0
#!/usr/bin/env python
from nltk.parse.malt import MaltParser
parser = MaltParser('maltparser-1.8.1','espmalt-1.0.mco')
txt="This is a test sentence"
parser.train_from_file('Tibidabo_Treebank.txt')
parser.raw_parse(txt)