示例#1
0
def getDuffyScore(sent1, sent2):
    tree_1 = tree()
    tree_2 = tree()
    out1 = getNLPToks(sent1)
    out2 = getNLPToks(sent2)
    generateTree(out1['parse'], tree_1)
    generateTree(out2['parse'], tree_2)
    flipTree(tree_1)
    flipTree(tree_2)
    (rscore_st, nscore_st) = CollinsDuffy(tree_1, tree_2, 0.8, 1, 1)
    return rscore_st, nscore_st
示例#2
0
def getMoschittiScore(sent1, sent2):
    tree_1 = tree()
    tree_2 = tree()
    out1 = getNLPToks(sent1)
    out2 = getNLPToks(sent2)
    generateTree(out1['parse'], tree_1)
    generateTree(out2['parse'], tree_2)
    flipTree(tree_1)
    flipTree(tree_2)
    (rscore_st, nscore_st) = MoschittiPT(tree_1, tree_2, 0.8, 1, 1)
    #     return rscore_st,nscore_st
    return nscore_st
示例#3
0
def parseCandidateBooks(candidate):
    # print('parsing')
    pTrees=list()
    pSents=list()
    pWithoutTokenTrees=list()
    for sent in candidate:
        sentParse=getNLPToks(sent)
        tempTree=tree()
        tempTree2=tree()
        generateTree(sentParse['parse'],tempTree)
        generateTree(sentParse['parse'],tempTree2)
        pSents.append(sentParse['parse'])
        flipTree(tempTree)
        flipTree(tempTree2)
        pTrees.append(tempTree)
        pWithoutTokenTrees.append(removeTokens(tempTree2,sent))
    print('candidate')
    return (pTrees,pSents,pWithoutTokenTrees)
示例#4
0
def parseBook(candidate):
    pTrees = list()
    pSents = list()
    for sent in candidate:
        sentParse = getNLPToks(sent)
        tempTree = tree()
        generateTree(sentParse['parse'], tempTree)
        pSents.append(sentParse['parse'])
        flipTree(tempTree)
        pTrees.append(tempTree)
    return (pTrees, pSents)
示例#5
0
def parseNewText(chunk):
    #print('Parsing chunk')
    # chunk=chunkTuple[0]
    # location=chunkTuple[1]
    # nlp=StanfordCoreNLP(location)
    parseChunk=list()
    parseSentenceChunk=list()
    parseWithoutTokenChunk=list()
    for sent in chunk:
        sentParse=getNLPToks(sent)
        tempTree=tree()
        tempTree2=tree()
        generateTree(sentParse['parse'],tempTree)
        generateTree(sentParse['parse'],tempTree2)
        parseSentenceChunk.append(sentParse['parse'])
        flipTree(tempTree)
        flipTree(tempTree2)
        parseChunk.append(tempTree)
        parseWithoutTokenChunk.append(removeTokens(tempTree2,sent))
    print('over')
    return (parseChunk,parseSentenceChunk,parseWithoutTokenChunk)      
def parseCandidateBooks(candidate):
    pTrees=list()
    pWithoutTokenTrees=list()
    for para in candidate:
        para=sent_tokenize(para)
        sentTrees=list()
        sentWithoutTokenTrees=list()
        for sent in para:
            sentParse=getNLPToks(sent)
            tempTree=tree()
            tempTree2=tree()
            generateTree(sentParse['parse'],tempTree)
            generateTree(sentParse['parse'],tempTree2)
            flipTree(tempTree)
            flipTree(tempTree2)
            sentTrees.append(tempTree)
            sentWithoutTokenTrees.append(removeTokens(tempTree2,sent))
        pTrees.append(sentTrees)
        pWithoutTokenTrees.append(sentWithoutTokenTrees)
    print('candidate')
    return pTrees,pWithoutTokenTrees
def parseNewText(paraChunk):
    # print('Parsing chunk')
    parseChunk=list()
    parseWithoutTokenChunk=list()
    for para in paraChunk:
        paraParse=list()
        paraWithoutTokenParse=list()
        para=sent_tokenize(para)
        for sent in para:
            sentParse=getNLPToks(sent)
            tempTree=tree()
            tempTree2=tree()
            generateTree(sentParse['parse'],tempTree)
            generateTree(sentParse['parse'],tempTree2)
#             parseSentenceChunk.append(sentParse['parse'])
            flipTree(tempTree)
            flipTree(tempTree2)
            paraParse.append(tempTree)
            paraWithoutTokenParse.append(removeTokens(tempTree2,sent))
        parseChunk.append(paraParse)
        parseWithoutTokenChunk.append(paraWithoutTokenParse)
    print('over')
    return parseChunk,parseWithoutTokenChunk   
def parseNewText(paraChunk):
    print('Parsing chunk')
    parseChunk = list()
    for para in paraChunk:
        paraParse = list()
        para = sent_tokenize(para)
        for sent in para:
            sentParse = getNLPToks(sent)
            tempTree = tree()
            generateTree(sentParse['parse'], tempTree)
            #             parseSentenceChunk.append(sentParse['parse'])
            flipTree(tempTree)
            paraParse.append(tempTree)
        parseChunk.append(paraParse)
    return parseChunk
def parseBook(candidate):
    pTrees = list()
    #     pSents=list()
    for para in candidate:
        para = sent_tokenize(para)
        sentTrees = list()
        for sent in para:
            sentParse = getNLPToks(sent)
            tempTree = tree()
            generateTree(sentParse['parse'], tempTree)
            #         pSents.append(sentParse['parse'])
            flipTree(tempTree)
            sentTrees.append(tempTree)
        pTrees.append(sentTrees)


#     return (pTrees,pSents)
    return pTrees
示例#10
0
for book in booksList:
    for sent in reducedSentences[book]:
        reducedBooks[book].append(books[book][sent])

pickling_on = open("./bible/reducedBooks.pickle", "wb")
pickle.dump(reducedBooks, pickling_on)

i = 0
parseTrees = list()
parsedSentences = list()
for sent in text:
    if i % 10 == 0:
        print(i)
    sentParse = getNLPToks(sent)
    tempTree = tree()
    generateTree(sentParse['parse'], tempTree)
    parsedSentences.append(sentParse['parse'])
    flipTree(tempTree)
    parseTrees.append(tempTree)
    i = i + 1

pickling_on = open("./bible/parseTrees.pickle", "wb")
pickle.dump(parseTrees, pickling_on)

potentialParseTrees = dict()
potentialParsedSentences = dict()

for book in booksList:
    print(book)
    candidate = reducedBooks[book]