import nltk
import json
import yaml
from random import choice
from stat_parser import Parser, display_tree
parser = Parser()
d = json.load(open('tree_ship_words.json'))
f = open("ships.yml")
ships = yaml.load(f.read())
name = choice(ships)
print name.lower()
#tree = parser.parse("Anticipation of a new lover's arrival")
tree = parser.parse("they had good pretzels")
display_tree(tree)
Пример #2
0
def main():
    time1 = time()
    parser = Parser()

    inFile = sys.argv[1]
    outFile = sys.argv[2]
    f = open(outFile,'w+')
    
    for line in open(inFile):
        if config.print_line: 
            print line

        global code, errorCode, nodeList, nodeNum, firstVBNN, firstNNVB, found, sqFlag, gqFlag, NNNode, VBNode
        code = 0
        nodeNum = 0
        errorCode = []
        nodeList = []
        firstVBNN = []
        firstNNVB = []
        found = 0
        sqFlag = 0
        gpFlag = 0
        NNNode = []
        VBNode = []
            
        wordNum = len(line.split())
        if config.print_word_num:
            print 'Word Num: ', wordNum

        if config.ignore_long_sentence and wordNum>config.max_word_num:
            # print 'Long Sentence'
            # print '\n'*2
            # print -1, line,
            f.write(str(-1) + ' ' + line)
            continue 

        try:
            start = time()
            indentTree = parser.raw_parse(line)     # raw parse for indent tree list
            end = time()
        except:
            # print 'Parsing Error'
            # print '\n'*2
            # print -1, line,
            f.write(str(-1) + ' ' + line)
            continue

        if config.print_parse_time:
            print 'Raw Parse Time: ', end-start, 's'
        
        if config.print_indent_tree:
            pprint(indentTree)                      # unit test

        root = trans(indentTree)                    # recursively transform list to "tree of node"
        assignCode(root)
        assignDesNum(root)
        
        if config.dfs_indent:
            dfsIndent(root,0)                       # unit test

        if config.print_node_list:                  # unit test
            for node in nodeList:
                print node.getData(), '\t',
            print
        
        if config.show_nltk_tree:
            start = time()
            nlktTree = parser.parse(line)           # nlktTree, could be drawn into graph
            end = time()
            print 'NLKT Tree Parse Time: ', end-start, 's'
            display_tree(nlktTree)                  # unit test

        """ Now the check begins! """
        preCheck()
        totalCheck(root)

        """ If there is some NP+VP left """
        for i in range(len(nodeList)-1):
            node1 = nodeList[i]
            node2 = nodeList[i+1]
            if node2.getParent().getData() == u'RB' and i+2<len(nodeList):
                node2 = nodeList[i+2]
            if node1.getParent().getData() in NNList and node2.getParent().getData() in VBList:
                if (node1 in NNNode and node2 in VBNode) or (firstNNVB and (node1==firstNNVB[0] and node2==firstNNVB[1])):
                    pass
                else:
                    NNNode.append(node1)
                    VBNode.append(node2)

        """ Be Check """
        for i in range(len(nodeList)-1):
            node1 = nodeList[i]
            node2 = nodeList[i+1]
            if node2.getParent().getData() == u'RB' and i+2<len(nodeList):
                node2 = nodeList[i+2]
            if node1.getData().lower() == 'i':
                if node2.getData() in BEList and node2.getData() not in ['am', 'was',"'m"]:
                    errorCode.append(node2.getCode())
            elif node1.getData().lower() in PRPSecondList:
                if node2.getData() in BEList and node2.getData() not in ['are', 'were',"'re"]:
                    errorCode.append(node2.getCode())
            elif node1.getData().lower() in PRPThirdList:
                if node1.getData().lower()=='that':
                    if i>0 and nodeList[i-1].getParent().getData() in NNList:
                        continue
                if node2.getData() in BEList and node2.getData() not in ['is', 'was',"'s"]:
                    errorCode.append(node2.getCode())
            else:
                pass

        """ del duplicates """
        if firstNNVB:
            n = firstNNVB[0]
            v = firstNNVB[1]
            if n in NNNode and v in VBNode:
                NNNode.remove(n)
                VBNode.remove(v)
                

        """ replace RB with the word after (Maybe VB) """
        if firstNNVB:
            v = firstNNVB[1]
            if v.getParent().getData()==u'RB':
                code = v.getCode()
                new = nodeList[code+1]
                if new.getParent().getData() in VBList:
                    firstNNVB[1] = new
                else:
                    pass

        for v in VBNode:
            if v.getParent().getData()==u'RB':
                code = v.getCode()
                new = nodeList[code+1]
                if new.getParent().getData() in VBList:
                    VBNode[VBNode.index(v)] = new
                else:
                    pass

        """ print dependencies """
        if config.print_npvp:
            print 'FROM ERRORCODE:'
            for i in errorCode:
                print nodeList[i].getData(), '\t',
            print

            print 'FROM QUESTION:'
            for node in firstVBNN:
                print node.getData()
            for node in firstNNVB:
                print node.getData()
            print

            print 'FROM NPVP & OTHERS:'
            for i in range(len(NNNode)):
                print NNNode[i].getParent().getData(), NNNode[i].getData(), '\t', VBNode[i].getParent().getData(), VBNode[i].getData()
            print

        """ canonicalize """
        if firstVBNN:
            if not (firstVBNN[0].getParent().getData() in VBList and firstVBNN[1].getParent().getData() in NNList):
                firstVBNN = []

        if firstNNVB:
            if not (firstNNVB[1].getParent().getData() in VBList and firstNNVB[0].getParent().getData() in NNList):
                firstNNVB = []

        for v in VBNode:
            i = VBNode.index(v)
            if not (NNNode[i].getParent().getData() in NNList and VBNode[i].getParent().getData() in VBList):
                del NNNode[i]
                del VBNode[i]
        

        """ Finally! We add codes! """
        if config.print_standard_answer:
            """ FROM QUESTION """
            if sqFlag or gqFlag:
                if firstVBNN:
                    v = firstVBNN[0]
                    n = firstVBNN[1]
                    if n.getData().lower() in PRPThirdList or n.getParent().getData() in [u'NN', u'NNP']:       # single noun
                        if v.getParent().getData() in [u'VB', u'VBP']:
                            errorCode.append(v.getCode())
                    else:
                        if v.getParent().getData()==u'VBZ':
                            errorCode.append(v.getCode())

                    if firstNNVB:
                            v = firstNNVB[1]
                            if v.getParent().getData()==u'VBZ':
                                errorCode.append(v.getCode())

                else:
                    if firstNNVB:
                        if not (firstNNVB[0] in NNNode and firstNNVB[1] in VBNode):
                            NNNode.append(firstNNVB[0])
                            VBNode.append(firstNNVB[1])

            """ FROM NPVP & OTHERS """
            for i in range(len(NNNode)):
                n = NNNode[i]
                v = VBNode[i]
                if n.getData().lower() in PRPThirdList or n.getParent().getData() in [u'NN', u'NNP']:       # single noun
                    if v.getParent().getData() in [u'VB', u'VBP']:
                        errorCode.append(v.getCode())
                else:
                    if v.getParent().getData()==u'VBZ':
                        errorCode.append(v.getCode())

            errorCode = list(set(errorCode))
            errorCode.sort()
            if errorCode:
                for i in errorCode:
                    # print i+1,
                    f.write(str(i+1) + ' ')
            else:
                # print -1,
                f.write(str(-1) + ' ')
            # print line,
            f.write(line)


        if config.print_vb:
            printVB(root)                           # print all verb and MD in tree
            print

        if config.print_empty_line:
            print '\n'*2

    time2 = time()
    if config.print_total_time:
        print 'Total Execution Time: ', time2-time1, 's'
        
    if config.show_nltk_tree:
        Tkinter._test()                             # show the nlktTree graph
Пример #3
0
from stat_parser import Parser, display_tree

parser = Parser()

# http://www.thrivenotes.com/the-last-question/
tree = parser.parse(
    "How can the net amount of entropy of the universe be massively decreased?"
)

display_tree(tree)
Пример #4
0
from stat_parser import Parser, display_tree


parser = Parser()

[tree1, tree2] = parser.parse("John saw Mary with the telescope")

display_tree([tree1, tree2])