def contextFreeGrammar(): print "page 298 Context-Free Grammar" print "=============== A Simple Grammar ===============" grammar1 = nltk.parse_cfg(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.nbest_parse(sent): print tree print "=============== Writing Your Own Grammars ===============" grammar1 = nltk.data.load('file:mygrammar.cfg') sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.nbest_parse(sent): print tree print "=============== Recursion in Syntactic Structure ===============" grammar2 = nltk.parse_cfg(""" S -> NP VP NP -> Det Nom | PropN Nom -> Adj Nom | N VP -> V Adj | V NP | V S | V NP PP PP -> P NP PropN -> 'Buster' | 'Chatterer' | 'Joe' Det -> 'the' | 'a' N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log' Adj -> 'angry' | 'frightened' | 'little' | 'tall' V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put' P -> 'on' """)
def generate_grammar(sentence): grammar = "\n".join([r for r, freq in frequent_rules]) for (word, pos_tag) in sentence: grammar += "%s -> '%s' \n" % (pos_tag, word) #print grammar return nltk.parse_cfg(grammar)
def __init__(self, blackboard, tense="present", person=1): super(SentenceExpert, self).__init__(blackboard, "Sentence Expert", tense, person, 5) self.eva = ["be", "look", "feel"] self.atv = ["like", "hate", "love", "know", "need", "see"] """ eva - emotional verb active evp - emotional verb passive ej - emotion adjective en - emotional noun atv - attitude verb """ self.grammar = nltk.parse_cfg(""" S -> P | EP | Person ATV NP P -> NP VP EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP ENP -> EN OF NP NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN VP -> V | V ERB | ERB V Det -> 'the' N -> 'n' V -> 'v' EVA -> 'eva' EVP -> 'makes' EN -> 'en' EJ -> 'ej' JJ -> 'adj' ERB -> 'erb' ATV -> 'atv' Person -> 'person' Pron -> 'pron' OF -> 'of' CC -> 'and' | 'but' | 'because' | 'so' """)
def build_grammar(self): '''Use the corpus data and return a NLTK grammar.''' grammer_def = self.build_grammar_text().getvalue() grammar = nltk.parse_cfg(grammer_def.encode('utf8')) return grammar
def demo(): from nltk import Nonterminal, parse_cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] grammar = parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N NP -> NP PP VP -> V NP VP -> VP PP Det -> 'a' Det -> 'the' Det -> 'my' NP -> 'I' N -> 'dog' N -> 'man' N -> 'park' N -> 'statue' V -> 'saw' P -> 'in' P -> 'up' P -> 'over' P -> 'with' """) def cb(grammar): print(grammar) top = Tk() editor = CFGEditor(top, grammar, cb) Label(top, text='\nTesting CFG Editor\n').pack() Button(top, text='Quit', command=top.destroy).pack() top.mainloop()
def demo(): """ A demonstration of the shift-reduce parser. """ from nltk import parse, parse_cfg grammar = parse_cfg( """ S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """ ) sent = "I saw a man in the park".split() parser = parse.ShiftReduceParser(grammar, trace=2) for p in parser.nbest_parse(sent): print p
def test_ctor(self): # arrange testGrammar = """ S -> NP VP VP -> VP PP VP -> V NP VP -> 'eats' PP -> P NP NP -> Det N NP -> 'she' V -> 'eats' P -> 'with' N -> 'fish' N -> 'fork' Det -> 'a' """ grammar = nltk.parse_cfg(testGrammar) sent = ['she', 'eats', 'a', 'fish', 'with', 'a', 'fork'] # act inst = cyk.Cyk(sent, grammar.productions()) # assert self.assertTrue(inst != None) self.assertTrue(inst.sentence == sent)
def __init__(self, blackboard, tense = "present", person = 1): super(SentenceExpert, self).__init__(blackboard, "Sentence Expert", tense, person,5) self.eva = ["be", "look", "feel"] self.atv = ["like", "hate", "love", "know", "need", "see"] """ eva - emotional verb active evp - emotional verb passive ej - emotion adjective en - emotional noun atv - attitude verb """ self.grammar = nltk.parse_cfg(""" S -> P | EP | Person ATV NP P -> NP VP EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP ENP -> EN OF NP NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN VP -> V | V ERB | ERB V Det -> 'the' N -> 'n' V -> 'v' EVA -> 'eva' EVP -> 'makes' EN -> 'en' EJ -> 'ej' JJ -> 'adj' ERB -> 'erb' ATV -> 'atv' Person -> 'person' Pron -> 'pron' OF -> 'of' CC -> 'and' | 'but' | 'because' | 'so' """)
def __init__(self, cfgGrammar): self.pb = productionBuilder.ProductionBuilder() self.grammar = nltk.parse_cfg(cfgGrammar) self.terminalTransformProductions = [] self.nonTerminalTransformProductions = [] self.singleNonTerminalTransformProductions = []
def Solution_parse(args): try: print "Parser option: %s " % args.parseOption gstring = open('solutiongrammar.cfg', 'r').read() grammar1 = nltk.parse_cfg(gstring) #print grammar1 , '\n' if (args.parseOption == 'rd'): parser = nltk.RecursiveDescentParser(grammar1) elif(args.parseOption == 'sr'): parser = nltk.ShiftReduceParser(grammar1) elif(args.parseOption == 'ec'): parser = nltk.parse.EarleyChartParser(grammar1) elif(args.parseOption == 'td'): parser = nltk.parse.TopDownChartParser(grammar1) elif(args.parseOption == 'bu'): parser = nltk.parse.BottomUpChartParser(grammar1) else: raise Exception("Unknown parseOption: %s" % args.parseOption) i = 0 for line in open('inputfile.txt','r'): i += 1 pass if i == 1: print line sent = wordpunct_tokenize(line) print sent , '\n' pp = parser.parse(sent) print pp, '\n' pass except Exception, err: sys.stderr.write('ERROR: %s\n' % str(err)) raise
def parse(wordlist, grammar, generator): """ Parse this thang. Call off to nltk's chart parser (which is the only one fast enough to parse the massive grammar). Only use the top best tree. If no parse tree is found, the program dies. The pass along the tree for actual symantic analysis, and then print out the parse and we're done! """ import nltk try: gr = nltk.parse_cfg(grammar) parts = [w.reduced() for w in wordlist] parser = nltk.BottomUpChartParser(gr) trees = parser.nbest_parse(parts) classifiers = ClassifierCollection(generator) ct = 0 for tree in trees: rparse(tree, classifiers, False) ct += 1 break if ct == 0: raise ParserException('No parse trees found') classifiers.finish() classifiers.pprint() except ValueError, e: raise ParserException(str(e))
def parseSimile(tokensWithIndices): #The grammar used to check a simile grammar = nltk.parse_cfg(""" S -> NP "like" NP | "ADJ" "like" "NP" | NP V "like" NP | "EX" "like" "NP" | NP "as" "ADJ" "as" NP | V "as" "ADJ" "as" NP |OTH NP -> N | "ADJ" N | "DET" NP N -> "NP" | "PRO" | "N" V -> "VD" | "V" | "VG" OTH -> "OTH" "PUNC" "FW" "WH" "TO" "NUM" "ADV" "VD" "VG" "L" "VN" "N" "P" "S" "EX" "V" "CNJ" "UH" "PRO" "MOD" """) tokens = map(lambda i: i[0], tokensWithIndices) indices = map(lambda i: i[1], tokensWithIndices) parser = nltk.ChartParser(grammar) simile_indices = [] start_token = 0 while (start_token < len(tokens) - 2): end_token = start_token + 2 #can't have simile smaller than 4 words simile = False while ( (not simile) and (end_token <= len(tokens))): if (len(parser.nbest_parse(tokens[start_token:end_token])) > 0): #If a parse tree was formed simile_indices.extend(indices[start_token:end_token]) start_token = end_token simile = True else: end_token += 1 start_token += 1 return simile_indices
def cfgMatch ( nlQuery ): terminalList = [ 'find','search','display','tell','faculty','student','staff','other' ] grammar = nltk.parse_cfg(""" S -> A B A -> 'find'|'search'|'display'|'tell' B -> 'faculty'|'student'|'staff'|'other' """) # Since grammar crashes if a non term not in grammar is used. # We have to check and report error if such a word is used anywhere ################################################################## # Check and errors reporting here tokenizedList = list( word_tokenize( nlQuery ) ) for word in tokenizedList: if word not in terminalList: print "ERROR" return -1 ################################################################## parser = nltk.RecursiveDescentParser ( grammar ) parseTree = parser.nbest_parse ( tokenizedList, 1 ) for tree in parseTree: print tree for elem in tree: for i in tree.node: print i
def demo(): from nltk import Nonterminal, parse_cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] grammar = parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N NP -> NP PP VP -> V NP VP -> VP PP Det -> 'a' Det -> 'the' Det -> 'my' NP -> 'I' N -> 'dog' N -> 'man' N -> 'park' N -> 'statue' V -> 'saw' P -> 'in' P -> 'up' P -> 'over' P -> 'with' """) def cb(grammar): print grammar top = Tk() editor = CFGEditor(top, grammar, cb) Label(top, text='\nTesting CFG Editor\n').pack() Button(top, text='Quit', command=top.destroy).pack() top.mainloop()
def build_grammar(): return nltk.parse_cfg(''' S -> NP VP S -> S Conj S NP -> Pronoun NP -> Name NP -> Article Noun NP -> Number NP -> NP PP NP -> NP RelClause VP -> Verb VP -> Verb NP VP -> Verb Adj VP -> VP PP PP -> Prep NP RelClause -> 'that' VP Article -> 'the' | 'a' | 'an' | 'this' | 'that' Prep -> 'to' | 'in' | 'on' | 'near' Conj -> 'and' | 'or' | 'but' Pronoun -> 'I' | 'you' | 'he' | 'me' | 'him' Verb -> 'book' | 'include' | 'prefer' | 'walk' None -> 'book' | 'flight' | 'meal' Name -> 'John' | 'Mary' | 'Boston' Adj -> 'first' | 'earliest' | 'cheap' ''')
def demo(): """ A demonstration of the recursive descent parser. """ from nltk import parse, parse_cfg grammar = parse_cfg(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """) for prod in grammar.productions(): print prod sent = 'I saw a man in the park'.split() parser = parse.RecursiveDescentParser(grammar, trace=2) for p in parser.nbest_parse(sent): print p
def generate_grammar(sentence): grammar = "\n".join([r for r, freq in frequent_rules]) for (word, pos_tag) in sentence: grammar += "%s -> '%s' \n" %(pos_tag, word) #print grammar return nltk.parse_cfg(grammar)
def test_returnRelevantTuples_2(self): # arrange testGrammar = """ S -> NP VP VP -> VP PP VP -> V NP VP -> 'eats' PP -> P NP NP -> Det N NP -> 'she' V -> 'eats' P -> 'with' N -> 'fish' N -> 'fork' Det -> 'a' """ grammar = nltk.parse_cfg(testGrammar) sent = ['she', 'eats', 'a', 'fish', 'with', 'a', 'fork'] inst = cyk.Cyk(sent, grammar.productions()) # act inst.executeAlgorithm() # assert self.assertTrue(inst.isInGrammar())
def test(): import nltk grammar1 = nltk.parse_cfg(""" """) rd_parser = nltk.Recursive Descent Parser(grammar1) sent = "Lee ran away home".split() t=rd_parser.n best_parse(sent)
def test(): import nltk grammar1 = nltk.parse_cfg(""" """) sr_parse = nltk.Shift Reduce Parser(grammar1) sent = "Lee ran away home".split() return sr_parse.parse(sent)
def __init__(self, blackboard): super(ComparisonExpert, self).__init__(blackboard, "Comparison Expert", importance=3) self.grammar = nltk.parse_cfg(""" S -> AS JJ AS Det N | JJ LIKE Det N JJ -> 'adj' N -> 'n' Det -> 'det' LIKE -> 'like' AS -> 'as' """)
def __init__(self, blackboard, tense = "present"): super(RhetoricalExpert, self).__init__(blackboard, "Rhetorical Expert", tense, 3) self.grammar = nltk.parse_cfg(""" S -> WHAT BE Det NP | WHY BE Det N SO JJ NP -> JJ N | N JJ -> 'adj' N -> 'n' Det -> 'the' BE -> 'be' SO -> 'so' WHAT -> 'what' WHY -> 'why' """)
def __init__(self, blackboard, tense="present", person=3): super(MetaphoreExpert, self).__init__(blackboard, "Metaphore Expert", tense=tense, person=person, importance=2) self.grammar = nltk.parse_cfg( """ S -> Person BE LIKE NP NP -> Det JJ N | Det N Person -> 'person' JJ -> 'adj' N -> 'n' Det -> 'the' BE -> 'be' LIKE -> 'like' """ )
def __init__(self, blackboard, tense="present"): super(RhetoricalExpert, self).__init__(blackboard, "Rhetorical Expert", tense, 3) self.grammar = nltk.parse_cfg(""" S -> WHAT BE Det NP | WHY BE Det N SO JJ NP -> JJ N | N JJ -> 'adj' N -> 'n' Det -> 'the' BE -> 'be' SO -> 'so' WHAT -> 'what' WHY -> 'why' """)
def sentence_parse_example(): groucho_grammar = nltk.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) sent = ["I", "shot", "an", "elephant", "in", "my", "pajamas"] parser = nltk.ChartParser(groucho_grammar) trees = parser.nbest_parse(sent) for tree in trees: print tree
def chart_parsing(): groucho_grammar = nltk.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) tokens = "I shot an elephant in my pajamas".split() wfst0 = _chart_init_wfst(tokens, groucho_grammar) _chart_display(wfst0, tokens) wfst1 = _chart_complete_wfst(wfst0, tokens, groucho_grammar, trace=True) _chart_display(wfst1, tokens)
def __init__(self, blackboard, tense="present", person=3): super(MetaphoreExpert, self).__init__(blackboard, "Metaphore Expert", tense=tense, person=person, importance=2) self.grammar = nltk.parse_cfg(""" S -> Person BE LIKE NP NP -> Det JJ N | Det N Person -> 'person' JJ -> 'adj' N -> 'n' Det -> 'the' BE -> 'be' LIKE -> 'like' """)
def chart_parse(in_file, grammar_file, out_file): text = unicode(open(in_file, 'r').read(), errors='ignore') output = open(out_file, 'w') grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore') try: grammar = nltk.parse_cfg(grammar_string) parser = nltk.ChartParser(grammar) sentences = nltk.sent_tokenize(text) for sentence in sentences: words = nltk.word_tokenize(sentence) tree = parser.parse(words) output.write(tree.pprint()) output.write('\n') except Exception, e: message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e) sys.stderr.write(message) sys.exit()
def another_test(): grammar = nltk.parse_cfg(""" S -> NP VP NP -> 'DT' 'NN' VP -> 'VB' | 'VBP' VP -> 'VB' 'NN' """) # Make your POS sentence into a list of tokens. sentence = "DT NN VB NN".split(" ") # Load the grammar into the ChartParser. cp = nltk.ChartParser(grammar) # Generate and print the nbest_parse from the grammar given the sentence tokens. for tree in cp.nbest_parse(sentence): print(tree)
def simpleGrammar(): grammar1 = nltk.parse_cfg(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.nbest_parse(sent): print tree
def process2(s): tokens = nltk.word_tokenize(s) tagged = nltk.pos_tag(tokens) grammar = nltk.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) parser = nltk.ChartParser(grammar) trees = parser.nbest_parse(tagged) return trees
def chart_parsing(): groucho_grammar = nltk.parse_cfg( """ S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """ ) tokens = "I shot an elephant in my pajamas".split() wfst0 = _chart_init_wfst(tokens, groucho_grammar) _chart_display(wfst0, tokens) wfst1 = _chart_complete_wfst(wfst0, tokens, groucho_grammar, trace=True) _chart_display(wfst1, tokens)
def ambiguity(): groucho_grammar = nltk.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas'] parser = nltk.ChartParser(groucho_grammar) trees = parser.nbest_parse(sent) for tree in trees: print tree
def sentence_parse_example(): groucho_grammar = nltk.parse_cfg( """ S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """ ) sent = ["I", "shot", "an", "elephant", "in", "my", "pajamas"] parser = nltk.ChartParser(groucho_grammar) trees = parser.nbest_parse(sent) for tree in trees: print tree
def simpleGrammar(): grammar1 = nltk.parse_cfg( """ S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """ ) sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.nbest_parse(sent): print tree
def recursiveSyntacticStructure(): grammar2 = nltk.parse_cfg(""" S -> NP VP NP -> Det Nom | PropN Nom -> Adj Nom | N VP -> V Adj | V NP | V S | V NP PP PP -> P NP PropN -> 'Buster' | 'Chatterer' | 'Joe' Det -> 'the' | 'a' N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log' Adj -> 'angry' | 'frightened' | 'little' | 'tall' V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put' P -> 'on' """) rd_parser = nltk.RecursiveDescentParser(grammar1) sent = 'Mary saw a dog'.split() for t in rd_parser.nbest_parse(sent): print t
def parsing_types(): grammar = nltk.parse_cfg(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw a dog".split() rd_parser = nltk.RecursiveDescentParser(grammar) print "==== recursive descent ====" for t in rd_parser.nbest_parse(sent): print t sr_parser = nltk.ShiftReduceParser(grammar) print "==== shift reduce ====" for t in sr_parser.nbest_parse(sent): print t
def cky(sentence, verbose): """ Perform the syntactic analysis on the sentence. The function uses the Cocke–Kasami–Younger algorithm. If a word in the sentence is not recognized, the function terminate the program with an error message. @param sentence: any correctly formed Czech sentence. @type sentence: String @param verbose: verbose output. @type verbose: Bool """ if verbose: print "Lexical analysis..." lexical = [] for word in nltk.tokenize.WordPunctTokenizer().tokenize(utfize(sentence)): classes = morph(word) if not classes: print "Error: the word '%s' is not recognized by the morphological analyser Ajka." % word sys.exit(1) lexical.append((word, classes)) if verbose: for l in lexical: print "%s: %s" % (l[0], u", ".join(l[1])) print "\nLoading the grammar..." try: f = open(GRAMMAR_FILE, "r") grammar = nltk.parse_cfg(f.read()) f.close() except IOError, err: print "Error: %s." % err sys.exit(1)
def test(grammarText, sentences): """Test the coverage of a CFG grammar. grammarText -- the grammar string sentences -- a list of sentences to test, with invalid ones prefixed with '*' """ valid_sentences = [s for s in sentences if s[0] != '*'] invalid_sentences = [s[1:] for s in sentences if s[0] == '*'] parser = ChartParser(parse_cfg(grammarText)) for sentence in valid_sentences: parses = parser.nbest_parse(sentence.split()) print(sentence + "\n" + "\n".join(map(str, parses)) + "\n") assert parses, "Valid sentence failed to parse." for sentence in invalid_sentences: print("*" + sentence) parses = parser.nbest_parse(sentence.split()) assert parses == [], "Invalid sentence parsed successfully."
def ex_a(): from part2_cfg import simple_grammar grammar = nltk.parse_cfg(simple_grammar) parsers = (nltk.RecursiveDescentParser, nltk.ShiftReduceParser, nltk.EarleyChartParser, nltk.BottomUpChartParser) for parser_class in parsers: print "Testing parser:", parser_class.__name__ parser = parser_class(grammar, trace=2) i = "a man saw a man with a cat" trees = parser.nbest_parse(i.split()) print "\n%d trees matches input '%s':" % (len(trees), i) for tree in trees: print tree print ""
def simple_cfg(): # grammar = nltk.parse_cfg(""" # S -> NP VP # VP -> V NP | V NP PP # PP -> P NP # V -> "saw" | "ate" | "walked" # NP -> "John" | "Mary" | "Bob" | Det N | Det N PP # Det -> "a" | "an" | "the" | "my" # N -> "man" | "dog" | "cat" | "telescope" | "park" # P -> "in" | "on" | "by" | "with" # """) # also can load grammar from text file # grammar = nltk.data.load("file:mygrammar.cfg") grammar = nltk.parse_cfg( """ S -> NP VP NP -> Det Nom | PropN Nom -> Adj Nom | N VP -> V Adj | V NP | V S | V NP PP PP -> P NP PropN -> 'Buster' | 'Chatterer' | 'Joe' Det -> 'the' | 'a' N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log' Adj -> 'angry' | 'frightened' | 'little' | 'tall' V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put' P -> 'on' """ ) # sent = "Mary saw Bob".split() # structural ambiguity - 2 parse trees for this. # prepositional phrase attach ambiguity. # sent = "the dog saw a man in a park".split() # For second grammar # sent = "the angry bear chased the frightened little squirrel".split() sent = "Chatterer said Buster thought the tree was tall".split() # rd_parser = nltk.RecursiveDescentParser(grammar, trace=2) # for debug # NOTE: production rules need to be right-recursive, ie X -> Y X rd_parser = nltk.RecursiveDescentParser(grammar) for tree in rd_parser.nbest_parse(sent): print tree
def cfg_demo(): """ A demonstration showing how C{ContextFreeGrammar}s can be created and used. """ from nltk import nonterminals, Production, parse_cfg # Create some nonterminals S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, Det = nonterminals('N, V, P, Det') VP_slash_NP = VP / NP print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP] print ' S.symbol() =>', ` S.symbol() ` print print Production(S, [NP]) # Create some Grammar Productions grammar = parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'dog' | 'cat' V -> 'chased' | 'sat' P -> 'on' | 'in' """) print 'A Grammar:', ` grammar ` print ' grammar.start() =>', ` grammar.start() ` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25) print print 'Coverage of input words by a grammar:' print grammar.covers(['a', 'dog']) print grammar.covers(['a', 'toy'])
def translate2(q, tname='T'): global T2, T3, T4, T5, T6, GR T2 = time.time() # tokenization l = tokenize(q) tokens = [a[1] for a in l] assert (tokens[0] == '//') T3 = time.time() # build grammar GR = grammar_text for typ, t in l: if typ == 's': GR += "Qname -> '" + t + "'\n" grammar = parse_cfg(GR) parser = parse.ChartParser(grammar, parse.TD_STRATEGY) T4 = time.time() # chart-parse the query trees = parser.nbest_parse(tokens) if not trees: T5 = T6 = time.time() return None, None tree = trees[0] T5 = time.time() # translate the parse tree r = Trans(tree, SerialNumber(), tname=tname).getSql() T6 = time.time() try: r1 = TransFlat(tree, SerialNumber(), tname=tname).getSql() except: r1 = None r1 = TransFlat(tree, SerialNumber(), tname=tname).getSql() return r, r1
def translate2(q,tname='T'): global T2, T3, T4, T5, T6, GR T2 = time.time() # tokenization l = tokenize(q) tokens = [a[1] for a in l] assert(tokens[0] == '//') T3 = time.time() # build grammar GR = grammar_text for typ, t in l: if typ == 's': GR += "Qname -> '" + t + "'\n" grammar = parse_cfg(GR) parser = parse.ChartParser(grammar, parse.TD_STRATEGY) T4 = time.time() # chart-parse the query trees = parser.nbest_parse(tokens) if not trees: T5 = T6 = time.time() return None, None tree = trees[0] T5 = time.time() # translate the parse tree r = Trans(tree,SerialNumber(),tname=tname).getSql() T6 = time.time() try: r1 = TransFlat(tree,SerialNumber(),tname=tname).getSql() except: r1 = None r1 = TransFlat(tree,SerialNumber(),tname=tname).getSql() return r, r1
def parsing_types(): grammar = nltk.parse_cfg( """ S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """ ) sent = "Mary saw a dog".split() rd_parser = nltk.RecursiveDescentParser(grammar) print "==== recursive descent ====" for t in rd_parser.nbest_parse(sent): print t sr_parser = nltk.ShiftReduceParser(grammar) print "==== shift reduce ====" for t in sr_parser.nbest_parse(sent): print t
def test_returnRelevantTuples_1(self): # arrange testGrammar = """ S -> NP VP VP -> VP PP VP -> V NP VP -> 'eats' PP -> P NP NP -> Det N NP -> 'she' V -> 'eats' P -> 'with' N -> 'fish' N -> 'fork' Det -> 'a' """ grammar = nltk.parse_cfg(testGrammar) sent = ['she', 'eats', 'a', 'fish', 'with', 'a', 'fork'] inst = cyk.Cyk(sent, grammar.productions()) # act pairs = inst.getAcceptablePairs(0, 1) # assert self.assertTrue(len(pairs) == 2) self.assertTrue(pairs[0][0] == "NP") self.assertTrue(pairs[0][1] == "VP") self.assertTrue(pairs[1][0] == "NP") self.assertTrue(pairs[1][1] == "V")
def recursiveSyntacticStructure(): grammar2 = nltk.parse_cfg( """ S -> NP VP NP -> Det Nom | PropN Nom -> Adj Nom | N VP -> V Adj | V NP | V S | V NP PP PP -> P NP PropN -> 'Buster' | 'Chatterer' | 'Joe' Det -> 'the' | 'a' N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log' Adj -> 'angry' | 'frightened' | 'little' | 'tall' V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put' P -> 'on' """ ) rd_parser = nltk.RecursiveDescentParser(grammar1) sent = "Mary saw a dog".split() for t in rd_parser.nbest_parse(sent): print t
def simple_cfg(): # grammar = nltk.parse_cfg(""" # S -> NP VP # VP -> V NP | V NP PP # PP -> P NP # V -> "saw" | "ate" | "walked" # NP -> "John" | "Mary" | "Bob" | Det N | Det N PP # Det -> "a" | "an" | "the" | "my" # N -> "man" | "dog" | "cat" | "telescope" | "park" # P -> "in" | "on" | "by" | "with" # """) # also can load grammar from text file # grammar = nltk.data.load("file:mygrammar.cfg") grammar = nltk.parse_cfg(""" S -> NP VP NP -> Det Nom | PropN Nom -> Adj Nom | N VP -> V Adj | V NP | V S | V NP PP PP -> P NP PropN -> 'Buster' | 'Chatterer' | 'Joe' Det -> 'the' | 'a' N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log' Adj -> 'angry' | 'frightened' | 'little' | 'tall' V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put' P -> 'on' """) # sent = "Mary saw Bob".split() # structural ambiguity - 2 parse trees for this. # prepositional phrase attach ambiguity. # sent = "the dog saw a man in a park".split() # For second grammar # sent = "the angry bear chased the frightened little squirrel".split() sent = "Chatterer said Buster thought the tree was tall".split() # rd_parser = nltk.RecursiveDescentParser(grammar, trace=2) # for debug # NOTE: production rules need to be right-recursive, ie X -> Y X rd_parser = nltk.RecursiveDescentParser(grammar) for tree in rd_parser.nbest_parse(sent): print tree
def app(): """ Create a recursive descent parser demo, using a simple grammar and text. """ from nltk import parse_cfg grammar = parse_cfg(""" # Grammatical productions. S -> NP VP NP -> Det N PP | Det N VP -> V NP PP | V NP | V PP -> P NP # Lexical productions. NP -> 'I' Det -> 'the' | 'a' N -> 'man' | 'park' | 'dog' | 'telescope' V -> 'ate' | 'saw' P -> 'in' | 'under' | 'with' """) sent = 'the dog saw a man in the park'.split() RecursiveDescentApp(grammar, sent).mainloop()
def demo(): """ A demonstration of the shift-reduce parser. """ from nltk import parse, parse_cfg grammar = parse_cfg(""" S -> NP VP NP -> Det N | Det N PP VP -> V NP | V NP PP PP -> P NP NP -> 'I' N -> 'man' | 'park' | 'telescope' | 'dog' Det -> 'the' | 'a' P -> 'in' | 'with' V -> 'saw' """) sent = 'I saw a man in the park'.split() parser = parse.ShiftReduceParser(grammar, trace=2) for p in parser.nbest_parse(sent): print(p)
# to run this script, please use Python's interactive mode, i.e. # python -i nltk_chartparser_app.py import nltk words = ["I", "shot", "an", "elephant", "in", "my", "pajamas"] grammar = nltk.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | 'I' | NP PP VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """) nltk.app.chartparser_app.ChartParserApp(grammar, words)
@author: coco wang @license: Apache Licence @contact: [email protected] @site: @software: PyCharm @file: D17.py @time: 2018/1/17 0017 上午 9:37 """ # 分析句子结构 import nltk groucho_grammar = nltk.parse_cfg(""" ... S -> NP VP ... PP -> P NP ... NP -> Det N | Det N PP | 'I' ... VP -> V NP | VP PP ... Det -> 'an' | 'my' ... N -> 'elephant' | 'pajamas' ... V -> 'shot' ... P -> 'in' ... """) sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas'] parser = nltk.ChartParser(groucho_grammar) trees = parser.nbest_parse(sent) for tree in trees: print(tree) sr_parse = nltk.ShiftReduceParser(groucho_grammar) sent = 'Mary saw a dog'.split() print(sr_parse.parse(sent))
print tree nltk.draw.tree.TreeView(tree) raw_input() # --------------------------- OWN CFG -------------------------------- # #sent = word_tokenize(sent) tags = pos_tag(sent, 1) print tags raw_input() own_grammar = nltk.parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP VP -> V NP | VP PP NP -> 'NP' VP -> 'VP' PP -> 'P' """) rd_parser = nltk.RecursiveDescentParser(own_grammar) for p in rd_parser.nbest_parse(sent): print p nltk.draw.tree.TreeView(p) raw_input()
element -> """ + _orjoin(elements) + """ digit -> """ + _orjoin(digits) + """ phase -> """ + _orjoin(phases) + """ number -> digit | digit number group -> suffixed | suffixed group paren_group_round -> '(' group ')' paren_group_square -> '[' group ']' plus_minus -> '+' | '-' number_suffix -> number ion_suffix -> '^' number plus_minus | '^' plus_minus suffix -> number_suffix | number_suffix ion_suffix | ion_suffix unsuffixed -> element | paren_group_round | paren_group_square suffixed -> unsuffixed | unsuffixed suffix """ parser = nltk.ChartParser(nltk.parse_cfg(grammar)) def _clean_parse_tree(tree): ''' The parse tree contains a lot of redundant nodes. E.g. paren_groups have groups as children, etc. This will clean up the tree. ''' def unparse_number(n): ''' Go from a number parse tree to a number ''' if len(n) == 1: rv = n[0][0] else: rv = n[0][0] + unparse_number(n[1]) return rv
import nltk #a simple context-free grammar grammar1 = nltk.parse_cfg(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | DEt N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.nbest_parse(sent): print tree #create my own grammar, grammar can be written on an independent file #grammar1 = nltk.data.load('file:mygrammar.cfg') #...
import nltk grammar = nltk.parse_cfg(open("grammar.cfg")) parser = nltk.ChartParser(grammar) def parse(sentence): sentence = nltk.word_tokenize(sentence) return parser.nbest_parse(sentence)[0] if __name__ == "__main__": print parse("the man saw the dog with the telescope")
from nltk import pos_tag from nltk.tokenize import wordpunct_tokenize from collections import OrderedDict import random funct_dict = OrderedDict({}) grammar1 = nltk.parse_cfg(""" Sent -> NP VP | NP VP END NP -> Det Nom | PropN | Det NP | N | PR | PR Nom Nom -> Adj Nom | N VP -> V Adj | V NP | V S | V NP PP | V Prep NP | V | V CC V PP -> Prep NP PropN -> 'NNP' | 'NNPS' Det -> 'DT' N -> 'NN' | 'NNS' Adj -> 'JJ' | 'JJR' | 'JJS' V -> 'VB' | 'VBD' | 'VBG' | 'VBN' | 'VBP' | 'VBZ' Prep -> 'TO' | 'IN' CC -> 'CC' PR -> 'PRP' | 'PRP$' RB -> 'RB' | 'RBR' | 'RBS' END -> '.' | '?' | '!' """) def add_func_to_dict(name=None): def wrapper(func): function_name = name if function_name is None: function_name = func.__name__
from nltk import parse_cfg from nltk import parse from nltk import Tree grammar = parse_cfg(''' S -> WHO QP QM | WHICH Nom QP QM QP -> VP | DO NP T VP -> I | T NP | BE A | BE NP | VP AND VP NP -> P | AR Nom | Nom Nom -> AN | AN Rel AN -> N | A AN Rel -> WHO VP | NP T N -> "Ns" | "Np" I -> "Is" | "Ip" T -> "Ts" | "Tp" A -> "A" P -> "P" BE -> "BEs" | "BEp" DO -> "DOs" | "DOp" AR -> "AR" WHO -> "WHO" WHICH -> "WHICH" AND -> "AND" QM -> "?" ''') chartpsr = parse.ChartParser(grammar) def all_parses(wlist, lx):