def _expand_node(self, production: nltk.Production): current_node = self.stack.pop() if production.lhs() == current_node.label(): self._append(current_node, production.rhs()) self.actions.append(production) else: self.stack.append(current_node) raise ValueError( f'Rule is not applicable: {production}, stack: {self.stack}.')
def handle_singles(rule): p = cfg_.productions(rule.rhs()[0]) # if (rule.lhs() == Nonterminal('SIGMA')): # print(p) for i in p: if (len(i) == 1): if (nltk.grammar.is_nonterminal(i.rhs()[0]) == False): newrules.append(Production(rule.lhs(), i.rhs())) else: handle_singles(Production(rule.lhs(), i.rhs())) else: prod = Production(rule.lhs(), i.rhs()) if prod not in allrules: allrules.append(prod)
def test1(): nt1 = Nonterminal('NP') nt2 = Nonterminal('VP') print nt1.symbol() S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') prod1 = Production(S, [NP, VP]) prod2 = Production(NP, [DT, NP]) print prod1.lhs() print prod1.rhs() print prod1 == Production(S, [NP, VP]) print prod1 == prod2
def train_grammar(unknown_words=[], nb_reduced_production=6000): productions = [] for item in train: for tree in treebank.parsed_sents(item): # perform optional tree transformations, e.g.: tree.collapse_unary(collapsePOS=False) # Remove branches A-B-C into A-B+C tree.chomsky_normal_form(horzMarkov=2) # Remove A->(B,C,D) into A->B,C+D->D #tree_prods = tree.productions() productions += tree.productions() counter = collections.Counter(productions) n_comms = [item for item, count in counter.most_common(nb_reduced_production) for i in range(count)] #Adding unkwown words and terminal rules back into the reduced productions set unknown_words_prods = [] for p in productions: if isinstance(p._rhs[0], str): unknown_words_prods.append(p) for u in unknown_words: rhs = [u] lhs = p._lhs new_prod = Production(lhs, rhs) unknown_words_prods.append(new_prod) n_comms += unknown_words_prods S = Nonterminal('S') grammar = induce_pcfg(S, n_comms) return grammar
def update_complete_chart(chart, tokens, grammar, trace=False): """Updates non-diagonal elements of chart Arguments: ---------- chart (list): List of list containing chart algorithm elements tokens (list): List of words in input sentence grammar (list): List of production rules in the grammar """ index = dict((p.rhs(), p.lhs()) for p in grammar.productions()) num_tokens = len(tokens) for span in range(2, num_tokens + 1): for start in range(num_tokens + 1 - span): end = start + span temp_categories, temp_rules = [], [] for mid in range(start + 1, end): nt1s, nt2s = chart[start][mid], chart[mid][end] if len(nt1s) != 0 and len(nt2s) != 0: for nt1 in nt1s[0]: for nt2 in nt2s[0]: if nt1 and nt2 and (nt1, nt2) in index: p = Production( index[(nt1, nt2)], (Nonterminal(nt1), Nonterminal(nt2))) temp_rules.append(f'{p._lhs} -> {p._rhs}') temp_categories.append(index[(nt1, nt2)]) chart[start][end] = [(temp_categories[i], temp_rules[i], mid) for i in range(len(temp_rules))] return chart
def _generate_production(self, t): arr = [] for i in range(len(t)): if type(t[i]) == str: arr.append(t[i]) else: arr.append(Nonterminal(t[i].label())) return Production(Nonterminal(t.label()), tuple(arr))
def update_grammar(productions, unknown): lis = pos_tagger.tag(unknown) for i in range(len(lis)): pos = nonterminals(lis[i][1])[0] production_ = Production(pos, [unknown[i]]) productions.append(production_) print production_, "added to productions" S = Nonterminal('SENT') grammar = induce_pcfg(S, productions) return grammar
def parse_productions(self, parse_tree, parent_label='', parent_annotation_level='non-preterminal'): """ :type parse_tree: nltk.Tree :type parent_label: str :param parent_annotation_level: Should be one of 'all' and 'non-preterminal' For the start_symbol, parent annotate as <original>_Parent_NULL if 'all' -> Parent annotate all nonterminals as <original>_Parent_<parent> if 'non-preterminal' -> Parent annotate only non-preterminals :type parent_annotation_level: str :rtype productions: list(nltk.Production) """ if not parse_tree: return [] elif len(parse_tree) == 1: if parent_annotation_level == 'non-preterminal': updated_lhs = Nonterminal(parse_tree.label()) elif parent_annotation_level == 'all': updated_lhs = Nonterminal(parse_tree.label() + '_Parent_' + parent_label) else: updated_lhs = Nonterminal(parse_tree.label()) rhs = [parse_tree[0]] return [Production(lhs=updated_lhs, rhs=rhs)] productions = [] updated_rhs = [] for i in parse_tree: if parent_annotation_level == 'all': updated_rhs.append(Nonterminal(i.label() + '_Parent_' + parse_tree.label())) elif parent_annotation_level == 'non-preterminal' and len(i) == 1: updated_rhs.append(Nonterminal(i.label())) else: updated_rhs.append(Nonterminal(i.label() + '_Parent_' + parse_tree.label())) productions += self.parse_productions(parse_tree=i, parent_label=parse_tree.label()) if not parent_label: parent_label = 'NULL' updated_lhs = Nonterminal(parse_tree.label() + '_Parent_' + parent_label) productions = [Production(lhs=updated_lhs, rhs=updated_rhs)] + productions return productions
def create_grammar(x_train): productions = [] for x in x_train: for tree in treebank.parsed_sents(x): # tree.collapse_unary(collapsePOS = True) tree.chomsky_normal_form() productions += tree.productions() S = Nonterminal('S') for w in ['CC','CD','DT','EX','FW','IN','JJ','JJR','JJS','LS','MD','NN','NNS','NNP','NNPS','PDT','POS','PRP','PRP','RB','RBR','RBS','RP','TO','UH','VB','VBD','VBG','VBN','VBP','VBZ','WDT','WP','WP','WRB', 'NP' ]: productions.append(Production(Nonterminal(w), ('<UNK>', ))) grammar = create_pcfg(S, productions) return grammar
def handle_long(rule, idx): if (len(rule) > 2): rh = list(rule.rhs()) lh = checkinrhs(rh[-2], rh[-1]) if (lh == None): z = Nonterminal("Z" + str(idx)) newrules.append(Production(z, (rh[-2], rh[-1]))) rh = rh[:-2] rh.append(z) newrule = Production(rule.lhs(), rh) else: rh = rh[:-2] rh.append(lh) newrule = Production(rule.lhs(), rh) # newrule = Production(rh[:-2]) return (handle_long(newrule, idx + 1)) if (len(rule) == 2): newrules.append(rule) return (idx - 1)
def generate_impacts_grammar(attribute, phase): gr = [ Production(Nonterminal('S'), (Nonterminal('AUX1'), )), Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))), Production(Nonterminal('S1'), ('you', Nonterminal('V1'))), Production(Nonterminal('V1'), ('think', Nonterminal('ART'))), Production(Nonterminal('ART'), ('the impact of the incident', Nonterminal('V2'))), Production(Nonterminal('END'), ('?', )) ] if phase == 1: v2 = Production(Nonterminal('V2'), ('was', Nonterminal('ATTR'))) else: v2 = Production(Nonterminal('V2'), ('was not', Nonterminal('ATTR'))) attribute = Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))) gr.append(v2) gr.append(attribute) grammar = CFG(Nonterminal('S'), gr) return grammar
def add_rules(nonterminal_name: Text, list_terminals: Sequence[Text]) -> Sequence[Production]: """Create the production rules for a givn nonterminal and a list of terminals corresponding to it. Arguments: nonterminal_name: The name of the nonterminal. list_terminals: The list of terminals that for each one a rule with the nonterminal will be produced. Returns: A sequence of productions rules. """ prods = [] for phrase in list_terminals: rule = Production(Nonterminal(nonterminal_name), (phrase, )) prods.append(rule) return prods
def build_tree(self, back, row, col, root): """Given a back-pointer matrix, a row/column entry point into the back-pointer matrix, and the root label, recursively builds and returns the most probable syntactic parse tree rooted at the entry point.""" a = self.index[root] # Base case - lexical productions if root in (Production.lhs(n) for n in self.terminals()): return Tree(root, [back[row + 1, row + 1, a]]) # Recursive case - nonlexical productions else: try: k, b, c = back[row, col, a] left, right = [back, row, k, b], [back, k, col, c] return Tree(root, [self.build_tree(*left), self.build_tree(*right)]) except TypeError: # In case the input is unlicensed by the PCFG return Tree(None, []) except Exception as e: raise e
def cfg_demo(): """ A demonstration showing how C{ContextFreeGrammar}s can be created and used. """ from nltk import nonterminals, Production, parse_cfg # Create some nonterminals S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, Det = nonterminals('N, V, P, Det') VP_slash_NP = VP / NP print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP] print ' S.symbol() =>', ` S.symbol() ` print print Production(S, [NP]) # Create some Grammar Productions grammar = parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'dog' | 'cat' V -> 'chased' | 'sat' P -> 'on' | 'in' """) print 'A Grammar:', ` grammar ` print ' grammar.start() =>', ` grammar.start() ` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25) print print 'Coverage of input words by a grammar:' print grammar.covers(['a', 'dog']) print grammar.covers(['a', 'toy'])
def _gp_lexicalized_rules(self, ptree): """Helper method to extract grandparent nodes and the production rules from the tree Parameters ------------ ptree : nltk Tree object Returns --------- production_rules : list add grand-parent nodes to prodcution ruels with '^' """ if not isinstance(ptree._label, string_types): raise TypeError('Productions can only be generated from trees having node labels that are strings') prods = [] for child in ptree.subtrees(): if child.parent(): prods += [Production(Nonterminal(child.parent().label() + '^' + child._label), _child_names(child))] return prods
import nltk from nltk import Nonterminal, nonterminals, Production, CFG nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print(nonterminal1.symbol()) print(nonterminal2.symbol()) print(nonterminal3.symbol()) print(nonterminal1==nonterminal2) print(nonterminal2==nonterminal3) print(nonterminal1==nonterminal3) S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP,NP,PP]) print(production1.lhs()) print(production1.rhs()) print(production3.lhs()) print(production3.rhs()) print(production3 == Production(VP, [V,NP,NP,PP])) print(production2 == production3)
def nonterminals(self): """Returns productions of the form A -> B C.""" return (p for p in self.grammar.productions() if Production.is_nonlexical(p))
def demo2(): from nltk import Nonterminal, Production, CFG nonterminals = "S VP NP PP P N Name V Det" (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] productions = ( # Syntactic Productions Production(S, [NP, VP]), Production(NP, [Det, N]), Production(NP, [NP, PP]), Production(VP, [VP, PP]), Production(VP, [V, NP, PP]), Production(VP, [V, NP]), Production(PP, [P, NP]), Production(PP, []), Production(PP, ["up", "over", NP]), # Lexical Productions Production(NP, ["I"]), Production(Det, ["the"]), Production(Det, ["a"]), Production(N, ["man"]), Production(V, ["saw"]), Production(P, ["in"]), Production(P, ["with"]), Production(N, ["park"]), Production(N, ["dog"]), Production(N, ["statue"]), Production(Det, ["my"]), ) grammar = CFG(S, productions) text = "I saw a man in the park".split() d = CFGDemo(grammar, text) d.mainloop()
def demo3(): from nltk import Production (S, VP, NP, PP, P, N, Name, V, Det) = \ nonterminals('S, VP, NP, PP, P, N, Name, V, Det') productions = ( # Syntactic Productions Production(S, [NP, VP]), Production(NP, [Det, N]), Production(NP, [NP, PP]), Production(VP, [VP, PP]), Production(VP, [V, NP, PP]), Production(VP, [V, NP]), Production(PP, [P, NP]), Production(PP, []), Production(PP, ['up', 'over', NP]), # Lexical Productions Production(NP, ['I']), Production(Det, ['the']), Production(Det, ['a']), Production(N, ['man']), Production(V, ['saw']), Production(P, ['in']), Production(P, ['with']), Production(N, ['park']), Production(N, ['dog']), Production(N, ['statue']), Production(Det, ['my']), ) t = Tk() def destroy(e, t=t): t.destroy() t.bind('q', destroy) p = ProductionList(t, productions) p.pack(expand=1, fill='both') p.add_callback('select', p.markonly) p.add_callback('move', p.markonly) p.focus() p.mark(productions[2]) p.mark(productions[8])
# -*- coding: utf-8 -*- """ Created on Wed Dec 9 13:04:57 2020 @author: Rahul Kothuri, Isak Nyberg """ import nltk from nltk import Nonterminal, nonterminals, Production, CFG w1 = Nonterminal("NP") w2 = Nonterminal("VP") S, NP, VP = nonterminals('S,NP,VP') NLN, LN, V, LNP, DT, VBP, Adj, VBZ, RB = nonterminals( 'NLN,LN,V,LNP, DT, VBP, Adj,VBZ,RB') prod1 = Production(S, [NP, VP]) prod2 = Production(NP, [DT, NP]) grammar = CFG.fromstring(""" S -> NP VP NP -> Det LN | Det NLN | Det LNP VP -> V NP | VBP Adj | VBZ Adj | V RB | V | VBZ NP Det -> 'The' Det -> 'A' Det -> 'the' Det -> 'that' Det -> 'Those' LN -> 'girl' | 'boy' | 'dog' LNP -> 'boys' NLN -> 'house' | 'crackers' V -> 'eats' V -> 'run' | 'runs' VBP -> 'are' VBZ -> 'is'
def parse_production(line, nonterm_parser, probabilistic=False): pos = 0 # Parse the left-hand side. lhs, pos = nonterm_parser(line, pos) # Skip over the arrow. m = re.compile('\s*->\s*').match(line, pos) if not m: raise ValueError('Expected an arrow') pos = m.end() # Parse the right hand side. probabilities = [0.0] found_terminal = found_non_terminal = False rhsides = [[]] while pos < len(line): # Probability. m = re.compile('(\[[\d\.]+\])\s*').match(line, pos) if probabilistic and m: pos = m.end() probabilities[-1] = float(m.group(1)[1:-1]) if probabilities[-1] > 1.0: raise ValueError('Production probability %f, ' 'should not be greater than 1.0' % (probabilities[-1], )) # String -- add terminal. elif line[pos] in "\'\"": m = re.compile('("[^"]+"|' + "'[^']+')\s*").match(line, pos) if not m: raise ValueError('Unterminated string') if found_terminal: raise ValueError('Bad right-hand-side: do not use ' 'a sequence of terminals') found_terminal = True rhsides[-1].append(m.group(1)[1:-1]) pos = m.end() # Vertical bar -- start new rhside. elif line[pos] == '|': probabilities.append(0.0) found_terminal = found_non_terminal = False rhsides.append([]) pos = re.compile('\\|\s*').match(line, pos).end() # Anything else -- nonterminal. else: nonterm, pos = nonterm_parser(line, pos) rhsides[-1].append(nonterm) found_non_terminal = True if found_terminal and found_non_terminal: raise ValueError('Bad right-hand-side: do not mix ' 'terminals and non-terminals') if probabilistic: return [ WeightedProduction(lhs, rhs, prob=probability) for (rhs, probability) in zip(rhsides, probabilities) ] else: return [Production(lhs, rhs) for rhs in rhsides]
def demo(should_print_times=True, trace=1): import sys, time S = FeatStructNonterminal('S') VP = FeatStructNonterminal('VP') NP = FeatStructNonterminal('NP') PP = FeatStructNonterminal('PP') V = FeatStructNonterminal('V') N = FeatStructNonterminal('N') P = FeatStructNonterminal('P') Name = FeatStructNonterminal('Name') Det = FeatStructNonterminal('Det') DetSg = FeatStructNonterminal('Det[-pl]') DetPl = FeatStructNonterminal('Det[+pl]') NSg = FeatStructNonterminal('N[-pl]') NPl = FeatStructNonterminal('N[+pl]') productions = [ # Define some grammatical productions. Production(S, (NP, VP)), Production(PP, (P, NP)), Production(NP, (NP, PP)), Production(VP, (VP, PP)), Production(VP, (V, NP)), Production(VP, (V,)), Production(NP, (DetPl, NPl)), Production(NP, (DetSg, NSg)), # Define some lexical productions. Production(NP, ('John',)), Production(NP, ('I',)), Production(Det, ('the',)), Production(Det, ('my',)), Production(Det, ('a',)), Production(NSg, ('dog',)), Production(NSg, ('cookie',)), Production(V, ('ate',)), Production(V, ('saw',)), Production(P, ('with',)), Production(P, ('under',)), ] earley_grammar = ContextFreeGrammar(S, productions) print earley_grammar print sent = 'I saw John with a dog with my cookie' print "Sentence:", print sent print tokens = sent.split() t = time.time() cp = FeatureEarleyChartParser(earley_grammar, trace=trace) trees = cp.nbest_parse(tokens) print if should_print_times: print "Time: %s" % (time.time() - t) for tree in trees: print tree
try: tup = (list1[i], list2[i]) except IndexError: if len(list1) > len(list2): list2.append('') tup = (list1[i], list2[i]) elif len(list1) < len(list2): list1.append('') tup = (list1[i], list2[i]) continue merged_list.append(tup) break return merged_list PosTuple = merge(pos, d) for item1, item2 in PosTuple: p = Production(Nonterminal(str(item1)), [str(item2)]) CFGgrammar.append(p) def sentenceparse(sent): rd_parser = nltk.RecursiveDescentParser(CFGgrammar) trees = rd_parser.parse(sent.split()) treelist = list(trees) for tree in treelist: print(tree)
def create_templates(): """Creates the templates from the grammar.""" prods = [ # Specific verb with goal and the rest of instruction body. Production(Nonterminal('S'), (Nonterminal('V2'), Nonterminal('V2_BODY'))), # A verb and rest of the instruction body assuming goal already mentioned. Production(Nonterminal('V2_BODY'), (Nonterminal('V1'), Nonterminal('M_G_ALREADY_V'))), # A verb and the rest of the instruction body assuming the goal wasn't # mentioned before. Production(Nonterminal('S'), (Nonterminal('V1'), Nonterminal('NO_GOAL'))), # The goal in the begining and the rest of the instruction body assuming # goal already mentioned. Production(Nonterminal('S'), (Nonterminal('V1_GOAL'), Nonterminal('WITH_GOAL'))), # A verb and 'to the' and then goal mention and the rest of the instruction # body. Production(Nonterminal('V1_GOAL'), (Nonterminal('V1'), Nonterminal('V1_CON'))), # A goal mention and the rest of the instruction body. Production(Nonterminal('WITH_GOAL'), (Nonterminal('GOAL'), Nonterminal('M_G'))), # Main part of the instruction without verb in begining and resuming # sentence. Production( Nonterminal('M_G_ALREADY_V'), (Nonterminal('MAIN_NO_V'), Nonterminal('END_NEAR_GOAL_KNOWN'))), # # Main part of the instruction, adding a new sentence. Production(Nonterminal('M_G'), (Nonterminal('MAIN'), Nonterminal('END_NEAR_GOAL_KNOWN'))), # End part - (1) near pivot assuming goal already mentioned; and (2) avoid # sentence. Production(Nonterminal('END_NEAR_GOAL_KNOWN'), (Nonterminal('NEAR_GOAL_START'), Nonterminal('AVOID'))), # End part - (1) near pivot assuming goal not mentioned yet; and (2) avoid # sentence. Production(Nonterminal('END_NEAR_GOAL_KNOWN'), (Nonterminal('NEAR_GOAL_END'), Nonterminal('AVOID'))), # Main part of the instruction without verb in begining and resuming # sentence assuming no goal mentioned before. Production( Nonterminal('NO_GOAL'), (Nonterminal('MAIN_NO_V'), Nonterminal('END_NEAR_GOAL_UNKNOWN'))), # Add Goal to main part and then resume instruction by adding an # ending(near+avoid). Production( Nonterminal('END_NEAR_GOAL_UNKNOWN'), (Nonterminal('GOAL_END'), Nonterminal('END_NEAR_GOAL_KNOWN'))), # Add Goal with near and then add an avoid sentenece. Production(Nonterminal('END_NEAR_GOAL_UNKNOWN'), (Nonterminal('NEAR_GOAL_END'), Nonterminal('AVOID'))), # Termial for IN+DT after verb. Production(Nonterminal('V1_CON'), ('to the', )), ] prods += add_rules('V2', V2) prods += add_rules('AVOID', AVOID) prods += add_rules('NEAR_GOAL_START', NEAR_GOAL_START) prods += add_rules('NEAR_GOAL_END', NEAR_GOAL_END) prods += add_rules('GOAL', GOAL) prods += add_rules('GOAL_END', GOAL_END) prods += add_rules('MAIN_NO_V', MAIN_NO_V) prods += add_rules('MAIN', MAIN) prods += add_rules('V1', V1) grammar = CFG(Nonterminal('S'), prods) # Generate templates. templates = [] for sentence in nltk.parse.generate.generate(grammar): sentence = ' '.join(sentence) if sentence[-1] != '.': sentence += '.' sentence = sentence.replace(" .", ".") sentence = sentence.replace(" ,", ",") sentence = sentence.replace("..", ".") re_space = re.compile(r'[\s]+') sentence = re_space.sub(r' ', sentence) templates.append(sentence) templates_df = pd.DataFrame(templates, columns=['sentence']).drop_duplicates() # Save templates templates_df.to_csv('templates.csv', index=False, header=False) # Flag features. for column in STREET_FEATURES: templates_df[column] = templates_df['sentence'].apply( lambda x: column.upper() in x) return templates_df
终结符的有限集合(T) 开始符号(S) 产生式的有限集合(P),形如:A->a """ # 非终结符 nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print((nonterminal1 == nonterminal2)) print((nonterminal2 == nonterminal3)) print((nonterminal1 == nonterminal3)) S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') # 产生式 production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP, NP, PP]) print(production1.lhs(), production1.rhs()) print(production2.lhs(), production2.rhs()) print(production3.lhs(), production3.rhs()) # 语法解析 gram1 = nltk.data.load('grammars/large_grammars/atis.cfg') # print(gram1) sent = nltk.data.load('grammars/large_grammars/atis_sentences.txt') sent = nltk.parse.util.extract_test_sentences(sent) testingsent = sent[25] sent = testingsent[0] """FAQ. 递归下降分析 增量式 earley算法 通过保存增量解析步骤的结果和确保每一个解析函数在同一个输入位置只被调用一次,就可以把任意解析表达文法转化成一个Packrat Parser,
def generate_events_grammar(attribute, parent, phase): gr = [ Production(Nonterminal('S'), (Nonterminal('AUX1'), )), Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))), Production(Nonterminal('S1'), ('you', Nonterminal('V1'))), Production(Nonterminal('V1'), ('think', Nonterminal('ART'))), Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))), Production(Nonterminal('END'), ('?', )) ] if parent is not None: art = Production(Nonterminal('ART'), ('the', Nonterminal('PAR'))) par = Production(Nonterminal('PAR'), (parent, Nonterminal('V2'))) else: art = Production(Nonterminal('ART'), ('the', Nonterminal('PAR'))) par = Production( Nonterminal('PAR'), ('events that caused the incident', Nonterminal('V2'))) if phase == 1: v2 = Production(Nonterminal('V2'), ('included', Nonterminal('ATTR'))) else: v2 = Production(Nonterminal('V2'), ('did not include', Nonterminal('ATTR'))) gr.append(art) gr.append(par) gr.append(v2) grammar = CFG(Nonterminal('S'), gr) return grammar
def demo2(): from nltk import Nonterminal, Production, ContextFreeGrammar nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] productions = ( # Syntactic Productions Production(S, [NP, VP]), Production(NP, [Det, N]), Production(NP, [NP, PP]), Production(VP, [VP, PP]), Production(VP, [V, NP, PP]), Production(VP, [V, NP]), Production(PP, [P, NP]), Production(PP, []), Production(PP, ['up', 'over', NP]), # Lexical Productions Production(NP, ['I']), Production(Det, ['the']), Production(Det, ['a']), Production(N, ['man']), Production(V, ['saw']), Production(P, ['in']), Production(P, ['with']), Production(N, ['park']), Production(N, ['dog']), Production(N, ['statue']), Production(Det, ['my']), ) grammar = ContextFreeGrammar(S, productions) text = 'I saw a man in the park'.split() d = CFGDemo(grammar, text) d.mainloop()
def generate_sources_grammar(attribute, parent, phase): gr = [ Production(Nonterminal('S'), (Nonterminal('AUX1'), )), Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))), Production(Nonterminal('S1'), ('you', Nonterminal('V1'))), Production(Nonterminal('V1'), ('think', Nonterminal('ART'))), Production(Nonterminal('ATTR'), (attribute, Nonterminal('END'))), Production(Nonterminal('END'), ('?', )) ] if phase == 1: v2 = Production(Nonterminal('V2'), ('included', Nonterminal('ATTR'))) else: v2 = Production(Nonterminal('V2'), ('didn´t include', Nonterminal('ATTR'))) if parent is None: article = Production(Nonterminal('ART'), ('the', Nonterminal('CLS'))) parent = Production(Nonterminal('CLS'), ('sources', Nonterminal('V2'))) else: article = Production(Nonterminal('ART'), ('the', Nonterminal('PAR'))) parent = Production(Nonterminal('PAR'), (parent, Nonterminal('V2'))) gr.append(v2) gr.append(article) gr.append(parent) grammar = CFG(Nonterminal('S'), gr) return grammar
ta /= len(list_tag_val) # armazena o resultado r = {'lp':lp, 'lr': lr, 'f1':f1, 'ta':ta} resultados.append(r) else: print("Sentença com mais de 18 palavras.") except Exception: print("Árvore mal formada.") # realiza o calculo da media para cada metrica media_lp = sum(item['lp'] for item in resultados)/len(resultados) media_lr = sum(item['lr'] for item in resultados)/len(resultados) media_f1 = sum(item['f1'] for item in resultados)/len(resultados) media_ta = sum(item['ta'] for item in resultados)/len(resultados) print("media_lp",media_lp,"media_lr",media_lr,"media_f1",media_f1,"media_ta",media_ta) # extrai as arvores da base de dados floresta, com suas respectivas tags filter_errors(floresta.parsed_sents()) roots = [] ROOT = Nonterminal('ROOT') # nao-terminal representado o simbolo inicial da gramatica initial_symbols = list(set(initial_symbols)) # remover repetidos for t in initial_symbols: roots += [Production(ROOT,[t])] # unificar a gramatica para apenas um simbolo inicial productions += roots productions += [Production(Nonterminal("n"), ["UNK"])] # regra para palavras desconhecidas (substantivo) pcfg = induce_pcfg(ROOT, productions) # cria a PCFG informando o simbolo inicial e as regras do_cky(pcfg) # aplica o algoritmo CKY (ViterbiParser)
def generate_entities_grammar(attribute, phase): gr = [ Production(Nonterminal('S'), (Nonterminal('AUX1'), )), Production(Nonterminal('AUX1'), ('Do', Nonterminal('S1'))), Production(Nonterminal('S1'), ('you', Nonterminal('V1'))), Production(Nonterminal('V1'), ('think', Nonterminal('ATTR'))), Production(Nonterminal('V3'), ('impacted', Nonterminal('OBJ'))), Production(Nonterminal('V3'), ('affected', Nonterminal('OBJ'))), Production(Nonterminal('OBJ'), ('by the incident', Nonterminal('END'))), Production(Nonterminal('END'), ('?', )) ] if phase == 1: v2 = Production(Nonterminal('V2'), ('are', Nonterminal('V3'))) else: v2 = Production(Nonterminal('V2'), ('are not', Nonterminal('V3'))) attribute = Production(Nonterminal('ATTR'), (attribute, Nonterminal('V2'))) gr.append(v2) gr.append(attribute) grammar = CFG(Nonterminal('S'), gr) return grammar
def demo3(): from nltk import Production (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals("S, VP, NP, PP, P, N, Name, V, Det") productions = ( # Syntactic Productions Production(S, [NP, VP]), Production(NP, [Det, N]), Production(NP, [NP, PP]), Production(VP, [VP, PP]), Production(VP, [V, NP, PP]), Production(VP, [V, NP]), Production(PP, [P, NP]), Production(PP, []), Production(PP, ["up", "over", NP]), # Lexical Productions Production(NP, ["I"]), Production(Det, ["the"]), Production(Det, ["a"]), Production(N, ["man"]), Production(V, ["saw"]), Production(P, ["in"]), Production(P, ["with"]), Production(N, ["park"]), Production(N, ["dog"]), Production(N, ["statue"]), Production(Det, ["my"]), ) t = Tk() def destroy(e, t=t): t.destroy() t.bind("q", destroy) p = ProductionList(t, productions) p.pack(expand=1, fill="both") p.add_callback("select", p.markonly) p.add_callback("move", p.markonly) p.focus() p.mark(productions[2]) p.mark(productions[8])
def __init__(self, grammar): super(PCKYParser, self).__init__() self.grammar = self.load_grammar(grammar) self.index = CodeBook( {Production.lhs(p) for p in self.grammar.productions()})