def apply(self, chart, grammar, edge): if edge.is_complete(): return nextsym, index = edge.nextsym(), edge.end() if not is_nonterminal(nextsym): return # If we've already applied this rule to an edge with the same # next & end, and the chart & grammar have not changed, then # just return (no new edges to add). nextsym_with_bindings = edge.next_with_bindings() done = self._done.get((nextsym_with_bindings, index), (None, None)) if done[0] is chart and done[1] is grammar: return for prod in grammar.productions(lhs=nextsym): # If the left corner in the predicted production is # leaf, it must match with the input. if prod.rhs(): first = prod.rhs()[0] if is_terminal(first): if index >= chart.num_leaves(): continue if first != chart.leaf(index): continue # We rename vars here, because we don't want variables # from the two different productions to match. if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True): new_edge = FeatureTreeEdge.from_production(prod, edge.end()) if chart.insert(new_edge, ()): yield new_edge # Record the fact that we've applied this rule. self._done[nextsym_with_bindings, index] = (chart, grammar)
def apply(self, chart, grammar, edge): if edge.is_incomplete(): return found = edge.lhs() for prod in grammar.productions(rhs=found): bindings = {} if isinstance(edge, FeatureTreeEdge): _next = prod.rhs()[0] if not is_nonterminal(_next): continue # We rename vars here, because we don't want variables # from the two different productions to match. used_vars = find_variables((prod.lhs(), ) + prod.rhs(), fs_class=FeatStruct) found = found.rename_variables(used_vars=used_vars) result = unify(_next, found, bindings, rename_vars=False) if result is None: continue new_edge = FeatureTreeEdge.from_production( prod, edge.start()).move_dot_forward(edge.end(), bindings) if chart.insert(new_edge, (edge, )): yield new_edge
def apply(self, chart, grammar, left_edge, right_edge): # Make sure the rule is applicable. if not (left_edge.end() == right_edge.start() and left_edge.is_incomplete() and right_edge.is_complete() and isinstance(left_edge, FeatureTreeEdge)): return found = right_edge.lhs() nextsym = left_edge.nextsym() if isinstance(right_edge, FeatureTreeEdge): if not is_nonterminal(nextsym): return if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]: return # Create a copy of the bindings. bindings = left_edge.bindings() # We rename vars here, because we don't want variables # from the two different productions to match. found = found.rename_variables(used_vars=left_edge.variables()) # Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to # generate B3 (result). result = unify(nextsym, found, bindings, rename_vars=False) if result is None: return else: if nextsym != found: return # Create a copy of the bindings. bindings = left_edge.bindings() # Construct the new edge. new_edge = left_edge.move_dot_forward(right_edge.end(), bindings) # Add it to the chart, with appropriate child pointers. if chart.insert_with_backpointer(new_edge, left_edge, right_edge): yield new_edge
def apply_iter(self, chart, grammar, left_edge, right_edge): # Make sure the rule is applicable. if not (left_edge.end() == right_edge.start() and left_edge.is_incomplete() and right_edge.is_complete() and isinstance(left_edge, TreeEdge) and isinstance(right_edge, TreeEdge) and left_edge.next()[TYPE] == right_edge.lhs()[TYPE]): return # Unify B1 (left_edge.next) with B2 (right_edge.lhs) to # generate B3 (result). bindings = left_edge.bindings() # creates a copy. result = unify(left_edge.next(), right_edge.lhs(), bindings, rename_vars=False) if result is None: return # Construct the new edge. new_edge = FeatureTreeEdge(span=(left_edge.start(), right_edge.end()), lhs=left_edge.lhs(), rhs=left_edge.rhs(), dot=left_edge.dot()+1, bindings=bindings) # Add it to the chart, with appropriate child pointers. changed_chart = False for cpl1 in chart.child_pointer_lists(left_edge): if chart.insert(new_edge, cpl1+(right_edge,)): changed_chart = True # If we changed the chart, then generate the edge. if changed_chart: yield new_edge
def apply(self, chart, grammar, edge): if edge.is_incomplete(): return found = edge.lhs() for prod in grammar.productions(rhs=found): bindings = {} if isinstance(edge, FeatureTreeEdge): _next = prod.rhs()[0] if not is_nonterminal(_next): continue # We rename vars here, because we don't want variables # from the two different productions to match. used_vars = find_variables( (prod.lhs(),) + prod.rhs(), fs_class=FeatStruct ) found = found.rename_variables(used_vars=used_vars) result = unify(_next, found, bindings, rename_vars=False) if result is None: continue new_edge = FeatureTreeEdge.from_production( prod, edge.start() ).move_dot_forward(edge.end(), bindings) if chart.insert(new_edge, (edge,)): yield new_edge
def apply_iter(self, chart, grammar, left_edge, right_edge): # Make sure the rule is applicable. if not (left_edge.end() == right_edge.start() and left_edge.is_incomplete() and right_edge.is_complete() and isinstance(left_edge, TreeEdge) and isinstance(right_edge, TreeEdge)): return # Unify B1 (left_edge.next) with B2 (right_edge.lhs) to # generate B3 (result). bindings = left_edge.bindings() # creates a copy. result = unify(left_edge.next(), right_edge.lhs(), bindings, rename_vars=False) if result is None: return # Construct the new edge. new_edge = FeatureTreeEdge(span=(left_edge.start(), right_edge.end()), lhs=left_edge.lhs(), rhs=left_edge.rhs(), dot=left_edge.dot()+1, bindings=bindings) # Add it to the chart, with appropriate child pointers. changed_chart = False for cpl1 in chart.child_pointer_lists(left_edge): if chart.insert(new_edge, cpl1+(right_edge,)): changed_chart = True # If we changed the chart, then generate the edge. if changed_chart: yield new_edge
def compute_children(self) -> List["FeatureGrammarNode"]: child_list: List["FeatureGrammarNode"] = [] # First we retrieve all variables used in current derivation used_vars: Set[Variable] = set() for symbol in self.symbols: if not isinstance(symbol, str): used_vars |= find_variables(symbol) for idx, symbol in enumerate(self.symbols): if isinstance(symbol, str): continue # For each non terminal symbol in current derivation , we select a production rule # that has a left hand side matching this symbol for production in self.feature_grammar.productions(lhs=symbol): # We rename all the variable in the production rules to avoid name conflicts # TODO put this after a check to avoid to do it if not neccessary new_vars = dict() lhs = rename_variables(production.lhs(), used_vars=used_vars, new_vars=new_vars) rhs = [ rename_variables(rhs_symb, used_vars=used_vars, new_vars=new_vars) for rhs_symb in production.rhs() ] # Compute the new binding new_bindings = dict() lhs = unify(lhs, symbol, bindings=new_bindings) if lhs is None: # Unification failed continue # Propagate the bindings to the siblings new_siblings = [ substitute_bindings(sibling, bindings=new_bindings) for sibling in self.symbols ] # Propagate the bindings to the rhs symbols new_rhs = [ substitute_bindings(rhs_symb, bindings=new_bindings) for rhs_symb in rhs ] # Create the new child new_child = FeatureGrammarNode( tuple(new_siblings[:idx] + new_rhs + new_siblings[idx + 1:]), self.feature_grammar, ) child_list.append(new_child) return child_list if len(child_list) != 0 else [ FeatureGrammarNode("DEAD_END", None) ]
def _parses(self, chart, start, tree_class): # Output a list of complete parses. trees = [] for edge in chart.select(span=(0, chart.num_leaves())): if unify(edge.lhs(), start, rename_vars=True): trees += chart.trees(edge, complete=True, tree_class=tree_class) return trees
def parses(self, start, tree_class=Tree): for edge in self.select(start=0, end=self._num_leaves): if ((isinstance(edge, FeatureTreeEdge)) and (edge.lhs()[TYPE] == start[TYPE]) and (unify(edge.lhs(), start, rename_vars=True)) ): for tree in self.trees(edge, complete=True, tree_class=tree_class): yield tree
def _parses(self, chart, start, tree_class): # Output a list of complete parses. trees = [] for edge in chart.select(span=(0, chart.num_leaves())): if ( (not isinstance(edge, LeafEdge)) and (edge.lhs()[TYPE] == start[TYPE]) and (unify(edge.lhs(), start, rename_vars=True)) ): trees += chart.trees(edge, complete=True, tree_class=tree_class) return trees
def apply_iter(self, chart, grammar, edge): if edge.is_complete(): return for prod in grammar.productions(): # Note: we rename vars here, because we don't want variables # from the two different productions to match. if unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True): new_edge = FeatureTreeEdge(span=(edge.end(), edge.end()), lhs=prod.lhs(), rhs=prod.rhs(), dot=0) if chart.insert(new_edge, ()): yield new_edge
def apply_iter(self, chart, grammar, edge): if edge.is_complete(): return #if not isinstance(edge.next(), FeatStructNonterminal): return for prod in grammar.productions(lhs=edge.next()): # Note: we rename vars here, because we don't want variables # from the two different productions to match. if (unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True)): new_edge = FeatureTreeEdge(span=(edge.end(), edge.end()), lhs=prod.lhs(), rhs=prod.rhs(), dot=0) if chart.insert(new_edge, ()): yield new_edge
def apply_iter(self, chart, gramar, edge): if edge.is_complete() or edge.end() >= chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) for pos in self._word_to_pos.get(leaf, []): if unify(pos, edge.next_with_bindings(), rename_vars=True): new_leaf_edge = LeafEdge(leaf, index) if chart.insert(new_leaf_edge, ()): yield new_leaf_edge new_pos_edge = FeatureTreeEdge((index, index + 1), pos, [leaf], 1) if chart.insert(new_pos_edge, (new_leaf_edge, )): yield new_pos_edge
def apply_iter(self, chart, gramar, edge): if edge.is_complete() or edge.end()>=chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) for pos in self._word_to_pos.get(leaf, []): if unify(pos, edge.next_with_bindings(), rename_vars=True): new_leaf_edge = LeafEdge(leaf, index) if chart.insert(new_leaf_edge, ()): yield new_leaf_edge new_pos_edge = FeatureTreeEdge((index, index+1), pos, [leaf], 1) if chart.insert(new_pos_edge, (new_leaf_edge,)): yield new_pos_edge
def apply_iter(self, chart, gramar, edge): if edge.is_complete() or edge.end()>=chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) for pos in [prod.lhs() for prod in gramar.productions(rhs=leaf)]: if (pos[TYPE] == edge.next()[TYPE] and unify(pos, edge.next_with_bindings(), rename_vars=True)): new_leaf_edge = LeafEdge(leaf, index) if chart.insert(new_leaf_edge, ()): yield new_leaf_edge new_pos_edge = FeatureTreeEdge((index, index+1), pos, [leaf], 1) if chart.insert(new_pos_edge, (new_leaf_edge,)): yield new_pos_edge
def apply_iter(self, chart, grammar, edge): if edge.is_complete(): return for prod in grammar.productions(): # Be sure not to predict lexical edges. # (The ScannerRule takes care of those.) if len(prod.rhs()) == 1 and isinstance(prod.rhs()[0], str): continue # Note: we rename vars here, because we don't want variables # from the two different productions to match. if ((prod.lhs()[TYPE] == edge.next()[TYPE]) and unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True)): new_edge = FeatureTreeEdge(span=(edge.end(), edge.end()), lhs=prod.lhs(), rhs=prod.rhs(), dot=0) if chart.insert(new_edge, ()): yield new_edge
def apply_iter(self, chart, grammar, edge): if edge.is_incomplete(): return if isinstance(edge, FeatureTreeEdge): for prod in grammar.productions(rhs=edge.lhs()): next = prod.rhs()[0] if not isinstance(next, FeatStructNonterminal): continue bindings = {} if unify(next, edge.lhs(), bindings): new_edge = FeatureTreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1, bindings) if chart.insert(new_edge, (edge,)): yield new_edge else: # The edge is a LeafEdge: for prod in grammar.productions(rhs=edge.lhs()): new_edge = FeatureTreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1) if chart.insert(new_edge, (edge,)): yield new_edge
def generate_from (self, x): options = self.expansions(x) if not options: raise Failure (r, x, bindings) = random.choice(options) children = [] for y in r.rhs(): if isinstance(y, str): children.append(y) else: y = y.substitute_bindings(bindings) child = self.generate_from(y) children.append(child) # just to update the bindings if not unify(y, child.label(), bindings, rename_vars=False): raise Exception("This can't happen") x = x.substitute_bindings(bindings).rename_variables() return Tree(x, children)
def iter_expansions(x, g): for r in g.productions(lhs=x): bindings = {} x1 = unify(x, r.lhs(), bindings, rename_vars=False) if x1: yield (r, x1, bindings)
def checkSentence(iii,s,corrlist,rec=0): """ iii: index of sentence in the sentence list s: sentence in nodedic format rec: recursion level """ #print "_____________________________________________" #if rec>10: #print s,rec #1/0 if debug: print "checking sentence",iii,"rec",rec,"len(s)",len(s) # TODO: kick out: if rec<1: #print "____________________________" #for i in s: print i,s[i][tokenname], s[i] #print "____________________________" for i in s: #print s if "gov" in s[i]: for g in s[i]["gov"]: if s[i]["gov"][g].endswith("_invisible"): s[i]["gov"][g]=s[i]["gov"][g].replace("_invisible","_inherited") if rec<3: for i in sorted(s): if debug>1: try: print "checking:",i,s[i][tokenname] #s[i] #print s[i]["lemma"],s[i]["lemma"] in corrdic,i in s #print corrdic except: print "index",i,"is gone" #if s[i]['cat']=="unknown" and s[i][tokenname]=="d'": #if s[i][tokenname]==u"écrivant": #print "********************************************************" #for i in sorted(s): #print s[i] ##print s #1/0 #print i,s[i] if i in s: for matchdic,insdic in corrlist: if unify(s[i],matchdic): #( s[i]["lemma"] in corrdic or s[i][tokenname] in corrdic): if debug>1: print "èèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèè" #if s[i]["lemma"] in corrdic : print corrdic[s[i]["lemma"]] #else:print corrdic[s[i][tokenname]] print s[i] print "matched oooooooooooooooooooooooooooooooo" print matchdic print "èèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèèè" #if s[i]["lemma"] in corrdic : insdic=corrdic[s[i]["lemma"]] #else:insdic=corrdic[s[i][tokenname]] if isinstance(insdic,int):# glueing. in this case insdic contains the direction of the token glueing s = glue(copy.deepcopy(s),i,insdic) s = checkSentence(iii,copy.deepcopy(s),corrlist,rec+1) else: s = integrate(copy.deepcopy(s),i,insdic) if len(insdic)>1: s=checkSentence(iii,copy.deepcopy(s),corrlist,rec+1) #elif i in s and ( s[i]["lemma"] in gluedic or s[i][tokenname] in gluedic) : #if debug>1: #print "gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg" #if s[i]["lemma"] in gluedic :print gluedic[s[i]["lemma"]] #else:print gluedic[s[i][tokenname]] #print "gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg" #if s[i]["lemma"] in gluedic : insdic=gluedic[s[i]["lemma"]] #else:insdic=gluedic[s[i][tokenname]] #news = glue(s,i,insdic) #s=checkSentence(iii,news,corrlist,rec+1) return s
def iter_expansions (self, x): for r in self.__grammar.productions(lhs=x): bindings = {} x1 = unify(x, r.lhs(), bindings, rename_vars=False) if x1: yield (r, x1, bindings)