def make_trans_tree(self, symbol, feat, fparam): """ generate a tree for dst-only non-terminal tree """ ntree = [] for ruleno in self.ruledict[symbol]: rule = self.rules[ruleno] try: #logging.debug("make_trans_tree1: %s unifyd(%s,%s,%s)", symbol, format_feat(feat), format_feat(fparam,'()'), format_feat(rule.feat)) fdict = Parser.unify_down(rule.feat, fparam, feat, rule.checklist) logging.debug("make_trans_tree: %s unifyd(%s,%s,%s)->%s", symbol, format_feat(feat), format_fparam(fparam), format_feat(rule.feat), format_feat(fdict)) trans = [] for item, param in zip(rule.right, rule.rparam): assert type(item) == str, "%s#%d(%s) %s:%s" % ( symbol, ruleno, fparam, item, param) if item[0] == '*': # Reference key = item[1:] try: val = fdict[key] except KeyError: raise UnifyError( "Translate Ref to feature %s not found in %s" % (key, rule)) if type(val) == str: if val[0].isupper(): trans.append( self.make_trans_tree(val, fdict, param)) else: trans.append(val) else: assert param is not False trans.append(self.trans_alts(val, fdict, param)) elif param is False: # Terminal trans.append(item) else: # NonTerminal trans.append(self.make_trans_tree(item, fdict, param)) ntree.append( Tree(symbol, rule, ruleno, [], trans, fdict, rule.cost)) if rule.cut: break except UnifyError as ue: last_error = "%s %s#%d" % (ue.args[0], symbol, ruleno) logging.debug("make_trans_tree: %s unifyd(%s,%s,%s)->Error", symbol, format_feat(feat), format_fparam(fparam), format_feat(rule.feat)) if not ntree: if not self.ruledict[symbol]: raise ParseError("No production rule defined for symbol: %s" % symbol) raise UnifyError(last_error) return ntree
def pformatr_dbg(self,level=0): """ similar to pformat_ext, additionally prints globally unique ids for tree nodes """ indent = self.indenter*level prod = self.right if len(prod)==0: # empty production return "" if all(map(lambda x:isinstance(x,str),prod)): # terminal-only production return indent+" ".join(prod)+"\n" return "".join([ "{}{}\n".format(indent,item) if isinstance(item,str) else "{indent}{head}({id})(\n{body}{indent})\n".format( indent=indent, head=item[0].head, id=uid(item), body=(indent+"|\n").join([ "{indent}#{ruleno}{cost}{feat}({id})\n{body}".format( indent=indent, ruleno=alt.ruleno, feat=format_feat(alt.feat), id=uid(alt), body = alt.pformatr_dbg(level+1), cost = "{%d}" % alt.cost if alt.cost!=0 else "" ) for alt in item ]) ) for item in prod ])
def pformatr_ext(self,level=0): """ return prety formatted (indented multiline) string representation of right tree with extended information(rule no, feature list, cost) """ indent = self.indenter*level prod = self.right if len(prod)==0: # empty production return "" if all(map(lambda x:isinstance(x,str),prod)): # terminal-only production return indent+" ".join(prod)+"\n" return "".join([ "{}{}\n".format(indent,item) if isinstance(item,str) else "{indent}{head}(\n{body}{indent})\n".format( indent=indent, head=item[0].head, body=(indent+"|\n").join([ "{indent}#{ruleno}{cost}{feat}\n{body}".format( indent=indent, ruleno=alt.ruleno, feat=format_feat(alt.feat), body = alt.pformatr_ext(level+1), cost = "{%d}" % alt.cost if alt.cost!=0 else "" ) for alt in item ]) ) for item in prod ])
def trans_tree(self, tree, feat=empty_dict, fparam=None): """ translates and unifies translation(right) part of a parse tree returns a modified version of the node "tree" """ assert type(tree) == Tree tree = copy.copy(tree) # MD 12.04.2018 rule = tree.rule logging.debug("trans_tree: %s unify(%s,%s,%s)->", tree.head, format_feat(tree.feat), format_fparam(fparam), format_feat(feat)) fdict = Parser.unify_down(tree.feat, fparam, feat, rule.checklist) logging.debug("trans_tree: ->%s", format_feat(fdict)) trans = [] for item, param in zip(rule.right, rule.rparam): if type(item) == int: # Matched (Left&Right) NT assert param is not False trans.append(self.trans_alts(tree.left[item], fdict, param)) elif item[0] == '*': # Reference assert param is not False key = item[1:] try: val = fdict[key] except KeyError: raise UnifyError( "Translate Ref to feature %s not found in %s" % (key, rule)) if type(val) == str: if val[0].isupper(): trans.append(self.make_trans_tree(val, fdict, param)) else: trans.append(val) else: trans.append(self.trans_alts(val, fdict, param)) elif param is False: # Terminal trans.append(item) else: # Unmatched (Right-Only) NT assert item[0].isupper() trans.append(self.make_trans_tree(item, fdict, param)) tree.right = trans return tree
def dot_format_int(self,left=True): """ return single-line formatted string representation of a tree """ return "\n".join( '{} [label="{}"]\n{} -> {}'.format(id(item),item,id(self),id(item)) if isinstance(item, str) else "\n".join(alt.dot_format_int(left) for alt in item) + "\n" + "\n".join('{} [label="#{}",tooltip="{}"]\n{} -> {}'.format( id(alt), alt.ruleno,str(alt.rule).replace('"','')+"\\n"+format_feat(alt.feat), id(item), id(alt) ) for alt in item) + "\n" + '{} [label="{}[{}:{}]",tooltip="{}"]\n{} -> {}'.format(id(item),item[0].head,item[0].start,item[0].end," ".join(Tree.words[item[0].start:item[0].end]),id(self),id(item)) for item in (self.left if left else self.right) )
def unify_tree(self, tree): """ bottom-up unifies a tree and returns a new tree """ rule = tree.rule checklist = rule.checklist fdict = rule.feat stack = [(fdict, [])] for item, fparam in zip(tree.left, rule.lparam): if isinstance(item, str): for fdict, seq in stack: seq.append(item) else: nstack = [] for fdict, seq in stack: nkeys = [] nvals = [] for alt in item: try: subtrees = self.unify_tree(alt) except UnifyError as ue: last_error = ue.args[0] continue for subtree in subtrees: logging.debug( "Unify feat=%s fparam=%s subfeat=%s ", format_feat(fdict), format_fparam(fparam), format_feat(subtree.feat)) try: _fdict = Parser.unify_up( fdict, fparam, subtree.feat) logging.debug("Unify Success=%s", format_feat(_fdict)) try: idx = nkeys.index(_fdict) nvals[idx].append(subtree) except ValueError: nkeys.append(_fdict) nvals.append([subtree]) #print("nkeys=%s nvals=%s" % (nkeys,nvals)) except UnifyError as ue: logging.debug( "Unify Failure dst=%s fparam=%s src=%s ", format_feat(fdict), format_fparam(fparam), format_feat(subtree.feat)) last_error = "%s super=%s#%s sub=%s#%s" % ( ue.args[0], tree.head, tree.ruleno, subtree.head, subtree.ruleno) for key, val in zip(nkeys, nvals): nstack.append((key, seq + [val])) stack = nstack if not stack: # if unification of all alternative sub-trees fails, re-raises the last error logging.debug("Re-raising UnifyError %s" % last_error) raise UnifyError(last_error) ntree = [] for fdict, seq in stack: for key, val in fdict.items(): if type(val) == int: fdict[key] = tree.left[val] ntree.append( Tree(tree.head, tree.rule, tree.ruleno, seq, tree.right, fdict, tree.cost)) if tree.head == "S'": assert len(ntree) == 1 return ntree[0] return ntree