def pst2dt(ptree, hpt=HPT, n=0, top=False): if isword(ptree): """ If it's a list with two elements where the second is a string, then it's a preterminal and we just want the head (changed 10/02/2012 to return whole thing) """ return [ptree] else: """ Otherwise it's a proper tree: hd (ptree[LABEL]) is the label, tl (ptree[DTRS:]) is the dtrs """ cat = gettag(ptree).split("-")[0] dtrs = ptree[DTRS:] hd = chooseHead(cat, dtrs, hpt) if not hd: raise Exception('No dtr found for %s in \n%s\nLabels for possible dtrs of %s are \n%s'%(cat, showPSTree(ptree), cat, hpt[cat])) others = [d for d in dtrs if not d == hd] hd = pst2dt(hd, hpt, n=n) hd, newdtrs = hd[0], hd[1:] for d in others: l = gettag(d) if "-" in l: l = l.split("-")[1] else: l = "mod" if islist(d) and len(d) == 1: continue d = pst2dt(d, hpt, n=n) d[0].label = l newdtrs.append(d) newdtrs.sort(cmp=lambda t1, t2: -1 if t1[0].position < t2[0].position else 1) hd = [hd]+newdtrs if top: hd[0].label = "top" return hd
def fixleaves(tree): if isword(tree): return tree for i in range(1, len(tree)): dtr = tree[i] if isleaf(dtr): tree[i] = WORD(dtr[1], dtr[0]) else: fixleaves(dtr)
def fixpositions(tree, leaves, n=0): if isword(tree): tree.position = n leaves.append(tree) return n+1 else: for d in tree[1:]: n = fixpositions(d, leaves, n) return n
def showPSTree(tree, indent=0, initial=False): s = "" if not initial: s += '\n' for i in range(0, indent): s += ' ' if isstring(tree) or isword(tree): s += '%s'%(tree,) else: l = gettag(tree) s += '[%s '%(l) initial = True for d in tree[DTRS:]: s += showPSTree(d, indent=indent+len(l)+2, initial=initial) initial = False s += ']' if indent == 0: s += '\n' return s
def gettag(x): if isword(x): return x.tag else: return x[LABEL]