def _tree_helper(t, antvalues): t = tree.str_to_tree(t) for node in t.frontier(): x = rule.Nonterminal.from_str(node.label) if isinstance(x, rule.Nonterminal): node.insert_child(0, antvalues[x.getindex() - 1]) return t
def _tree_helper(t, antvalues): t = tree.str_to_tree(t) for node in t.frontier(): x = sym.fromstring(node.label) if sym.isvar(x): node.insert_child(0, antvalues[sym.getindex(x) - 1]) return t
def _tree_helper(t, antvalues): t = tree.str_to_tree(t) for node in t.frontier(): x = sym.fromstring(node.label) if sym.isvar(x): node.insert_child(0, antvalues[sym.getindex(x)-1]) return t
def _tree_helper(t, antvalues): t = tree.str_to_tree(t) for node in t.frontier(): x = rule.Nonterminal.from_str(node.label) if isinstance(x, rule.Nonterminal): node.insert_child(0, antvalues[x.getindex()-1]) return t
def __init__(self, file): self.parses = {} for line in file: t = tree.str_to_tree(line) if t is None: log.write("warning: couldn't read tree\n") continue s = tuple([sym.fromstring(node.label) for node in t.frontier()]) self.parses[s] = t
def raduparse(t,radu=True): if radu: t=radu2ptb(t) return tree.str_to_tree(t)
def raduparse(t): t=radu2ptb(t) return tree.str_to_tree(t)
if opts.tags: a.ftags = ftfile.readline().split() a.etags = etfile.readline().split() if len(a.ftags) != len(a.fwords): sys.stderr.write( "warning: length mismatch between French words and tags (%d != %d)\n" % (len(a.ftags), len(a.fwords))) if len(a.etags) != len(a.ewords): sys.stderr.write( "warning: length mismatch between English words and tags (%d != %d)\n" % (len(a.etags), len(a.ewords))) a.espans = None if opts.trees: if ebfile is not None: etree = tree.str_to_tree(ebfile.readline()) if etree is None: sys.stderr.write("warning, line %d: null tree" % a.lineno) a.espans = {} elif etree.length != len(a.ewords): sys.stderr.write( "warning, line %d: length mismatch between English words and trees (%d != %d)\n" % (a.lineno, len(a.ewords), etree.length)) sys.stderr.write( " start of English sentence: %s\n" % " ".join([sym.tostring(x) for x in a.ewords[:5]])) a.espans = {} else: remove_req(etree) a.espans = etree.spans() for (span, labels) in a.espans.iteritems():
a.write(log.file) a.write_visual(log.file) log.file.flush() if opts.tags: a.ftags = ftfile.readline().split() a.etags = etfile.readline().split() if len(a.ftags) != len(a.fwords): sys.stderr.write("warning: length mismatch between French words and tags (%d != %d)\n" % (len(a.ftags), len(a.fwords))) if len(a.etags) != len(a.ewords): sys.stderr.write("warning: length mismatch between English words and tags (%d != %d)\n" % (len(a.etags), len(a.ewords))) a.espans = None if opts.trees: if ebfile is not None: etree = tree.str_to_tree(ebfile.readline()) if etree is None: sys.stderr.write("warning, line %d: null tree" % a.lineno) a.espans = {} elif etree.length != len(a.ewords): sys.stderr.write("warning, line %d: length mismatch between English words and trees (%d != %d)\n" % (a.lineno, len(a.ewords), etree.length)) sys.stderr.write(" start of English sentence: %s\n" % " ".join([sym.tostring(x) for x in a.ewords[:5]])) a.espans = {} else: remove_req(etree) a.espans = etree.spans() for (span, labels) in a.espans.iteritems(): a.espans[span] = [sym.fromtag(x) for x in labels] # done reading all input lines if opts.discard_long_sentences and len(a.fwords) > opts.maxabslen:
relabeler2 = re.compile('-\d+-BAR$') relabeler3 = re.compile('^@') def relabel(label): #sys.stderr.write("%sA\n" % label) label=relabeler2.sub('-BAR', label) label=relabeler1.sub('', label) label=relabeler3.sub('', label) return label if __name__ == "__main__": for l in sys.stdin: (rid, trline) = l.split(' ', 1) node=tree.str_to_tree(trline) #sys.stdout.write("%s\n" % node.__str__()) begin=0 n_bad_rewrites=0 for n in node.preorder(): if(len(n.children)): #if begin!=0: # sys.stdout.write(" ### ") begin=1 rewrite="%s(" % relabel(n.label) ch=[] for c in n.children: ch.append(relabel(c.label)) #sys.stdout.write("%s" % ' '.join(ch)) rewrite+= "%s" % ' '.join(ch) rewrite+=")"
for li, line in enumerate(sys.stdin): fields = line.rstrip().split('\t') try: fstr, estr, astr = fields[:3] except: log.write("bad line, skipped: %s\n" % line.rstrip('\r\n')) continue if len(fields) == 4: provenance = list(fields[3].split()) else: provenance = [] if opts.french_trees: try: ftree = prepare_tree(tree.str_to_tree(fstr)) if ftree is None: log.write("bad tree, skipped line\n") continue #fspans = label_spans(ftree) fleaves = list(ftree.frontier()) fwords = [leaf.label for leaf in fleaves] ftags = [leaf.parent.label for leaf in fleaves] fcb = crossing_brackets(ftree) except Exception as e: log.write("bad ftree, skipped: %s\n" % fstr) log.write("reason: %s\n" % e) continue else: fwords = fstr.split()