def create_forest(self,line): try: self.forest = forest.forest_from_text(line, delete_words=['@UNKNOWN@']) return True except forest.TreeFormatException: self.forest = None return False
def create_forest(self, line): try: self.forest = forest.forest_from_text(line, delete_words=['@UNKNOWN@']) return True except forest.TreeFormatException: self.forest = None return False
srcfilename = args[1] forestfilename = args[0] reffilenames = args[2:] srcfile = open(srcfilename) forestfile = open(forestfilename) if forestfilename != "-" else sys.stdin reffiles = [open(reffilename) for reffilename in reffilenames] def output(f): deriv = f.viterbi_deriv() hypv = deriv.vector() hyp = deriv.english() return "hyp={{{%s}}} derivation={{{%s}}} %s" % (" ".join(sym.tostring(e) for e in hyp), deriv, hypv) for srcline, forestline, reflines in itertools.izip(srcfile, forestfile, itertools.izip(*reffiles)): f = forest.forest_from_text(forestline) # the oracle needs to know how long all the French spans are for item in f.bottomup(): for ded in item.deds: # replace rule's French side with correct number of French words # we don't even bother to use the right number of variables ded.rule = rule.Rule(ded.rule.lhs, rule.Phrase([sym.fromstring('<foreign-word>')]*int(ded.dcost['foreign-length'])), ded.rule.e) f.reweight(weights) print "1-best %s" % output(f) s = sgml.Sentence(srcline.split()) s.fwords = srcline.split()