args = p.parse_args() last_n = None if args.gold_aln: gold_alns = load_aln(args.gold_aln) last_n = len(gold_alns) e_snts = load_snts(args.e, args.limit) f_snts = load_snts(args.f, args.limit) assert len(e_snts) == len(f_snts) gold_e_snts = load_snts(args.gold_e) gold_f_snts = load_snts(args.gold_f) ga = GizaAligner() test_alns = ga.temp_train(e_snts + gold_e_snts, f_snts + gold_f_snts) if args.gold_aln: assert len(e_snts + gold_e_snts) >= len(gold_alns) # ------------------------------------------- # We will assume that the test sentences are the last # in the file. # ------------------------------------------- alns_to_test = test_alns[-len(gold_alns) :] assert len(alns_to_test) == len(gold_alns) ae = AlignEval(alns_to_test, gold_alns) print(",".join([str(args.limit)] + ["{:.5f}".format(f) for f in ae.all()]))
def train_aligner(o, e, f): ga = GizaAligner() # ga = GizaAligner.load(o, e, f) ga.train(o, e, f)