Пример #1
0
    args = p.parse_args()

    last_n = None
    if args.gold_aln:
        gold_alns = load_aln(args.gold_aln)
        last_n = len(gold_alns)

    e_snts = load_snts(args.e, args.limit)
    f_snts = load_snts(args.f, args.limit)
    assert len(e_snts) == len(f_snts)

    gold_e_snts = load_snts(args.gold_e)
    gold_f_snts = load_snts(args.gold_f)

    ga = GizaAligner()
    test_alns = ga.temp_train(e_snts + gold_e_snts, f_snts + gold_f_snts)

    if args.gold_aln:
        assert len(e_snts + gold_e_snts) >= len(gold_alns)

        # -------------------------------------------
        # We will assume that the test sentences are the last
        # in the file.
        # -------------------------------------------

        alns_to_test = test_alns[-len(gold_alns) :]
        assert len(alns_to_test) == len(gold_alns)

        ae = AlignEval(alns_to_test, gold_alns)
        print(",".join([str(args.limit)] + ["{:.5f}".format(f) for f in ae.all()]))
Пример #2
0
def train_aligner(o, e, f):
    ga = GizaAligner()

    # ga = GizaAligner.load(o, e, f)
    ga.train(o, e, f)