Пример #1
0
def align_file(fin, fout, em_model, hmm_model):
    pairs = Aligner.readAMR(fin)
    print("Aligning")
    algs = Aligner.alignPairs(pairs, em_model, hmm_model)
    print("Writing alignments to file")
    Aligner.printAlignments(algs, pairs, fout)
    print("done")
Пример #2
0
def train_models(fnames, emiter, hmmiter, model_name):
    if not fnames:
        sys.exit("No file provided")

    print("Reading AMR files")
    pairs = []
    for fname in fnames:
        f = open(fname, "r")
        pairs += Aligner.readAMR(f)
        f.close()
    sentences = [Aligner.tokenize(pair[0]) for pair in pairs]
    graphs = [AMRGraph(pair[1], False) for pair in pairs]

    emprobs = EM.train(sentences, graphs, model_name + ".em", emiter)
    #emprobs = EM.load_model(model_name + ".em")
    print("Initializing rule-based alignments")
    n = len(sentences)
    initalgs = [{}] * n
    for i in range(n):
        initalgs[i] = Aligner.initalign(graphs[i].ref, sentences[i])
        if (i+1) % 1000 == 0:
            print(str(i+1) + "/" + str(n))
    hmmprobs = HMM.train(sentences, graphs, emprobs, model_name + ".hmm", hmmiter, initalgs)
    print("Done")