示例#1
0
def symmetrization(es, fs, corpus):
    '''
    forpus
        for translation from fs to es
    return
        alignment **from fs to es**
    '''
    f2e_train = ibmmodel2._train(corpus, loop_count=10)
    f2e = ibmmodel2.viterbi_alignment(es, fs, *f2e_train).items()

    e2f_corpus = list(zip(*reversed(list(zip(*corpus)))))
    e2f_train = ibmmodel2._train(e2f_corpus, loop_count=10)
    e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()

    return alignment(es, fs, e2f, f2e)
示例#2
0
def symmetrization(es, fs, corpus):
    '''
    forpus
        for translation from fs to es
    return
        alignment **from fs to es**
    '''
    f2e_train = ibmmodel2._train(corpus, loop_count=1000)
    f2e = ibmmodel2.viterbi_alignment(es, fs, *f2e_train).items()

    e2f_corpus = zip(*reversed(zip(*corpus)))
    e2f_train = ibmmodel2._train(e2f_corpus, loop_count=1000)
    e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()

    return alignment(es, fs, e2f, f2e)
示例#3
0
    # test2
    from smt.utils.utility import mkcorpus
    from word_alignment import alignment
    from smt.ibmmodel import ibmmodel2
    import sys

    delimiter = ","
    # load file which will be trained
    modelfd = open(sys.argv[1])
    sentenses = [line.rstrip().split(delimiter) for line
                 in modelfd.readlines()]
    # make corpus
    corpus = mkcorpus(sentenses)

    # train model from corpus
    f2e_train = ibmmodel2._train(corpus, loop_count=10)
    e2f_corpus = list(zip(*reversed(list(zip(*corpus)))))
    e2f_train = ibmmodel2._train(e2f_corpus, loop_count=10)

    # phrase extraction
    for line in sys.stdin:
        _es, _fs = line.rstrip().split(delimiter)
        es = _es.split()
        fs = _fs.split()

        f2e = ibmmodel2.viterbi_alignment(es, fs, *f2e_train).items()
        e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()
        align = alignment(es, fs, e2f, f2e)  # symmetrized alignment

        # output matrix
        #from smt.utils.utility import matrix
示例#4
0
    from smt.utils.utility import mkcorpus
    from word_alignment import alignment
    from smt.ibmmodel import ibmmodel2
    import sys

    delimiter = ","
    # load file which will be trained
    modelfd = open(sys.argv[1])
    sentenses = [
        line.rstrip().split(delimiter) for line in modelfd.readlines()
    ]
    # make corpus
    corpus = mkcorpus(sentenses)

    # train model from corpus
    f2e_train = ibmmodel2._train(corpus, loop_count=10)
    e2f_corpus = list(zip(*reversed(list(zip(*corpus)))))
    e2f_train = ibmmodel2._train(e2f_corpus, loop_count=10)

    # phrase extraction
    for line in sys.stdin:
        _es, _fs = line.rstrip().split(delimiter)
        es = _es.split()
        fs = _fs.split()

        f2e = ibmmodel2.viterbi_alignment(es, fs, *f2e_train).items()
        e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items()
        align = alignment(es, fs, e2f, f2e)  # symmetrized alignment

        # output matrix
        #from smt.utils.utility import matrix