def show_matrix(self, es, fs, t, a): ''' print matrix according to viterbi alignment like fs ------------- e| | s| | | | ------------- >>> sentences = [("僕 は 男 です", "I am a man"), ("私 は 女 です", "I am a girl"), ("私 は 先生 です", "I am a teacher"), ("彼女 は 先生 です", "She is a teacher"), ("彼 は 先生 です", "He is a teacher"), ] >>> t, a = train(sentences, loop_count=1000) >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a) |x| | | | | | |x| | | | | |x| | | |x| | ''' max_a = self.viterbi_alignment(es, fs, t, a).items() m = len(es) n = len(fs) return utility.matrix(m, n, max_a, es, fs)
def show_matrix(es, fs, t, a): ''' print matrix according to viterbi alignment like fs ------------- e| | s| | | | ------------- >>> sentences = [("僕 は 男 です", "I am a man"), ("私 は 女 です", "I am a girl"), ("私 は 先生 です", "I am a teacher"), ("彼女 は 先生 です", "She is a teacher"), ("彼 は 先生 です", "He is a teacher"), ] >>> t, a = train(sentences, loop_count=1000) >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a) |x| | | | | | |x| | | | | |x| | | |x| | ''' max_a = viterbi_alignment(es, fs, t, a).items() m = len(es) n = len(fs) return utility.matrix(m, n, max_a)
e2f_train = ibmmodel2._train(e2f_corpus, loop_count=10) e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items() return alignment(es, fs, e2f, f2e) if __name__ == '__main__': # test for alignment es = "michael assumes that he will stay in the house".split() fs = "michael geht davon aus , dass er im haus bleibt".split() e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6), (4, 7), (7, 8), (9, 9), (6, 10)] f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8), (8, 8), (9, 9), (5, 10), (6, 10)] from smt.utils.utility import matrix print(matrix(len(es), len(fs), e2f, es, fs)) print(matrix(len(es), len(fs), f2e, es, fs)) ali = _alignment(es, fs, e2f, f2e) print(matrix(len(es), len(fs), ali, es, fs)) # test for symmetrization from smt.utils.utility import mkcorpus sentenses = [("僕 は 男 です", "I am a man"), ("私 は 女 です", "I am a girl"), ("私 は 先生 です", "I am a teacher"), ("彼女 は 先生 です", "She is a teacher"), ("彼 は 先生 です", "He is a teacher"), ] corpus = mkcorpus(sentenses) es = "私 は 先生 です".split() fs = "I am a teacher".split()
] >>> t, a = train(sentences, loop_count=1000) >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a) |x| | | | | | |x| | | | | |x| | | |x| | ''' max_a = viterbi_alignment(es, fs, t, a).items() m = len(es) n = len(fs) return utility.matrix(m, n, max_a) if __name__ == '__main__': #sentences = [("the house", "das Haus"), # ("the book", "das Buch"), # ("a book", "ein Buch"), # ] print(utility.matrix(2, 3, [(1, 1), (2, 3)])) sentences = [ ("僕 は 男 です", "I am a man"), ("私 は 女 です", "I am a girl"), ("私 は 先生 です", "I am a teacher"), ("彼女 は 先生 です", "She is a teacher"), ("彼 は 先生 です", "He is a teacher"), ] t, a = train(sentences, loop_count=100) args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a) print(show_matrix(*args))
("彼 は 先生 です", "He is a teacher"), ] >>> t, a = train(sentences, loop_count=1000) >>> args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a) |x| | | | | | |x| | | | | |x| | | |x| | ''' max_a = viterbi_alignment(es, fs, t, a).items() m = len(es) n = len(fs) return utility.matrix(m, n, max_a) if __name__ == '__main__': #sentences = [("the house", "das Haus"), # ("the book", "das Buch"), # ("a book", "ein Buch"), # ] print(utility.matrix(2, 3, [(1, 1), (2, 3)])) sentences = [("僕 は 男 です", "I am a man"), ("私 は 女 です", "I am a girl"), ("私 は 先生 です", "I am a teacher"), ("彼女 は 先生 です", "She is a teacher"), ("彼 は 先生 です", "He is a teacher"), ] t, a = train(sentences, loop_count=100) args = ("私 は 先生 です".split(), "I am a teacher".split(), t, a) print(show_matrix(*args))
e2f_train = ibmmodel2._train(e2f_corpus, loop_count=1000) e2f = ibmmodel2.viterbi_alignment(fs, es, *e2f_train).items() return alignment(es, fs, e2f, f2e) if __name__ == '__main__': # test for alignment es = "michael assumes that he will stay in the house".split() fs = "michael geht davon aus , dass er im haus bleibt".split() e2f = [(1, 1), (2, 2), (2, 3), (2, 4), (3, 6), (4, 7), (7, 8), (9, 9), (6, 10)] f2e = [(1, 1), (2, 2), (3, 6), (4, 7), (7, 8), (8, 8), (9, 9), (5, 10), (6, 10)] from smt.utils.utility import matrix print(matrix(len(es), len(fs), e2f)) print(matrix(len(es), len(fs), f2e)) ali = _alignment(es, fs, e2f, f2e) print(matrix(len(es), len(fs), ali)) # test for symmetrization from smt.utils.utility import mkcorpus sentenses = [("僕 は 男 です", "I am a man"), ("私 は 女 です", "I am a girl"), ("私 は 先生 です", "I am a teacher"), ("彼女 は 先生 です", "She is a teacher"), ("彼 は 先生 です", "He is a teacher"), ] corpus = mkcorpus(sentenses) es = "私 は 先生 です".split() fs = "I am a teacher".split()