예제 #1
0
 def setUp(self):
     word_scores = os.path.join(data_path, "test_word_scores_big.csv")
     self.parallel_corpus = os.path.join(data_path, "parallel-en-es.txt")
     # Documents
     A, B = parallel_corpus_to_documents(self.parallel_corpus)
     self.document_a = A[:30]
     self.document_b = B[:30]
     training = training_alignments_from_documents(self.document_a,
                                                   self.document_b)
     # Word score
     word_pair_score = WordPairScore(word_scores)
     # Sentence Score
     sentence_pair_score = SentencePairScore()
     sentence_pair_score.train(training, word_pair_score)
     # Yalign model
     document_aligner = SequenceAligner(sentence_pair_score, 0.49)
     self.model = YalignModel(document_aligner)
예제 #2
0
 def setUp(self):
     random.seed(hash("Y U NO?"))
     base_path = os.path.dirname(os.path.abspath(__file__))
     word_scores = os.path.join(base_path, "data",
                                "test_word_scores_big.csv")
     parallel_corpus = os.path.join(base_path, "data", "parallel-en-es.txt")
     A, B = parallel_corpus_to_documents(parallel_corpus)
     A = A[:25]
     B = B[:25]
     self.alignments = list(training_alignments_from_documents(A, B))
     self.A, self.B, self.correct_alignments = \
                              list(training_scrambling_from_documents(A, B))
     # Word score
     word_pair_score = WordPairScore(word_scores)
     # Sentence Score
     sentence_pair_score = SentencePairScore()
     sentence_pair_score.train(self.alignments, word_pair_score)
     # Yalign model
     self.min_ = sentence_pair_score.min_bound
     self.max_ = sentence_pair_score.max_bound
     gap_penalty = (self.min_ + self.max_) / 2.0
     document_aligner = SequenceAligner(sentence_pair_score, gap_penalty)
     self.model = YalignModel(document_aligner, 1)