def setUp(self): word_scores = os.path.join(data_path, "test_word_scores_big.csv") self.parallel_corpus = os.path.join(data_path, "parallel-en-es.txt") # Documents A, B = parallel_corpus_to_documents(self.parallel_corpus) self.document_a = A[:30] self.document_b = B[:30] training = training_alignments_from_documents(self.document_a, self.document_b) # Word score word_pair_score = WordPairScore(word_scores) # Sentence Score sentence_pair_score = SentencePairScore() sentence_pair_score.train(training, word_pair_score) # Yalign model document_aligner = SequenceAligner(sentence_pair_score, 0.49) self.model = YalignModel(document_aligner)
def setUp(self): random.seed(hash("Y U NO?")) base_path = os.path.dirname(os.path.abspath(__file__)) word_scores = os.path.join(base_path, "data", "test_word_scores_big.csv") parallel_corpus = os.path.join(base_path, "data", "parallel-en-es.txt") A, B = parallel_corpus_to_documents(parallel_corpus) A = A[:25] B = B[:25] self.alignments = list(training_alignments_from_documents(A, B)) self.A, self.B, self.correct_alignments = \ list(training_scrambling_from_documents(A, B)) # Word score word_pair_score = WordPairScore(word_scores) # Sentence Score sentence_pair_score = SentencePairScore() sentence_pair_score.train(self.alignments, word_pair_score) # Yalign model self.min_ = sentence_pair_score.min_bound self.max_ = sentence_pair_score.max_bound gap_penalty = (self.min_ + self.max_) / 2.0 document_aligner = SequenceAligner(sentence_pair_score, gap_penalty) self.model = YalignModel(document_aligner, 1)