def test_max_chunk_size_2(self): random.seed(45) text_a = text.Text( 'Sentence A1. Sentence A2. Sentence A3.', 'text_a', language=languages.English, ) text_b = text.Text( 'Sentence B1. Sentence B2. Sentence B3.', 'text_b', language=languages.English, ) text_c = text.Text.composite_of(text_a, text_b, 2) expected_sentences = ['Sentence A1.', 'Sentence A2.', 'Sentence B1.', 'Sentence B2.', 'Sentence A3.', 'Sentence B3.', ] for i, sentence in enumerate(expected_sentences): self.assertEqual(sentence, text_c.sentences[i])
def setUp(self): self.ddj_3 = text.Text(DDJ_3, 'ddj_3') self.bin_model = model.BinaryModel()
def setUp(self): self.ddj_3 = text.Text(DDJ_3, 'ddj_3') self.bow_model = model.BagOfWordsModel()
def setUp(self): self.corpus = corpus.Corpus(min_occurrences=2) self.corpus.add_text(text.Text(DDJ_START, 'ddj_start'))
def setUp(self): self.corpus = corpus.Corpus() self.corpus.add_text(text.Text(DDJ_START, 'ddj_start')) self.corpus.add_text(text.Text(DDJ_END, 'ddj_end'))
def setUp(self): self.corpus = corpus.Corpus(text_model=model.BagOfWordsModel()) self.corpus.add_text(text.Text(DDJ_START, 'ddj_start'))
def setUp(self): self.text = text.Text(DDJ_START, 'ddj_start')
def setUp(self): self.text = text.Text(THE_ROOM, 'the_room', language=languages.English)
def test_multilingual_composite_text_raises_error(self): lzh_text = text.Text(DDJ_START, 'ddj_start', language=languages.ClassicalChinese) eng_text = text.Text(THE_ROOM, 'the_room', language=languages.English) with self.assertRaises(ValueError): multiling_text = text.Text.composite_of(lzh_text, eng_text, 1)