Exemplo n.º 1
0
 def test_sentence_tokenizer_utils_with_punctuation(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin',
                                            punctuation=['.', '?', '!'])
     self.assertIsInstance(
         trainer.train_sentence_tokenizer(self.latin_text),
         PunktSentenceTokenizer)
Exemplo n.º 2
0
 def test_sentence_tokenizer_utils_with_strict(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin',
                     strict=True,
                     punctuation=['.', '?', '!'],
                     strict_punctuation=[','])
     self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text),
                           PunktSentenceTokenizer)
Exemplo n.º 3
0
 def test_sentence_tokenizer_trainer_pickle(self):
     with patch.object(BaseSentenceTokenizerTrainer,
                       'pickle_sentence_tokenizer') as mock:
         trainer = BaseSentenceTokenizerTrainer('latin')
         trainer.pickle_sentence_tokenizer('mock.p', trainer)
     mock.assert_called_once_with('mock.p', trainer)
Exemplo n.º 4
0
 def test_sentence_tokenizer_utils_with_abbreviations(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin', abbreviations=['Kal.'])
     self.assertIsInstance(
         trainer.train_sentence_tokenizer(self.latin_text),
         PunktSentenceTokenizer)
Exemplo n.º 5
0
 def test_sentence_tokenizer_utils(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin')
     self.assertIsInstance(
         trainer.train_sentence_tokenizer(self.latin_text),
         PunktSentenceTokenizer)
Exemplo n.º 6
0
 def test_sentence_tokenizer_trainer_pickle(self):
     with patch.object(BaseSentenceTokenizerTrainer, 'pickle_sentence_tokenizer') as mock:
         trainer = BaseSentenceTokenizerTrainer('latin')
         trainer.pickle_sentence_tokenizer('mock.p', trainer)
     mock.assert_called_once_with('mock.p', trainer)
Exemplo n.º 7
0
 def test_sentence_tokenizer_utils_with_abbreviations(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin', abbreviations=['Kal.'])
     self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text),
                           PunktSentenceTokenizer)
Exemplo n.º 8
0
 def test_sentence_tokenizer_utils(self):
     """Test sentence tokenization trainer"""
     trainer = BaseSentenceTokenizerTrainer('latin')
     self.assertIsInstance(trainer.train_sentence_tokenizer(self.latin_text),
                           PunktSentenceTokenizer)