def test_load_corpus_file_non_existent(self): """ Test that a file path can be specified for a corpus. """ file_path = './test_corpus.yml' self.assertFalse(os.path.exists(file_path)) with self.assertRaises(IOError): corpus.load_corpus(file_path)
def test_load_corpus(self): """ Test loading the entire corpus of languages. """ corpus_data = corpus.load_corpus('chatterbot.corpus') self.assertTrue(len(corpus_data))
def test_load_corpus_english_greetings(self): file_path = os.path.join(corpus.DATA_DIRECTORY, 'english', 'greetings.yml') corpus_data = corpus.load_corpus(file_path) self.assertEqual(len(corpus_data), 1)
def test_load_english_corpus_categories(self): corpus_data = corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus_data), 1) # Test that each conversation gets labeled with the correct category for conversation in corpus_data: self.assertIn('greetings', conversation.categories)
def test_conversation_format(self): corpora = corpus.load_corpus('chatterbot.corpus') for conversations in corpora: for conversation in conversations: for text in conversation: if not isinstance(text, str): self.fail('"{}" must be a string, not {}.'.format( str(text), type(text)))
def test_character_count(self): """ Test that no line in the corpus exceeds the maximum number of characters. """ from chatterbot_corpus.corpus import DIALOG_MAXIMUM_CHARACTER_LENGTH corpora = corpus.load_corpus('chatterbot.corpus') for conversations in corpora: for conversation in conversations: for statement in conversation: if len(statement) > DIALOG_MAXIMUM_CHARACTER_LENGTH: self.fail( u'"{}" cannot be longer than {} characters'.format( statement, DIALOG_MAXIMUM_CHARACTER_LENGTH))
def test_load_corpus_file(self): """ Test that a file path can be specified for a corpus. """ # Create a file for testing file_path = './test_corpus.yml' with io.open(file_path, 'w') as test_corpus: yml_data = u'\n'.join([ 'conversations:', '- - Hello', ' - Hi', '- - Hi', ' - Hello' ]) test_corpus.write(yml_data) # Load the content from the corpus corpus_data = corpus.load_corpus(file_path) # Remove the test file if os.path.exists(file_path): os.remove(file_path) self.assertEqual(len(corpus_data), 1) self.assertEqual(len(corpus_data[0]), 2)
def test_load_corpus_russian(self): corpus_data = corpus.load_corpus('chatterbot.corpus.russian') self.assertTrue(len(corpus_data))
def test_load_corpus_portuguese(self): corpus_data = corpus.load_corpus('chatterbot.corpus.portuguese') self.assertTrue(len(corpus_data))
def test_load_corpus_marathi(self): corpus_data = corpus.load_corpus('chatterbot.corpus.marathi') self.assertTrue(len(corpus_data))
def test_load_corpus_indonesia(self): corpus_data = corpus.load_corpus('chatterbot.corpus.indonesia') self.assertTrue(len(corpus_data))
def test_load_corpus_hindi(self): corpus_data = corpus.load_corpus('chatterbot.corpus.hindi') self.assertTrue(len(corpus_data))
def test_load_corpus_french(self): corpus_data = corpus.load_corpus('chatterbot.corpus.french') self.assertTrue(len(corpus_data))
def test_load_corpus_english(self): corpus_data = corpus.load_corpus('chatterbot.corpus.english') self.assertTrue(len(corpus_data))
def test_load_corpus_traditional_chinese(self): corpus_data = corpus.load_corpus('chatterbot.corpus.tchinese') self.assertTrue(len(corpus_data))
def test_load_english_corpus(self): corpus_data = corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus_data), 1) self.assertIn(['Hi', 'Hello'], corpus_data[0])
def test_load_corpus_english_trailing_slash(self): file_path = os.path.join(corpus.DATA_DIRECTORY, 'english') + '/' corpus_data = corpus.load_corpus(file_path) self.assertGreater(len(corpus_data), 1)
def test_load_corpus_telugu(self): corpus_data = corpus.load_corpus('chatterbot.corpus.telugu') self.assertTrue(len(corpus_data))