class CorpusUtilsTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ path = self.corpus.get_file_path("chatterbot.corpus.english") self.assertIn( os.path.join("chatterbot", "corpus", "data", "english"), path ) def test_read_corpus(self): corpus_path = os.path.join( self.corpus.data_directory, "english", "conversations.json" ) data = self.corpus.read_corpus(corpus_path) self.assertIn("conversations", data) def test_load_corpus(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english.greetings") self.assertEqual(len(corpus), 1) self.assertIn(["Hi", "Hello"], corpus[0]) def test_load_corpus_general(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english") self.assertEqual(len(corpus), 3) self.assertIn(["Hi", "Hello"], corpus[1])
class CorpusUtilsTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ path = self.corpus.get_file_path('chatterbot.corpus.english') self.assertIn(os.path.join('chatterbot', 'corpus', 'data', 'english'), path) def test_read_corpus(self): corpus_path = os.path.join(self.corpus.data_directory, 'english', 'conversations.json') data = self.corpus.read_corpus(corpus_path) self.assertIn('conversations', data) def test_list_english_corpus_files(self): data_files = self.corpus.list_corpus_files('chatterbot.corpus.english') self.assertGreaterEqual(len(data_files), 3) self.assertIn('.json', data_files[0]) def test_load_corpus(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus), 1) self.assertIn(['Hi', 'Hello'], corpus[0]) def test_load_corpus_general(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english") self.assertEqual(len(corpus), 3) self.assertIn(['Hi', 'Hello'], corpus[1])
class CorpusUtilsTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ path = self.corpus.get_file_path("chatterbot.corpus.english") self.assertIn( os.path.join("chatterbot", "corpus", "data", "english"), path ) def test_read_corpus(self): corpus_path = os.path.join( self.corpus.data_directory, "english", "conversations.json" ) data = self.corpus.read_corpus(corpus_path) self.assertIn("conversations", data) def test_load_corpus(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english.greetings") self.assertEqual(len(corpus), 1) self.assertIn(["Hi", "Hello"], corpus[0]) def test_load_corpus_general(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english") self.assertEqual(len(corpus), 2) self.assertIn(["Hi", "Hello"], corpus[1])
class CorpusUtilsTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ path = self.corpus.get_file_path('chatterbot.corpus.english') self.assertIn( os.path.join('chatterbot', 'corpus', 'data', 'english'), path ) def test_read_english_corpus(self): corpus_path = os.path.join( self.corpus.data_directory, 'english', 'conversations.corpus.json' ) data = self.corpus.read_corpus(corpus_path) self.assertIn('conversations', data) def test_list_english_corpus_files(self): data_files = self.corpus.list_corpus_files('chatterbot.corpus.english') self.assertGreaterEqual(len(data_files), 3) self.assertIn('.json', data_files[0]) def test_load_corpus(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus), 1) self.assertIn(['Hi', 'Hello'], corpus[0]) def test_load_corpus_english(self): corpus = self.corpus.load_corpus("chatterbot.corpus.english") self.assertEqual(len(corpus), 3) self.assertIn(['Hi', 'Hello'], corpus[1])
class ChatterBotCorpusTrainer(Trainer): """ Allows the chat bot to be trained using data from the ChatterBot dialog corpus. """ def __init__(self, chatbot, **kwargs): super().__init__(chatbot, **kwargs) from chatterbot.corpus import Corpus self.corpus = Corpus() def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): if self.show_training_progress: utils.print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + ' Training', conversation_count + 1, len(corpus)) previous_statement_text = None for text in conversation: _statement = Statement( text=text, in_response_to=previous_statement_text, conversation='training') _statement.add_tags(*corpus.categories) statement = self.get_preprocessed_statement(_statement) previous_statement_text = statement.text self.chatbot.storage.create( text=statement.text, in_response_to=statement.in_response_to, conversation=statement.conversation, tags=statement.tags)
class newCorpusTrainer(trainers.Trainer): def __init__(self, storage, **kwargs): super(newCorpusTrainer, self).__init__(storage, **kwargs) from chatterbot.corpus import Corpus self.corpus = Corpus() def train(self, *corpus_paths): # Allow a list of corpora to be passed instead of arguments if len(corpus_paths) == 1: if isinstance(corpus_paths[0], list): corpus_paths = corpus_paths[0] # Train the chat bot with each statement and response pair for corpus_path in corpus_paths: corpora = self.corpus.load_corpus(corpus_path) corpus_files = self.corpus.list_corpus_files(corpus_path) for corpus_count, corpus in enumerate(corpora): for conversation_count, conversation in enumerate(corpus): print_progress_bar( str(os.path.basename(corpus_files[corpus_count])) + " Training", conversation_count + 1, len(corpus)) previous_statement_line = [] statement_line = [] for line in conversation: for text in line: statement = self.get_or_create(text) statement.add_tags(corpus.categories) statement_line.append(statement.text) if previous_statement_line != []: for previous_statement_text in previous_statement_line: statement.add_response( Response(previous_statement_text)) self.storage.update(statement) previous_statement_line = statement_line statement_line = []
class CorpusLoadingTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_load_corpus_chinese(self): corpus = self.corpus.load_corpus('chatterbot.corpus.chinese') self.assertTrue(len(corpus)) def test_load_corpus_english(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english') self.assertTrue(len(corpus)) def test_load_corpus_french(self): corpus = self.corpus.load_corpus('chatterbot.corpus.french') self.assertTrue(len(corpus)) def test_load_corpus_german(self): corpus = self.corpus.load_corpus('chatterbot.corpus.german') self.assertTrue(len(corpus)) def test_load_corpus_hindi(self): corpus = self.corpus.load_corpus('chatterbot.corpus.hindi') self.assertTrue(len(corpus)) def test_load_corpus_indonesia(self): corpus = self.corpus.load_corpus('chatterbot.corpus.indonesia') self.assertTrue(len(corpus)) def test_load_corpus_italian(self): corpus = self.corpus.load_corpus('chatterbot.corpus.italian') self.assertTrue(len(corpus)) def test_load_corpus_marathi(self): corpus = self.corpus.load_corpus('chatterbot.corpus.marathi') self.assertTrue(len(corpus)) def test_load_corpus_portuguese(self): corpus = self.corpus.load_corpus('chatterbot.corpus.portuguese') self.assertTrue(len(corpus)) def test_load_corpus_russian(self): corpus = self.corpus.load_corpus('chatterbot.corpus.russian') self.assertTrue(len(corpus)) def test_load_corpus_spanish(self): corpus = self.corpus.load_corpus('chatterbot.corpus.spanish') self.assertTrue(len(corpus)) def test_load_corpus_telugu(self): corpus = self.corpus.load_corpus('chatterbot.corpus.telugu') self.assertTrue(len(corpus))
class CorpusLoadingTestCase(TestCase): def setUp(self): self.corpus = Corpus() def test_load_corpus_chinese(self): corpus = self.corpus.load_corpus('chatterbot.corpus.chinese') self.assertTrue(len(corpus)) def test_load_corpus_english(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english') self.assertTrue(len(corpus)) def test_load_corpus_english_greetings(self): corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings') self.assertEqual(len(corpus), 1) self.assertIn(['Hi', 'Hello'], corpus[0]) def test_load_corpus_french(self): corpus = self.corpus.load_corpus('chatterbot.corpus.french') self.assertTrue(len(corpus)) def test_load_corpus_german(self): corpus = self.corpus.load_corpus('chatterbot.corpus.german') self.assertTrue(len(corpus)) def test_load_corpus_hindi(self): corpus = self.corpus.load_corpus('chatterbot.corpus.hindi') self.assertTrue(len(corpus)) def test_load_corpus_indonesia(self): corpus = self.corpus.load_corpus('chatterbot.corpus.indonesia') self.assertTrue(len(corpus)) def test_load_corpus_italian(self): corpus = self.corpus.load_corpus('chatterbot.corpus.italian') self.assertTrue(len(corpus)) def test_load_corpus_marathi(self): corpus = self.corpus.load_corpus('chatterbot.corpus.marathi') self.assertTrue(len(corpus)) def test_load_corpus_portuguese(self): corpus = self.corpus.load_corpus('chatterbot.corpus.portuguese') self.assertTrue(len(corpus)) def test_load_corpus_russian(self): corpus = self.corpus.load_corpus('chatterbot.corpus.russian') self.assertTrue(len(corpus)) def test_load_corpus_spanish(self): corpus = self.corpus.load_corpus('chatterbot.corpus.spanish') self.assertTrue(len(corpus)) def test_load_corpus_telugu(self): corpus = self.corpus.load_corpus('chatterbot.corpus.telugu') self.assertTrue(len(corpus)) def test_get_file_path(self): """ Test that a dotted path is properly converted to a file address. """ import os path = self.corpus.get_file_path('chatterbot.corpus.english') self.assertIn( os.path.join('chatterbot_corpus', 'data', 'english'), path )