示例#1
0
 def test_load_corpus(self):
     """
     Test loading the entire corpus of languages.
     """
     corpus_files = corpus.list_corpus_files('chatterbot.corpus')
     corpus_data = corpus.load_corpus(*corpus_files)
     self.assertTrue(len(list(corpus_data)))
示例#2
0
    def test_load_new_corpus_file(self):
        """
        Test that a file path can be specified for a corpus.
        """
        # Create a file for testing
        file_path = './test_corpus.yml'
        with io.open(file_path, 'w') as test_corpus:
            yml_data = u'\n'.join([
                'conversations:', '- - Hello', '  - Hi', '- - Hi', '  - Hello'
            ])
            test_corpus.write(yml_data)

        data_files = corpus.list_corpus_files(file_path)
        corpus_data = list(corpus.load_corpus(*data_files))

        # Remove the test file
        if os.path.exists(file_path):
            os.remove(file_path)

        self.assertEqual(len(corpus_data), 1)

        # Load the content from the corpus
        conversations, _categories, _file_path = corpus_data[0]

        self.assertEqual(len(conversations[0]), 2)
示例#3
0
 def test_load_corpus_greetings(self):
     for language in LANGUAGES:
         file_path = os.path.join(corpus.DATA_DIRECTORY, language,
                                  'greetings.yml')
         data_files = corpus.list_corpus_files(file_path)
         corpus_data = corpus.load_corpus(*data_files)
         self.assertEqual(len(list(corpus_data)), 1)
示例#4
0
 def test_load_corpus_file_non_existent(self):
     """
     Test that a file path can be specified for a corpus.
     """
     file_path = './test_corpus.yml'
     self.assertFalse(os.path.exists(file_path))
     with self.assertRaises(IOError):
         list(corpus.load_corpus(file_path))
示例#5
0
 def test_load_corpus_language(self):
     for language in LANGUAGES:
         paths = [
             f'chatterbot.corpus.{language}',
             os.path.join(corpus.DATA_DIRECTORY, 'english') + '/',
             os.path.join(corpus.DATA_DIRECTORY, language)
         ]
         for file_path in paths:
             data_files = corpus.list_corpus_files(file_path)
             corpus_data = corpus.load_corpus(*data_files)
             self.assertGreater(len(list(corpus_data)), 1)
示例#6
0
    def test_load_corpus_categories(self):
        # english - greetings
        data_files = corpus.list_corpus_files(
            'chatterbot.corpus.english.greetings')
        corpus_data = list(corpus.load_corpus(*data_files))

        self.assertEqual(len(corpus_data), 1)
        for _conversation, categories, _file_path in corpus_data:
            self.assertIn('greetings', categories)

        conversations, categories, file_path = corpus_data[0]
        self.assertIn(['Hi', 'Hello'], conversations)
        self.assertEqual(['greetings'], categories)
        self.assertIn('chatterbot_corpus/data/english/greetings.yml',
                      file_path)
示例#7
0
    def train(self, *corpus_paths):
        from app.chatterbot_api.chatterbot.corpus import load_corpus, list_corpus_files

        data_file_paths = []

        # Get the paths to each file the bot will be trained with
        for corpus_path in corpus_paths:
            data_file_paths.extend(list_corpus_files(corpus_path))

        for corpus, categories, file_path in load_corpus(*data_file_paths):

            statements_to_create = []

            # Train the chat bot with each statement and response pair
            for conversation_count, conversation in enumerate(corpus):

                if self.show_training_progress:
                    utils.print_progress_bar(
                        'Training ' + str(os.path.basename(file_path)),
                        conversation_count + 1, len(corpus))

                previous_statement_text = None
                previous_statement_search_text = ''

                for text in conversation:

                    statement_search_text = self.chatbot.storage.tagger.get_text_index_string(
                        text)

                    statement = Statement(
                        text=text,
                        search_text=statement_search_text,
                        in_response_to=previous_statement_text,
                        search_in_response_to=previous_statement_search_text,
                        conversation='training')

                    statement.add_tags(*categories)

                    statement = self.get_preprocessed_statement(statement)

                    previous_statement_text = statement.text
                    previous_statement_search_text = statement_search_text

                    statements_to_create.append(statement)

            if statements_to_create:
                self.chatbot.storage.create_many(statements_to_create)