Python load_corpus示例

编程语言: Python

命名空间/包名称: app.chatterbot_api.chatterbot.corpus

方法/功能: load_corpus

hotexamples.com的示例: 7

Python load_corpus - 已找到7个示例。这些是从开源项目中提取的最受好评的app.chatterbot_api.chatterbot.corpus.load_corpus现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

 def test_load_corpus(self):
     """
     Test loading the entire corpus of languages.
     """
     corpus_files = corpus.list_corpus_files('chatterbot.corpus')
     corpus_data = corpus.load_corpus(*corpus_files)
     self.assertTrue(len(list(corpus_data)))

示例#2

显示文件

    def test_load_new_corpus_file(self):
        """
        Test that a file path can be specified for a corpus.
        """
        # Create a file for testing
        file_path = './test_corpus.yml'
        with io.open(file_path, 'w') as test_corpus:
            yml_data = u'\n'.join([
                'conversations:', '- - Hello', '  - Hi', '- - Hi', '  - Hello'
            ])
            test_corpus.write(yml_data)

        data_files = corpus.list_corpus_files(file_path)
        corpus_data = list(corpus.load_corpus(*data_files))

        # Remove the test file
        if os.path.exists(file_path):
            os.remove(file_path)

        self.assertEqual(len(corpus_data), 1)

        # Load the content from the corpus
        conversations, _categories, _file_path = corpus_data[0]

        self.assertEqual(len(conversations[0]), 2)

示例#3

显示文件

 def test_load_corpus_greetings(self):
     for language in LANGUAGES:
         file_path = os.path.join(corpus.DATA_DIRECTORY, language,
                                  'greetings.yml')
         data_files = corpus.list_corpus_files(file_path)
         corpus_data = corpus.load_corpus(*data_files)
         self.assertEqual(len(list(corpus_data)), 1)

示例#4

显示文件

 def test_load_corpus_file_non_existent(self):
     """
     Test that a file path can be specified for a corpus.
     """
     file_path = './test_corpus.yml'
     self.assertFalse(os.path.exists(file_path))
     with self.assertRaises(IOError):
         list(corpus.load_corpus(file_path))

示例#5

显示文件

 def test_load_corpus_language(self):
     for language in LANGUAGES:
         paths = [
             f'chatterbot.corpus.{language}',
             os.path.join(corpus.DATA_DIRECTORY, 'english') + '/',
             os.path.join(corpus.DATA_DIRECTORY, language)
         ]
         for file_path in paths:
             data_files = corpus.list_corpus_files(file_path)
             corpus_data = corpus.load_corpus(*data_files)
             self.assertGreater(len(list(corpus_data)), 1)

示例#6

显示文件

    def test_load_corpus_categories(self):
        # english - greetings
        data_files = corpus.list_corpus_files(
            'chatterbot.corpus.english.greetings')
        corpus_data = list(corpus.load_corpus(*data_files))

        self.assertEqual(len(corpus_data), 1)
        for _conversation, categories, _file_path in corpus_data:
            self.assertIn('greetings', categories)

        conversations, categories, file_path = corpus_data[0]
        self.assertIn(['Hi', 'Hello'], conversations)
        self.assertEqual(['greetings'], categories)
        self.assertIn('chatterbot_corpus/data/english/greetings.yml',
                      file_path)

示例#7

显示文件

    def train(self, *corpus_paths):
        from app.chatterbot_api.chatterbot.corpus import load_corpus, list_corpus_files

        data_file_paths = []

        # Get the paths to each file the bot will be trained with
        for corpus_path in corpus_paths:
            data_file_paths.extend(list_corpus_files(corpus_path))

        for corpus, categories, file_path in load_corpus(*data_file_paths):

            statements_to_create = []

            # Train the chat bot with each statement and response pair
            for conversation_count, conversation in enumerate(corpus):

                if self.show_training_progress:
                    utils.print_progress_bar(
                        'Training ' + str(os.path.basename(file_path)),
                        conversation_count + 1, len(corpus))

                previous_statement_text = None
                previous_statement_search_text = ''

                for text in conversation:

                    statement_search_text = self.chatbot.storage.tagger.get_text_index_string(
                        text)

                    statement = Statement(
                        text=text,
                        search_text=statement_search_text,
                        in_response_to=previous_statement_text,
                        search_in_response_to=previous_statement_search_text,
                        conversation='training')

                    statement.add_tags(*categories)

                    statement = self.get_preprocessed_statement(statement)

                    previous_statement_text = statement.text
                    previous_statement_search_text = statement_search_text

                    statements_to_create.append(statement)

            if statements_to_create:
                self.chatbot.storage.create_many(statements_to_create)