Exemplo n.º 1
0
class CorpusUtilsTestCase(TestCase):

    def setUp(self):
        self.corpus = Corpus()

    def test_get_file_path(self):
        """
        Test that a dotted path is properly converted to a file address.
        """
        path = self.corpus.get_file_path("chatterbot.corpus.english")
        self.assertIn(
            os.path.join("chatterbot", "corpus", "data", "english"),
            path
        )

    def test_read_corpus(self):
        corpus_path = os.path.join(
            self.corpus.data_directory,
            "english", "conversations.json"
        )
        data = self.corpus.read_corpus(corpus_path)
        self.assertIn("conversations", data)

    def test_load_corpus(self):
        corpus = self.corpus.load_corpus("chatterbot.corpus.english.greetings")

        self.assertEqual(len(corpus), 1)
        self.assertIn(["Hi", "Hello"], corpus[0])

    def test_load_corpus_general(self):
        corpus = self.corpus.load_corpus("chatterbot.corpus.english")

        self.assertEqual(len(corpus), 2)
        self.assertIn(["Hi", "Hello"], corpus[1])
Exemplo n.º 2
0
class CorpusUtilsTestCase(TestCase):
    def setUp(self):
        self.corpus = Corpus()

    def test_get_file_path(self):
        """
        Test that a dotted path is properly converted to a file address.
        """
        path = self.corpus.get_file_path('chatterbot.corpus.english')
        self.assertIn(os.path.join('chatterbot', 'corpus', 'data', 'english'),
                      path)

    def test_read_corpus(self):
        corpus_path = os.path.join(self.corpus.data_directory, 'english',
                                   'conversations.json')
        data = self.corpus.read_corpus(corpus_path)
        self.assertIn('conversations', data)

    def test_list_english_corpus_files(self):
        data_files = self.corpus.list_corpus_files('chatterbot.corpus.english')

        self.assertGreaterEqual(len(data_files), 3)
        self.assertIn('.json', data_files[0])

    def test_load_corpus(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings')

        self.assertEqual(len(corpus), 1)
        self.assertIn(['Hi', 'Hello'], corpus[0])

    def test_load_corpus_general(self):
        corpus = self.corpus.load_corpus("chatterbot.corpus.english")

        self.assertEqual(len(corpus), 3)
        self.assertIn(['Hi', 'Hello'], corpus[1])
Exemplo n.º 3
0
class CorpusUtilsTestCase(TestCase):

    def setUp(self):
        self.corpus = Corpus()

    def test_get_file_path(self):
        """
        Test that a dotted path is properly converted to a file address.
        """
        path = self.corpus.get_file_path("chatterbot.corpus.english")
        self.assertIn(
            os.path.join("chatterbot", "corpus", "data", "english"),
            path
        )

    def test_read_corpus(self):
        corpus_path = os.path.join(
            self.corpus.data_directory,
            "english", "conversations.json"
        )
        data = self.corpus.read_corpus(corpus_path)
        self.assertIn("conversations", data)

    def test_load_corpus(self):
        corpus = self.corpus.load_corpus("chatterbot.corpus.english.greetings")

        self.assertEqual(len(corpus), 1)
        self.assertIn(["Hi", "Hello"], corpus[0])

    def test_load_corpus_general(self):
        corpus = self.corpus.load_corpus("chatterbot.corpus.english")

        self.assertEqual(len(corpus), 3)
        self.assertIn(["Hi", "Hello"], corpus[1])
Exemplo n.º 4
0
class ChatterBotCorpusTrainer(Trainer):
    """
    Allows the chat bot to be trained using data from the
    ChatterBot dialog corpus.
    """
    def __init__(self, chatbot, **kwargs):
        super().__init__(chatbot, **kwargs)
        from chatterbot.corpus import Corpus

        self.corpus = Corpus()

    def train(self, *corpus_paths):

        # Allow a list of corpora to be passed instead of arguments
        if len(corpus_paths) == 1:
            if isinstance(corpus_paths[0], list):
                corpus_paths = corpus_paths[0]

        # Train the chat bot with each statement and response pair
        for corpus_path in corpus_paths:

            corpora = self.corpus.load_corpus(corpus_path)

            corpus_files = self.corpus.list_corpus_files(corpus_path)
            for corpus_count, corpus in enumerate(corpora):
                for conversation_count, conversation in enumerate(corpus):

                    if self.show_training_progress:
                        utils.print_progress_bar(
                            str(os.path.basename(corpus_files[corpus_count])) +
                            ' Training', conversation_count + 1, len(corpus))

                    previous_statement_text = None

                    for text in conversation:

                        _statement = Statement(
                            text=text,
                            in_response_to=previous_statement_text,
                            conversation='training')

                        _statement.add_tags(*corpus.categories)

                        statement = self.get_preprocessed_statement(_statement)

                        previous_statement_text = statement.text

                        self.chatbot.storage.create(
                            text=statement.text,
                            in_response_to=statement.in_response_to,
                            conversation=statement.conversation,
                            tags=statement.tags)
	def get_language_data(self , language):
		"""
		Load language-specific data
		"""
		from chatterbot.corpus import Corpus
		
		corpus = Corpus()
		
		math_words_data_file_path = corpus.get_file_path('chatterbot.corpus.{}.math_words'.format(language) ,
			extension='json')
		
		try:
			with open(math_words_data_file_path) as data:
				return json.load(data)
		except IOError:
			raise self.UnrecognizedLanguageException(
				'A math_words data file was not found for `{}` at `{}`.'.format(language , math_words_data_file_path))
Exemplo n.º 6
0
class newCorpusTrainer(trainers.Trainer):
    def __init__(self, storage, **kwargs):
        super(newCorpusTrainer, self).__init__(storage, **kwargs)
        from chatterbot.corpus import Corpus

        self.corpus = Corpus()

    def train(self, *corpus_paths):

        # Allow a list of corpora to be passed instead of arguments
        if len(corpus_paths) == 1:
            if isinstance(corpus_paths[0], list):
                corpus_paths = corpus_paths[0]

        # Train the chat bot with each statement and response pair
        for corpus_path in corpus_paths:

            corpora = self.corpus.load_corpus(corpus_path)

            corpus_files = self.corpus.list_corpus_files(corpus_path)
            for corpus_count, corpus in enumerate(corpora):
                for conversation_count, conversation in enumerate(corpus):
                    print_progress_bar(
                        str(os.path.basename(corpus_files[corpus_count])) +
                        " Training", conversation_count + 1, len(corpus))

                    previous_statement_line = []
                    statement_line = []

                    for line in conversation:
                        for text in line:
                            statement = self.get_or_create(text)
                            statement.add_tags(corpus.categories)

                            statement_line.append(statement.text)

                            if previous_statement_line != []:
                                for previous_statement_text in previous_statement_line:
                                    statement.add_response(
                                        Response(previous_statement_text))
                            self.storage.update(statement)

                        previous_statement_line = statement_line
                        statement_line = []
Exemplo n.º 7
0
class CorpusUtilsTestCase(TestCase):

    def setUp(self):
        self.corpus = Corpus()

    def test_get_file_path(self):
        """
        Test that a dotted path is properly converted to a file address.
        """
        path = self.corpus.get_file_path('chatterbot.corpus.english')
        self.assertIn(
            os.path.join('chatterbot', 'corpus', 'data', 'english'),
            path
        )

    def test_read_english_corpus(self):
        corpus_path = os.path.join(
            self.corpus.data_directory,
            'english', 'conversations.corpus.json'
        )
        data = self.corpus.read_corpus(corpus_path)
        self.assertIn('conversations', data)

    def test_list_english_corpus_files(self):
        data_files = self.corpus.list_corpus_files('chatterbot.corpus.english')

        self.assertGreaterEqual(len(data_files), 3)
        self.assertIn('.json', data_files[0])

    def test_load_corpus(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings')

        self.assertEqual(len(corpus), 1)
        self.assertIn(['Hi', 'Hello'], corpus[0])

    def test_load_corpus_english(self):
        corpus = self.corpus.load_corpus("chatterbot.corpus.english")

        self.assertEqual(len(corpus), 3)
        self.assertIn(['Hi', 'Hello'], corpus[1])
    def get_language_data(self, language):
        """
        Load language-specific data
        """
        from chatterbot.corpus import Corpus

        corpus = Corpus()

        math_words_data_file_path = corpus.get_file_path(
            'chatterbot.corpus.{}.math_words'.format(language),
            extension='json'
        )

        try:
            with open(math_words_data_file_path) as data:
                return json.load(data)
        except IOError:
            raise self.UnrecognizedLanguageException(
                'A math_words data file was not found for `{}` at `{}`.'.format(
                    language, math_words_data_file_path
                )
            )
Exemplo n.º 9
0
 def setUp(self):
     self.corpus = Corpus()
Exemplo n.º 10
0
    def __init__(self, chatbot, **kwargs):
        super().__init__(chatbot, **kwargs)
        from chatterbot.corpus import Corpus

        self.corpus = Corpus()
Exemplo n.º 11
0
class CorpusLoadingTestCase(TestCase):

    def setUp(self):
        self.corpus = Corpus()

    def test_load_corpus_chinese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.chinese')

        self.assertTrue(len(corpus))

    def test_load_corpus_english(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english')

        self.assertTrue(len(corpus))

    def test_load_corpus_english_greetings(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings')

        self.assertEqual(len(corpus), 1)
        self.assertIn(['Hi', 'Hello'], corpus[0])

    def test_load_corpus_french(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.french')

        self.assertTrue(len(corpus))

    def test_load_corpus_german(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.german')

        self.assertTrue(len(corpus))

    def test_load_corpus_hindi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.hindi')

        self.assertTrue(len(corpus))

    def test_load_corpus_indonesia(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.indonesia')

        self.assertTrue(len(corpus))

    def test_load_corpus_italian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.italian')

        self.assertTrue(len(corpus))

    def test_load_corpus_marathi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.marathi')

        self.assertTrue(len(corpus))

    def test_load_corpus_portuguese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.portuguese')

        self.assertTrue(len(corpus))

    def test_load_corpus_russian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.russian')

        self.assertTrue(len(corpus))

    def test_load_corpus_spanish(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.spanish')

        self.assertTrue(len(corpus))

    def test_load_corpus_telugu(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.telugu')

        self.assertTrue(len(corpus))

    def test_get_file_path(self):
        """
        Test that a dotted path is properly converted to a file address.
        """
        import os

        path = self.corpus.get_file_path('chatterbot.corpus.english')
        self.assertIn(
            os.path.join('chatterbot_corpus', 'data', 'english'),
            path
        )
Exemplo n.º 12
0
 def __init__(self, storage, **kwargs):
     self.kwargs = kwargs
     self.storage = storage
     self.corpus = Corpus()
Exemplo n.º 13
0
class CorpusLoadingTestCase(TestCase):

    def setUp(self):
        self.corpus = Corpus()

    def test_load_corpus_chinese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.chinese')

        self.assertTrue(len(corpus))

    def test_load_corpus_english(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english')

        self.assertTrue(len(corpus))

    def test_load_corpus_english_greetings(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english.greetings')

        self.assertEqual(len(corpus), 1)
        self.assertIn(['Hi', 'Hello'], corpus[0])

    def test_load_corpus_french(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.french')

        self.assertTrue(len(corpus))

    def test_load_corpus_german(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.german')

        self.assertTrue(len(corpus))

    def test_load_corpus_hindi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.hindi')

        self.assertTrue(len(corpus))

    def test_load_corpus_indonesia(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.indonesia')

        self.assertTrue(len(corpus))

    def test_load_corpus_italian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.italian')

        self.assertTrue(len(corpus))

    def test_load_corpus_marathi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.marathi')

        self.assertTrue(len(corpus))

    def test_load_corpus_portuguese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.portuguese')

        self.assertTrue(len(corpus))

    def test_load_corpus_russian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.russian')

        self.assertTrue(len(corpus))

    def test_load_corpus_spanish(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.spanish')

        self.assertTrue(len(corpus))

    def test_load_corpus_telugu(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.telugu')

        self.assertTrue(len(corpus))

    def test_get_file_path(self):
        """
        Test that a dotted path is properly converted to a file address.
        """
        import os

        path = self.corpus.get_file_path('chatterbot.corpus.english')
        self.assertIn(
            os.path.join('chatterbot_corpus', 'data', 'english'),
            path
        )
Exemplo n.º 14
0
    def __init__(self, storage, **kwargs):
        super(newCorpusTrainer, self).__init__(storage, **kwargs)
        from chatterbot.corpus import Corpus

        self.corpus = Corpus()
Exemplo n.º 15
0
class CorpusLoadingTestCase(TestCase):

    def setUp(self):
        self.corpus = Corpus()

    def test_load_corpus_chinese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.chinese')

        self.assertTrue(len(corpus))

    def test_load_corpus_english(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english')

        self.assertTrue(len(corpus))

    def test_load_corpus_french(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.french')

        self.assertTrue(len(corpus))

    def test_load_corpus_german(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.german')

        self.assertTrue(len(corpus))

    def test_load_corpus_hindi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.hindi')

        self.assertTrue(len(corpus))

    def test_load_corpus_indonesia(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.indonesia')

        self.assertTrue(len(corpus))

    def test_load_corpus_italian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.italian')

        self.assertTrue(len(corpus))

    def test_load_corpus_marathi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.marathi')

        self.assertTrue(len(corpus))

    def test_load_corpus_portuguese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.portuguese')

        self.assertTrue(len(corpus))

    def test_load_corpus_russian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.russian')

        self.assertTrue(len(corpus))

    def test_load_corpus_spanish(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.spanish')

        self.assertTrue(len(corpus))

    def test_load_corpus_telugu(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.telugu')

        self.assertTrue(len(corpus))
Exemplo n.º 16
0
 def setUp(self):
     self.corpus = Corpus()
Exemplo n.º 17
0
class CorpusLoadingTestCase(TestCase):
    def setUp(self):
        self.corpus = Corpus()

    def test_load_corpus_chinese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.chinese')

        self.assertTrue(len(corpus))

    def test_load_corpus_english(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.english')

        self.assertTrue(len(corpus))

    def test_load_corpus_french(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.french')

        self.assertTrue(len(corpus))

    def test_load_corpus_german(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.german')

        self.assertTrue(len(corpus))

    def test_load_corpus_hindi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.hindi')

        self.assertTrue(len(corpus))

    def test_load_corpus_indonesia(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.indonesia')

        self.assertTrue(len(corpus))

    def test_load_corpus_italian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.italian')

        self.assertTrue(len(corpus))

    def test_load_corpus_marathi(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.marathi')

        self.assertTrue(len(corpus))

    def test_load_corpus_portuguese(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.portuguese')

        self.assertTrue(len(corpus))

    def test_load_corpus_russian(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.russian')

        self.assertTrue(len(corpus))

    def test_load_corpus_spanish(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.spanish')

        self.assertTrue(len(corpus))

    def test_load_corpus_telugu(self):
        corpus = self.corpus.load_corpus('chatterbot.corpus.telugu')

        self.assertTrue(len(corpus))