def test_tokenizer_is_template(self): tokenizer = Tokenizer() self.assertIsNotNone(tokenizer) self.assertFalse(tokenizer.is_template) tokenizer.is_template = True self.assertTrue(tokenizer.is_template)
def test_tokenizer_compare(self): tokenizer = Tokenizer() self.assertIsNotNone(tokenizer) target = "aaa bbb" self.assertTrue(tokenizer.compare(target, "aaa bbb")) self.assertFalse(tokenizer.compare(target, "aaa")) self.assertFalse(tokenizer.compare(target, "bbb"))
def __init__(self, split_chars=' ', punctuation_chars=None, before_concatenation_rule=None, after_concatenation_rule=None): Tokenizer.__init__(self, split_chars=split_chars, punctuation_chars=punctuation_chars, before_concatenation_rule=before_concatenation_rule, after_concatenation_rule=after_concatenation_rule)
def test_default_tokenizer(self): tokenizer = Tokenizer() self.assertIsNotNone(tokenizer) self.assertEqual([], tokenizer.texts_to_words("")) self.assertEqual(["Hello"], tokenizer.texts_to_words("Hello")) self.assertEqual(["Hello", "World"], tokenizer.texts_to_words("Hello World")) self.assertEqual(["Hello", "World"], tokenizer.texts_to_words(" Hello World ")) self.assertEqual("", tokenizer.words_to_texts([])) self.assertEqual("Hello", tokenizer.words_to_texts(["Hello"])) self.assertEqual("Hello World", tokenizer.words_to_texts(["Hello", "World"])) self.assertEqual("Hello World", tokenizer.words_to_texts(["Hello", "", "World"])) self.assertEqual("Hello World", tokenizer.words_to_texts([" Hello ", " World "]))
def test_sentence_creation_two_words_diff_split_char(self): tokenizer = Tokenizer(",") sentence = Sentence(tokenizer, "One,Two",) self.assertIsNotNone(sentence) self.assertEqual(2, sentence.num_words()) self.assertEqual("One", sentence.word(0)) self.assertEqual("Two", sentence.word(1)) self.assertIsNone(sentence.word(2)) self.assertEqual("One,Two", sentence.text())
def test_load_tokenizer_no_class(self): config = BrainConfiguration() self.assertIsNotNone(config) tokenizer_config = BrainTokenizerConfiguration() self.assertIsNotNone(tokenizer_config) tokenizer_config._classname = None tokenizer_config._punctation_chars = '' config._tokenizer = tokenizer_config tokenizer = Tokenizer.load_tokenizer(config) self.assertIsNotNone(tokenizer) self.assertIsInstance(tokenizer, Tokenizer)
def test_load_tokenizer(self): config = BrainConfiguration() self.assertIsNotNone(config) tokenizer_config = BrainTokenizerConfiguration() self.assertIsNotNone(tokenizer_config) tokenizer_config._classname = 'programy.dialog.tokenizer.tokenizer.Tokenizer' tokenizer_config._punctation_chars = '' config._tokenizer = tokenizer_config tokenizer = Tokenizer.load_tokenizer(config) self.assertIsNotNone(tokenizer) self.assertIsInstance(tokenizer, Tokenizer)
def __init__(self, bot, configuration: BrainConfiguration): assert bot is not None assert configuration is not None self._questions = 0 self._bot = bot self._configuration = configuration self._pattern_factory = None self._template_factory = None self._binaries = BinariesManager(configuration.binaries) self._braintree = BraintreeManager(configuration.braintree) self._tokenizer = Tokenizer.load_tokenizer(configuration.tokenizer) self._denormal_collection = DenormalCollection() self._normal_collection = NormalCollection() self._gender_collection = GenderCollection() self._person_collection = PersonCollection() self._person2_collection = Person2Collection() self._rdf_collection = RDFCollection() self._sets_collection = SetCollection() self._maps_collection = MapCollection() self._properties_collection = PropertiesCollection() self._default_variables_collection = DefaultVariablesCollection() self._regex_templates = RegexTemplatesCollection() self._dynamics_collection = DynamicsCollection() self._preprocessors = PreProcessorCollection() self._postprocessors = PostProcessorCollection() self._postquestionprocessors = PostQuestionProcessorCollection() self._services = ServiceHandler() self._oobhandler = OOBHandler() self._security = SecurityManager(configuration.security) self._aiml_parser = self.load_aiml_parser() self.load(self.configuration)
def __init__(self, split_chars=' '): Tokenizer.__init__(self, split_chars)
def __init__(self, bot, configuration: BrainConfiguration): assert (bot is not None) assert (configuration is not None) self._bot = bot self._configuration = configuration self._binaries = BinariesManager(configuration.binaries) self._braintree = BraintreeManager(configuration.braintree) self._tokenizer = Tokenizer.load_tokenizer(configuration) if configuration.debugfiles.save_errors_collection is True: errors_dict = {} else: errors_dict = None self._denormal_collection = DenormalCollection(errors_dict) self._normal_collection = NormalCollection(errors_dict) self._gender_collection = GenderCollection(errors_dict) self._person_collection = PersonCollection(errors_dict) self._person2_collection = Person2Collection(errors_dict) self._rdf_collection = RDFCollection(errors_dict) self._sets_collection = SetCollection(errors_dict) self._maps_collection = MapCollection(errors_dict) self._properties_collection = PropertiesCollection(errors_dict) self._default_variables_collection = DefaultVariablesCollection( errors_dict) self._botnames_collection = BotNamesCollection(errors_dict) self._preprocessors = PreProcessorCollection(errors_dict) self._postprocessors = PostProcessorCollection(errors_dict) self._pattern_factory = None self._template_factory = None self._security = SecurityManager(configuration.security) self._oobhandler = OOBHandler(configuration.oob) self._regex_templates = RegexTemplatesCollection(errors_dict) self._dynamics_collection = DynamicsCollection() self._aiml_parser = self.load_aiml_parser() self._nlu_collection = NluCollection(bot.client, configuration.nlu, errors_dict) self._nlu = NluRequest.load_nlu(configuration.nlu) self._nlu_utterance = None self.load(self.configuration) if configuration.debugfiles.save_errors_collection is True: storage_factory = self.bot.client.storage_factory if storage_factory.entity_storage_engine_available( StorageFactory.ERRORS_COLLECTION) is True: errors_collection_engine = storage_factory.entity_storage_engine( StorageFactory.ERRORS_COLLECTION) errors_collection_store = errors_collection_engine.errors_collection_store( ) errors_collection_store.save_errors_collection(errors_dict)
def test_tokenizer_words_from_current_pos(self): tokenizer = Tokenizer() self.assertIsNotNone(tokenizer) self.assertEqual("", tokenizer.words_from_current_pos(None, 0)) words = ["aaa", "bbb", "ccc"] self.assertEqual("aaa bbb ccc", tokenizer.words_from_current_pos(words, 0)) self.assertEqual("bbb ccc", tokenizer.words_from_current_pos(words, 1)) self.assertEqual("ccc", tokenizer.words_from_current_pos(words, 2)) self.assertEqual("", tokenizer.words_from_current_pos(words, 3)) self.assertEqual("ccc", tokenizer.words_from_current_pos(words, -1)) self.assertEqual("bbb ccc", tokenizer.words_from_current_pos(words, -2)) self.assertEqual("aaa bbb ccc", tokenizer.words_from_current_pos(words, -3)) self.assertEqual("aaa bbb ccc", tokenizer.words_from_current_pos(words, -4))
def test_load_tokenizer_classname_none(self): config = BrainTokenizerConfiguration() config._classname = None tokenizer = Tokenizer.load_tokenizer(config) self.assertIsNotNone(tokenizer) self.assertIsInstance(tokenizer, Tokenizer)
def test_load_invalid_tokenizer(self): config = BrainTokenizerConfiguration() config._classname = "programy.dialog.tokenizer.tokenizer.TokenizerXXX" tokenizer = Tokenizer.load_tokenizer(config) self.assertIsNotNone(tokenizer) self.assertIsInstance(tokenizer, Tokenizer)
def test_words_from_current_pos_no_words(self): tokenizer = Tokenizer() self.assertIsNotNone(tokenizer) self.assertEquals("", tokenizer.words_from_current_pos([], 5)) self.assertEquals("", tokenizer.words_from_current_pos(None, 5))
def test_words_from_current_pos_past_end(self): tokenizer = Tokenizer() self.assertIsNotNone(tokenizer) self.assertEquals("", tokenizer.words_from_current_pos(["A", "B", "C"], 5))