def test_regex_matching(self): ch = Chunk([], "Онx. yно. оzо", "ADD", "filename", "1") r = regex.compile(r'([А-я]*([A-z])[А-я]+|[А-я]+([A-z])[А-я]*)') matches = r.findall(ch.get_mod_sents()[0]) self.assertEqual(3, len(matches)) self.assertEqual('x', matches[0][2]) self.assertEqual('y', matches[1][1]) self.assertEqual('z', matches[2][1])
def test_fix_title_case(self): checker = chks.SentCorrectnessChecker(['title_case']) chunk = Chunk("", "маленькая буква.", "", "", 1) checker.fix(chunk) self.assertEqual("Маленькая буква.", chunk.get_mod_text()) chunk = Chunk( "", ". В основном токсин из организма выводится через почки.", "", "", 1) checker.fix(chunk) self.assertEqual(1, len(chunk.get_mod_sents())) self.assertEqual( "В основном токсин из организма выводится через почки.", chunk.get_mod_text())
def test_term_in_the_end(self): checker = chks.SentCorrectnessChecker(['term_in_the_end']) chunk = Chunk("", "Correct sent with trailing spaces!!!!! ", "", "", 1) checker(chunk, None) self.assertEqual(1, len(chunk.get_mod_sents())) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Boring sent.", "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Question mark?", "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Without term ", "", "", 1) checker(chunk, None) self.assertEqual(1, len(checker.get_errors()))