def test_fix_term_in_the_end(self): checker = chks.SentCorrectnessChecker(['term_in_the_end']) chunk = Chunk("", "Without term ", "", "", 1) checker.fix(chunk) self.assertEqual("Without term.", chunk.get_mod_text()) chunk = Chunk("", ["Without term ", "wo term"], "", "", 1) checker.fix(chunk) self.assertEqual("Without term. wo term.", chunk.get_mod_text()) chunk = Chunk("", ["Boring sent.", "text"], "", "", 1) checker.fix(chunk) self.assertEqual("Boring sent. text.", chunk.get_mod_text())
def test_fix_title_case(self): checker = chks.SentCorrectnessChecker(['title_case']) chunk = Chunk("", "маленькая буква.", "", "", 1) checker.fix(chunk) self.assertEqual("Маленькая буква.", chunk.get_mod_text()) chunk = Chunk( "", ". В основном токсин из организма выводится через почки.", "", "", 1) checker.fix(chunk) self.assertEqual(1, len(chunk.get_mod_sents())) self.assertEqual( "В основном токсин из организма выводится через почки.", chunk.get_mod_text())
def create_checkers(opts, sources_dir, spell_checker_whitelist = None): return [ chks.OrigSentChecker(opts), chks.SourceDocsChecker(opts, sources_dir), chks.PRChecker(opts, fluctuation_delta = 5), chks.AddChecker(opts, fluctuation_delta = 5), chks.DelChecker(opts, fluctuation_delta = 5), chks.CPYChecker(opts, fluctuation_delta = 5), chks.CctChecker(opts, fluctuation_delta = 5), chks.SspChecker(opts, fluctuation_delta = 5), chks.ORIGModTypeChecker(), chks.SentCorrectnessChecker(), chks.CyrillicAlphabetChecker(opts), chks.SpellChecker(whitelist = spell_checker_whitelist) ]
def test_term_in_the_end(self): checker = chks.SentCorrectnessChecker(['term_in_the_end']) chunk = Chunk("", "Correct sent with trailing spaces!!!!! ", "", "", 1) checker(chunk, None) self.assertEqual(1, len(chunk.get_mod_sents())) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Boring sent.", "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Question mark?", "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Without term ", "", "", 1) checker(chunk, None) self.assertEqual(1, len(checker.get_errors()))
def test_title_case(self): checker = chks.SentCorrectnessChecker(['title_case']) chunk = Chunk("", "Корректное предложение!", "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "Из-за дефиса не работает str.istitle.", "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", '"Цитата: текст"', "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", '2009 number is ok.', "", "", 1) checker(chunk, None) self.assertEqual(0, len(checker.get_errors())) chunk = Chunk("", "маленькая буква.", "", "", 1) checker(chunk, None) self.assertEqual(1, len(checker.get_errors()))
def create_checkers(opts, sources_dir, spell_checker_whitelist=None): return [ chks.OriginalityChecker(opts), chks.OrigSentChecker(opts), chks.ModSentChecker(opts), chks.SourceDocsChecker(opts, sources_dir), chks.PRChecker(opts), # chks.AddChecker(opts), # chks.DelChecker(opts), # chks.CPYChecker(opts), chks.CctChecker(opts), # chks.SspChecker(opts), chks.SHFChecker(opts), # chks.SYNChecker(opts), chks.LexicalSimChecker(opts), trans_chks.ORIGModTypeChecker(), chks.SentCorrectnessChecker(), trans_chks.TranslationChecker(opts), trans_chks.ManualTranslationChecker(opts), chks.CyrillicAlphabetChecker(opts), chks.SpellChecker(whitelist=spell_checker_whitelist) ]