import tools import sys if __name__ == "__main__": for line in open(sys.argv[1], "r").readlines(): line = tools.format_white_space(line) line = tools.strQ2B(line) line = tools.remove_control_character(line) sentences = tools.chapter_to_sentences(line.strip()) sentences = tools.gather_sentences(sentences) for sentence in sentences: s = tools.strip_punctuation(sentence) if len(s) > 1 and tools.have_chinese(s): print(s)
def __clean_raw_file(self, raw): lower_str = raw.lower() return tools.strip_punctuation(lower_str)
def __clean_raw_file__(self,raw): str_lower = raw.lower() return tools.strip_punctuation(str_lower)