def savetexts(self, filepath, prepath): """ 保存预处理后的文本 :param filepath: html文件路径 :param prepath: 保存路径 :return: """ self.logger.info('init pretreatment directory:"{0}"'.format(prepath)) FileUtil.init_path(prepath) try: file_lists = os.listdir(filepath) # 返回当前路径下所有文件和路径,字符串类型 for filename in file_lists: file = os.path.join(filepath, filename) if os.path.isfile(file): # 1.获取url及文本 url, text = FileUtil.get_url_text(file) # 2.关键词信息 kws = PreDeal.seg(text) self.logger.info( "Store pretreatment texts content:{0}".format( filename)) FileUtil.writefile(url + '\t'.join(kws), os.path.join(prepath, filename)) self.logger.info('Text pretreatment End!') except Exception as e: print(e)