示例#1
0
 def savetexts(self, filepath, prepath):
     """
     保存预处理后的文本
     :param filepath: html文件路径
     :param prepath:  保存路径
     :return:
     """
     self.logger.info('init pretreatment directory:"{0}"'.format(prepath))
     FileUtil.init_path(prepath)
     try:
         file_lists = os.listdir(filepath)  # 返回当前路径下所有文件和路径,字符串类型
         for filename in file_lists:
             file = os.path.join(filepath, filename)
             if os.path.isfile(file):
                 # 1.获取url及文本
                 url, text = FileUtil.get_url_text(file)
                 # 2.关键词信息
                 kws = PreDeal.seg(text)
                 self.logger.info(
                     "Store pretreatment texts content:{0}".format(
                         filename))
                 FileUtil.writefile(url + '\t'.join(kws),
                                    os.path.join(prepath, filename))
         self.logger.info('Text pretreatment End!')
     except Exception as e:
         print(e)