Exemplo n.º 1
0
    def un_pack(compressed_files_path):
        try:
            file_name = os.path.basename(compressed_files_path)
            file_info = file_name.rsplit(".", 1)
            name = file_info[0]
            suffix = file_info[1]

            # 拼接文件解压路径
            uncompress_dir = os.path.join(
                os.path.dirname(compressed_files_path), name)
            if not os.path.exists(uncompress_dir):
                os.makedirs(uncompress_dir)
            else:
                shutil.rmtree(uncompress_dir)
                os.makedirs(uncompress_dir)
            # 根据文件类型做解压操作
            if suffix in ["rar"]:
                # RarFile(compressed_files_path).extractall(uncompress_dir)
                os.system(
                    f"unrar x -o- -y {compressed_files_path} {uncompress_dir}")
            elif suffix in ["zip", "Zip"]:
                shutil.unpack_archive(compressed_files_path, uncompress_dir,
                                      suffix)
            os.remove(compressed_files_path)
            return uncompress_dir
        except Exception as e:
            logger.exception(e)
            return None
Exemplo n.º 2
0
 def del_file(dir_path):
     try:
         for file in os.listdir(dir_path):
             if file.startswith("."):
                 os.remove(file)
     except Exception as e:
         logger.exception(e)
Exemplo n.º 3
0
 def _word_to_pdf_batch(dir_path, transform=None, delete=True):
     if transform is None:
         transform = ["doc", "docx"]
     try:
         if dir_path and os.path.exists(dir_path) and transform:
             for file_type in transform:
                 if os.system(
                         f"soffice --headless -convert-to pdf {dir_path}/*.{file_type} --outdir {dir_path}"
                 ) in [0, "0"] and delete:
                     try:
                         os.system(rf"rm -f {dir_path}/*.{file_type}")
                         logger.info(f"rm -f {dir_path}/*.{file_type}")
                     except Exception as e:
                         logger.exception(e)
     except Exception as e:
         logger.exception(e)
Exemplo n.º 4
0
    def recursion_decompressing(self, files_path):
        """
        对传入对文件夹遍历判断需要再次解压的文件,进行解压处理
        Args:
            files_path: 上一层解压后文件集中路径

        Returns:

        """
        try:
            if files_path and os.path.exists(files_path):
                for file in os.listdir(files_path):
                    if file.rsplit(".", 1)[-1] in [
                            "rar", "zip"
                    ] and not file.startswith("."):
                        abs_file_path = os.path.join(files_path, file)
                        self.uncompress(abs_file_path)
        except Exception as e:
            logger.exception(e)
Exemplo n.º 5
0
 def get_message(self, key, func):
     try:
         index = 0
         while True:
             message = cache.rpop(key)
             if message:
                 logger.info(f"{key}, message{message}")
                 # message = deserialization(self.__cls__().Default, message, url="default")
                 merge_process = Thread(target=func, args=(eval(message), ))
                 merge_process.start()
                 index = 0
             else:
                 if index >= self.timeout:
                     exit()
                 index += 1
                 # logger.info(f"{key}: {index}")
                 time.sleep(1)
     except Exception as e:
         logger.exception(e)
Exemplo n.º 6
0
    def check_libre_office_status():
        """
        检查系统是否安装了libreoffice,没有安装的话,进行软件的安装
        Returns:

        """
        try:
            office_info = os.system("libreoffice --version")
            logger.info(f"{office_info}")
            if office_info not in [0, "0"]:
                logger.info(f"系统内部没有安装")
                logger.info(f"start install libreoffice")
                if os.system("yum install -y libreoffice") in [0, "0"]:
                    logger.info(f"安装成功")
                else:
                    logger.info(f"安装失败")
            else:
                logger.info(f"系统已安装")
        except Exception as e:
            logger.exception(e)
Exemplo n.º 7
0
    def start(self):
        try:
            self.push_item_in_redis_list(self.message_a, self.start_item)
            thr_downloader = Thread(target=self.get_message, args=(self.message_a, self.download_page))
            thr_primary = Thread(target=self.get_message, args=(self.message_b, self.primary))
            thr_merge_result = Thread(target=self.get_message, args=(self.message_c, self.merger_result))
            thr_downloader.start()
            logger.info("下载线程开启!")
            time.sleep(3)

            thr_primary.start()
            logger.info("抽取线程开启!")

            thr_merge_result.start()
            logger.info("数据合并线程开启")

            thr_merge_result.join()
            return self.result
        except Exception as identifier:
            logger.exception(identifier)
Exemplo n.º 8
0
    def word_to_pdf_single(word_path: str, dir_path, delete=True):
        """
        ubuntu中使用命令行将doc、docx文件转换为pdf,并根据状态是否删除源文件
        Args:
            word_path: doc、docx文件路径
            dir_path: 转换后文件保存位置
            delete: 是否删除,默认删除

        Returns:

        """
        try:
            if word_path and os.path.exists(word_path):
                try:
                    status = os.system(
                        f"soffice --headless -convert-to pdf {word_path} --outdir {dir_path}"
                    )
                    if status in [0, "0"] and delete:
                        try:
                            os.remove(word_path)
                            logger.info(f"rm {word_path}")
                        except Exception as e:
                            logger.exception(e)
                except Exception as e:
                    logger.exception(e)

        except Exception as e:
            logger.exception(e)
Exemplo n.º 9
0
    def word_to_pdf_batch(self, dir_path):
        """
        文件夹内的word文件批量转换为PDF,
        主要使用的是word中插入的图片,对文本没有要求
        Args:
            dir_path: 需要处理的文件夹

        Returns:

        """
        if dir_path:
            try:
                all_word_files = []
                for root, dirs, files in os.walk(dir_path):
                    [
                        self.word_to_pdf_single(os.path.join(root, x),
                                                dir_path) for x in files
                        if x.rsplit(".", 1)[-1] in ['doc', "docx"]
                        and not x.startswith(".")
                    ]
                logger.info(all_word_files)
            except Exception as e:
                logger.exception(e)