Exemplo n.º 1
0
    def spider_book_info_by_task(self):
        """ 根据 task 抓取 book_info
        """
        _name = '书籍搜索'
        self.log.info(f"[{_name}]模块开始处理")
        session = EBookSession()
        session.expire_on_commit = False
        tasks = session.query(BookTask).filter_by(Process=False).all()
        self.log.info(f"[{_name}]待处理数据 {len(tasks)} 条")
        for task in tasks:
            book_name = task.Name

            if cacheContext.exists_book(book_name) is False:
                # 不存在
                book = qdh5.get_book(task.Name)
                if book is None:
                    continue
                model = self.generate_book_model(**book)

                self.log.info(f"[{_name}]提取到书籍:{book}")
                session.add(model)
                session.commit()

                # 加入到 queue
                self.__book_queue.put(model)

            task.Process = True
        session.commit()
        session.close()
        self.log.info(f"[{_name}]模块处理完成...")
Exemplo n.º 2
0
    def fether_chapter(self, id):
        """获取数据章节信息工作函数

        Args:
            id ([int]): [线程序号]
        """
        fix = f"[获取章节信息(线程{id})]"
        self.log.info(f"{fix} 开始执行")

        session = EBookSession()
        session.expire_on_commit = False  # !对象在commit后取消和session的关联,防止session过期后对象被销毁
        index = 0

        while not self.__book_queue.empty():
            book = self.__book_queue.get()
            index += 1
            msg = f"{fix}[{index}/{self.__book_queue.qsize()}]处理书籍 {book.Name}"
            try:
                chapters = xbqg.get_chapters(book.Name)
                if chapters is None:
                    continue
                total = 0
                for index, chapter in enumerate(chapters):
                    total = index + 1
                    model = Chapter(book.Id, total, chapter['name'],
                                    chapter['url'])
                    # 加入数据库
                    session.add(model)
                    session.commit()

                    # 加入到 queue
                    self.__chapter_queue.put(model)

                if total == 0:
                    continue

                # 更新书籍状态
                session.query(Book).filter(Book.Id == book.Id).update(
                    {"Process": True})
                session.commit()

                self.log.info(f"{msg} ,  提取到章节 {total} 条,已加入待下载任务")

            except Exception:
                self.log.error(f"{msg} 异常", exc_info=True)
                self.__book_queue.put(book)
                self.log.warning(f"{msg} 异常,已经放到任务末尾等待重新执行")

        session.close()
        self.log.info(f"{fix} 执行完成")
Exemplo n.º 3
0
    def spider_book_info_by_category(self):
        """ 根据 category 抓取 book_info
        """

        _name = '分类书籍提取'
        self.log.info(f"[{_name}]模块开始处理")

        session = EBookSession()
        session.expire_on_commit = False
        categories = cacheContext.get_all_category()
        self.log.info(f"[{_name}]分类数量:{len(categories)}")
        for category in categories:
            self.log.info(f"[{_name}]处理分类:{category.Name}")
            subId = category.Id
            id = category.ParentId
            if id == 0:
                id = subId
                subId = 0

            books = qdh5.get_books(category.Url)
            for book in books:
                if cacheContext.exists_book(book['name']) is True:
                    continue
                # 不存在
                model = self.generate_book_model(**book)
                model.CategoryId = id
                model.SubCategoryId = subId

                self.log.info(f"[{_name}]提取到书籍:{model}")
                session.add(model)
                session.commit()

                # 加入到 queue
                self.__book_queue.put(model)
            # 一次目录一次提交
            session.commit()
        session.close()
        self.log.info(f"[{_name}]模块处理完成...")