async def handler(self): pass logging.info("自动新增书籍开始执行!") for site in parser_selector.regular.keys(): if not hasattr(parser_selector.get_parser(site), 'parse_all_book'): continue logging.info("自动新增书籍开始执行,{}".format(site)) parser_cls = parser_selector.get_parser(site) # 同时发送请求数 together_num = 50 if parser_cls.total_page > together_num: tasks = [] num = parser_cls.total_page // together_num times = 0 while times * together_num < parser_cls.total_page: end_num = parser_cls.total_page if ( parser_cls.total_page - times * together_num ) < together_num else (times + 1) * together_num await self.do_all_book(parser_cls, times * together_num, end_num) times += 1 else: await self.do_all_book(parser_cls, 1, parser_cls.total_page)
async def handler(self): logging.info("自动快速新增书籍开始执行!") for site in parser_selector.regular.keys(): if not hasattr(parser_selector.get_parser(site), 'all_book_url_one_by_one'): continue parser_cls = parser_selector.get_parser(site) self.url_done = [] urls = [] exist = Book.normal.all().values_list('origin_addr') exist_urls = [i[0] for i in exist] for i in range(211, parser_cls.total_all_book): # for i in range(208, 210): url = parser_cls.all_book_url_one_by_one.format(i) if i in exist_urls: continue urls.append(url) if len(urls) >= 20000: self.handler_threading(urls) urls = [] self.handler_threading(urls) logging.info("自动快速新增书籍执行结束!共添加{}条数据".format(self.total_done))
def handler(self): logging.info("自动新增书籍开始执行!") for site in parser_selector.regular.keys(): if not hasattr(parser_selector.get_parser(site), "parse_all_book"): continue logging.info("自动新增书籍开始执行,{}".format(site)) parser_cls = parser_selector.get_parser(site) self.do_all_book(parser_cls, 1, parser_cls.total_page)
async def handler(self): logging.info("自动缓慢新增书籍开始执行!") for site in parser_selector.regular.keys(): if not hasattr(parser_selector.get_parser(site), 'all_book_url_one_by_one'): continue parser_cls = parser_selector.get_parser(site) # for i in range(9, parser_cls.total_all_book): for i in range(284999, 285000): url = parser_cls.all_book_url_one_by_one.format(i) bic = BookInsertClient(url, parser_cls.book_type, 'with_content') await bic.handler()
def __init__(self, url): self.url = url parser = parser_selector.get_parser(url) self.parser = parser.parse_all_book self.headers = parser.request_header if hasattr( parser, 'request_header') else None self.encoding = parser.encoding if hasattr(parser, 'encoding') else 'utf-8'
def __init__(self, chapter: Chapter = None, book: Book = None): self.chapter = chapter self.book = book origin_addr = book.origin_addr if book else chapter.origin_addr parser = parser_selector.get_parser(origin_addr) self.parser = parser.parse_chapter_content self.headers = parser.request_header if hasattr( parser, 'request_header') else None self.encoding = parser.encoding if hasattr(parser, 'encoding') else 'utf-8'
def __init__(self, book: Book): self.book = book self.book_type = book.book_type parser = parser_selector.get_parser(book.origin_addr) self.parser = parser.parse_chapter self.headers = parser.request_header if hasattr( parser, 'request_header') else None self.encoding = parser.encoding if hasattr(parser, 'encoding') else 'utf-8'
def __init__(self, url, book_type, on_shelf=True, book=None): self.url = url self.book_type = book_type self.on_shelf = on_shelf self.book = book parser = parser_selector.get_parser(url) self.parser = parser.parse_info self.headers = parser.request_header if hasattr( parser, 'request_header') else None self.encoding = parser.encoding if hasattr(parser, 'encoding') else 'utf-8'
def __init__(self, book: Book): """更新书籍的章节信息,不包括章节内容1 Args: book (Book): [description] """ self.book = book self.book_type = book.book_type parser = parser_selector.get_parser(book.origin_addr) self.parser = parser.parse_chapter self.headers = (parser.request_header if hasattr( parser, "request_header") else None) self.encoding = parser.encoding if hasattr(parser, "encoding") else "utf-8"
def book_insert(self, url): lock.acquire() if url and (url not in self.url_done): self.url_done.append(url) self.total_done += 1 else: return self.current_run_threading.append(url) lock.release() parser_cls = parser_selector.get_parser(url) # bic = BookInsertClient(url, parser_cls.book_type, 'with_chapter') bic = BookInsertClient(url, parser_cls.book_type, 'with_content') bic.run() time.sleep(1) lock.acquire() self.current_run_threading.remove(url) logging.info('当前还有线程 共 {} 条等待执行结束'.format( len(self.current_run_threading))) lock.release()
def __init__(self, url, book_type, on_shelf=True, book=None): """处理书籍信息,不包括章节 Args: url ([str]): [书籍地址] book_type ([str]): [书籍类型,] on_shelf (bool, optional): [是否上架]. Defaults to True. book ([Book], optional): [description]. Defaults to None. """ self.url = url self.book_type = book_type self.on_shelf = on_shelf self.book = book parser = parser_selector.get_parser(url) self.parser = parser.parse_info self.headers = (parser.request_header if hasattr( parser, "request_header") else None) self.encoding = parser.encoding if hasattr(parser, "encoding") else "utf-8"
def __init__(self, chapter: Chapter = None, book: Book = None, fast=False): """更新章节内容 Args: chapter (Chapter, optional): [只更新本章节内容]. Defaults to None. book (Book, optional): [更新本书籍所有章节内容]. Defaults to None. -- chapter/book, 二选一 fast (bool, optional): [是否多线程]. Defaults to False. """ self.chapter = chapter self.book = book self.fast = fast self.wait_done = 0 origin_addr = book.origin_addr if book else chapter.origin_addr parser = parser_selector.get_parser(origin_addr) self.parser = parser.parse_chapter_content self.headers = (parser.request_header if hasattr( parser, "request_header") else None) self.encoding = parser.encoding if hasattr(parser, "encoding") else "utf-8"