def _test_crawl_comicbook(site, comicid=None, chapter_number=1): comicbook = ComicBook.create_comicbook(site=site, comicid=comicid) comicbook.start_crawler() chapter = comicbook.Chapter(chapter_number=chapter_number) assert len(chapter.image_urls) > 0 logger.info(chapter.to_dict()) logger.info(comicbook.to_dict()) result = ComicBook.search(site=site) assert len(result) > 0 return comicbook, chapter
def crawl_comicbook(site, comicid, chapter_number): comicbook = ComicBook.create_comicbook(site=site, comicid=comicid) chapter = comicbook.Chapter(chapter_number=chapter_number) assert len(chapter.image_urls) > 0 print(chapter.to_dict()) print(comicbook.to_dict()) return comicbook, chapter
def search(site): name = request.args.get('name') if not name: abort(400) search_result_item_list = ComicBook.search(site=site, name=name) return jsonify({ "search_result": [item.to_dict() for item in search_result_item_list] })
def crawl_comicbook(site, comicid, chapter_number): comicbook = ComicBook.create_comicbook(site=site, comicid=comicid) comicbook.start_crawler() chapter = comicbook.Chapter(chapter_number=chapter_number) assert len(chapter.image_urls) > 0 logger.info(chapter.to_dict()) logger.info(comicbook.to_dict()) return comicbook, chapter
def test_u17(): # 雏蜂 URL: http://www.u17.com/comic/195.html site = "u17" comicid = "195" name = "雏蜂" chapter_number = -1 crawl_comicbook(site=site, comicid=comicid, chapter_number=chapter_number) result = ComicBook.search(site=site, name=name) assert len(result) > 0
def test_ishuhui(): # 海贼王 URL: https://www.ishuhui.com/comics/anime/1 site = "ishuhui" comicid = "1" name = "海贼王" chapter_number = -1 crawl_comicbook(site=site, comicid=comicid, chapter_number=chapter_number) result = ComicBook.search(site=site, name=name) assert len(result) > 0
def test_qq(): # 海贼王 URL: https://ac.qq.com/Comic/ComicInfo/id/505430 site = "qq" comicid = "505430" name = "海贼王" chapter_number = -1 crawl_comicbook(site=site, comicid=comicid, chapter_number=chapter_number) result = ComicBook.search(site=site, name=name) assert len(result) > 0
def test_bilibili(): # 航海王 URL: https://manga.bilibili.com/detail/mc24742 site = "bilibili" comicid = "mc24742" name = "航海王" chapter_number = 1 crawl_comicbook(site=site, comicid=comicid, chapter_number=chapter_number) result = ComicBook.search(site=site, name=name) assert len(result) > 0
def get_comicbook_from_cache(site, comicid=None): comicbook = ComicBook(site=site, comicid=comicid) proxy_config = current_app.config.get(ConfigKey.CRAWLER_PROXY, {}) proxy = proxy_config.get(site) if proxy: SessionMgr.set_proxy(site=site, proxy=proxy) cookies_path = get_cookies_path(site=site) if os.path.exists(cookies_path): SessionMgr.load_cookies(site=site, path=cookies_path) return comicbook
def test_wangyi(): # 海贼王 URL: https://manhua.163.com/source/5015165829890111936 site = "wangyi" comicid = "5015165829890111936" name = "海贼王" chapter_number = -1 crawl_comicbook(site=site, comicid=comicid, chapter_number=chapter_number) result = ComicBook.search(site=site, name=name) assert len(result) > 0
def search(site): name = request.args.get('name') limit = request.args.get('limit', default=20, type=int) if not name: abort(400) search_result_item_list = ComicBook.search(site=site, name=name, limit=limit) return jsonify({ "search_result": [item.to_dict() for item in search_result_item_list] })
def _test_crawl_comicbook(site, comicid=None, chapter_number=1, proxy=None, test_search=True): comicbook = ComicBook(site=site, comicid=comicid) if proxy: SessionMgr.set_proxy(site=site, proxy=proxy) comicbook.start_crawler() chapter = comicbook.Chapter(chapter_number=chapter_number) assert len(chapter.image_urls) > 0 logger.info(chapter.to_dict()) logger.info(comicbook.to_dict()) if test_search: result = comicbook.search(name=comicbook.crawler.DEFAULT_SEARCH_NAME) assert len(result.to_dict()) > 0 return comicbook, chapter
def get_comicbook_from_cache(site, comicid): comicbook = ComicBook.create_comicbook(site=site, comicid=comicid) return comicbook
def get_comicbook_from_cache(site, comicid=None): if site in const.NOT_SUPPORT_SITES: raise SiteNotSupport() comicbook = ComicBook(site=site, comicid=comicid) return comicbook
def parse_url_info(url): site = ComicBook.get_site_by_url(url) comicid = ComicBook.get_comicid_by_url(site=site, url=url) return dict(site=site, comicid=comicid, url=url)
def get_comicbook(site, comicid): return ComicBook.create_comicbook(site=site, comicid=comicid)
def get_comicbook_from_cache(site, comicid=None): comicbook = ComicBook(site=site, comicid=comicid) return comicbook