def do_request(self, url, headers=None): retry = 5 o_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' } headers = headers or o_headers while retry >= 0: try: proxies = get_proxy() res = requests.get(url, headers=headers, proxies={'http': random.choice(proxies)}, verify=False, timeout=5) logging.info( 'normal requests:<<<{}>>> PROXY:{}, URL {}'.format( res.status_code, len(proxies), url)) return res except Exception as e: logging.error( 'current normal requests error:<<<{}>>>: {}'.format( e, url)) proxies = get_proxy() retry -= 1 return None
async def handler_content(self, res, chapter: Chapter): content = self.parser(res) logging.info('处理<<{}>>{},正文信息:{}...'.format(chapter.book, chapter, content[:15])) if chapter.book_type == BOOK_TYPE_DESC.Comic: imgs = [] for key in content.keys(): imgs.insert(int(key), content[key]) img_objs = await self.save_image(imgs, IMAGE_TYPE_DESC.CHAPER_CONTENT, self.headers) # 如果能获取到所有img对象则保存 if len(img_objs) and None not in img_objs: content = img_objs try: chapter.save_content(content) chapter.active = True chapter.save() except OSError: logging.error('处理<<{}>>单章节正文信息 失败 : {}'.format( chapter.book, chapter)) chapter.active = False chapter.save() pass
def do_request(self, url, headers=None): retry = 1 o_headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36" } headers = headers or o_headers headers["Connection"] = "close" while retry >= 0: try: time.sleep(int(random.random() * 15)) res = requests.get( url, headers=headers, proxies=get_proxy(), verify=False, timeout=3, ) logging.info( "normal requests:<<<{}>>> PROXY:{}, URL {}".format( res.status_code, 1, url)) return res except Exception as e: if retry == 0: logging.error( "current normal requests error:<<<{}>>>: {}".format( e, url)) # proxies = get_proxy() retry -= 1 return None
def handler_content(self, content, chapter: Chapter): logging.info("处理--{}--<<{}>>{},正文信息:{}...".format( self.wait_done, chapter.book, chapter, content[:10])) if chapter.book_type == BOOK_TYPE_DESC.Comic: imgs = [] for key in content.keys(): imgs.insert(int(key), content[key]) img_objs = self.save_image(imgs, IMAGE_TYPE_DESC.CHAPER_CONTENT, self.headers) # 如果能获取到所有img对象则保存 if len(img_objs) and None not in img_objs: content = img_objs try: if not content: raise OSError chapter.save_content(content) chapter.active = True chapter.save() except OSError: logging.error("处理<<{}>>单章节正文信息 失败 : {}".format( chapter.book, chapter)) chapter.active = False chapter.save() pass
def send_book_to_kindle(): logging.info("推送订阅书本至kindle任务开始") start = time.time() total = 0 fail = 0 look = 0 book_ids = (SubscribeBook.normal.filter(ready=True).values_list( "book_id", flat=True).distinct()) user_id = 1 for book_id in book_ids: subs = SubscribeBook.normal.filter(ready=True, book_id=book_id) start_chapter, end_chapter = subs[0].chapter, subs[ 0].book.latest_chapter() # 判断需要推送的章节是否都已可用 send_chapters = Chapter.normal.filter( book_id=book_id, number__in=[ x for x in range(start_chapter.number if start_chapter else 0, end_chapter.number + 1) ], ).values("active", flat=True) if not all(send_chapters): fail += 1 look += 1 logging.info("{}部分章节不可用,不予推送至kindle".format(subs[0].book.title)) continue to_email = [sub.user.email for sub in subs] try: # if True: # 开启事务 with transaction.atomic(): task_makebook = Task.create_task_for_make_book( user_id, book_id, start_chapter.id if start_chapter else 0, end_chapter.id, ) task_email = Task.create_task_for_send_email( user_id, book_id, list(set(to_email))) model_task.delay([task_makebook.id, task_email.id]) for sub in subs: sub.chapter_id = subs[0].book.latest_chapter().id sub.ready = False sub.count = sub.count + 1 sub.save() except Exception as e: fail += 1 look += len(to_email) logging.error(f"推送订阅书本至kindle任务book_id: {book_id}, 失败。原因: {e}") continue stop = time.time() logging.info("推送订阅书本至kindle任务创建结束,共推送{}本, 失败{}本, 受影响用户{}位, 共耗时{}秒".format( total - fail if total > fail else 0, fail, look, stop - start))
def handle_single_task(task_id): task = Task.objects.get(id=task_id) logging.info(f"任务{task.id}——{task.task_type}开始, 内容: {task.content}") start = time.time() task.task_status = TASK_STATUS_DESC.RUNNING task.markup = "" task.progress = 0 task.save() try: content = json.loads(task.content) if task.task_type == TASK_TYPE_DESC.BOOK_INSERT: "书籍新增" s = BookInsertByUrlClient(url=content["url"], book_type=content["book_type"]) elif task.task_type in TASK_TYPE_DESC.BOOK_UPDATE: "书本全更新" s = BookUpdateClient( book_id=content["book_id"], chapter_id=content["chapter_id"], update_type=content["update_type"], ) elif task.task_type in TASK_TYPE_DESC.MAKE_BOOK: "书籍打包" s = MakeMyPDF( book_id=content["book_id"], start_chapter_id=content["start_chapter_id"], end_chapter_id=content["end_chapter_id"], ) elif task.task_type == TASK_TYPE_DESC.SEND_TO_KINDLE: "推送" s = SendKindleEmail(book_id=content["book_id"], to=content["to"]) else: task.task_status = TASK_STATUS_DESC.FAILD task.markup = "任务未执行, {}不存在".format(task.task_type) task.save() return s.run() except Exception as e: error_info = f"执行任务{task.id}失败: {e}" logging.error(error_info) task.markup = error_info task.task_status = TASK_STATUS_DESC.FAILD task.save() return task.task_status = TASK_STATUS_DESC.FINISH task.save() stop = time.time() logging.info("handle_single_task任务结束, 共耗时{}秒".format(stop - start))
def get_token(self, obj): try: if self.context.get("request").user.id != obj.id: logging.info( "get token illegal, data not belong to current user") return "" token = Token.objects.get_or_create(user=obj)[0].key return token except Exception as e: logging.error("get token error: {}".format(e)) return ""
async def async_do_request(self, url, content_type='text', headers=None, **kwargs): '''处理请求, url:请求地址''' retry = 5 o_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' } headers = headers or o_headers conn = TCPConnector(limit=50) async with aiohttp.ClientSession(connector=conn) as session: while retry >= 0: try: # if True: proxies = get_proxy() async with session.get(url, proxy=random.choice(proxies), verify_ssl=False, headers=headers, timeout=5) as res: logging.info( 'current asyncio requests is {}:proxy:{}<<<{}>>> {}' .format(str(retry), len(proxies), str(res.status), url)) if res.status > 400: if res.status == 403: raise aiohttp.ClientHttpProxyError return None if content_type == 'text': return await res.text(**kwargs) if content_type == 'read': return await res.read(**kwargs) except (aiohttp.ClientHttpProxyError, asyncio.exceptions.TimeoutError, aiohttp.client_exceptions.ClientConnectorError): proxies = get_proxy() retry -= 1 except Exception as e: logging.error('异步请求异常:{}, 当前url: {}'.format(e, url)) return None return None