import json from aiohttp import web from multidict import CIMultiDict import log_utils from core.crawler import crawl logger = log_utils.LogHandler('server', file=True) def _gen_headers(r): res_headers = CIMultiDict() if 'Set-Cookie' in r.headers: for cookie in r.headers.getall('Set-Cookie'): res_headers.add('Set-Cookie', cookie) res_headers['Via-Proxy'] = str(r.proxy) return res_headers async def forward(method, url, pam, pom, **kwargs): session = kwargs.get('session') headers = kwargs.get('headers') content = kwargs.get('content') mode = kwargs.get('mode', 'score') need_https = 'Need-Https' in headers if need_https: url = url.replace('http://', 'https://', 1) pattern_str, check_rule_json = pam.t.closest_pattern(url)
import asyncio import sys import traceback import aiohttp import log_utils from config import conf from models.response import FailedResponse, Response logger = log_utils.LogHandler(__name__, file=True) def init_session(): conn = aiohttp.TCPConnector(limit=500, ttl_dns_cache=30*60) return aiohttp.ClientSession(connector=conn) async def _crawl(method, url, session, **kwargs): proxy = kwargs.get('proxy') if proxy is not None: kwargs['proxy'] = str(proxy) kwargs.update({'ssl': False, 'timeout': kwargs.get('timeout') or conf.timeout}) try: async with session.request(method, url, **kwargs) as r: r.__class__ = Response r.request_data = kwargs.get('data') await r.read() except asyncio.CancelledError: r = FailedResponse() r.cancelled = True