Пример #1
0
import json

from aiohttp import web
from multidict import CIMultiDict

import log_utils
from core.crawler import crawl

logger = log_utils.LogHandler('server', file=True)


def _gen_headers(r):
    res_headers = CIMultiDict()
    if 'Set-Cookie' in r.headers:
        for cookie in r.headers.getall('Set-Cookie'):
            res_headers.add('Set-Cookie', cookie)
    res_headers['Via-Proxy'] = str(r.proxy)
    return res_headers


async def forward(method, url, pam, pom, **kwargs):
    session = kwargs.get('session')
    headers = kwargs.get('headers')
    content = kwargs.get('content')
    mode = kwargs.get('mode', 'score')

    need_https = 'Need-Https' in headers
    if need_https:
        url = url.replace('http://', 'https://', 1)

    pattern_str, check_rule_json = pam.t.closest_pattern(url)
Пример #2
0
import asyncio
import sys
import traceback

import aiohttp

import log_utils
from config import conf
from models.response import FailedResponse, Response

logger = log_utils.LogHandler(__name__, file=True)


def init_session():
    conn = aiohttp.TCPConnector(limit=500, ttl_dns_cache=30*60)
    return aiohttp.ClientSession(connector=conn)


async def _crawl(method, url, session, **kwargs):
    proxy = kwargs.get('proxy')
    if proxy is not None:
        kwargs['proxy'] = str(proxy)
    kwargs.update({'ssl': False, 'timeout': kwargs.get('timeout') or conf.timeout})
    try:
        async with session.request(method, url, **kwargs) as r:
            r.__class__ = Response
            r.request_data = kwargs.get('data')
            await r.read()
    except asyncio.CancelledError:
        r = FailedResponse()
        r.cancelled = True