示例#1
0
    def build_request(self,
                      obj,
                      download_size_limit=config.download_size_limit):
        env = obj['env']
        rule = obj['rule']
        request = self.render(obj['request'], env['variables'], env['session'])

        method = request['method']
        url = request['url']
        headers = dict((e['name'], e['value']) for e in request['headers'])
        cookies = dict((e['name'], e['value']) for e in request['cookies'])
        data = request.get('data')
        if method == 'GET':
            data = None
        elif method == 'POST':
            data = request.get('data', '')

        def set_size_limit_callback(curl):
            def size_limit(download_size, downloaded, upload_size, uploaded):
                if download_size and download_size > download_size_limit:
                    return 1
                if downloaded > download_size_limit:
                    return 1
                return 0

            curl.setopt(pycurl.NOPROGRESS, 0)
            curl.setopt(pycurl.PROGRESSFUNCTION, size_limit)
            return curl

        req = httpclient.HTTPRequest(
            url=url,
            method=method,
            headers=headers,
            body=data,
            follow_redirects=False,
            max_redirects=0,
            decompress_response=True,
            allow_nonstandard_methods=True,
            allow_ipv6=True,
            prepare_curl_callback=set_size_limit_callback,
            validate_cert=False,
        )

        session = cookie_utils.CookieSession()
        if req.headers.get('Cookie'):
            session.update(dict(x.strip().split('=', 1) \
                    for x in req.headers['Cookie'].split(';') \
                    if '=' in x))
        if isinstance(env['session'], cookie_utils.CookieSession):
            session.from_json(env['session'].to_json())
        else:
            session.from_json(env['session'])
        session.update(cookies)
        cookie_header = session.get_cookie_header(req)
        if cookie_header:
            req.headers['Cookie'] = cookie_header

        env['session'] = session

        return req, rule, env
示例#2
0
文件: fetcher.py 项目: ysice/qiandao
        def build_response(response):
            cookies = cookie_utils.CookieSession()
            cookies.extract_cookies_to_jar(response.request, response)

            encoding = utils.find_encoding(response.body, response.headers)
            if not response.headers.get('content-type'):
                response.headers['content-type'] = 'text/plain'
            if 'charset=' not in response.headers.get('content-type', ''):
                response.headers['content-type'] += '; charset='+encoding

            return dict(
                    status = response.code,
                    statusText = response.reason,
                    headers = build_headers(response.headers),
                    cookies = cookies.to_json(),
                    content = dict(
                        size = len(response.body),
                        mimeType = response.headers.get('content-type'),
                        text = base64.b64encode(response.body),
                        decoded = utils.decode(response.body, response.headers),
                        ),
                    redirectURL = response.headers.get('Location'),
                    headersSize = -1,
                    bodySize = -1,
                    )
示例#3
0
    def render(self, request, env, session=[]):
        request = dict(request)
        if isinstance(session, cookie_utils.CookieSession):
            _cookies = session
        else:
            _cookies = cookie_utils.CookieSession()
            _cookies.from_json(session)

        def _render(obj, key):
            if not obj.get(key):
                return
            # env.pop('md5')
            # if(env.get)
            # env['md5'] = None
            obj[key] = self.jinja_env.from_string(obj[key]).render(_cookies=_cookies, **env)

        _render(request, 'method')
        _render(request, 'url')
        for header in request['headers']:
            _render(header, 'name')
            _render(header, 'value')
        for cookie in request['cookies']:
            _render(cookie, 'name')
            _render(cookie, 'value')
        _render(request, 'data')
        return request
示例#4
0
    def render(self, request, env, session=[]):
        request = dict(request)
        if isinstance(session, cookie_utils.CookieSession):
            _cookies = session
        else:
            _cookies = cookie_utils.CookieSession()
            _cookies.from_json(session)

        def _render(obj, key):
            if not obj.get(key):
                return

            obj[key] = self.jinja_env.from_string(obj[key]).render(
                _cookies=_cookies, **env)
            return True

        _render(request, 'method')
        _render(request, 'url')
        for header in request['headers']:
            _render(header, 'name')
            if pycurl and header['name'][0] == ":":
                header['name'] = header['name'][1:]
            _render(header, 'value')
            header['value'] = utils.quote_chinese(header['value'])
        for cookie in request['cookies']:
            _render(cookie, 'name')
            _render(cookie, 'value')
            cookie['value'] = utils.quote_chinese(cookie['value'])
        _render(request, 'data')
        return request
示例#5
0
    def render(self, request, env, session=[]):
        request = dict(request)
        if isinstance(session, cookie_utils.CookieSession):
            _cookies = session
        else:
            _cookies = cookie_utils.CookieSession()
            _cookies.from_json(session)

        def _render(obj, key):
            if not obj.get(key):
                return
            try:
                obj[key] = self.jinja_env.from_string(obj[key]).render(
                    _cookies=_cookies, **env)
                return True
            except Exception as e:
                log_error = 'The error occurred when rendering template {}: {} \\r\\n {}'.format(
                    key, obj[key], repr(e))
                raise httpclient.HTTPError(500, log_error)

        _render(request, 'method')
        _render(request, 'url')
        for header in request['headers']:
            _render(header, 'name')
            if pycurl and header['name'] and header['name'][0] == ":":
                header['name'] = header['name'][1:]
            _render(header, 'value')
            header['value'] = utils.quote_chinese(header['value'])
        for cookie in request['cookies']:
            _render(cookie, 'name')
            _render(cookie, 'value')
            cookie['value'] = utils.quote_chinese(cookie['value'])
        _render(request, 'data')
        return request
示例#6
0
        def build_response(response):
            cookies = cookie_utils.CookieSession()
            cookies.extract_cookies_to_jar(response.request, response)

            return dict(
                status=response.code,
                statusText=response.reason,
                headers=build_headers(response.headers),
                cookies=cookies.to_json(),
                content=dict(
                    size=len(response.body),
                    mimeType=response.headers.get('content-type'),
                    text=base64.b64encode(response.body),
                    decoded=utils.decode(response.body, response.headers),
                ),
                redirectURL=response.headers.get('Location'),
                headersSize=-1,
                bodySize=-1,
            )
示例#7
0
    def build_request(self,
                      obj,
                      download_size_limit=config.download_size_limit,
                      connect_timeout=config.connect_timeout,
                      request_timeout=config.request_timeout,
                      proxy={},
                      CURL_ENCODING=True,
                      CURL_CONTENT_LENGTH=True):
        env = obj['env']
        rule = obj['rule']
        request = self.render(obj['request'], env['variables'], env['session'])

        method = request['method']
        url = request['url']
        if str(url).startswith('api://'):
            url = str(url).replace('api:/', local_host, 1)

        headers = dict((e['name'], e['value']) for e in request['headers'])
        cookies = dict((e['name'], e['value']) for e in request['cookies'])
        data = request.get('data')
        if method == 'GET':
            data = None
        elif method == 'POST':
            data = request.get('data', '')

        def set_curl_callback(curl):
            def size_limit(download_size, downloaded, upload_size, uploaded):
                if download_size and download_size > download_size_limit:
                    return 1
                if downloaded > download_size_limit:
                    return 1
                return 0

            if pycurl:
                if not CURL_ENCODING:
                    try:
                        curl.unsetopt(pycurl.ENCODING)
                    except:
                        pass
                if not CURL_CONTENT_LENGTH:
                    try:
                        if headers.get('content-length'):
                            headers.pop('content-length')
                            curl.setopt(pycurl.HTTPHEADER, [
                                "%s: %s" % (native_str(k), native_str(v))
                                for k, v in HTTPHeaders(headers).get_all()
                            ])
                    except:
                        pass
                if config.dns_server:
                    curl.setopt(pycurl.DNS_SERVERS, config.dns_server)
                curl.setopt(pycurl.NOPROGRESS, 0)
                curl.setopt(pycurl.PROGRESSFUNCTION, size_limit)
                curl.setopt(pycurl.CONNECTTIMEOUT, int(connect_timeout))
                curl.setopt(pycurl.TIMEOUT, int(request_timeout))
                if proxy:
                    if proxy.get('scheme', '') == 'socks5':
                        curl.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
                    elif proxy.get('scheme', '') == 'socks5h':
                        curl.setopt(pycurl.PROXYTYPE,
                                    pycurl.PROXYTYPE_SOCKS5_HOSTNAME)
            return curl

        req = httpclient.HTTPRequest(url=url,
                                     method=method,
                                     headers=headers,
                                     body=data,
                                     follow_redirects=False,
                                     max_redirects=0,
                                     decompress_response=True,
                                     allow_nonstandard_methods=True,
                                     allow_ipv6=True,
                                     prepare_curl_callback=set_curl_callback,
                                     validate_cert=False,
                                     connect_timeout=connect_timeout,
                                     request_timeout=request_timeout)

        session = cookie_utils.CookieSession()
        if req.headers.get('cookie'):
            req.headers['Cookie'] = req.headers.pop("cookie")
        if req.headers.get('Cookie'):
            session.update(dict(x.strip().split('=', 1) \
                    for x in req.headers['Cookie'].split(';') \
                    if '=' in x))
        if isinstance(env['session'], cookie_utils.CookieSession):
            session.from_json(env['session'].to_json())
        else:
            session.from_json(env['session'])
        session.update(cookies)
        cookie_header = session.get_cookie_header(req)
        if cookie_header:
            req.headers['Cookie'] = cookie_header

        if proxy and pycurl:
            if not config.proxy_direct_mode:
                for key in proxy:
                    if key != 'scheme':
                        setattr(req, 'proxy_%s' % key, proxy[key])
            elif config.proxy_direct_mode == 'regexp':
                if not re.compile(config.proxy_direct).search(req.url):
                    for key in proxy:
                        if key != 'scheme':
                            setattr(req, 'proxy_%s' % key, proxy[key])
            elif config.proxy_direct_mode == 'url':
                if utils.urlmatch(
                        req.url) not in config.proxy_direct.split('|'):
                    for key in proxy:
                        if key != 'scheme':
                            setattr(req, 'proxy_%s' % key, proxy[key])

        env['session'] = session

        return req, rule, env