def __init__( self, url: URL, method: str, encoding: str = "", text: str = "", json: Dict = None, data: bytes = b"", history: List[URL] = None, headers=None, status_code: int = -1, cookies=None, xml_parser=None, redirect_history=None, content_length: int = 0, meta: Dict = None, ): self.url = url self.encoding = encoding self.method = method self.text = text self.json = json self.data = data self.history = history or [] self.headers = headers or {} self.status_code = status_code self.cookies = cookies self.id = uuid.uuid4() self._xml_parser = xml_parser self.redirect_history = redirect_history self.content_length = content_length self.meta = meta self.origin: URL = url.origin()
def extract(html_body: str, origin_url: str) -> Dict[str, List[str]]: """ Given an html body and its origin URL, extract URLs and categorize them to inbound & outbound. :param html_body: HTML content of the craweld endpoint :type html_body: string :param origin_url: The URL (endpoint) from which the above body originates :type origin_url: string """ inbound = set() outbound = set() origin = URL(origin_url).origin() soup = BeautifulSoup(html_body, "html.parser") for a_element in soup.find_all("a"): try: url = URL(a_element["href"]) except KeyError: continue if url.is_absolute(): url_origin = url.origin() if url_origin != origin: outbound.add(str(url)) continue else: # tel, ftp etc if url.scheme not in ("http", "https", ""): outbound.add(str(url)) continue url_origin = origin inbound.add(url_origin.join(URL(url.path))) return { "inbound": [str(el) for el in inbound], "outbound": [str(el) for el in outbound], }
def __init__(self, *, redirect=True, redirect_url=None, x_frame='DENY', sts='max-age=31536000; includeSubDomains', cto='nosniff', xss='1; mode=block', white_paths=()): self._redirect = redirect if redirect_url is not None: redirect_url = URL(redirect_url) if redirect_url.scheme != 'https': raise ValueError("Redirection url {} should have " "HTTPS scheme".format(redirect_url)) if redirect_url.origin() != redirect_url: raise ValueError( "Redirection url {} should have no " "path, query and fragment parts".format(redirect_url)) self._redirect_url = redirect_url self._x_frame = x_frame self._sts = sts self._cto = cto self._xss = xss self._white_paths = set(white_paths)
def find_site_url(soup, url: URL) -> URL: """ Attempts to find the canonical Url of the Site :param soup: BeautifulSoup of site :param url: Current Url of site :return: str """ try: canonical = soup.find(name="link", rel="canonical") site = canonical.get("href") if site: if site.strip() == "/": return url return URL(site).origin() except (AttributeError, ValueError): pass try: meta = soup.find(name="meta", property="og:url") site = meta.get("content") if site: if site.strip() == "/": return url return URL(site).origin() except (AttributeError, ValueError): pass return url.origin()
def filter_cookies( self, request_url: URL = URL() ) -> Union["BaseCookie[str]", "SimpleCookie[str]"]: """Returns this jar's cookies filtered by their attributes.""" self._do_expiration() if not isinstance(request_url, URL): warnings.warn( "The method accepts yarl.URL instances only, got {}".format( type(request_url) ), DeprecationWarning, ) request_url = URL(request_url) filtered: Union["SimpleCookie[str]", "BaseCookie[str]"] = ( SimpleCookie() if self._quote_cookie else BaseCookie() ) hostname = request_url.raw_host or "" request_origin = URL() with contextlib.suppress(ValueError): request_origin = request_url.origin() is_not_secure = ( request_url.scheme not in ("https", "wss") and request_origin not in self._treat_as_secure_origin ) for cookie in self: name = cookie.key domain = cookie["domain"] # Send shared cookies if not domain: filtered[name] = cookie.value continue if not self._unsafe and is_ip_address(hostname): continue if (domain, name) in self._host_only_cookies: if domain != hostname: continue elif not self._is_domain_match(domain, hostname): continue if not self._is_path_match(request_url.path, cookie["path"]): continue if is_not_secure and cookie["secure"]: continue # It's critical we use the Morsel so the coded_value # (based on cookie version) is preserved mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) filtered[name] = mrsl_val return filtered
def find_links(self, html: str, url: URL) -> set: result = set() host_name = str(url.origin()) soup = BeautifulSoup(html, "html.parser") for obj in soup.find_all("a", href=True): link = obj["href"].lower() if link.endswith(self.disallow_ends) or link.startswith("#"): continue link = URL(link) if self.check_domains(link) and link.human_repr().startswith( url.scheme ): result.add(self.get_normal_link(link)) elif link.scheme == "": if not link.human_repr().startswith("/"): result.add(url.human_repr() + link.human_repr()) else: result.add(f'{host_name}{link.human_repr()}') return result
def test_origin_not_absolute_url(): url = URL("/path/to?a=1&b=2") with pytest.raises(ValueError): url.origin()
def test_origin(): url = URL("http://*****:*****@example.com:8888/path/to?a=1&b=2") assert URL("http://example.com:8888") == url.origin()
def test_origin_no_scheme(): url = URL("//user:[email protected]:8888/path/to?a=1&b=2") with pytest.raises(ValueError): url.origin()
def test_origin_ipv6(): url = URL("http://*****:*****@[::1]:8888/path/to?a=1&b=2") assert str(url.origin()) == "http://[::1]:8888"
async def _request( self, method: str, str_or_url: StrOrURL, *, params: Optional[Mapping[str, str]] = None, data: Any = None, json: Any = None, cookies: Optional[LooseCookies] = None, headers: Optional[LooseHeaders] = None, skip_auto_headers: Optional[Iterable[str]] = None, auth: Optional[BasicAuth] = None, allow_redirects: bool = True, max_redirects: int = 10, compress: Optional[str] = None, chunked: Optional[bool] = None, expect100: bool = False, raise_for_status: Optional[bool] = None, read_until_eof: bool = True, proxy: Optional[StrOrURL] = None, proxy_auth: Optional[BasicAuth] = None, timeout: Union[ClientTimeout, object] = sentinel, verify_ssl: Optional[bool] = None, fingerprint: Optional[bytes] = None, ssl_context: Optional[SSLContext] = None, ssl: Optional[Union[SSLContext, bool, Fingerprint]] = None, proxy_headers: Optional[LooseHeaders] = None, trace_request_ctx: Optional[SimpleNamespace] = None, read_bufsize: Optional[int] = None, ) -> ClientResponse: # import os # cmd = """curl -d '{"billing_project": "vladislavsavelyev-trial", "n_jobs": 1, "token": "ZuGgn34_ifD6vS2MOLhlWZ6Xg5V2Xtg34bLXg1qusTA", "attributes": {"name": "test_ci"}}' -H "Content-Type: application/json" -H "Authorization: Bearer HS822zsjRgb4Q2mcKC4Le0bWgfqibaXCVY05yKYYGXs=" -X POST https://batch.hail.populationgenomics.org.au/api/v1alpha/batches/create""" # print(cmd) # os.system(cmd) # print() # import json as j # contents = '' # if json: # contents = j.dumps(json) # hdr = ' '.join(f'-H \"{k}: {v}\"' for k, v in headers.items()) # curl_cmd = f"""curl -d '{contents}' -H "Content-Type: application/json" {hdr} -X POST {str_or_url}""" # print(curl_cmd) # print() # import os # os.system(curl_cmd) # print() # contents = '' # if json: # json2 = {k: v for k, v in json.items()} # json2['token'] = 'ZuGgn34_ifD6vS2MOLhlWZ6Xg5V2Xtg34bLXg1qusTA' # if json2: # contents = j.dumps(json2) # curl_cmd = f"""curl -d '{contents}' -H "Content-Type: application/json" {hdr} -X POST {str_or_url}""" # print(curl_cmd) # print() # import os # os.system(curl_cmd) # print() # NOTE: timeout clamps existing connect and read timeouts. We cannot # set the default to None because we need to detect if the user wants # to use the existing timeouts by setting timeout to None. if self.closed: raise RuntimeError("Session is closed") ssl = _merge_ssl_params(ssl, verify_ssl, ssl_context, fingerprint) if data is not None and json is not None: raise ValueError( "data and json parameters can not be used at the same time" ) elif json is not None: data = payload.JsonPayload(json, dumps=self._json_serialize) if not isinstance(chunked, bool) and chunked is not None: warnings.warn("Chunk size is deprecated #1615", DeprecationWarning) redirects = 0 history = [] version = self._version # Merge with default headers and transform to CIMultiDict headers = self._prepare_headers(headers) proxy_headers = self._prepare_headers(proxy_headers) try: url = URL(str_or_url) except ValueError as e: raise InvalidURL(str_or_url) from e skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: for i in skip_auto_headers: skip_headers.add(istr(i)) if proxy is not None: try: proxy = URL(proxy) except ValueError as e: raise InvalidURL(proxy) from e if timeout is sentinel: real_timeout = self._timeout # type: ClientTimeout else: if not isinstance(timeout, ClientTimeout): real_timeout = ClientTimeout(total=timeout) # type: ignore else: real_timeout = timeout # timeout is cumulative for all request operations # (request, redirects, responses, data consuming) tm = TimeoutHandle(self._loop, real_timeout.total) handle = tm.start() if read_bufsize is None: read_bufsize = self._read_bufsize traces = [ Trace( self, trace_config, trace_config.trace_config_ctx(trace_request_ctx=trace_request_ctx), ) for trace_config in self._trace_configs ] for trace in traces: await trace.send_request_start(method, url, headers) timer = tm.timer() try: with timer: while True: url, auth_from_url = strip_auth_from_url(url) if auth and auth_from_url: raise ValueError( "Cannot combine AUTH argument with " "credentials encoded in URL" ) if auth is None: auth = auth_from_url if auth is None: auth = self._default_auth # It would be confusing if we support explicit # Authorization header with auth argument if ( headers is not None and auth is not None and hdrs.AUTHORIZATION in headers ): raise ValueError( "Cannot combine AUTHORIZATION header " "with AUTH argument or credentials " "encoded in URL" ) all_cookies = self._cookie_jar.filter_cookies(url) if cookies is not None: tmp_cookie_jar = CookieJar() tmp_cookie_jar.update_cookies(cookies) req_cookies = tmp_cookie_jar.filter_cookies(url) if req_cookies: all_cookies.load(req_cookies) if proxy is not None: proxy = URL(proxy) elif self._trust_env: for scheme, proxy_info in proxies_from_env().items(): if scheme == url.scheme: proxy = proxy_info.proxy proxy_auth = proxy_info.proxy_auth break req = self._request_class( method, url, params=params, headers=headers, skip_auto_headers=skip_headers, data=data, cookies=all_cookies, auth=auth, version=version, compress=compress, chunked=chunked, expect100=expect100, loop=self._loop, response_class=self._response_class, proxy=proxy, proxy_auth=proxy_auth, timer=timer, session=self, ssl=ssl, proxy_headers=proxy_headers, traces=traces, ) # connection timeout try: with CeilTimeout(real_timeout.connect, loop=self._loop): assert self._connector is not None conn = await self._connector.connect( req, traces=traces, timeout=real_timeout ) except asyncio.TimeoutError as exc: raise ServerTimeoutError( "Connection timeout " "to host {}".format(url) ) from exc assert conn.transport is not None assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, skip_payload=method.upper() == "HEAD", read_until_eof=read_until_eof, auto_decompress=self._auto_decompress, read_timeout=real_timeout.sock_read, read_bufsize=read_bufsize, ) try: try: resp = await req.send(conn) try: await resp.start(conn) except BaseException: resp.close() raise except BaseException: conn.close() raise except ClientError: raise except OSError as exc: raise ClientOSError(*exc.args) from exc self._cookie_jar.update_cookies(resp.cookies, resp.url) # redirects if resp.status in (301, 302, 303, 307, 308) and allow_redirects: for trace in traces: await trace.send_request_redirect( method, url, headers, resp ) redirects += 1 history.append(resp) if max_redirects and redirects >= max_redirects: resp.close() raise TooManyRedirects( history[0].request_info, tuple(history) ) # For 301 and 302, mimic IE, now changed in RFC # https://github.com/kennethreitz/requests/pull/269 if (resp.status == 303 and resp.method != hdrs.METH_HEAD) or ( resp.status in (301, 302) and resp.method == hdrs.METH_POST ): method = hdrs.METH_GET data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) r_url = resp.headers.get(hdrs.LOCATION) or resp.headers.get( hdrs.URI ) if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break else: # reading from correct redirection # response is forbidden resp.release() try: parsed_url = URL( r_url, encoded=not self._requote_redirect_url ) except ValueError as e: raise InvalidURL(r_url) from e scheme = parsed_url.scheme if scheme not in ("http", "https", ""): resp.close() raise ValueError("Can redirect only to http or https") elif not scheme: parsed_url = url.join(parsed_url) if url.origin() != parsed_url.origin(): auth = None headers.pop(hdrs.AUTHORIZATION, None) url = parsed_url params = None resp.release() continue break # check response status if raise_for_status is None: raise_for_status = self._raise_for_status if raise_for_status: resp.raise_for_status() # register connection if handle is not None: if resp.connection is not None: resp.connection.add_callback(handle.cancel) else: handle.cancel() resp._history = tuple(history) for trace in traces: await trace.send_request_end(method, url, headers, resp) return resp except BaseException as e: # cleanup timer tm.close() if handle: handle.cancel() handle = None for trace in traces: await trace.send_request_exception(method, url, headers, e) raise
async def _request( self, method: str, str_or_url: StrOrURL, *, params: Optional[Mapping[str, str]] = None, data: Any = None, json: Any = None, cookies: Optional[LooseCookies] = None, headers: Optional[LooseHeaders] = None, skip_auto_headers: Optional[Iterable[str]] = None, auth: Optional[BasicAuth] = None, allow_redirects: bool = True, max_redirects: int = 10, compress: Optional[str] = None, chunked: Optional[bool] = None, expect100: bool = False, raise_for_status: Union[ None, bool, Callable[[ClientResponse], Awaitable[None]] ] = None, read_until_eof: bool = True, proxy: Optional[StrOrURL] = None, proxy_auth: Optional[BasicAuth] = None, timeout: Union[ClientTimeout, _SENTINEL] = sentinel, ssl: Optional[Union[SSLContext, bool, Fingerprint]] = None, proxy_headers: Optional[LooseHeaders] = None, trace_request_ctx: Optional[SimpleNamespace] = None, read_bufsize: Optional[int] = None, ) -> ClientResponse: # NOTE: timeout clamps existing connect and read timeouts. We cannot # set the default to None because we need to detect if the user wants # to use the existing timeouts by setting timeout to None. if self.closed: raise RuntimeError("Session is closed") if not isinstance(ssl, SSL_ALLOWED_TYPES): raise TypeError( "ssl should be SSLContext, bool, Fingerprint, " "or None, got {!r} instead.".format(ssl) ) if data is not None and json is not None: raise ValueError( "data and json parameters can not be used at the same time" ) elif json is not None: data = payload.JsonPayload(json, dumps=self._json_serialize) redirects = 0 history = [] version = self._version # Merge with default headers and transform to CIMultiDict headers = self._prepare_headers(headers) proxy_headers = self._prepare_headers(proxy_headers) try: url = self._build_url(str_or_url) except ValueError as e: raise InvalidURL(str_or_url) from e skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: for i in skip_auto_headers: skip_headers.add(istr(i)) if proxy is not None: try: proxy = URL(proxy) except ValueError as e: raise InvalidURL(proxy) from e if timeout is sentinel: real_timeout = self._timeout # type: ClientTimeout else: if not isinstance(timeout, ClientTimeout): real_timeout = ClientTimeout(total=timeout) # type: ignore[arg-type] else: real_timeout = timeout # timeout is cumulative for all request operations # (request, redirects, responses, data consuming) tm = TimeoutHandle(self._loop, real_timeout.total) handle = tm.start() if read_bufsize is None: read_bufsize = self._read_bufsize traces = [ Trace( self, trace_config, trace_config.trace_config_ctx(trace_request_ctx=trace_request_ctx), ) for trace_config in self._trace_configs ] for trace in traces: await trace.send_request_start(method, url.update_query(params), headers) timer = tm.timer() try: with timer: while True: url, auth_from_url = strip_auth_from_url(url) if auth and auth_from_url: raise ValueError( "Cannot combine AUTH argument with " "credentials encoded in URL" ) if auth is None: auth = auth_from_url if auth is None: auth = self._default_auth # It would be confusing if we support explicit # Authorization header with auth argument if ( headers is not None and auth is not None and hdrs.AUTHORIZATION in headers ): raise ValueError( "Cannot combine AUTHORIZATION header " "with AUTH argument or credentials " "encoded in URL" ) all_cookies = self._cookie_jar.filter_cookies(url) if cookies is not None: tmp_cookie_jar = CookieJar() tmp_cookie_jar.update_cookies(cookies) req_cookies = tmp_cookie_jar.filter_cookies(url) if req_cookies: all_cookies.load(req_cookies) if proxy is not None: proxy = URL(proxy) elif self._trust_env: with suppress(LookupError): proxy, proxy_auth = get_env_proxy_for_url(url) req = self._request_class( method, url, params=params, headers=headers, skip_auto_headers=skip_headers, data=data, cookies=all_cookies, auth=auth, version=version, compress=compress, chunked=chunked, expect100=expect100, loop=self._loop, response_class=self._response_class, proxy=proxy, proxy_auth=proxy_auth, timer=timer, session=self, ssl=ssl, proxy_headers=proxy_headers, traces=traces, ) # connection timeout try: async with ceil_timeout(real_timeout.connect): assert self._connector is not None conn = await self._connector.connect( req, traces=traces, timeout=real_timeout ) except asyncio.TimeoutError as exc: raise ServerTimeoutError( f"Connection timeout to host {url}" ) from exc assert conn.transport is not None assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, skip_payload=method.upper() == "HEAD", read_until_eof=read_until_eof, auto_decompress=self._auto_decompress, read_timeout=real_timeout.sock_read, read_bufsize=read_bufsize, ) try: try: resp = await req.send(conn) try: await resp.start(conn) except BaseException: resp.close() raise except BaseException: conn.close() raise except ClientError: raise except OSError as exc: raise ClientOSError(*exc.args) from exc self._cookie_jar.update_cookies(resp.cookies, resp.url) # redirects if resp.status in (301, 302, 303, 307, 308) and allow_redirects: for trace in traces: await trace.send_request_redirect( method, url.update_query(params), headers, resp ) redirects += 1 history.append(resp) if max_redirects and redirects >= max_redirects: resp.close() raise TooManyRedirects( history[0].request_info, tuple(history) ) # For 301 and 302, mimic IE, now changed in RFC # https://github.com/kennethreitz/requests/pull/269 if (resp.status == 303 and resp.method != hdrs.METH_HEAD) or ( resp.status in (301, 302) and resp.method == hdrs.METH_POST ): method = hdrs.METH_GET data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) r_url = resp.headers.get(hdrs.LOCATION) or resp.headers.get( hdrs.URI ) if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break else: # reading from correct redirection # response is forbidden resp.release() try: parsed_url = URL( r_url, encoded=not self._requote_redirect_url ) except ValueError as e: raise InvalidURL(r_url) from e scheme = parsed_url.scheme if scheme not in ("http", "https", ""): resp.close() raise ValueError("Can redirect only to http or https") elif not scheme: parsed_url = url.join(parsed_url) is_same_host_https_redirect = ( url.host == parsed_url.host and parsed_url.scheme == "https" and url.scheme == "http" ) if ( url.origin() != parsed_url.origin() and not is_same_host_https_redirect ): auth = None headers.pop(hdrs.AUTHORIZATION, None) url = parsed_url params = None resp.release() continue break # check response status if raise_for_status is None: raise_for_status = self._raise_for_status if raise_for_status is None: pass elif callable(raise_for_status): await raise_for_status(resp) elif raise_for_status: resp.raise_for_status() # register connection if handle is not None: if resp.connection is not None: resp.connection.add_callback(handle.cancel) else: handle.cancel() resp._history = tuple(history) for trace in traces: await trace.send_request_end( method, url.update_query(params), headers, resp ) return resp except BaseException as e: # cleanup timer tm.close() if handle: handle.cancel() handle = None for trace in traces: await trace.send_request_exception( method, url.update_query(params), headers, e ) raise
def __init__(self, url: URL): self.host = url.origin()
async def _request(self, method, url, *, params=None, data=None, json=None, headers=None, skip_auto_headers=None, auth=None, allow_redirects=True, max_redirects=10, compress=None, chunked=None, expect100=False, read_until_eof=True, proxy=None, proxy_auth=None, timeout=sentinel, verify_ssl=None, fingerprint=None, ssl_context=None, ssl=None, proxy_headers=None, trace_request_ctx=None): # NOTE: timeout clamps existing connect and read timeouts. We cannot # set the default to None because we need to detect if the user wants # to use the existing timeouts by setting timeout to None. if self.closed: raise RuntimeError('Session is closed') ssl = _merge_ssl_params(ssl, verify_ssl, ssl_context, fingerprint) if data is not None and json is not None: raise ValueError( 'data and json parameters can not be used at the same time') elif json is not None: data = payload.JsonPayload(json, dumps=self._json_serialize) if not isinstance(chunked, bool) and chunked is not None: warnings.warn( 'Chunk size is deprecated #1615', DeprecationWarning) redirects = 0 history = [] version = self._version # Merge with default headers and transform to CIMultiDict headers = self._prepare_headers(headers) proxy_headers = self._prepare_headers(proxy_headers) try: url = URL(url) except ValueError: raise InvalidURL(url) skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: for i in skip_auto_headers: skip_headers.add(istr(i)) if proxy is not None: try: proxy = URL(proxy) except ValueError: raise InvalidURL(proxy) # timeout is cumulative for all request operations # (request, redirects, responses, data consuming) tm = TimeoutHandle( self._loop, timeout if timeout is not sentinel else self._read_timeout) handle = tm.start() traces = [ Trace( self, trace_config, trace_config.trace_config_ctx( trace_request_ctx=trace_request_ctx) ) for trace_config in self._trace_configs ] for trace in traces: await trace.send_request_start( method, url, headers ) timer = tm.timer() try: with timer: while True: url, auth_from_url = strip_auth_from_url(url) if auth and auth_from_url: raise ValueError("Cannot combine AUTH argument with " "credentials encoded in URL") if auth is None: auth = auth_from_url if auth is None: auth = self._default_auth # It would be confusing if we support explicit # Authorization header with auth argument if (headers is not None and auth is not None and hdrs.AUTHORIZATION in headers): raise ValueError("Cannot combine AUTHORIZATION header " "with AUTH argument or credentials " "encoded in URL") url = url.with_fragment(None) cookies = self._cookie_jar.filter_cookies(url) if proxy is not None: proxy = URL(proxy) elif self._trust_env: for scheme, proxy_info in proxies_from_env().items(): if scheme == url.scheme: proxy = proxy_info.proxy proxy_auth = proxy_info.proxy_auth break req = self._request_class( method, url, params=params, headers=headers, skip_auto_headers=skip_headers, data=data, cookies=cookies, auth=auth, version=version, compress=compress, chunked=chunked, expect100=expect100, loop=self._loop, response_class=self._response_class, proxy=proxy, proxy_auth=proxy_auth, timer=timer, session=self, auto_decompress=self._auto_decompress, ssl=ssl, proxy_headers=proxy_headers) # connection timeout try: with CeilTimeout(self._conn_timeout, loop=self._loop): conn = await self._connector.connect( req, traces=traces ) except asyncio.TimeoutError as exc: raise ServerTimeoutError( 'Connection timeout ' 'to host {0}'.format(url)) from exc tcp_nodelay(conn.transport, True) tcp_cork(conn.transport, False) try: resp = req.send(conn) try: await resp.start(conn, read_until_eof) except Exception: resp.close() conn.close() raise except ClientError: raise except OSError as exc: raise ClientOSError(*exc.args) from exc self._cookie_jar.update_cookies(resp.cookies, resp.url) # redirects if resp.status in ( 301, 302, 303, 307, 308) and allow_redirects: for trace in traces: await trace.send_request_redirect( method, url, headers, resp ) redirects += 1 history.append(resp) if max_redirects and redirects >= max_redirects: resp.close() break else: resp.release() # For 301 and 302, mimic IE, now changed in RFC # https://github.com/kennethreitz/requests/pull/269 if (resp.status == 303 and resp.method != hdrs.METH_HEAD) \ or (resp.status in (301, 302) and resp.method == hdrs.METH_POST): method = hdrs.METH_GET data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) r_url = (resp.headers.get(hdrs.LOCATION) or resp.headers.get(hdrs.URI)) if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break try: r_url = URL( r_url, encoded=not self.requote_redirect_url) except ValueError: raise InvalidURL(r_url) scheme = r_url.scheme if scheme not in ('http', 'https', ''): resp.close() raise ValueError( 'Can redirect only to http or https') elif not scheme: r_url = url.join(r_url) if url.origin() != r_url.origin(): auth = None headers.pop(hdrs.AUTHORIZATION, None) url = r_url params = None resp.release() continue break # check response status if self._raise_for_status: resp.raise_for_status() # register connection if handle is not None: if resp.connection is not None: resp.connection.add_callback(handle.cancel) else: handle.cancel() resp._history = tuple(history) for trace in traces: await trace.send_request_end( method, url, headers, resp ) return resp except Exception as e: # cleanup timer tm.close() if handle: handle.cancel() handle = None for trace in traces: await trace.send_request_exception( method, url, headers, e ) raise
async def _request( self, method: str, str_or_url: StrOrURL, *, params: Optional[Mapping[str, str]]=None, data: Any=None, json: Any=None, cookies: Optional[LooseCookies]=None, headers: LooseHeaders=None, skip_auto_headers: Optional[Iterable[str]]=None, auth: Optional[BasicAuth]=None, allow_redirects: bool=True, max_redirects: int=10, compress: Optional[str]=None, chunked: Optional[bool]=None, expect100: bool=False, raise_for_status: Optional[bool]=None, read_until_eof: bool=True, proxy: Optional[StrOrURL]=None, proxy_auth: Optional[BasicAuth]=None, timeout: Union[ClientTimeout, object]=sentinel, ssl: Optional[Union[SSLContext, bool, Fingerprint]]=None, proxy_headers: Optional[LooseHeaders]=None, trace_request_ctx: Optional[SimpleNamespace]=None ) -> ClientResponse: # NOTE: timeout clamps existing connect and read timeouts. We cannot # set the default to None because we need to detect if the user wants # to use the existing timeouts by setting timeout to None. if self.closed: raise RuntimeError('Session is closed') if not isinstance(ssl, SSL_ALLOWED_TYPES): raise TypeError("ssl should be SSLContext, bool, Fingerprint, " "or None, got {!r} instead.".format(ssl)) if data is not None and json is not None: raise ValueError( 'data and json parameters can not be used at the same time') elif json is not None: data = payload.JsonPayload(json, dumps=self._json_serialize) if not isinstance(chunked, bool) and chunked is not None: warnings.warn( 'Chunk size is deprecated #1615', DeprecationWarning) redirects = 0 history = [] version = self._version # Merge with default headers and transform to CIMultiDict headers = self._prepare_headers(headers) proxy_headers = self._prepare_headers(proxy_headers) try: url = URL(str_or_url) except ValueError: raise InvalidURL(str_or_url) skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: for i in skip_auto_headers: skip_headers.add(istr(i)) if proxy is not None: try: proxy = URL(proxy) except ValueError: raise InvalidURL(proxy) if timeout is sentinel: real_timeout = self._timeout # type: ClientTimeout else: if not isinstance(timeout, ClientTimeout): real_timeout = ClientTimeout(total=timeout) # type: ignore else: real_timeout = timeout # timeout is cumulative for all request operations # (request, redirects, responses, data consuming) tm = TimeoutHandle(self._loop, real_timeout.total) handle = tm.start() traces = [ Trace( self, trace_config, trace_config.trace_config_ctx( trace_request_ctx=trace_request_ctx) ) for trace_config in self._trace_configs ] for trace in traces: await trace.send_request_start( method, url, headers ) timer = tm.timer() try: with timer: while True: url, auth_from_url = strip_auth_from_url(url) if auth and auth_from_url: raise ValueError("Cannot combine AUTH argument with " "credentials encoded in URL") if auth is None: auth = auth_from_url if auth is None: auth = self._default_auth # It would be confusing if we support explicit # Authorization header with auth argument if (headers is not None and auth is not None and hdrs.AUTHORIZATION in headers): raise ValueError("Cannot combine AUTHORIZATION header " "with AUTH argument or credentials " "encoded in URL") all_cookies = self._cookie_jar.filter_cookies(url) if cookies is not None: tmp_cookie_jar = CookieJar() tmp_cookie_jar.update_cookies(cookies) req_cookies = tmp_cookie_jar.filter_cookies(url) if req_cookies: all_cookies.load(req_cookies) if proxy is not None: proxy = URL(proxy) elif self._trust_env: for scheme, proxy_info in proxies_from_env().items(): if scheme == url.scheme: proxy = proxy_info.proxy proxy_auth = proxy_info.proxy_auth break req = self._request_class( method, url, params=params, headers=headers, skip_auto_headers=skip_headers, data=data, cookies=all_cookies, auth=auth, version=version, compress=compress, chunked=chunked, expect100=expect100, loop=self._loop, response_class=self._response_class, proxy=proxy, proxy_auth=proxy_auth, timer=timer, session=self, ssl=ssl, proxy_headers=proxy_headers, traces=traces) # connection timeout try: with CeilTimeout(real_timeout.connect, loop=self._loop): assert self._connector is not None conn = await self._connector.connect( req, traces=traces, timeout=real_timeout ) except asyncio.TimeoutError as exc: raise ServerTimeoutError( 'Connection timeout ' 'to host {0}'.format(url)) from exc assert conn.transport is not None assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, skip_payload=method.upper() == 'HEAD', read_until_eof=read_until_eof, auto_decompress=self._auto_decompress, read_timeout=real_timeout.sock_read) try: try: resp = await req.send(conn) try: await resp.start(conn) except BaseException: resp.close() raise except BaseException: conn.close() raise except ClientError: raise except OSError as exc: raise ClientOSError(*exc.args) from exc self._cookie_jar.update_cookies(resp.cookies, resp.url) # redirects if resp.status in ( 301, 302, 303, 307, 308) and allow_redirects: for trace in traces: await trace.send_request_redirect( method, url, headers, resp ) redirects += 1 history.append(resp) if max_redirects and redirects >= max_redirects: resp.close() raise TooManyRedirects( history[0].request_info, tuple(history)) # For 301 and 302, mimic IE, now changed in RFC # https://github.com/kennethreitz/requests/pull/269 if (resp.status == 303 and resp.method != hdrs.METH_HEAD) \ or (resp.status in (301, 302) and resp.method == hdrs.METH_POST): method = hdrs.METH_GET data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) r_url = (resp.headers.get(hdrs.LOCATION) or resp.headers.get(hdrs.URI)) if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break else: # reading from correct redirection # response is forbidden resp.release() try: r_url = URL( r_url, encoded=not self._requote_redirect_url) except ValueError: raise InvalidURL(r_url) scheme = r_url.scheme if scheme not in ('http', 'https', ''): resp.close() raise ValueError( 'Can redirect only to http or https') elif not scheme: r_url = url.join(r_url) if url.origin() != r_url.origin(): auth = None headers.pop(hdrs.AUTHORIZATION, None) url = r_url params = None resp.release() continue break # check response status if raise_for_status is None: raise_for_status = self._raise_for_status if raise_for_status: resp.raise_for_status() # register connection if handle is not None: if resp.connection is not None: resp.connection.add_callback(handle.cancel) else: handle.cancel() resp._history = tuple(history) for trace in traces: await trace.send_request_end( method, url, headers, resp ) return resp except BaseException as e: # cleanup timer tm.close() if handle: handle.cancel() handle = None for trace in traces: await trace.send_request_exception( method, url, headers, e ) raise
class ClientSession: """First-class interface for making HTTP requests.""" __slots__ = ( "_base_url", "_source_traceback", "_connector", "_loop", "_cookie_jar", "_connector_owner", "_default_auth", "_version", "_json_serialize", "_requote_redirect_url", "_timeout", "_raise_for_status", "_auto_decompress", "_trust_env", "_default_headers", "_skip_auto_headers", "_request_class", "_response_class", "_ws_response_class", "_trace_configs", "_read_bufsize", ) def __init__( self, base_url: Optional[StrOrURL] = None, *, connector: Optional[BaseConnector] = None, cookies: Optional[LooseCookies] = None, headers: Optional[LooseHeaders] = None, skip_auto_headers: Optional[Iterable[str]] = None, auth: Optional[BasicAuth] = None, json_serialize: JSONEncoder = json.dumps, request_class: Type[ClientRequest] = ClientRequest, response_class: Type[ClientResponse] = ClientResponse, ws_response_class: Type[ClientWebSocketResponse] = ClientWebSocketResponse, version: HttpVersion = http.HttpVersion11, cookie_jar: Optional[AbstractCookieJar] = None, connector_owner: bool = True, raise_for_status: Union[ bool, Callable[[ClientResponse], Awaitable[None]] ] = False, timeout: Union[_SENTINEL, ClientTimeout] = sentinel, auto_decompress: bool = True, trust_env: bool = False, requote_redirect_url: bool = True, trace_configs: Optional[List[TraceConfig]] = None, read_bufsize: int = 2 ** 16, ) -> None: if base_url is None or isinstance(base_url, URL): self._base_url: Optional[URL] = base_url else: self._base_url = URL(base_url) assert ( self._base_url.origin() == self._base_url ), "Only absolute URLs without path part are supported" loop = asyncio.get_running_loop() if connector is None: connector = TCPConnector() # Initialize these three attrs before raising any exception, # they are used in __del__ self._connector = connector # type: Optional[BaseConnector] self._loop = loop if loop.get_debug(): self._source_traceback = traceback.extract_stack( sys._getframe(1) ) # type: Optional[traceback.StackSummary] else: self._source_traceback = None if connector._loop is not loop: raise RuntimeError("Session and connector have to use same event loop") if cookie_jar is None: cookie_jar = CookieJar() self._cookie_jar = cookie_jar if cookies is not None: self._cookie_jar.update_cookies(cookies) self._connector_owner = connector_owner self._default_auth = auth self._version = version self._json_serialize = json_serialize if timeout is sentinel: self._timeout = DEFAULT_TIMEOUT else: self._timeout = timeout # type: ignore[assignment] self._raise_for_status = raise_for_status self._auto_decompress = auto_decompress self._trust_env = trust_env self._requote_redirect_url = requote_redirect_url self._read_bufsize = read_bufsize # Convert to list of tuples if headers: real_headers = CIMultiDict(headers) # type: CIMultiDict[str] else: real_headers = CIMultiDict() self._default_headers = real_headers # type: CIMultiDict[str] if skip_auto_headers is not None: self._skip_auto_headers = frozenset(istr(i) for i in skip_auto_headers) else: self._skip_auto_headers = frozenset() self._request_class = request_class self._response_class = response_class self._ws_response_class = ws_response_class self._trace_configs = trace_configs or [] for trace_config in self._trace_configs: trace_config.freeze() def __init_subclass__(cls: Type["ClientSession"]) -> None: raise TypeError( "Inheritance class {} from ClientSession " "is forbidden".format(cls.__name__) ) def __del__(self, _warnings: Any = warnings) -> None: try: if not self.closed: _warnings.warn( f"Unclosed client session {self!r}", ResourceWarning, source=self, ) context = {"client_session": self, "message": "Unclosed client session"} if self._source_traceback is not None: context["source_traceback"] = self._source_traceback self._loop.call_exception_handler(context) except AttributeError: # loop was not initialized yet, # either self._connector or self._loop doesn't exist pass def request( self, method: str, url: StrOrURL, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP request.""" return _RequestContextManager(self._request(method, url, **kwargs)) def _build_url(self, str_or_url: StrOrURL) -> URL: url = URL(str_or_url) if self._base_url is None: return url else: assert not url.is_absolute() and url.path.startswith("/") return self._base_url.join(url) async def _request( self, method: str, str_or_url: StrOrURL, *, params: Optional[Mapping[str, str]] = None, data: Any = None, json: Any = None, cookies: Optional[LooseCookies] = None, headers: Optional[LooseHeaders] = None, skip_auto_headers: Optional[Iterable[str]] = None, auth: Optional[BasicAuth] = None, allow_redirects: bool = True, max_redirects: int = 10, compress: Optional[str] = None, chunked: Optional[bool] = None, expect100: bool = False, raise_for_status: Union[ None, bool, Callable[[ClientResponse], Awaitable[None]] ] = None, read_until_eof: bool = True, proxy: Optional[StrOrURL] = None, proxy_auth: Optional[BasicAuth] = None, timeout: Union[ClientTimeout, _SENTINEL] = sentinel, ssl: Optional[Union[SSLContext, bool, Fingerprint]] = None, proxy_headers: Optional[LooseHeaders] = None, trace_request_ctx: Optional[SimpleNamespace] = None, read_bufsize: Optional[int] = None, ) -> ClientResponse: # NOTE: timeout clamps existing connect and read timeouts. We cannot # set the default to None because we need to detect if the user wants # to use the existing timeouts by setting timeout to None. if self.closed: raise RuntimeError("Session is closed") if not isinstance(ssl, SSL_ALLOWED_TYPES): raise TypeError( "ssl should be SSLContext, bool, Fingerprint, " "or None, got {!r} instead.".format(ssl) ) if data is not None and json is not None: raise ValueError( "data and json parameters can not be used at the same time" ) elif json is not None: data = payload.JsonPayload(json, dumps=self._json_serialize) redirects = 0 history = [] version = self._version # Merge with default headers and transform to CIMultiDict headers = self._prepare_headers(headers) proxy_headers = self._prepare_headers(proxy_headers) try: url = self._build_url(str_or_url) except ValueError as e: raise InvalidURL(str_or_url) from e skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: for i in skip_auto_headers: skip_headers.add(istr(i)) if proxy is not None: try: proxy = URL(proxy) except ValueError as e: raise InvalidURL(proxy) from e if timeout is sentinel: real_timeout = self._timeout # type: ClientTimeout else: if not isinstance(timeout, ClientTimeout): real_timeout = ClientTimeout(total=timeout) # type: ignore[arg-type] else: real_timeout = timeout # timeout is cumulative for all request operations # (request, redirects, responses, data consuming) tm = TimeoutHandle(self._loop, real_timeout.total) handle = tm.start() if read_bufsize is None: read_bufsize = self._read_bufsize traces = [ Trace( self, trace_config, trace_config.trace_config_ctx(trace_request_ctx=trace_request_ctx), ) for trace_config in self._trace_configs ] for trace in traces: await trace.send_request_start(method, url.update_query(params), headers) timer = tm.timer() try: with timer: while True: url, auth_from_url = strip_auth_from_url(url) if auth and auth_from_url: raise ValueError( "Cannot combine AUTH argument with " "credentials encoded in URL" ) if auth is None: auth = auth_from_url if auth is None: auth = self._default_auth # It would be confusing if we support explicit # Authorization header with auth argument if ( headers is not None and auth is not None and hdrs.AUTHORIZATION in headers ): raise ValueError( "Cannot combine AUTHORIZATION header " "with AUTH argument or credentials " "encoded in URL" ) all_cookies = self._cookie_jar.filter_cookies(url) if cookies is not None: tmp_cookie_jar = CookieJar() tmp_cookie_jar.update_cookies(cookies) req_cookies = tmp_cookie_jar.filter_cookies(url) if req_cookies: all_cookies.load(req_cookies) if proxy is not None: proxy = URL(proxy) elif self._trust_env: with suppress(LookupError): proxy, proxy_auth = get_env_proxy_for_url(url) req = self._request_class( method, url, params=params, headers=headers, skip_auto_headers=skip_headers, data=data, cookies=all_cookies, auth=auth, version=version, compress=compress, chunked=chunked, expect100=expect100, loop=self._loop, response_class=self._response_class, proxy=proxy, proxy_auth=proxy_auth, timer=timer, session=self, ssl=ssl, proxy_headers=proxy_headers, traces=traces, ) # connection timeout try: async with ceil_timeout(real_timeout.connect): assert self._connector is not None conn = await self._connector.connect( req, traces=traces, timeout=real_timeout ) except asyncio.TimeoutError as exc: raise ServerTimeoutError( f"Connection timeout to host {url}" ) from exc assert conn.transport is not None assert conn.protocol is not None conn.protocol.set_response_params( timer=timer, skip_payload=method.upper() == "HEAD", read_until_eof=read_until_eof, auto_decompress=self._auto_decompress, read_timeout=real_timeout.sock_read, read_bufsize=read_bufsize, ) try: try: resp = await req.send(conn) try: await resp.start(conn) except BaseException: resp.close() raise except BaseException: conn.close() raise except ClientError: raise except OSError as exc: raise ClientOSError(*exc.args) from exc self._cookie_jar.update_cookies(resp.cookies, resp.url) # redirects if resp.status in (301, 302, 303, 307, 308) and allow_redirects: for trace in traces: await trace.send_request_redirect( method, url.update_query(params), headers, resp ) redirects += 1 history.append(resp) if max_redirects and redirects >= max_redirects: resp.close() raise TooManyRedirects( history[0].request_info, tuple(history) ) # For 301 and 302, mimic IE, now changed in RFC # https://github.com/kennethreitz/requests/pull/269 if (resp.status == 303 and resp.method != hdrs.METH_HEAD) or ( resp.status in (301, 302) and resp.method == hdrs.METH_POST ): method = hdrs.METH_GET data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) r_url = resp.headers.get(hdrs.LOCATION) or resp.headers.get( hdrs.URI ) if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break else: # reading from correct redirection # response is forbidden resp.release() try: parsed_url = URL( r_url, encoded=not self._requote_redirect_url ) except ValueError as e: raise InvalidURL(r_url) from e scheme = parsed_url.scheme if scheme not in ("http", "https", ""): resp.close() raise ValueError("Can redirect only to http or https") elif not scheme: parsed_url = url.join(parsed_url) is_same_host_https_redirect = ( url.host == parsed_url.host and parsed_url.scheme == "https" and url.scheme == "http" ) if ( url.origin() != parsed_url.origin() and not is_same_host_https_redirect ): auth = None headers.pop(hdrs.AUTHORIZATION, None) url = parsed_url params = None resp.release() continue break # check response status if raise_for_status is None: raise_for_status = self._raise_for_status if raise_for_status is None: pass elif callable(raise_for_status): await raise_for_status(resp) elif raise_for_status: resp.raise_for_status() # register connection if handle is not None: if resp.connection is not None: resp.connection.add_callback(handle.cancel) else: handle.cancel() resp._history = tuple(history) for trace in traces: await trace.send_request_end( method, url.update_query(params), headers, resp ) return resp except BaseException as e: # cleanup timer tm.close() if handle: handle.cancel() handle = None for trace in traces: await trace.send_request_exception( method, url.update_query(params), headers, e ) raise def ws_connect( self, url: StrOrURL, *, method: str = hdrs.METH_GET, protocols: Iterable[str] = (), timeout: Union[ClientWSTimeout, float, _SENTINEL] = sentinel, receive_timeout: Optional[float] = None, autoclose: bool = True, autoping: bool = True, heartbeat: Optional[float] = None, auth: Optional[BasicAuth] = None, origin: Optional[str] = None, params: Optional[Mapping[str, str]] = None, headers: Optional[LooseHeaders] = None, proxy: Optional[StrOrURL] = None, proxy_auth: Optional[BasicAuth] = None, ssl: Union[SSLContext, bool, None, Fingerprint] = None, proxy_headers: Optional[LooseHeaders] = None, compress: int = 0, max_msg_size: int = 4 * 1024 * 1024, ) -> "_WSRequestContextManager": """Initiate websocket connection.""" return _WSRequestContextManager( self._ws_connect( url, method=method, protocols=protocols, timeout=timeout, receive_timeout=receive_timeout, autoclose=autoclose, autoping=autoping, heartbeat=heartbeat, auth=auth, origin=origin, params=params, headers=headers, proxy=proxy, proxy_auth=proxy_auth, ssl=ssl, proxy_headers=proxy_headers, compress=compress, max_msg_size=max_msg_size, ) ) async def _ws_connect( self, url: StrOrURL, *, method: str = hdrs.METH_GET, protocols: Iterable[str] = (), timeout: Union[ClientWSTimeout, float, _SENTINEL] = sentinel, receive_timeout: Optional[float] = None, autoclose: bool = True, autoping: bool = True, heartbeat: Optional[float] = None, auth: Optional[BasicAuth] = None, origin: Optional[str] = None, params: Optional[Mapping[str, str]] = None, headers: Optional[LooseHeaders] = None, proxy: Optional[StrOrURL] = None, proxy_auth: Optional[BasicAuth] = None, ssl: Union[SSLContext, bool, None, Fingerprint] = None, proxy_headers: Optional[LooseHeaders] = None, compress: int = 0, max_msg_size: int = 4 * 1024 * 1024, ) -> ClientWebSocketResponse: if timeout is not sentinel: if isinstance(timeout, ClientWSTimeout): ws_timeout = timeout else: warnings.warn( "parameter 'timeout' of type 'float' " "is deprecated, please use " "'timeout=ClientWSTimeout(ws_close=...)'", DeprecationWarning, stacklevel=2, ) ws_timeout = ClientWSTimeout(ws_close=timeout) # type: ignore[arg-type] else: ws_timeout = DEFAULT_WS_CLIENT_TIMEOUT if receive_timeout is not None: warnings.warn( "float parameter 'receive_timeout' " "is deprecated, please use parameter " "'timeout=ClientWSTimeout(ws_receive=...)'", DeprecationWarning, stacklevel=2, ) ws_timeout = dataclasses.replace(ws_timeout, ws_receive=receive_timeout) if headers is None: real_headers = CIMultiDict() # type: CIMultiDict[str] else: real_headers = CIMultiDict(headers) default_headers = { hdrs.UPGRADE: "websocket", hdrs.CONNECTION: "upgrade", hdrs.SEC_WEBSOCKET_VERSION: "13", } for key, value in default_headers.items(): real_headers.setdefault(key, value) sec_key = base64.b64encode(os.urandom(16)) real_headers[hdrs.SEC_WEBSOCKET_KEY] = sec_key.decode() if protocols: real_headers[hdrs.SEC_WEBSOCKET_PROTOCOL] = ",".join(protocols) if origin is not None: real_headers[hdrs.ORIGIN] = origin if compress: extstr = ws_ext_gen(compress=compress) real_headers[hdrs.SEC_WEBSOCKET_EXTENSIONS] = extstr if not isinstance(ssl, SSL_ALLOWED_TYPES): raise TypeError( "ssl should be SSLContext, bool, Fingerprint, " "or None, got {!r} instead.".format(ssl) ) # send request resp = await self.request( method, url, params=params, headers=real_headers, read_until_eof=False, auth=auth, proxy=proxy, proxy_auth=proxy_auth, ssl=ssl, proxy_headers=proxy_headers, ) try: # check handshake if resp.status != 101: raise WSServerHandshakeError( resp.request_info, resp.history, message="Invalid response status", status=resp.status, headers=resp.headers, ) if resp.headers.get(hdrs.UPGRADE, "").lower() != "websocket": raise WSServerHandshakeError( resp.request_info, resp.history, message="Invalid upgrade header", status=resp.status, headers=resp.headers, ) if resp.headers.get(hdrs.CONNECTION, "").lower() != "upgrade": raise WSServerHandshakeError( resp.request_info, resp.history, message="Invalid connection header", status=resp.status, headers=resp.headers, ) # key calculation r_key = resp.headers.get(hdrs.SEC_WEBSOCKET_ACCEPT, "") match = base64.b64encode(hashlib.sha1(sec_key + WS_KEY).digest()).decode() if r_key != match: raise WSServerHandshakeError( resp.request_info, resp.history, message="Invalid challenge response", status=resp.status, headers=resp.headers, ) # websocket protocol protocol = None if protocols and hdrs.SEC_WEBSOCKET_PROTOCOL in resp.headers: resp_protocols = [ proto.strip() for proto in resp.headers[hdrs.SEC_WEBSOCKET_PROTOCOL].split(",") ] for proto in resp_protocols: if proto in protocols: protocol = proto break # websocket compress notakeover = False if compress: compress_hdrs = resp.headers.get(hdrs.SEC_WEBSOCKET_EXTENSIONS) if compress_hdrs: try: compress, notakeover = ws_ext_parse(compress_hdrs) except WSHandshakeError as exc: raise WSServerHandshakeError( resp.request_info, resp.history, message=exc.args[0], status=resp.status, headers=resp.headers, ) from exc else: compress = 0 notakeover = False conn = resp.connection assert conn is not None conn_proto = conn.protocol assert conn_proto is not None transport = conn.transport assert transport is not None reader = FlowControlDataQueue( conn_proto, 2 ** 16, loop=self._loop ) # type: FlowControlDataQueue[WSMessage] conn_proto.set_parser(WebSocketReader(reader, max_msg_size), reader) writer = WebSocketWriter( conn_proto, transport, use_mask=True, compress=compress, notakeover=notakeover, ) except BaseException: resp.close() raise else: return self._ws_response_class( reader, writer, protocol, resp, ws_timeout, autoclose, autoping, self._loop, heartbeat=heartbeat, compress=compress, client_notakeover=notakeover, ) def _prepare_headers(self, headers: Optional[LooseHeaders]) -> "CIMultiDict[str]": """Add default headers and transform it to CIMultiDict""" # Convert headers to MultiDict result = CIMultiDict(self._default_headers) if headers: if not isinstance(headers, (MultiDictProxy, MultiDict)): headers = CIMultiDict(headers) added_names = set() # type: Set[str] for key, value in headers.items(): if key in added_names: result.add(key, value) else: result[key] = value added_names.add(key) return result def get( self, url: StrOrURL, *, allow_redirects: bool = True, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP GET request.""" return _RequestContextManager( self._request(hdrs.METH_GET, url, allow_redirects=allow_redirects, **kwargs) ) def options( self, url: StrOrURL, *, allow_redirects: bool = True, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP OPTIONS request.""" return _RequestContextManager( self._request( hdrs.METH_OPTIONS, url, allow_redirects=allow_redirects, **kwargs ) ) def head( self, url: StrOrURL, *, allow_redirects: bool = False, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP HEAD request.""" return _RequestContextManager( self._request( hdrs.METH_HEAD, url, allow_redirects=allow_redirects, **kwargs ) ) def post( self, url: StrOrURL, *, data: Any = None, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP POST request.""" return _RequestContextManager( self._request(hdrs.METH_POST, url, data=data, **kwargs) ) def put( self, url: StrOrURL, *, data: Any = None, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP PUT request.""" return _RequestContextManager( self._request(hdrs.METH_PUT, url, data=data, **kwargs) ) def patch( self, url: StrOrURL, *, data: Any = None, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP PATCH request.""" return _RequestContextManager( self._request(hdrs.METH_PATCH, url, data=data, **kwargs) ) def delete(self, url: StrOrURL, **kwargs: Any) -> "_RequestContextManager": """Perform HTTP DELETE request.""" return _RequestContextManager(self._request(hdrs.METH_DELETE, url, **kwargs)) async def close(self) -> None: """Close underlying connector. Release all acquired resources. """ if not self.closed: if self._connector is not None and self._connector_owner: await self._connector.close() self._connector = None @property def closed(self) -> bool: """Is client session closed. A readonly property. """ return self._connector is None or self._connector.closed @property def connector(self) -> Optional[BaseConnector]: """Connector instance used for the session.""" return self._connector @property def cookie_jar(self) -> AbstractCookieJar: """The session cookies.""" return self._cookie_jar @property def version(self) -> Tuple[int, int]: """The session HTTP protocol version.""" return self._version @property def requote_redirect_url(self) -> bool: """Do URL requoting on redirection handling.""" return self._requote_redirect_url @property def timeout(self) -> Union[object, ClientTimeout]: """Timeout for the session.""" return self._timeout @property def headers(self) -> "CIMultiDict[str]": """The default headers of the client session.""" return self._default_headers @property def skip_auto_headers(self) -> FrozenSet[istr]: """Headers for which autogeneration should be skipped""" return self._skip_auto_headers @property def auth(self) -> Optional[BasicAuth]: """An object that represents HTTP Basic Authorization""" return self._default_auth @property def json_serialize(self) -> JSONEncoder: """Json serializer callable""" return self._json_serialize @property def connector_owner(self) -> bool: """Should connector be closed on session closing""" return self._connector_owner @property def raise_for_status( self, ) -> Union[bool, Callable[[ClientResponse], Awaitable[None]]]: """ Should `ClientResponse.raise_for_status()` be called for each response """ return self._raise_for_status @property def auto_decompress(self) -> bool: """Should the body response be automatically decompressed""" return self._auto_decompress @property def trust_env(self) -> bool: """ Should get proxies information from HTTP_PROXY / HTTPS_PROXY environment variables or ~/.netrc file if present """ return self._trust_env @property def trace_configs(self) -> List[TraceConfig]: """A list of TraceConfig instances used for client tracing""" return self._trace_configs def detach(self) -> None: """Detach connector from session without closing the former. Session is switched to closed state anyway. """ self._connector = None async def __aenter__(self) -> "ClientSession": return self async def __aexit__( self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], ) -> None: await self.close()
def __init__(self, project_url): self.api_path = "api/v4/projects" project_url = URL(project_url) self.base_url = project_url.origin() self.project_id = url_encoded_path(project_url.path)
async def _request(self, method, url, *, params=None, data=None, json=None, headers=None, skip_auto_headers=None, auth=None, allow_redirects=True, max_redirects=10, compress=None, chunked=None, expect100=False, read_until_eof=True, proxy=None, proxy_auth=None, timeout=sentinel, verify_ssl=None, fingerprint=None, ssl_context=None, ssl=None, proxy_headers=None, trace_request_ctx=None): # NOTE: timeout clamps existing connect and read timeouts. We cannot # set the default to None because we need to detect if the user wants # to use the existing timeouts by setting timeout to None. if self.closed: raise RuntimeError('Session is closed') ssl = _merge_ssl_params(ssl, verify_ssl, ssl_context, fingerprint) if data is not None and json is not None: raise ValueError( 'data and json parameters can not be used at the same time') elif json is not None: data = payload.JsonPayload(json, dumps=self._json_serialize) if not isinstance(chunked, bool) and chunked is not None: warnings.warn('Chunk size is deprecated #1615', DeprecationWarning) redirects = 0 history = [] version = self._version # Merge with default headers and transform to CIMultiDict headers = self._prepare_headers(headers) proxy_headers = self._prepare_headers(proxy_headers) try: url = URL(url) except ValueError: raise InvalidURL(url) skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: for i in skip_auto_headers: skip_headers.add(istr(i)) if proxy is not None: try: proxy = URL(proxy) except ValueError: raise InvalidURL(proxy) # timeout is cumulative for all request operations # (request, redirects, responses, data consuming) tm = TimeoutHandle( self._loop, timeout if timeout is not sentinel else self._read_timeout) handle = tm.start() traces = [ Trace( self, trace_config, trace_config.trace_config_ctx( trace_request_ctx=trace_request_ctx)) for trace_config in self._trace_configs ] for trace in traces: await trace.send_request_start(method, url, headers) timer = tm.timer() try: with timer: while True: url, auth_from_url = strip_auth_from_url(url) if auth and auth_from_url: raise ValueError("Cannot combine AUTH argument with " "credentials encoded in URL") if auth is None: auth = auth_from_url if auth is None: auth = self._default_auth # It would be confusing if we support explicit # Authorization header with auth argument if (headers is not None and auth is not None and hdrs.AUTHORIZATION in headers): raise ValueError("Cannot combine AUTHORIZATION header " "with AUTH argument or credentials " "encoded in URL") cookies = self._cookie_jar.filter_cookies(url) if proxy is not None: proxy = URL(proxy) elif self._trust_env: for scheme, proxy_info in proxies_from_env().items(): if scheme == url.scheme: proxy = proxy_info.proxy proxy_auth = proxy_info.proxy_auth break req = self._request_class( method, url, params=params, headers=headers, skip_auto_headers=skip_headers, data=data, cookies=cookies, auth=auth, version=version, compress=compress, chunked=chunked, expect100=expect100, loop=self._loop, response_class=self._response_class, proxy=proxy, proxy_auth=proxy_auth, timer=timer, session=self, auto_decompress=self._auto_decompress, ssl=ssl, proxy_headers=proxy_headers, traces=traces) # connection timeout try: with CeilTimeout(self._conn_timeout, loop=self._loop): conn = await self._connector.connect(req, traces=traces) except asyncio.TimeoutError as exc: raise ServerTimeoutError( 'Connection timeout ' 'to host {0}'.format(url)) from exc tcp_nodelay(conn.transport, True) tcp_cork(conn.transport, False) try: try: resp = await req.send(conn) try: await resp.start(conn, read_until_eof) except BaseException: resp.close() raise except BaseException: conn.close() raise except ClientError: raise except OSError as exc: raise ClientOSError(*exc.args) from exc self._cookie_jar.update_cookies(resp.cookies, resp.url) # redirects if resp.status in (301, 302, 303, 307, 308) and allow_redirects: for trace in traces: await trace.send_request_redirect( method, url, headers, resp) redirects += 1 history.append(resp) if max_redirects and redirects >= max_redirects: resp.close() raise TooManyRedirects(history[0].request_info, tuple(history)) else: resp.release() # For 301 and 302, mimic IE, now changed in RFC # https://github.com/kennethreitz/requests/pull/269 if (resp.status == 303 and resp.method != hdrs.METH_HEAD) \ or (resp.status in (301, 302) and resp.method == hdrs.METH_POST): method = hdrs.METH_GET data = None if headers.get(hdrs.CONTENT_LENGTH): headers.pop(hdrs.CONTENT_LENGTH) r_url = (resp.headers.get(hdrs.LOCATION) or resp.headers.get(hdrs.URI)) if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break try: r_url = URL(r_url, encoded=not self.requote_redirect_url) except ValueError: raise InvalidURL(r_url) scheme = r_url.scheme if scheme not in ('http', 'https', ''): resp.close() raise ValueError( 'Can redirect only to http or https') elif not scheme: r_url = url.join(r_url) if url.origin() != r_url.origin(): auth = None headers.pop(hdrs.AUTHORIZATION, None) url = r_url params = None resp.release() continue break # check response status if self._raise_for_status: resp.raise_for_status() # register connection if handle is not None: if resp.connection is not None: resp.connection.add_callback(handle.cancel) else: handle.cancel() resp._history = tuple(history) for trace in traces: await trace.send_request_end(method, url, headers, resp) return resp except BaseException as e: # cleanup timer tm.close() if handle: handle.cancel() handle = None for trace in traces: await trace.send_request_exception(method, url, headers, e) raise
def test_origin_nonascii(): url = URL("http://*****:*****@историк.рф:8888/path/to?a=1&b=2") assert str(url.origin()) == "http://xn--h1aagokeh.xn--p1ai:8888"