def connection_from_url(self, url): """ Similar to :func:`urllib3.connectionpool.connection_from_url` but doesn't pass any additional parameters to the :class:`urllib3.connectionpool.ConnectionPool` constructor. Additional parameters are taken from the :class:`.PoolManager` constructor. """ u = parse_url(url) return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." u = parse_url(url) if u.scheme == "http": # For proxied HTTPS requests, httplib sets the necessary headers # on the CONNECT to the proxy. For HTTP, we'll definitely # need to set 'Host' at the very least. kw['headers'] = self._set_proxy_headers(url, kw.get('headers', self.headers)) return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
def urlopen(self, method, url, redirect=True, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." u = parse_url(url) if u.scheme == "http": # For proxied HTTPS requests, httplib sets the necessary headers # on the CONNECT to the proxy. For HTTP, we'll definitely # need to set 'Host' at the very least. kw['headers'] = self._set_proxy_headers( url, kw.get('headers', self.headers)) return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
def _set_proxy_headers(self, url, headers=None): """ Sets headers needed by proxies: specifically, the Accept and Host headers. Only sets headers not provided by the user. """ headers_ = {'Accept': '*/*'} netloc = parse_url(url).netloc if netloc: headers_['Host'] = netloc if headers: headers_.update(headers) return headers_
def __init__(self, proxy_url, num_pools=10, headers=None, proxy_headers=None, **connection_pool_kw): if isinstance(proxy_url, HTTPConnectionPool): proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, proxy_url.port) proxy = parse_url(proxy_url) if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) self.proxy = proxy self.proxy_headers = proxy_headers or {} assert self.proxy.scheme in ("http", "https"), \ 'Not supported proxy scheme %s' % self.proxy.scheme connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers super(ProxyManager, self).__init__( num_pools, headers, **connection_pool_kw)
def __init__(self, proxy_url, num_pools=10, headers=None, proxy_headers=None, **connection_pool_kw): if isinstance(proxy_url, HTTPConnectionPool): proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, proxy_url.port) proxy = parse_url(proxy_url) if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) self.proxy = proxy self.proxy_headers = proxy_headers or {} assert self.proxy.scheme in ("http", "https"), \ 'Not supported proxy scheme %s' % self.proxy.scheme connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
def urlopen(self, method, url, redirect=True, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri portion of the ``url``. The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ u = parse_url(url) conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) kw['assert_same_host'] = False kw['redirect'] = False if 'headers' not in kw: kw['headers'] = self.headers if self.proxy is not None and u.scheme == "http": response = conn.urlopen(method, url, **kw) else: response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: return response # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) # RFC 2616, Section 10.3.4 if response.status == 303: method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown kw['redirect'] = redirect return self.urlopen(method, redirect_location, **kw)
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. try: url = unicode(url) except NameError: # We're on Python 3. url = str(url) except UnicodeDecodeError: pass # Don't do any URL preparation for oddball schemes if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. scheme, auth, host, port, path, query, fragment = parse_url(url) if not scheme: raise MissingSchema("Invalid URL {0!r}: No schema supplied. " "Perhaps you meant http://{0}?".format(url)) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # Only want to apply IDNA to the hostname try: host = host.encode('idna').decode('utf-8') except UnicodeError: raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url