def _process_responses(self, requests): for request, conn, _, response in requests: if response.status_code == 999: # skip processing the response if we failed to finish the # request in the first place. continue try: raw_response = conn.getresponse() urllib3_reponse = HTTPResponse.from_httplib( raw_response, # pool=conn, # TODO? connection=conn, preload_content=False, decode_content=False) except: # HACK: see below. response.__init__() self.build_exception_response_into(response, request, sys.exc_info()) else: # HACK: we re-initialize the response that we originally handed # out to the user because there are a bunch of properties that # cache various things and cleaning those up would be too much # of a hassle. response.__init__() self.build_response_into(response, request, urllib3_reponse)
class PassThroughProxyDownloader(DownloaderBase): def __init__(self, user_agent, proxy): self.user_agent = user_agent web_proxy_split = proxy.split(':') self.proxy_ip = web_proxy_split[0] self.proxy_port = int(web_proxy_split[1]) def download(self, url, referrer=None, if_modified_since=None, if_none_match=None): request = [ "GET %s HTTP/1.1" % url, "User-Agent: %s" % self.user_agent, "Accept-Encoding: gzip, deflate, compress", "Accept: */*" "Host: %s" % urlparse(url).hostname, "Connection: Close" # this may not be necessary ] for header, value in CommonHeaders.iteritems(): request.append("%s: %s" % (header, value)) for header, value in DownloaderBase.construct_headers(referrer, if_modified_since, if_none_match).iteritems(): request.append("%s: %s" % (header, value)) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((self.proxy_ip, self.proxy_port)) except socket.error, ex: raise ProxyDownException(ex) s.send("\r\n".join(request) + '\r\n\r\n') r = HTTPResponse(s, strict=False, method='GET', buffering=True) r.begin() resp = urllib3Response.from_httplib(r, decode_content=False) s.close() r.close() response = requests.Response() # Fallback to None if there's no status_code, for whatever reason. response.status_code = getattr(resp, 'status', None) response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})) response.encoding = get_encoding_from_headers(response.headers) response._content = resp.data response.raw = resp response.reason = response.raw.reason response.url = url # don't care about cookies right now #extract_cookies_to_jar(response.cookies, req, resp) # don't worry about the requests' Request object right now until it is needed #response.request = req response.request = None return response
def _response_reader(): try: # Receive the response from the server try: # For Python 2.7+ versions, use buffering of HTTP # responses r = low_conn.getresponse(buffering=True) except TypeError: # For compatibility with Python 2.6 versions and back r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, decode_content=False) response_queue.put(resp) except Exception as e: response_queue.put(e)
def send( self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None ): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple or urllib3 Timeout object :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. :rtype: requests.Response """ conn = self.get_connection(request.url, proxies) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) self.add_headers( request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies, ) chunked = not (request.body is None or "Content-Length" in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. err = ( "Invalid timeout {0}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout) ) raise ValueError(err) elif isinstance(timeout, TimeoutSauce): pass else: timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: resp = conn.urlopen( method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.max_retries, timeout=timeout, ) # Send the request. else: if hasattr(conn, "proxy_pool"): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode("utf-8")) low_conn.send(b"\r\n") low_conn.send(i) low_conn.send(b"\r\n") low_conn.send(b"0\r\n\r\n") # Receive the response from the server try: # For Python 2.7+ versions, use buffering of HTTP # responses r = low_conn.getresponse(buffering=True) except TypeError: # For compatibility with Python 2.6 versions and back r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, decode_content=False, ) except: # If we hit any problems here, clean up the connection. # Then, reraise so that we can handle the actual exception. low_conn.close() raise except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: if isinstance(e.reason, ConnectTimeoutError): # TODO: Remove this in 3.0.0: see #2811 if not isinstance(e.reason, NewConnectionError): raise ConnectTimeout(e, request=request) if isinstance(e.reason, ResponseError): raise RetryError(e, request=request) if isinstance(e.reason, _ProxyError): raise ProxyError(e, request=request) if isinstance(e.reason, _SSLError): # This branch is for urllib3 v1.22 and later. raise SSLError(e, request=request) raise ConnectionError(e, request=request) except ClosedPoolError as e: raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): # This branch is for urllib3 versions earlier than v1.22 raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) else: raise return self.build_response(request, resp)
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple or urllib3 Timeout object :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. :rtype: requests.Response """ conn = self.get_connection(request.url, proxies) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) chunked = not (request.body is None or 'Content-Length' in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. err = ("Invalid timeout {0}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout)) raise ValueError(err) elif isinstance(timeout, TimeoutSauce): pass else: timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: resp = conn.urlopen(method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.max_retries, timeout=timeout) # Send the request. else: if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode('utf-8')) low_conn.send(b'\r\n') low_conn.send(i) low_conn.send(b'\r\n') low_conn.send(b'0\r\n\r\n') # Receive the response from the server try: # For Python 2.7+ versions, use buffering of HTTP # responses r = low_conn.getresponse(buffering=True) except TypeError: # For compatibility with Python 2.6 versions and back r = low_conn.getresponse() resp = HTTPResponse.from_httplib(r, pool=conn, connection=low_conn, preload_content=False, decode_content=False) except: # If we hit any problems here, clean up the connection. # Then, reraise so that we can handle the actual exception. low_conn.close() raise except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: if isinstance(e.reason, ConnectTimeoutError): # TODO: Remove this in 3.0.0: see #2811 if not isinstance(e.reason, NewConnectionError): raise ConnectTimeout(e, request=request) if isinstance(e.reason, ResponseError): raise RetryError(e, request=request) if isinstance(e.reason, _ProxyError): raise ProxyError(e, request=request) if isinstance(e.reason, _SSLError): # This branch is for urllib3 v1.22 and later. raise SSLError(e, request=request) raise ConnectionError(e, request=request) except ClosedPoolError as e: raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): # This branch is for urllib3 versions earlier than v1.22 raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) else: raise return self.build_response(request, resp)
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) The timeout on the request. :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. """ conn = self.get_connection(request.url, proxies) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) self.add_headers(request) chunked = not (request.body is None or 'Content-Length' in request.headers) if stream: timeout = TimeoutSauce(connect=timeout) else: timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: resp = conn.urlopen( method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.max_retries, timeout=timeout ) # Send the request. else: if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=timeout) try: low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode('utf-8')) low_conn.send(b'\r\n') low_conn.send(i) low_conn.send(b'\r\n') low_conn.send(b'0\r\n\r\n') r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, decode_content=False ) except: # If we hit any problems here, clean up the connection. # Then, reraise so that we can handle the actual exception. low_conn.close() raise else: # All is well, return the connection to the pool. conn._put_conn(low_conn) except socket.error as sockerr: raise ConnectionError(sockerr) except MaxRetryError as e: raise ConnectionError(e) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): raise SSLError(e) elif isinstance(e, TimeoutError): raise Timeout(e) else: raise r = self.build_response(request, resp) if not stream: r.content return r
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object.""" conn = self.get_connection(request.url, proxies) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) chunked = not (request.body is None or 'Content-Length' in request.headers) try: if not chunked: resp = conn.urlopen( method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.max_retries, timeout=timeout ) # Send the request. else: if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=timeout) low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode('utf-8')) low_conn.send(b'\r\n') low_conn.send(i) low_conn.send(b'\r\n') low_conn.send(b'0\r\n\r\n') r = low_conn.getresponse() resp = HTTPResponse.from_httplib(r, pool=conn, connection=low_conn, preload_content=False, decode_content=False ) except socket.error as sockerr: raise ConnectionError(sockerr) except MaxRetryError as e: raise ConnectionError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): raise SSLError(e) elif isinstance(e, TimeoutError): raise Timeout(e) else: raise Timeout('Request timed out.') r = self.build_response(request, resp) if not stream: r.content return r
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a (`connect timeout, read timeout <user/advanced.html#timeouts>`_) tuple. :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. """ conn = self.get_connection(request.url, proxies) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) self.add_headers(request) chunked = not (request.body is None or "Content-Length" in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. err = ( "Invalid timeout {0}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout) ) raise ValueError(err) else: timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: resp = conn.urlopen( method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.max_retries, timeout=timeout, ) # Send the request. else: if hasattr(conn, "proxy_pool"): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=timeout) try: low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode("utf-8")) low_conn.send(b"\r\n") low_conn.send(i) low_conn.send(b"\r\n") low_conn.send(b"0\r\n\r\n") r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, decode_content=False ) except: # If we hit any problems here, clean up the connection. # Then, reraise so that we can handle the actual exception. low_conn.close() raise else: # All is well, return the connection to the pool. conn._put_conn(low_conn) except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: if isinstance(e.reason, ConnectTimeoutError): raise ConnectTimeout(e, request=request) if isinstance(e.reason, ResponseError): raise RetryError(e, request=request) raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) else: raise return self.build_response(request, resp)
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a (`connect timeout, read timeout <user/advanced.html#timeouts>`_) tuple. :type timeout: float or tuple :param verify: (optional) Whether to verify SSL certificates. :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. """ conn = self.get_connection(request.url, proxies) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) self.add_headers(request) chunked = not (request.body is None or 'Content-Length' in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. err = ("Invalid timeout {0}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout)) raise ValueError(err) else: timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: resp = conn.urlopen( method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=Retry(self.max_retries, read=False), timeout=timeout ) # Send the request. else: if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=timeout) try: low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode('utf-8')) low_conn.send(b'\r\n') low_conn.send(i) low_conn.send(b'\r\n') low_conn.send(b'0\r\n\r\n') r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, decode_content=False ) except: # If we hit any problems here, clean up the connection. # Then, reraise so that we can handle the actual exception. low_conn.close() raise else: # All is well, return the connection to the pool. conn._put_conn(low_conn) except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: if isinstance(e.reason, ConnectTimeoutError): raise ConnectTimeout(e, request=request) raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) else: raise return self.build_response(request, resp)