def __init__(self, socket=None, encoding='utf-8'): _Channel.__init__(self, socket) # Set our prefix to an empty string. This is prepended to all sent # commands, and useful for servers. self.prefix = '' self.encoding = encoding # Read lines at once. self.read_delimiter = '\n'
def __init__(self, socket, server): """ Initialises the connection. Args: socket: A pre-existing socket that this channel should wrap. server: The server to which this channel is connected. Note: socket and parent arguments are non-optional because they are determined by the server. """ Channel.__init__(self, socket) self.server = server
def __init__(self, ConnectionClass=None): """ Initialises the server. Args: ConnectionClass: The class to use to wrap newly connected sockets. Optional. """ Channel.__init__(self) # Sets instance attribute, NOT class attribute. if ConnectionClass: self.ConnectionClass = ConnectionClass # A dictionary mapping file descriptors to channels. self.channels = weakref.WeakValueDictionary()
def _handle_write_event(self): if not self._ssl_handshake_done: self._ssl_handshake() return Channel._handle_write_event(self)
def _process_request(self): """ Starts processing the first request on the stack. """ if not self._requests: if self._channel: self._channel.close_immediately() self._channel = None if self._processing: self._processing = False from pants.engine import Engine Engine.instance().stop() return request = self._requests[0] port = request[2].port if not port: if request[2].scheme.lower() == 'https': port = 443 else: port = 80 host = "%s:%d" % (request[2].hostname, port) if self._channel: if not self._server == host.lower() or not \ self._is_secure == (request[2].scheme.lower() == 'https'): self._channel.close() return if not self._channel: # Store the current server. self._server = host.lower() # Create a Channel, hook into it, and connect. self._channel = Channel() self._channel.handle_close = self._handle_close self._channel.handle_connect = self._handle_connect self._is_secure = request[2].scheme.lower() == 'https' if self._is_secure: self._channel.startTLS() self._channel.connect(request[2].hostname, port) return # If we got here, we're connected, and to the right server. Do stuff. self._send('%s %s HTTP/1.1%s' % (request[0], request[8], CRLF)) for k, v in request[3].iteritems(): self._send('%s: %s%s' % (k, v, CRLF)) if request[4]: self._send('%s%s' % (CRLF, request[4])) else: self._send(CRLF) # Now, wait for a response. self._channel.handle_read = self._read_headers self._channel.read_delimiter = DOUBLE_CRLF
class HTTPClient(object): """ An HTTP client, capable of communicating with most, if not all, servers using an incomplete implementation of HTTP/1.1. The HTTPClient's behavior is defined, mainly, through the handle_response function that's expected as the first argument when constructing a new HTTPClient. Alternatively, you may subclass HTTPClient to modify the response handler. """ def __init__(self, response_handler=None, max_redirects=5, keep_alive=True, unicode=True): """ Initialize a new HTTPClient instance. Args: response_handler: Optionally, a function to use for handling received responses from the server. If None is provided, the default will be used instead. max_redirects: The number of times to follow a redirect from the server. Defaults to 5. keep_alive: If True, the connection will be reused as much as possible. Defaults to True. unicode: If True, the Content-Type header will be checked for a character set. If one is present, the body will be converted to unicode, using that character set. Defauls to True. """ if response_handler is not None: if not callable(response_handler): raise ValueError("response handler must be callable.") self.handle_response = response_handler # Internal State self._channel = None self._processing = False self._requests = [] self._server = None self._is_secure = False self._helper = None # External State self.keep_alive = keep_alive self.max_redirects = max_redirects self.unicode = unicode ##### General Methods ##################################################### def get(self, url, timeout=30, headers=None, **kwargs): """ Perform an HTTP GET request for the specified URL. Additional query parameters may be specified as keyword arguments. For example: client.get('http://www.google.com/search', q='test') Is equivilent to: client.get('http://www.google.com/search?q=test') Args: url: The URL to fetch. timeout: The time, in seconds, to wait for a response before erroring out. Defaults to 30. headers: An optional dict of headers to send with the request. """ helper = self._helper if helper is None: helper = ClientHelper(self) if kwargs: query, fragment = urlparse.urlparse(url)[4:6] if query: query = "%s&%s" % (query, urllib.urlencode(kwargs, True)) url = "%s?%s" % (url.partition('?')[0], query) if fragment: url = "%s#%s" % (url, fragment) else: query = urllib.urlencode(kwargs, True) if fragment: url = "%s?%s#%s" % (url.partition('#')[0], query, fragment) else: url = "%s?%s" % (url, query) helper.requests.append(self._add_request('GET', url, headers, None, timeout)) return helper def post(self, url, timeout=None, headers=None, files=None, **kwargs): """ Perform an HTTP POST request for the specified URL. Args: url: The URL to fetch. timeout: The time, in seconds, to wait for a response before erroring out. Defaults to 30. headers: An optional dict of headers to send with the request. files: An optional dict of files to submit to the server. Any additional keyword arguments will be sent in the request body as POST variables. """ helper = self._helper if helper is None: helper = ClientHelper(self) body = '' if headers is None: headers = {} if headers.get('Content-Type', '') == 'application/x-www-form-urlencoded' and files: raise ValueError("Cannot send files with Content-Type " "'application/x-www-form-urlencoded'.") if files: headers['Content-Type'] = 'multipart/form-data' elif not 'Content-Type' in headers: headers['Content-Type'] = 'application/x-www-form-urlencoded' if headers['Content-Type'] == 'multipart/form-data': boundary, body = encode_multipart(kwargs, files) headers['Content-Type'] = 'multipart/form-data; boundary=%s' % \ boundary elif kwargs: body = urllib.urlencode(kwargs, True) helper.requests.append(self._add_request('POST', url, headers, body, timeout)) return helper def process(self): """ Block until the queued requests finish. """ if not self._requests: return self._processing = True from pants.engine import Engine Engine.instance().start() ##### Public Event Handlers ############################################### def handle_response(self, response): """ Placeholder. Called when an HTTP response is received. Args: response: The HTTP response that was received. """ pass ##### Private Methods ##################################################### def _add_request(self, method, url, headers, body, timeout, append=True): u = url.lower() if not (u.startswith('http://') or u.startswith('https://')): raise ValueError("Can only make HTTP or HTTPS requests with HTTPClient.") parts = urlparse.urlparse(url) # Build our headers. if headers is None: headers = {} if not 'Accept-Encoding' in headers: headers['Accept-Encoding'] = 'deflate, gzip' if not 'Host' in headers: headers['Host'] = parts.netloc if not 'User-Agent' in headers: headers['User-Agent'] = USER_AGENT if body: headers['Content-Length'] = len(body) path = parts.path or '/' if parts.query: path = '%s?%s' % (path, parts.query) if parts.fragment: path = '%s#%s' % (path, parts.fragment) request = [method, url, parts, headers, body, timeout, None, time(), path, 0] if append: self._requests.append(request) # If we're just starting, start to process. if len(self._requests) == 1: callback(self._process_request) return request def _process_request(self): """ Starts processing the first request on the stack. """ if not self._requests: if self._channel: self._channel.close_immediately() self._channel = None if self._processing: self._processing = False from pants.engine import Engine Engine.instance().stop() return request = self._requests[0] port = request[2].port if not port: if request[2].scheme.lower() == 'https': port = 443 else: port = 80 host = "%s:%d" % (request[2].hostname, port) if self._channel: if not self._server == host.lower() or not \ self._is_secure == (request[2].scheme.lower() == 'https'): self._channel.close() return if not self._channel: # Store the current server. self._server = host.lower() # Create a Channel, hook into it, and connect. self._channel = Channel() self._channel.handle_close = self._handle_close self._channel.handle_connect = self._handle_connect self._is_secure = request[2].scheme.lower() == 'https' if self._is_secure: self._channel.startTLS() self._channel.connect(request[2].hostname, port) return # If we got here, we're connected, and to the right server. Do stuff. self._send('%s %s HTTP/1.1%s' % (request[0], request[8], CRLF)) for k, v in request[3].iteritems(): self._send('%s: %s%s' % (k, v, CRLF)) if request[4]: self._send('%s%s' % (CRLF, request[4])) else: self._send(CRLF) # Now, wait for a response. self._channel.handle_read = self._read_headers self._channel.read_delimiter = DOUBLE_CRLF def _send(self, data): self._channel.write(data) ##### Internal Event Handlers ############################################# def _handle_connect(self): #if self._is_secure and not self._channel.is_secure(): # self._channel.startTLS() # return if self._requests: self._process_request() else: self._channel.close() def _handle_close(self): """ In the event that the connection is closed, see if there's another request to process. If so, reconnect to the given host. """ self._channel = None self._is_secure = False self._process_request() def _handle_response(self): """ Call the response handler. """ request = self._requests.pop(0) response = self.current_response close_after = response.headers.get('Connection', '') == 'close' close_after &= self.keep_alive # Is this a 100 Continue? if response.status == 100: self.current_response = None del response # Process the request. if close_after: if self._channel: self._channel.close_immediately() return self._process_request() return # Did we catch a redirect? if response.status in (301,302) and request[9] <= self.max_redirects: # Generate a new request, using the new URL. new_url = urlparse.urljoin(response.full_url, response.headers['Location']) new_headers = request[3].copy() del new_headers['Host'] new_req = self._add_request(request[0], new_url, new_headers, request[4], request[5], False) new_req[6] = request[6] new_req[7] = request[7] new_req[9] = request[9] + 1 self._requests.insert(0, new_req) self.current_response = None del response # Process the request. if close_after: if self._channel: self._channel.close_immediately() return self._process_request() return # Try converting to unicode? if self.unicode: content_type = response.headers.get('Content-Type','') if 'charset=' in content_type: content_type, _, encoding = content_type.partition('charset=') try: response.body = response.body.decode(encoding) except (LookupError, UnicodeDecodeError): pass # Determine the handler function to use. if callable(request[6]): func = request[6] else: func = self.handle_response # Call the handler function. try: func(response) except Exception: log.exception('Error handling HTTP response.') # Process the next request. self.current_response = None if close_after: if self._channel: self._channel.close_immediately() return self._process_request() def _read_body(self, data): """ Read the response body, decompress it if necessary, and then call the response handler. """ resp = self.current_response if resp._decompressor: resp.body = resp._decompressor.decompress(data) resp.body += resp._decompressor.flush() del resp._decompressor else: resp.body = data self._handle_response() def _read_additional_headers(self, data): resp = self.current_response if data: resp._additional_headers += '%s%s' % (data, CRLF) return headers = read_headers(resp._additional_headers) del resp._additional_headers # Did we get an additional header for Content-Encoding? enc = resp.headers.get('Content-Encoding', '') for k,v in headers.iteritems(): if k in resp.headers: if not isinstance(resp.headers[k], list): resp.headers[k] = [resp.headers[k]] if isinstance(v, list): resp.headers[k].extend(v) else: resp.headers[k].append(v) else: resp.headers[k] = v new_enc = resp.headers.get('Content-Encoding', '') if (new_enc == 'gzip' or new_enc == 'deflate') and enc == '': if new_enc == 'gzip': resp.body = zlib.decompress(resp.body, 16 + zlib.MAX_WBITS) elif new_enc == 'deflate': resp.body = zlib.decompress(resp.body, -zlib.MAX_WBITS) # Finally, handle it. self._handle_response() def _read_chunk_head(self, data): """ Read a chunk header. """ if ';' in data: data, ext = data.split(';', 1) else: ext = '' length = int(data.strip(), 16) if length == 0: resp = self.current_response if resp._decompressor: resp.body += resp._decompressor.flush() del resp._decompressor self._channel.handle_read = self._read_additional_headers resp._additional_headers = '' self._channel.read_delimiter = CRLF else: self._channel.handle_read = self._read_chunk_body self._channel.read_delimiter = length + 2 def _read_chunk_body(self, data): """ Read a chunk body. """ resp = self.current_response if resp._decompressor: resp.body += resp._decompressor.decompress(data[:-2]) else: resp.body += data[:-2] self._channel.handle_read = self._read_chunk_head self._channel.read_delimiter = CRLF def _read_headers(self, data): """ Read the headers of an HTTP response from the socket, and the response body as well, into a new HTTPResponse instance. Then call the request handler. """ do_close = False try: initial_line, data = data.split(CRLF, 1) try: try: http_version, status, status_text = initial_line.split(' ', 2) status = int(status) except ValueError: http_version, status = initial_line.split(' ') status = int(status) status_text = HTTP.get(status, '') except ValueError: raise BadRequest('Invalid HTTP status line %r.' % initial_line) # Parse the headers. headers = read_headers(data) # Construct a HTTPResponse object. self.current_response = response = HTTPResponse(self, self._requests[0], http_version, status, status_text, headers) # Do we have a Content-Encoding header? if 'Content-Encoding' in headers: encoding = headers['Content-Encoding'] if encoding == 'gzip': response._decompressor = zlib.decompressobj(16+zlib.MAX_WBITS) elif encoding == 'deflate': response._decompressor = zlib.decompressobj(-zlib.MAX_WBITS) # Do we have a Content-Length header? if 'Content-Length' in headers: self._channel.handle_read = self._read_body self._channel.read_delimiter = int(headers['Content-Length']) elif 'Transfer-Encoding' in headers: if headers['Transfer-Encoding'] == 'chunked': self._channel.handle_read = self._read_chunk_head self._channel.read_delimiter = CRLF else: raise BadRequest("Unsupported Transfer-Encoding: %s" % headers['Transfer-Encoding']) # Is this a HEAD request? If so, then handle the request NOW. if response.method == 'HEAD': self._handle_response() except BadRequest, e: log.info('Bad response from %r: %s', self._server, e) do_close = True except Exception: log.exception('Error handling HTTP response.') do_close = True