def redirect_request(self, newurl, req, fp, code, msg, headers): """Return a Request or None in response to a redirect. This is called by the http_error_30x methods when a redirection response is received. If a redirection should take place, return a new Request to allow http_error_30x to perform the redirect; otherwise, return None to indicate that an HTTPError should be raised. """ if code in (301, 302, 303, "refresh") or \ (code == 307 and not req.has_data()): # Strictly (according to RFC 2616), 301 or 302 in response to # a POST MUST NOT cause a redirection without confirmation # from the user (of urllib2, in this case). In practice, # essentially all clients do redirect in this case, so we do # the same. # XXX really refresh redirections should be visiting; tricky to # fix, so this will wait until post-stable release new = Request(newurl, headers=req.headers, origin_req_host=req.get_origin_req_host(), unverifiable=True, visit=False, ) new._origin_req = getattr(req, "_origin_req", req) return new else: raise HTTPError(req.get_full_url(), code, msg, headers, fp)
def found_terminator(self): self._last_use = int(time.time()) if self._current_request: self._current_request.found_terminator() else: header, self._in_buffer = self._in_buffer, '' lines = string.split(header, '\r\n') while lines and not lines[0]: lines.pop(0) if not lines: self.close_when_done() return request = lines.pop(0) try: command, uri, version = crack_request(request) except: if self.server.debug: self.log_info("Ignoring malformed HTTP request: " + request) return if '%' in request: request = unquote(request) if command is None: self.log_info('Bad HTTP request: %s' % repr(request), 'error') return header = _join_headers(lines) self._current_request = Request(self, request, command, uri, version, header) requests = self._request_queue requests.insert(len(requests) - 1, self._current_request) self.request_counter.increment() self.server.total_requests.increment() self._current_request.found_terminator()
def http_request(self, request): if not hasattr(request, "add_unredirected_header"): newrequest = Request(request._Request__original, request.data, request.headers) try: newrequest.origin_req_host = request.origin_req_host except AttributeError: pass try: newrequest.unverifiable = request.unverifiable except AttributeError: pass try: newrequest.visit = request.visit except AttributeError: pass request = newrequest return request
def _request(self, url_or_req, data, visit, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): if isstringlike(url_or_req): req = Request(url_or_req, data, visit=visit, timeout=timeout) else: # already a mechanize.Request instance req = url_or_req if data is not None: req.add_data(data) # XXX yuck set_request_attr(req, "visit", visit, None) set_request_attr(req, "timeout", timeout, _sockettimeout._GLOBAL_DEFAULT_TIMEOUT) return req
def _request(self, url_or_req, data, visit, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): if isstringlike(url_or_req): req = Request(url_or_req, data, visit=visit, timeout=timeout) else: # already a urllib2.Request or mechanize.Request instance req = url_or_req if data is not None: req.add_data(data) # XXX yuck set_request_attr(req, "visit", visit, None) set_request_attr(req, "timeout", timeout, _sockettimeout._GLOBAL_DEFAULT_TIMEOUT) return req
def found_terminator(self): self._last_use = int(time.time()) if self._current_request: self._current_request.found_terminator() else: header, self._in_buffer = self._in_buffer, '' lines = string.split(header, '\r\n') while lines and not lines[0]: lines.pop(0) if not lines: self.close_when_done() return request = lines.pop(0) try: command,uri,version = crack_request(request) except: if self.server.debug: self.log_info( "Ignoring malformed HTTP request: " + request ) return if '%' in request: request = unquote(request) if command is None: self.log_info('Bad HTTP request: %s' % repr(request),'error') return header = _join_headers(lines) self._current_request = Request(self, request, command, uri, version, header) requests = self._request_queue requests.insert(len(requests) - 1, self._current_request) self.request_counter.increment() self.server.total_requests.increment() self._current_request.found_terminator()
def _request(self, url_or_req, data, visit): if isstringlike(url_or_req): req = Request(url_or_req, data, visit=visit) else: # already a urllib2.Request or mechanize.Request instance req = url_or_req if data is not None: req.add_data(data) # XXX yuck, give request a .visit attribute if it doesn't have one try: req.visit except AttributeError: req.visit = None if visit is not None: req.visit = visit return req
def read(self): """Reads the robots.txt URL and feeds it to the parser.""" if self._opener is None: self.set_opener() req = Request(self.url, unverifiable=True, visit=False, timeout=self._timeout) try: f = self._opener.open(req) except HTTPError as f: pass except (IOError, socket.error, OSError) as exc: debug_robots("ignoring error opening %r: %s" % (self.url, exc)) return lines = [] line = f.readline() while line: lines.append(line.strip()) line = f.readline() status = f.code if status == 401 or status == 403: self.disallow_all = True debug_robots("disallow all") elif status >= 400: self.allow_all = True debug_robots("allow all") elif status == 200 and lines: debug_robots("parse lines") self.parse(lines)
def read(self): """Reads the robots.txt URL and feeds it to the parser.""" if self._opener is None: self.set_opener() req = Request(self.url, unverifiable=True, visit=False) try: f = self._opener.open(req) except HTTPError, f: pass
class Channel(AsyncChat, object): request_manager = Request.singleton.request_manager ac_out_buffer_size = 1 << 16 current_request = None channel_counter = Counter() linger = struct.pack("ii",0,0) def __init__(self, server, conn, addr): self.channel_number = Channel.channel_counter.increment() self.addr = addr self.server = server # Leaving out connection and map because we set them below. AsyncChat.__init__(self) self._map = REDUSA_SOCKET_MAP self._in_buffer = '' self._current_request = None self._null_request = _NullRequest() self._request_queue = [self._null_request] self._keep_alive = 1 self._last_use = int(time.time()) self.check_maintenance() self.set_socket(conn, self._map) self.socket.setblocking(0) self._is_connected = True self.connected = True self._setup_counters() self.reset() def _setup_counters(self): self.request_counter = Counter() self.response_counter = Counter() self.bytes_out = Counter() self.bytes_in = Counter() self.read_calls = Counter() self.readable_calls = Counter() self.write_calls = Counter() self.writable_calls = Counter() self.buffer_refills = Counter() self.refill_skips = Counter() def get_statistics(self): requests = float(self.request_counter.value) responses = float(self.response_counter.value) bytesin = float(self.bytes_in.value) bytesout = float(self.bytes_out.value) readcalls = float(self.read_calls.value) readablecalls = float(self.readable_calls.value) writecalls = float(self.write_calls.value) writablecalls = float(self.writable_calls.value) refills = float(self.buffer_refills.value) refillskips = float(self.refill_skips.value) messages = ['Number of requests: %d' % requests] messages.append('Number of responses: %d' % responses) messages.append('Bytes read: %d' % bytesin) messages.append('Bytes written: %d' % bytesout) messages.append('Calls to read: %d' % readcalls) messages.append('Calls to readable: %d' % readablecalls) messages.append('Calls to write: %d' % writecalls) messages.append('Calls to writable: %d' % writablecalls) messages.append('Calls to refill buffer: %d' % refills) messages.append('Skipped calls to refill buffer: %d' % refillskips) averages = [] if requests: bytesperrequest = bytesin / requests bytesperread = bytesin / readcalls readsperrequest = readcalls / requests readablesperread = readablecalls / readcalls averages.append('Bytes per request: %0.1f' % bytesperrequest) averages.append('Reads per request: %0.1f' % readsperrequest) averages.append('Readables per read: %0.1f' % readablesperread) averages.append('Bytes per read: %0.1f' % bytesperread) if responses: bytesperresponse = bytesout / responses bytesperwrite = bytesout / writecalls writesperresponse = writecalls / responses refillsperresponse = refills / responses bytesperrefill = bytesout / refills responsesperrefill = responses / refills writablesperwrite = writablecalls / writecalls averages.append('Bytes per response: %0.1f' % bytesperresponse) averages.append('Writes per response: %0.1f' % writesperresponse) averages.append('Writables per write: %0.1f' % writablesperwrite) averages.append('Bytes per write: %0.1f' % bytesperwrite) averages.append('Refills per response: %0.1f' % refillsperresponse) averages.append('Bytes per refill: %0.1f' % bytesperrefill) averages.append('Responses per refill: %0.1f' % responsesperrefill) formatted = ['Server channel statistics'] for message in messages: label, value = message.split(': ') formatted.append(' --%s: %s' % (label.ljust(25), value)) formatted.append(' Calculated averages') for average in averages: label, value = average.split(': ') formatted.append(' --%s: %s' % (label.ljust(25), value)) return '\n'.join(formatted) def set_socket(self, sock, map = None): AsyncChat.set_socket(self, sock, map) # Ensure that we never block waiting for a socket to close. self.socket.setsockopt(socket.SOL_SOCKET,socket.SO_LINGER,self.linger) def server_name(self): return self.server.name def close_when_done(self): self._keep_alive = 0 def reset_terminator(self): self.set_terminator('\r\n\r\n') def reset(self): self._current_request = None self.reset_terminator() def request_handled(self,request): if request is self._request_queue[0]: self.server.response_ready(self) def writable(self): self.writable_calls.increment() return AsyncChat.writable(self) or self._request_queue[0].writable() def refill_buffer(self): responsecount = self.response_counter.value requests = self._request_queue while requests[0].writable(): self.response_counter.increment() self.producer_fifo.push(requests.pop(0).out) if requests[0] is self._null_request and not self._keep_alive: self.producer_fifo.push(None) if responsecount < self.response_counter.value: # Discards final call to make averages more pertinent self.buffer_refills.increment() else: self.refill_skips.increment() return AsyncChat.refill_buffer(self) def readable (self): # Use of accepting requests here keeps from blocking asyncore. self.readable_calls.increment() return (AsyncChat.readable(self) and self.request_manager.accepting_requests()) def __repr__(self): ar = AsyncChat.__repr__(self)[1:-1] return '<%s channel#: %s requests:%s>' % (ar,self.channel_number, self.request_counter) def __str__(self): ar = AsyncChat.__repr__(self)[1:-1] return ('%s, Channel #%s, requests processed: %s' % (ar,self.channel_number,self.request_counter)) def check_maintenance(self): if not self.channel_number % self.server.maintenance_interval: self.maintenance() def maintenance(self): self.kill_zombies() def kill_zombies(self): now = int(time.time()) for channel in self._map.values(): if isinstance(channel, Channel): if (now - channel._last_use) > channel.server.zombie_timeout: channel.die_if_zombie() def die_if_zombie(self): if self.writable(): self._last_use = int(time.time()) else: self.close() def send(self,data): self.write_calls.increment() bytecount = 0 if self._is_connected: bytecount = AsyncChat.send(self, data) self.bytes_out.increment(bytecount) self.server.bytes_out.increment(bytecount) return bytecount def recv(self,buffer_size): self.read_calls.increment() try: result = AsyncChat.recv(self,buffer_size) except MemoryError: sys.exit("Out of Memory!") bytecount = len(result) self.bytes_in.increment(bytecount) self.server.bytes_in.increment(bytecount) return result def handle_error(self): t,v = sys.exc_info()[:2] if t is SystemExit: raise t,v msglog.exception(msglog.types.ERR,None,'Handled') self.close() def log(self,*args): pass def collect_incoming_data(self,data): if self._current_request: # we are receiving data (probably POST data) for a request self._current_request.collect_incoming_data(data) else: # we are receiving header (request) data self._in_buffer += data def found_terminator(self): self._last_use = int(time.time()) if self._current_request: self._current_request.found_terminator() else: header, self._in_buffer = self._in_buffer, '' lines = string.split(header, '\r\n') while lines and not lines[0]: lines.pop(0) if not lines: self.close_when_done() return request = lines.pop(0) try: command,uri,version = crack_request(request) except: if self.server.debug: self.log_info( "Ignoring malformed HTTP request: " + request ) return if '%' in request: request = unquote(request) if command is None: self.log_info('Bad HTTP request: %s' % repr(request),'error') return header = _join_headers(lines) self._current_request = Request(self, request, command, uri, version, header) requests = self._request_queue requests.insert(len(requests) - 1, self._current_request) self.request_counter.increment() self.server.total_requests.increment() self._current_request.found_terminator() def push_with_producer(self,producer): self.producer_fifo.push(producer) def log_info(self,message,type=msglog.types.INFO): if type == msglog.types.DB and not self.server.debug: return prefix = '%s, Channel %s' % (self.server, self.channel_number) msglog.log(prefix, type, message) def log_statistics(self): self.log_info('\n%s\n' % self.get_statistics(), msglog.types.DB) def close(self): self._is_connected = False if self._current_request: try: self._current_request.handle_close() except: msglog.exception(prefix = 'Handled') AsyncChat.close(self) self.log_info('closed.', msglog.types.DB) def add_channel (self, map=None): if map is None: map = REDUSA_SOCKET_MAP assert map is REDUSA_SOCKET_MAP, 'Hack assumes that the map argument is None...' return asyncore.dispatcher.add_channel(self, map) def del_channel (self, map=None): if map is None: map = REDUSA_SOCKET_MAP assert map is REDUSA_SOCKET_MAP, 'Hack assumes that the map argument is None...' return asyncore.dispatcher.del_channel(self, map)
class Channel(AsyncChat, object): request_manager = Request.singleton.request_manager ac_out_buffer_size = 1 << 16 current_request = None channel_counter = Counter() linger = struct.pack("ii", 0, 0) def __init__(self, server, conn, addr): self.channel_number = Channel.channel_counter.increment() self.addr = addr self.server = server # Leaving out connection and map because we set them below. AsyncChat.__init__(self) self._map = REDUSA_SOCKET_MAP self._in_buffer = '' self._current_request = None self._null_request = _NullRequest() self._request_queue = [self._null_request] self._keep_alive = 1 self._last_use = int(time.time()) self.check_maintenance() self.set_socket(conn, self._map) self.socket.setblocking(0) self._is_connected = True self.connected = True self._setup_counters() self.reset() def _setup_counters(self): self.request_counter = Counter() self.response_counter = Counter() self.bytes_out = Counter() self.bytes_in = Counter() self.read_calls = Counter() self.readable_calls = Counter() self.write_calls = Counter() self.writable_calls = Counter() self.buffer_refills = Counter() self.refill_skips = Counter() def get_statistics(self): requests = float(self.request_counter.value) responses = float(self.response_counter.value) bytesin = float(self.bytes_in.value) bytesout = float(self.bytes_out.value) readcalls = float(self.read_calls.value) readablecalls = float(self.readable_calls.value) writecalls = float(self.write_calls.value) writablecalls = float(self.writable_calls.value) refills = float(self.buffer_refills.value) refillskips = float(self.refill_skips.value) messages = ['Number of requests: %d' % requests] messages.append('Number of responses: %d' % responses) messages.append('Bytes read: %d' % bytesin) messages.append('Bytes written: %d' % bytesout) messages.append('Calls to read: %d' % readcalls) messages.append('Calls to readable: %d' % readablecalls) messages.append('Calls to write: %d' % writecalls) messages.append('Calls to writable: %d' % writablecalls) messages.append('Calls to refill buffer: %d' % refills) messages.append('Skipped calls to refill buffer: %d' % refillskips) averages = [] if requests: bytesperrequest = bytesin / requests bytesperread = bytesin / readcalls readsperrequest = readcalls / requests readablesperread = readablecalls / readcalls averages.append('Bytes per request: %0.1f' % bytesperrequest) averages.append('Reads per request: %0.1f' % readsperrequest) averages.append('Readables per read: %0.1f' % readablesperread) averages.append('Bytes per read: %0.1f' % bytesperread) if responses: bytesperresponse = bytesout / responses bytesperwrite = bytesout / writecalls writesperresponse = writecalls / responses refillsperresponse = refills / responses bytesperrefill = bytesout / refills responsesperrefill = responses / refills writablesperwrite = writablecalls / writecalls averages.append('Bytes per response: %0.1f' % bytesperresponse) averages.append('Writes per response: %0.1f' % writesperresponse) averages.append('Writables per write: %0.1f' % writablesperwrite) averages.append('Bytes per write: %0.1f' % bytesperwrite) averages.append('Refills per response: %0.1f' % refillsperresponse) averages.append('Bytes per refill: %0.1f' % bytesperrefill) averages.append('Responses per refill: %0.1f' % responsesperrefill) formatted = ['Server channel statistics'] for message in messages: label, value = message.split(': ') formatted.append(' --%s: %s' % (label.ljust(25), value)) formatted.append(' Calculated averages') for average in averages: label, value = average.split(': ') formatted.append(' --%s: %s' % (label.ljust(25), value)) return '\n'.join(formatted) def set_socket(self, sock, map=None): AsyncChat.set_socket(self, sock, map) # Ensure that we never block waiting for a socket to close. self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, self.linger) def server_name(self): return self.server.name def close_when_done(self): self._keep_alive = 0 def reset_terminator(self): self.set_terminator('\r\n\r\n') def reset(self): self._current_request = None self.reset_terminator() def request_handled(self, request): if request is self._request_queue[0]: self.server.response_ready(self) def writable(self): self.writable_calls.increment() return AsyncChat.writable(self) or self._request_queue[0].writable() def refill_buffer(self): responsecount = self.response_counter.value requests = self._request_queue while requests[0].writable(): self.response_counter.increment() self.producer_fifo.push(requests.pop(0).out) if requests[0] is self._null_request and not self._keep_alive: self.producer_fifo.push(None) if responsecount < self.response_counter.value: # Discards final call to make averages more pertinent self.buffer_refills.increment() else: self.refill_skips.increment() return AsyncChat.refill_buffer(self) def readable(self): # Use of accepting requests here keeps from blocking asyncore. self.readable_calls.increment() return (AsyncChat.readable(self) and self.request_manager.accepting_requests()) def __repr__(self): ar = AsyncChat.__repr__(self)[1:-1] return '<%s channel#: %s requests:%s>' % (ar, self.channel_number, self.request_counter) def __str__(self): ar = AsyncChat.__repr__(self)[1:-1] return ('%s, Channel #%s, requests processed: %s' % (ar, self.channel_number, self.request_counter)) def check_maintenance(self): if not self.channel_number % self.server.maintenance_interval: self.maintenance() def maintenance(self): self.kill_zombies() def kill_zombies(self): now = int(time.time()) for channel in self._map.values(): if isinstance(channel, Channel): if (now - channel._last_use) > channel.server.zombie_timeout: channel.die_if_zombie() def die_if_zombie(self): if self.writable(): self._last_use = int(time.time()) else: self.close() def send(self, data): self.write_calls.increment() bytecount = 0 if self._is_connected: bytecount = AsyncChat.send(self, data) self.bytes_out.increment(bytecount) self.server.bytes_out.increment(bytecount) return bytecount def recv(self, buffer_size): self.read_calls.increment() try: result = AsyncChat.recv(self, buffer_size) except MemoryError: sys.exit("Out of Memory!") bytecount = len(result) self.bytes_in.increment(bytecount) self.server.bytes_in.increment(bytecount) return result def handle_error(self): t, v = sys.exc_info()[:2] if t is SystemExit: raise t, v msglog.exception(msglog.types.ERR, None, 'Handled') self.close() def log(self, *args): pass def collect_incoming_data(self, data): if self._current_request: # we are receiving data (probably POST data) for a request self._current_request.collect_incoming_data(data) else: # we are receiving header (request) data self._in_buffer += data def found_terminator(self): self._last_use = int(time.time()) if self._current_request: self._current_request.found_terminator() else: header, self._in_buffer = self._in_buffer, '' lines = string.split(header, '\r\n') while lines and not lines[0]: lines.pop(0) if not lines: self.close_when_done() return request = lines.pop(0) try: command, uri, version = crack_request(request) except: if self.server.debug: self.log_info("Ignoring malformed HTTP request: " + request) return if '%' in request: request = unquote(request) if command is None: self.log_info('Bad HTTP request: %s' % repr(request), 'error') return header = _join_headers(lines) self._current_request = Request(self, request, command, uri, version, header) requests = self._request_queue requests.insert(len(requests) - 1, self._current_request) self.request_counter.increment() self.server.total_requests.increment() self._current_request.found_terminator() def push_with_producer(self, producer): self.producer_fifo.push(producer) def log_info(self, message, type=msglog.types.INFO): if type == msglog.types.DB and not self.server.debug: return prefix = '%s, Channel %s' % (self.server, self.channel_number) msglog.log(prefix, type, message) def log_statistics(self): self.log_info('\n%s\n' % self.get_statistics(), msglog.types.DB) def close(self): self._is_connected = False if self._current_request: try: self._current_request.handle_close() except: msglog.exception(prefix='Handled') AsyncChat.close(self) self.log_info('closed.', msglog.types.DB) def add_channel(self, map=None): if map is None: map = REDUSA_SOCKET_MAP assert map is REDUSA_SOCKET_MAP, 'Hack assumes that the map argument is None...' return asyncore.dispatcher.add_channel(self, map) def del_channel(self, map=None): if map is None: map = REDUSA_SOCKET_MAP assert map is REDUSA_SOCKET_MAP, 'Hack assumes that the map argument is None...' return asyncore.dispatcher.del_channel(self, map)