Пример #1
0
 def found_terminator(self):
     self._last_use = int(time.time())
     if self._current_request:
         self._current_request.found_terminator()
     else:
         header, self._in_buffer = self._in_buffer, ''
         lines = string.split(header, '\r\n')
         while lines and not lines[0]:
             lines.pop(0)
         if not lines:
             self.close_when_done()
             return
         request = lines.pop(0)
         try:
             command, uri, version = crack_request(request)
         except:
             if self.server.debug:
                 self.log_info("Ignoring malformed HTTP request: " +
                               request)
             return
         if '%' in request:
             request = unquote(request)
         if command is None:
             self.log_info('Bad HTTP request: %s' % repr(request), 'error')
             return
         header = _join_headers(lines)
         self._current_request = Request(self, request, command, uri,
                                         version, header)
         requests = self._request_queue
         requests.insert(len(requests) - 1, self._current_request)
         self.request_counter.increment()
         self.server.total_requests.increment()
         self._current_request.found_terminator()
Пример #2
0
    def redirect_request(self, newurl, req, fp, code, msg, headers):
        """Return a Request or None in response to a redirect.

        This is called by the http_error_30x methods when a redirection
        response is received.  If a redirection should take place, return a
        new Request to allow http_error_30x to perform the redirect;
        otherwise, return None to indicate that an HTTPError should be
        raised.

        """
        if code in (301, 302, 303, "refresh") or \
               (code == 307 and not req.has_data()):
            # Strictly (according to RFC 2616), 301 or 302 in response to
            # a POST MUST NOT cause a redirection without confirmation
            # from the user (of urllib2, in this case).  In practice,
            # essentially all clients do redirect in this case, so we do
            # the same.
            # XXX really refresh redirections should be visiting; tricky to
            #  fix, so this will wait until post-stable release
            new = Request(newurl,
                          headers=req.headers,
                          origin_req_host=req.get_origin_req_host(),
                          unverifiable=True,
                          visit=False,
                          )
            new._origin_req = getattr(req, "_origin_req", req)
            return new
        else:
            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
Пример #3
0
 def read(self):
     """Reads the robots.txt URL and feeds it to the parser."""
     if self._opener is None:
         self.set_opener()
     req = Request(self.url, unverifiable=True, visit=False,
                   timeout=self._timeout)
     try:
         f = self._opener.open(req)
     except HTTPError as f:
         pass
     except (IOError, socket.error, OSError) as exc:
         debug_robots("ignoring error opening %r: %s" %
                      (self.url, exc))
         return
     lines = []
     line = f.readline()
     while line:
         lines.append(line.strip())
         line = f.readline()
     status = f.code
     if status == 401 or status == 403:
         self.disallow_all = True
         debug_robots("disallow all")
     elif status >= 400:
         self.allow_all = True
         debug_robots("allow all")
     elif status == 200 and lines:
         debug_robots("parse lines")
         self.parse(lines)
Пример #4
0
 def read(self):
     """Reads the robots.txt URL and feeds it to the parser."""
     if self._opener is None:
         self.set_opener()
     req = Request(self.url, unverifiable=True, visit=False)
     try:
         f = self._opener.open(req)
     except HTTPError, f:
         pass
Пример #5
0
 def http_request(self, request):
     if not hasattr(request, "add_unredirected_header"):
         newrequest = Request(request._Request__original, request.data,
                              request.headers)
         try: newrequest.origin_req_host = request.origin_req_host
         except AttributeError: pass
         try: newrequest.unverifiable = request.unverifiable
         except AttributeError: pass
         try: newrequest.visit = request.visit
         except AttributeError: pass
         request = newrequest
     return request
Пример #6
0
 def _request(self, url_or_req, data, visit,
              timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
     if isstringlike(url_or_req):
         req = Request(url_or_req, data, visit=visit, timeout=timeout)
     else:
         # already a urllib2.Request or mechanize.Request instance
         req = url_or_req
         if data is not None:
             req.add_data(data)
         # XXX yuck
         set_request_attr(req, "visit", visit, None)
         set_request_attr(req, "timeout", timeout,
                          _sockettimeout._GLOBAL_DEFAULT_TIMEOUT)
     return req
Пример #7
0
 def _request(self, url_or_req, data, visit):
     if isstringlike(url_or_req):
         req = Request(url_or_req, data, visit=visit)
     else:
         # already a urllib2.Request or mechanize.Request instance
         req = url_or_req
         if data is not None:
             req.add_data(data)
         # XXX yuck, give request a .visit attribute if it doesn't have one
         try:
             req.visit
         except AttributeError:
             req.visit = None
         if visit is not None:
             req.visit = visit
     return req