Пример #1
0
    def _mech_open(self,
                   url,
                   data=None,
                   update_history=True,
                   visit=None,
                   timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        try:
            url.get_full_url
        except AttributeError:
            # string URL -- convert to absolute URL if required
            scheme, authority = _rfc3986.urlsplit(url)[:2]
            if scheme is None:
                # relative URL
                if self._response is None:
                    raise BrowserStateError("can't fetch relative reference: "
                                            "not viewing any document")
                url = _rfc3986.urljoin(self._response.geturl(), url)

        request = self._request(url, data, visit, timeout)
        visit = request.visit
        if visit is None:
            visit = True

        if visit:
            self._visit_request(request, update_history)

        success = True
        try:
            response = UserAgentBase.open(self, request, data)
        except urllib2.HTTPError as error:
            success = False
            if error.fp is None:  # not a response
                raise
            response = error
# except (IOError, socket.error, OSError), error:
# Yes, urllib2 really does raise all these :-((
# See test_urllib2.py for examples of socket.gaierror and OSError,
# plus note that FTPHandler raises IOError.
# XXX I don't seem to have an example of exactly socket.error being
# raised, only socket.gaierror...
# I don't want to start fixing these here, though, since this is a
# subclass of OpenerDirector, and it would break old code.  Even in
# Python core, a fix would need some backwards-compat. hack to be
# acceptable.
# raise

        if visit:
            self._set_response(response, False)
            response = copy.copy(self._response)
        elif response is not None:
            response = _response.upgrade_response(response)

        if not success:
            raise response
        return response
Пример #2
0
    def _set_response(self, response, close_current):
        # sanity check, necessary but far from sufficient
        if not (response is None or
                (hasattr(response, "info") and hasattr(response, "geturl")
                 and hasattr(response, "read"))):
            raise ValueError("not a response object")

        self.form = None
        if response is not None:
            response = _response.upgrade_response(response)
        if close_current and self._response is not None:
            self._response.close()
        self._response = response
        self._factory.set_response(response)
Пример #3
0
    def _set_response(self, response, close_current):
        # sanity check, necessary but far from sufficient
        if not (response is None or
                (hasattr(response, "info") and hasattr(response, "geturl") and
                 hasattr(response, "read")
                 )
                ):
            raise ValueError("not a response object")

        self.form = None
        if response is not None:
            response = _response.upgrade_response(response)
        if close_current and self._response is not None:
            self._response.close()
        self._response = response
        self._factory.set_response(response)
Пример #4
0
class Browser(UserAgentBase):
    """Browser-like class with support for history, forms and links.

    BrowserStateError is raised whenever the browser is in the wrong state to
    complete the requested operation - e.g., when .back() is called when the
    browser history is empty, or when .follow_link() is called when the current
    response does not contain HTML data.

    Public attributes:

    request: current request (mechanize.Request)
    form: currently selected form (see .select_form())

    """

    handler_classes = copy.copy(UserAgentBase.handler_classes)
    handler_classes["_referer"] = HTTPRefererProcessor
    default_features = copy.copy(UserAgentBase.default_features)
    default_features.append("_referer")

    def __init__(
        self,
        factory=None,
        history=None,
        request_class=None,
    ):
        """

        Only named arguments should be passed to this constructor.

        factory: object implementing the mechanize.Factory interface.
        history: object implementing the mechanize.History interface.  Note
         this interface is still experimental and may change in future.
        request_class: Request class to use.  Defaults to mechanize.Request

        The Factory and History objects passed in are 'owned' by the Browser,
        so they should not be shared across Browsers.  In particular,
        factory.set_response() should not be called except by the owning
        Browser itself.

        Note that the supplied factory's request_class is overridden by this
        constructor, to ensure only one Request class is used.

        """
        self._handle_referer = True

        if history is None:
            history = History()
        self._history = history

        if request_class is None:
            request_class = _request.Request

        if factory is None:
            factory = DefaultFactory()
        factory.set_request_class(request_class)
        self._factory = factory
        self.request_class = request_class

        self.request = None
        self._set_response(None, False)

        # do this last to avoid __getattr__ problems
        UserAgentBase.__init__(self)

    def close(self):
        UserAgentBase.close(self)
        if self._response is not None:
            self._response.close()
        if self._history is not None:
            self._history.close()
            self._history = None

        # make use after .close easy to spot
        self.form = None
        self.request = self._response = None
        self.request = self.response = self.set_response = None
        self.geturl = self.reload = self.back = None
        self.clear_history = self.set_cookie = self.links = self.forms = None
        self.viewing_html = self.encoding = self.title = None
        self.select_form = self.click = self.submit = self.click_link = None
        self.follow_link = self.find_link = None

    def set_handle_referer(self, handle):
        """Set whether to add Referer header to each request."""
        self._set_handler("_referer", handle)
        self._handle_referer = bool(handle)

    def _add_referer_header(self, request, origin_request=True):
        if self.request is None:
            return request
        scheme = request.get_type()
        original_scheme = self.request.get_type()
        if scheme not in ["http", "https"]:
            return request
        if not origin_request and not self.request.has_header("Referer"):
            return request

        if (self._handle_referer and original_scheme in ["http", "https"]
                and not (original_scheme == "https" and scheme != "https")):
            # strip URL fragment (RFC 2616 14.36)
            parts = _rfc3986.urlsplit(self.request.get_full_url())
            parts = parts[:-1] + (None, )
            referer = _rfc3986.urlunsplit(parts)
            request.add_unredirected_header("Referer", referer)
        return request

    def open_novisit(self,
                     url,
                     data=None,
                     timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        """Open a URL without visiting it.

        Browser state (including request, response, history, forms and links)
        is left unchanged by calling this function.

        The interface is the same as for .open().

        This is useful for things like fetching images.

        See also .retrieve().

        """
        return self._mech_open(url, data, visit=False, timeout=timeout)

    def open(self,
             url,
             data=None,
             timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        return self._mech_open(url, data, timeout=timeout)

    def _mech_open(self,
                   url,
                   data=None,
                   update_history=True,
                   visit=None,
                   timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        try:
            url.get_full_url
        except AttributeError:
            # string URL -- convert to absolute URL if required
            scheme, authority = _rfc3986.urlsplit(url)[:2]
            if scheme is None:
                # relative URL
                if self._response is None:
                    raise BrowserStateError("can't fetch relative reference: "
                                            "not viewing any document")
                url = _rfc3986.urljoin(self._response.geturl(), url)

        request = self._request(url, data, visit, timeout)
        visit = request.visit
        if visit is None:
            visit = True

        if visit:
            self._visit_request(request, update_history)

        success = True
        try:
            response = UserAgentBase.open(self, request, data)
        except urllib2.HTTPError, error:
            success = False
            if error.fp is None:  # not a response
                raise
            response = error
##         except (IOError, socket.error, OSError), error:
##             # Yes, urllib2 really does raise all these :-((
##             # See test_urllib2.py for examples of socket.gaierror and OSError,
##             # plus note that FTPHandler raises IOError.
##             # XXX I don't seem to have an example of exactly socket.error being
##             #  raised, only socket.gaierror...
##             # I don't want to start fixing these here, though, since this is a
##             # subclass of OpenerDirector, and it would break old code.  Even in
##             # Python core, a fix would need some backwards-compat. hack to be
##             # acceptable.
##             raise

        if visit:
            self._set_response(response, False)
            response = copy.copy(self._response)
        elif response is not None:
            response = _response.upgrade_response(response)

        if not success:
            raise response
        return response
Пример #5
0
 def any_response(self, request, response):
     if not hasattr(response, 'closeable_response'):
         response = upgrade_response(response)
     return response