def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): try: url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required scheme, authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError("can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit, timeout) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError as error: success = False if error.fp is None: # not a response raise response = error # except (IOError, socket.error, OSError), error: # Yes, urllib2 really does raise all these :-(( # See test_urllib2.py for examples of socket.gaierror and OSError, # plus note that FTPHandler raises IOError. # XXX I don't seem to have an example of exactly socket.error being # raised, only socket.gaierror... # I don't want to start fixing these here, though, since this is a # subclass of OpenerDirector, and it would break old code. Even in # Python core, a fix would need some backwards-compat. hack to be # acceptable. # raise if visit: self._set_response(response, False) response = copy.copy(self._response) elif response is not None: response = _response.upgrade_response(response) if not success: raise response return response
def _set_response(self, response, close_current): # sanity check, necessary but far from sufficient if not (response is None or (hasattr(response, "info") and hasattr(response, "geturl") and hasattr(response, "read"))): raise ValueError("not a response object") self.form = None if response is not None: response = _response.upgrade_response(response) if close_current and self._response is not None: self._response.close() self._response = response self._factory.set_response(response)
def _set_response(self, response, close_current): # sanity check, necessary but far from sufficient if not (response is None or (hasattr(response, "info") and hasattr(response, "geturl") and hasattr(response, "read") ) ): raise ValueError("not a response object") self.form = None if response is not None: response = _response.upgrade_response(response) if close_current and self._response is not None: self._response.close() self._response = response self._factory.set_response(response)
class Browser(UserAgentBase): """Browser-like class with support for history, forms and links. BrowserStateError is raised whenever the browser is in the wrong state to complete the requested operation - e.g., when .back() is called when the browser history is empty, or when .follow_link() is called when the current response does not contain HTML data. Public attributes: request: current request (mechanize.Request) form: currently selected form (see .select_form()) """ handler_classes = copy.copy(UserAgentBase.handler_classes) handler_classes["_referer"] = HTTPRefererProcessor default_features = copy.copy(UserAgentBase.default_features) default_features.append("_referer") def __init__( self, factory=None, history=None, request_class=None, ): """ Only named arguments should be passed to this constructor. factory: object implementing the mechanize.Factory interface. history: object implementing the mechanize.History interface. Note this interface is still experimental and may change in future. request_class: Request class to use. Defaults to mechanize.Request The Factory and History objects passed in are 'owned' by the Browser, so they should not be shared across Browsers. In particular, factory.set_response() should not be called except by the owning Browser itself. Note that the supplied factory's request_class is overridden by this constructor, to ensure only one Request class is used. """ self._handle_referer = True if history is None: history = History() self._history = history if request_class is None: request_class = _request.Request if factory is None: factory = DefaultFactory() factory.set_request_class(request_class) self._factory = factory self.request_class = request_class self.request = None self._set_response(None, False) # do this last to avoid __getattr__ problems UserAgentBase.__init__(self) def close(self): UserAgentBase.close(self) if self._response is not None: self._response.close() if self._history is not None: self._history.close() self._history = None # make use after .close easy to spot self.form = None self.request = self._response = None self.request = self.response = self.set_response = None self.geturl = self.reload = self.back = None self.clear_history = self.set_cookie = self.links = self.forms = None self.viewing_html = self.encoding = self.title = None self.select_form = self.click = self.submit = self.click_link = None self.follow_link = self.find_link = None def set_handle_referer(self, handle): """Set whether to add Referer header to each request.""" self._set_handler("_referer", handle) self._handle_referer = bool(handle) def _add_referer_header(self, request, origin_request=True): if self.request is None: return request scheme = request.get_type() original_scheme = self.request.get_type() if scheme not in ["http", "https"]: return request if not origin_request and not self.request.has_header("Referer"): return request if (self._handle_referer and original_scheme in ["http", "https"] and not (original_scheme == "https" and scheme != "https")): # strip URL fragment (RFC 2616 14.36) parts = _rfc3986.urlsplit(self.request.get_full_url()) parts = parts[:-1] + (None, ) referer = _rfc3986.urlunsplit(parts) request.add_unredirected_header("Referer", referer) return request def open_novisit(self, url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): """Open a URL without visiting it. Browser state (including request, response, history, forms and links) is left unchanged by calling this function. The interface is the same as for .open(). This is useful for things like fetching images. See also .retrieve(). """ return self._mech_open(url, data, visit=False, timeout=timeout) def open(self, url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): return self._mech_open(url, data, timeout=timeout) def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): try: url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required scheme, authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError("can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit, timeout) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError, error: success = False if error.fp is None: # not a response raise response = error ## except (IOError, socket.error, OSError), error: ## # Yes, urllib2 really does raise all these :-(( ## # See test_urllib2.py for examples of socket.gaierror and OSError, ## # plus note that FTPHandler raises IOError. ## # XXX I don't seem to have an example of exactly socket.error being ## # raised, only socket.gaierror... ## # I don't want to start fixing these here, though, since this is a ## # subclass of OpenerDirector, and it would break old code. Even in ## # Python core, a fix would need some backwards-compat. hack to be ## # acceptable. ## raise if visit: self._set_response(response, False) response = copy.copy(self._response) elif response is not None: response = _response.upgrade_response(response) if not success: raise response return response
def any_response(self, request, response): if not hasattr(response, 'closeable_response'): response = upgrade_response(response) return response