def createRequest(self, op, req, outgoing_data): """Return a new QNetworkReply object. Args: op: Operation op req: const QNetworkRequest & req outgoing_data: QIODevice * outgoingData Return: A QNetworkReply. """ if proxymod.application_factory is not None: proxy_error = proxymod.application_factory.get_error() if proxy_error is not None: return networkreply.ErrorNetworkReply( req, proxy_error, QNetworkReply.UnknownProxyError, self) if not req.url().isValid(): log.network.debug("Ignoring invalid requested URL: {}".format( req.url().errorString())) return networkreply.ErrorNetworkReply( req, "Invalid request URL", QNetworkReply.HostNotFoundError, self) for header, value in shared.custom_headers(url=req.url()): req.setRawHeader(header, value) tab = self._get_tab() current_url = QUrl() if tab is not None: try: current_url = tab.url() except RuntimeError: # We could be in the middle of the webpage shutdown here. pass request = interceptors.Request(first_party_url=current_url, request_url=req.url()) interceptors.run(request) if request.is_blocked: return networkreply.ErrorNetworkReply( req, HOSTBLOCK_ERROR_STRING, QNetworkReply.ContentAccessDenied, self) if 'log-requests' in objects.debug_flags: operation = debug.qenum_key(QNetworkAccessManager, op) operation = operation.replace('Operation', '').upper() log.network.debug("{} {}, first-party {}".format( operation, req.url().toDisplayString(), current_url.toDisplayString())) scheme = req.url().scheme() if scheme in self._scheme_handlers: result = self._scheme_handlers[scheme](req, op, current_url) if result is not None: result.setParent(self) return result self.set_referer(req, current_url) return super().createRequest(op, req, outgoing_data)
def interceptRequest(self, info): """Handle the given request. Reimplementing this virtual function and setting the interceptor on a profile makes it possible to intercept URL requests. This function is executed on the IO thread, and therefore running long tasks here will block networking. info contains the information about the URL request and will track internally whether its members have been altered. Args: info: QWebEngineUrlRequestInfo &info """ if 'log-requests' in self._args.debug_flags: resource_type = debug.qenum_key(QWebEngineUrlRequestInfo, info.resourceType()) navigation_type = debug.qenum_key(QWebEngineUrlRequestInfo, info.navigationType()) log.webview.debug("{} {}, first-party {}, resource {}, " "navigation {}".format( bytes(info.requestMethod()).decode('ascii'), info.requestUrl().toDisplayString(), info.firstPartyUrl().toDisplayString(), resource_type, navigation_type)) url = info.requestUrl() first_party = info.firstPartyUrl() if ((url.scheme(), url.host(), url.path()) == ('qute', 'settings', '/set')): if (first_party != QUrl('qute://settings/') or info.resourceType() != QWebEngineUrlRequestInfo.ResourceTypeXhr): log.webview.warning("Blocking malicious request from {} to {}" .format(first_party.toDisplayString(), url.toDisplayString())) info.block(True) return # FIXME:qtwebengine only block ads for NavigationTypeOther? request = interceptors.Request(first_party_url=first_party, request_url=url) interceptors.run(request) if request.is_blocked: info.block(True) for header, value in shared.custom_headers(url=url): info.setHttpHeader(header, value) user_agent = config.instance.get('content.headers.user_agent', url=url) if user_agent is not None: info.setHttpHeader(b'User-Agent', user_agent.encode('ascii'))
def interceptRequest(self, info): """Handle the given request. Reimplementing this virtual function and setting the interceptor on a profile makes it possible to intercept URL requests. This function is executed on the IO thread, and therefore running long tasks here will block networking. info contains the information about the URL request and will track internally whether its members have been altered. Args: info: QWebEngineUrlRequestInfo &info """ if 'log-requests' in self._args.debug_flags: resource_type = debug.qenum_key(QWebEngineUrlRequestInfo, info.resourceType()) navigation_type = debug.qenum_key(QWebEngineUrlRequestInfo, info.navigationType()) log.webview.debug("{} {}, first-party {}, resource {}, " "navigation {}".format( bytes(info.requestMethod()).decode('ascii'), info.requestUrl().toDisplayString(), info.firstPartyUrl().toDisplayString(), resource_type, navigation_type)) url = info.requestUrl() first_party = info.firstPartyUrl() if ((url.scheme(), url.host(), url.path()) == ('qute', 'settings', '/set')): if (first_party != QUrl('qute://settings/') or info.resourceType() != QWebEngineUrlRequestInfo.ResourceTypeXhr): log.webview.warning( "Blocking malicious request from {} to {}".format( first_party.toDisplayString(), url.toDisplayString())) info.block(True) return # FIXME:qtwebengine only block ads for NavigationTypeOther? request = interceptors.Request(first_party_url=first_party, request_url=url) interceptors.run(request) if request.is_blocked: info.block(True) for header, value in shared.custom_headers(url=url): info.setHttpHeader(header, value) user_agent = config.instance.get('content.headers.user_agent', url=url) if user_agent is not None: info.setHttpHeader(b'User-Agent', user_agent.encode('ascii'))
def _fetch_url(self, url): """Download the given url and add the file to the collection. Args: url: The file to download as QUrl. """ if self.writer is None: raise AssertionError if url.scheme() not in ['http', 'https']: return # Prevent loading an asset twice if url in self.loaded_urls: return self.loaded_urls.add(url) log.downloads.debug("loading asset at {}".format(url)) # Using the download manager to download host-blocked urls might crash # qute, see the comments/discussion on # https://github.com/qutebrowser/qutebrowser/pull/962#discussion_r40256987 # and https://github.com/qutebrowser/qutebrowser/issues/1053 request = interceptors.Request(first_party_url=None, request_url=url) interceptors.run(request) if request.is_blocked: log.downloads.debug("Skipping {}, host-blocked".format(url)) # We still need an empty file in the output, QWebView can be pretty # picky about displaying a file correctly when not all assets are # at least referenced in the mhtml file. self.writer.add_file(urlutils.encoded_url(url), b'') return download_manager = objreg.get('qtnetwork-download-manager') target = downloads.FileObjDownloadTarget(_NoCloseBytesIO()) item = download_manager.get(url, target=target, auto_remove=True) self.pending_downloads.add((url, item)) item.finished.connect(functools.partial(self._finished, url, item)) item.error.connect(functools.partial(self._error, url, item)) item.cancelled.connect(functools.partial(self._cancelled, url, item))
def _fetch_url(self, url): """Download the given url and add the file to the collection. Args: url: The file to download as QUrl. """ if url.scheme() not in ['http', 'https']: return # Prevent loading an asset twice if url in self.loaded_urls: return self.loaded_urls.add(url) log.downloads.debug("loading asset at {}".format(url)) # Using the download manager to download host-blocked urls might crash # qute, see the comments/discussion on # https://github.com/qutebrowser/qutebrowser/pull/962#discussion_r40256987 # and https://github.com/qutebrowser/qutebrowser/issues/1053 request = interceptors.Request(first_party_url=None, request_url=url) interceptors.run(request) if request.is_blocked: log.downloads.debug("Skipping {}, host-blocked".format(url)) # We still need an empty file in the output, QWebView can be pretty # picky about displaying a file correctly when not all assets are # at least referenced in the mhtml file. self.writer.add_file(urlutils.encoded_url(url), b'') return download_manager = objreg.get('qtnetwork-download-manager') target = downloads.FileObjDownloadTarget(_NoCloseBytesIO()) item = download_manager.get(url, target=target, auto_remove=True) self.pending_downloads.add((url, item)) item.finished.connect(functools.partial(self._finished, url, item)) item.error.connect(functools.partial(self._error, url, item)) item.cancelled.connect(functools.partial(self._cancelled, url, item))
def interceptRequest(self, info): """Handle the given request. Reimplementing this virtual function and setting the interceptor on a profile makes it possible to intercept URL requests. On Qt < 5.13, this function is executed on the IO thread, and therefore running long tasks here will block networking. info contains the information about the URL request and will track internally whether its members have been altered. Args: info: QWebEngineUrlRequestInfo &info """ if 'log-requests' in objects.debug_flags: resource_type_str = debug.qenum_key(QWebEngineUrlRequestInfo, info.resourceType()) navigation_type_str = debug.qenum_key(QWebEngineUrlRequestInfo, info.navigationType()) log.network.debug("{} {}, first-party {}, resource {}, " "navigation {}".format( bytes(info.requestMethod()).decode('ascii'), info.requestUrl().toDisplayString(), info.firstPartyUrl().toDisplayString(), resource_type_str, navigation_type_str)) url = info.requestUrl() first_party = info.firstPartyUrl() if not url.isValid(): log.network.debug("Ignoring invalid intercepted URL: {}".format( url.errorString())) return # Per QWebEngineUrlRequestInfo::ResourceType documentation, if we fail # our lookup, we should fall back to ResourceTypeUnknown try: resource_type = self._resource_types[info.resourceType()] except KeyError: log.network.warning( "Resource type {} not found in RequestInterceptor dict." .format(debug.qenum_key(QWebEngineUrlRequestInfo, info.resourceType()))) resource_type = interceptors.ResourceType.unknown is_xhr = info.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr if ((url.scheme(), url.host(), url.path()) == ('qute', 'settings', '/set')): if first_party != QUrl('qute://settings/') or not is_xhr: log.network.warning("Blocking malicious request from {} to {}" .format(first_party.toDisplayString(), url.toDisplayString())) info.block(True) return # FIXME:qtwebengine only block ads for NavigationTypeOther? request = WebEngineRequest( first_party_url=first_party, request_url=url, resource_type=resource_type, webengine_info=info) interceptors.run(request) if request.is_blocked: info.block(True) for header, value in shared.custom_headers(url=url): if header.lower() == b'accept' and is_xhr: # https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/setRequestHeader # says: "If no Accept header has been set using this, an Accept header # with the type "*/*" is sent with the request when send() is called." # # We shouldn't break that if someone sets a custom Accept header for # normal requests. continue info.setHttpHeader(header, value) # Note this is ignored before Qt 5.12.4 and 5.13.1 due to # https://bugreports.qt.io/browse/QTBUG-60203 - there, we set the # commandline-flag in qtargs.py instead. if config.cache['content.headers.referer'] == 'never': info.setHttpHeader(b'Referer', b'') user_agent = websettings.user_agent(url) info.setHttpHeader(b'User-Agent', user_agent.encode('ascii'))
def interceptRequest(self, info): """Handle the given request. Reimplementing this virtual function and setting the interceptor on a profile makes it possible to intercept URL requests. On Qt < 5.13, this function is executed on the IO thread, and therefore running long tasks here will block networking. info contains the information about the URL request and will track internally whether its members have been altered. Args: info: QWebEngineUrlRequestInfo &info """ if 'log-requests' in objects.debug_flags: resource_type_str = debug.qenum_key(QWebEngineUrlRequestInfo, info.resourceType()) navigation_type_str = debug.qenum_key(QWebEngineUrlRequestInfo, info.navigationType()) log.webview.debug("{} {}, first-party {}, resource {}, " "navigation {}".format( bytes(info.requestMethod()).decode('ascii'), info.requestUrl().toDisplayString(), info.firstPartyUrl().toDisplayString(), resource_type_str, navigation_type_str)) url = info.requestUrl() first_party = info.firstPartyUrl() if not url.isValid(): log.webview.debug("Ignoring invalid intercepted URL: {}".format( url.errorString())) return # Per QWebEngineUrlRequestInfo::ResourceType documentation, if we fail # our lookup, we should fall back to ResourceTypeUnknown try: resource_type = self._resource_types[info.resourceType()] except KeyError: log.webview.warning( "Resource type {} not found in RequestInterceptor dict." .format(debug.qenum_key(QWebEngineUrlRequestInfo, info.resourceType()))) resource_type = interceptors.ResourceType.unknown if ((url.scheme(), url.host(), url.path()) == ('qute', 'settings', '/set')): if (first_party != QUrl('qute://settings/') or info.resourceType() != QWebEngineUrlRequestInfo.ResourceTypeXhr): log.webview.warning("Blocking malicious request from {} to {}" .format(first_party.toDisplayString(), url.toDisplayString())) info.block(True) return # FIXME:qtwebengine only block ads for NavigationTypeOther? request = WebEngineRequest( first_party_url=first_party, request_url=url, resource_type=resource_type, webengine_info=info) interceptors.run(request) if request.is_blocked: info.block(True) for header, value in shared.custom_headers(url=url): info.setHttpHeader(header, value) user_agent = config.instance.get('content.headers.user_agent', url=url) if user_agent is not None: info.setHttpHeader(b'User-Agent', user_agent.encode('ascii'))
def createRequest(self, op, req, outgoing_data): """Return a new QNetworkReply object. Args: op: Operation op req: const QNetworkRequest & req outgoing_data: QIODevice * outgoingData Return: A QNetworkReply. """ if proxymod.application_factory is not None: proxy_error = proxymod.application_factory.get_error() if proxy_error is not None: return networkreply.ErrorNetworkReply( req, proxy_error, QNetworkReply.UnknownProxyError, self) for header, value in shared.custom_headers(url=req.url()): req.setRawHeader(header, value) # There are some scenarios where we can't figure out current_url: # - There's a generic NetworkManager, e.g. for downloads # - The download was in a tab which is now closed. current_url = QUrl() if self._tab_id is not None: assert self._win_id is not None try: tab = objreg.get('tab', scope='tab', window=self._win_id, tab=self._tab_id) current_url = tab.url() except (KeyError, RuntimeError): # https://github.com/qutebrowser/qutebrowser/issues/889 # Catching RuntimeError because we could be in the middle of # the webpage shutdown here. current_url = QUrl() request = interceptors.Request(first_party_url=current_url, request_url=req.url()) interceptors.run(request) if request.is_blocked: return networkreply.ErrorNetworkReply( req, HOSTBLOCK_ERROR_STRING, QNetworkReply.ContentAccessDenied, self) if 'log-requests' in objects.debug_flags: operation = debug.qenum_key(QNetworkAccessManager, op) operation = operation.replace('Operation', '').upper() log.webview.debug("{} {}, first-party {}".format( operation, req.url().toDisplayString(), current_url.toDisplayString())) scheme = req.url().scheme() if scheme in self._scheme_handlers: result = self._scheme_handlers[scheme](req, op, current_url) if result is not None: result.setParent(self) return result self.set_referer(req, current_url) return super().createRequest(op, req, outgoing_data)
def createRequest(self, op, req, outgoing_data): """Return a new QNetworkReply object. Args: op: Operation op req: const QNetworkRequest & req outgoing_data: QIODevice * outgoingData Return: A QNetworkReply. """ proxy_factory = objreg.get('proxy-factory', None) if proxy_factory is not None: proxy_error = proxy_factory.get_error() if proxy_error is not None: return networkreply.ErrorNetworkReply( req, proxy_error, QNetworkReply.UnknownProxyError, self) for header, value in shared.custom_headers(url=req.url()): req.setRawHeader(header, value) # There are some scenarios where we can't figure out current_url: # - There's a generic NetworkManager, e.g. for downloads # - The download was in a tab which is now closed. current_url = QUrl() if self._tab_id is not None: assert self._win_id is not None try: tab = objreg.get('tab', scope='tab', window=self._win_id, tab=self._tab_id) current_url = tab.url() except (KeyError, RuntimeError): # https://github.com/qutebrowser/qutebrowser/issues/889 # Catching RuntimeError because we could be in the middle of # the webpage shutdown here. current_url = QUrl() request = interceptors.Request(first_party_url=current_url, request_url=req.url()) interceptors.run(request) if request.is_blocked: return networkreply.ErrorNetworkReply( req, HOSTBLOCK_ERROR_STRING, QNetworkReply.ContentAccessDenied, self) if 'log-requests' in self._args.debug_flags: operation = debug.qenum_key(QNetworkAccessManager, op) operation = operation.replace('Operation', '').upper() log.webview.debug("{} {}, first-party {}".format( operation, req.url().toDisplayString(), current_url.toDisplayString())) scheme = req.url().scheme() if scheme in self._scheme_handlers: result = self._scheme_handlers[scheme](req, op, current_url) if result is not None: result.setParent(self) return result self.set_referer(req, current_url) return super().createRequest(op, req, outgoing_data)