def error_handling_callback(response): """ Raise exceptions and log alerts. :param response: Response returned by Session.request(). :type response: :py:obj:`requests.Response` """ # TODO: do some error correcting stuff if isinstance(response, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(response): raise FatalServerError(str(response)) if isinstance(response, Exception): with suppress(Exception): # request exception may contain response and request attribute error('An error occurred for uri ' + response.request.url) raise response from None if response.status_code == HTTPStatus.GATEWAY_TIMEOUT: raise Server504Error('Server {} timed out'.format( urlparse(response.url).netloc)) if response.status_code == HTTPStatus.REQUEST_URI_TOO_LONG: raise Server414Error('Too long GET request') # TODO: shall it raise? this might break some code, TBC # response.raise_for_status() # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if response.status_code not in (HTTPStatus.OK, HTTPStatus.MULTI_STATUS): warning('Http response status {}'.format(response.status_code))
def error_handling_callback(request): """ Raise exceptions and log alerts. @param request: Request that has completed @type request: L{threadedhttp.HttpRequest} """ # TODO: do some error correcting stuff if isinstance(request.data, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(request.data): raise FatalServerError(str(request.data)) # if all else fails if isinstance(request.data, Exception): error('An error occurred for uri ' + request.uri) raise request.data if request.status == 504: raise Server504Error('Server %s timed out' % request.hostname) if request.status == 414: raise Server414Error('Too long GET request') # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if request.status not in (200, 207): warning('Http response status {0}'.format(request.data.status_code))
def _http_process(session, http_request): method = http_request.method uri = http_request.uri body = http_request.body headers = http_request.headers if PY2 and headers: headers = dict((key, str(value)) for key, value in headers.items()) auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn('%s' % requests_oauthlib, ImportWarning) error('OAuth authentication not supported: %s' % requests_oauthlib) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout try: ignore_validation = http_request.kwargs.pop( 'disable_ssl_certificate_validation', False) # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, data=body, headers=headers, auth=auth, timeout=timeout, verify=not ignore_validation) except Exception as e: http_request.data = e else: http_request.data = response
def _http_process(session, http_request): method = http_request.method uri = http_request.uri params = http_request.params body = http_request.body headers = http_request.headers if PY2 and headers: headers = dict((key, str(value)) for key, value in headers.items()) auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn('%s' % requests_oauthlib, ImportWarning) error('OAuth authentication not supported: %s' % requests_oauthlib) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout try: ignore_validation = http_request.kwargs.pop( 'disable_ssl_certificate_validation', False) # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, params=params, data=body, headers=headers, auth=auth, timeout=timeout, verify=not ignore_validation) except Exception as e: http_request.data = e else: http_request.data = response
def number_of_images(self): """Return the (cached) number of images in the djvu file.""" if not hasattr(self, '_image_count'): dp = subprocess.Popen(['djvused', '-e', 'n', self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: error('djvulibre library error!\n{0!s}'.format(stderrdata)) self._image_count = int(stdoutdata) return self._image_count
def has_text(self): """Test if the djvu file has a text-layer.""" if not hasattr(self, '_has_text'): dp = subprocess.Popen(['djvudump', self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: error('djvulibre library error!\n{0!s}'.format(stderrdata)) txt = stdoutdata.decode('utf-8') self._has_text = 'TXTz' in txt return self._has_text
def number_of_images(self): """Return the (cached) number of images in the djvu file.""" if not hasattr(self, '_image_count'): dp = subprocess.Popen(['djvused', '-e', 'n', self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: error('djvulibre library error!\n%s' % stderrdata) self._image_count = int(stdoutdata) return self._image_count
def has_text(self): """Test if the djvu file has a text-layer.""" if not hasattr(self, '_has_text'): dp = subprocess.Popen(['djvudump', self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: error('djvulibre library error!\n%s' % stderrdata) txt = stdoutdata.decode('utf-8') self._has_text = 'TXTz' in txt return self._has_text
def get_page(self, n): """Get page n for djvu file.""" if not self.has_text(): raise ValueError('Djvu file {0!s} has no text layer.'.format(self.file_djvu)) if not (1 <= n <= self.number_of_images()): raise ValueError('Requested page number %d is not in file %s' ' page range [%d-%d]' % (n, self.file_djvu, 1, self.number_of_images())) dp = subprocess.Popen(['djvutxt', '--page={0:d}'.format(n), self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: error('djvulibre library error!\n{0!s}'.format(stderrdata)) return self._remove_control_chars(stdoutdata)
def get_page(self, n): """Get page n for djvu file.""" if not self.has_text(): raise ValueError('Djvu file %s has no text layer.' % self.file_djvu) if not (1 <= n <= self.number_of_images()): raise ValueError('Requested page number %d is not in file %s' ' page range [%d-%d]' % (n, self.file_djvu, 1, self.number_of_images())) dp = subprocess.Popen( ['djvutxt', '--page=%d' % n, self.file_djvu], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = dp.communicate() if dp.returncode != 0: error('djvulibre library error!\n%s' % stderrdata) return self._remove_control_chars(stdoutdata)
def _http_process(session, http_request): """ Process an `threadedhttp.HttpRequest` instance. @param session: Session that will be used to process the `http_request`. @type session: L{requests.Session} @param http_request: Request that will be processed. @type http_request: L{threadedhttp.HttpRequest} @return: None @rtype: None """ method = http_request.method uri = http_request.uri params = http_request.params body = http_request.body headers = http_request.headers if PY2 and headers: headers = dict((key, str(value)) for key, value in headers.items()) auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn('%s' % requests_oauthlib, ImportWarning) error('OAuth authentication not supported: %s' % requests_oauthlib) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout try: ignore_validation = http_request.kwargs.pop( 'disable_ssl_certificate_validation', False) # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, params=params, data=body, headers=headers, auth=auth, timeout=timeout, verify=not ignore_validation, **http_request.kwargs) except Exception as e: http_request.data = e else: http_request.data = response
def _http_process(session, http_request): """ Process an `threadedhttp.HttpRequest` instance. @param session: Session that will be used to process the `http_request`. @type session: L{requests.Session} @param http_request: Request that will be processed. @type http_request: L{threadedhttp.HttpRequest} @return: None @rtype: None """ method = http_request.method uri = http_request.uri params = http_request.params body = http_request.body headers = http_request.headers if PY2 and headers: headers = {key: str(value) for key, value in headers.items()} auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn('%s' % requests_oauthlib, ImportWarning) error('OAuth authentication not supported: %s' % requests_oauthlib) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout try: ignore_validation = http_request.kwargs.pop( 'disable_ssl_certificate_validation', False) # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, params=params, data=body, headers=headers, auth=auth, timeout=timeout, verify=not ignore_validation, **http_request.kwargs) except Exception as e: http_request.data = e else: http_request.data = response
def error_handling_callback(response): """ Raise exceptions and log alerts. @param response: Response returned by Session.request(). @type response: L{requests.Response} """ # TODO: do some error correcting stuff if isinstance(response, requests.exceptions.SSLError): if SSL_CERT_VERIFY_FAILED_MSG in str(response): raise FatalServerError(str(response)) if isinstance(response, Exception): with suppress(Exception): # request.data exception may contain response and request attribute error('An error occurred for uri ' + response.request.url) raise response from None if response.status_code == 504: raise Server504Error('Server {} timed out'.format( urlparse(response.url).netloc)) if response.status_code == 414: raise Server414Error('Too long GET request') # TODO: shall it raise? this might break some code, TBC # response.raise_for_status() # HTTP status 207 is also a success status for Webdav FINDPROP, # used by the version module. if response.status_code not in (200, 207): warning('Http response status {}'.format(response.status_code)) if isinstance(response.encoding, UnicodeDecodeError): error('An error occurred for uri {}: ' 'no encoding detected!'.format(response.request.url)) raise response.encoding from None
def print_failures(self, typed_item: BaseType, failed_constraints): """Print failed constraints""" for constraint in failed_constraints: botlogging.error(f"{constraint} failed for {typed_item}")
def read_user_config(): with open(_filename, 'rb') as f: exec(compile(f.read(), _filename, 'exec'), _exec_globals) if os.path.exists(_filename): _filestatus = os.stat(_filename) _filemode = _filestatus[0] _fileuid = _filestatus[4] if OSWIN32 or _fileuid in [os.getuid(), 0]: if OSWIN32 or _filemode & 0o02 == 0: read_user_config() else: try: read_user_config() except Exception as e: error(f"{e}" + "Skipped '%(fn)s': writeable by others." % {'fn': _filename}) else: try: read_user_config() except Exception as e: error(f"{e}" + "Skipped '%(fn)s': owned by someone else." % {'fn': _filename}) class _DifferentTypeError(UserWarning, TypeError): """An error when the required type doesn't match the actual type.""" def __init__(self, name, actual_type, allowed_types): super(_DifferentTypeError, self).__init__( 'Configuration variable "{0}" is defined as "{1.__name__}" in ' 'your user-config.py but expected "{2}".'.format( name, actual_type,
else: userinterface_lang = userinterface_lang.split('_')[0] # Fix up default site if family == 'wikipedia' and mylang == 'language': if __no_user_config != '2': warning('family and mylang are not set.\n' "Defaulting to family='wikipedia' and mylang='test'.") mylang = 'test' # SECURITY WARNINGS if (not ignore_file_security_warnings and private_files_permission & (stat.S_IRWXG | stat.S_IRWXO) != 0): error("CRITICAL SECURITY WARNING: 'private_files_permission' is set" ' to allow access from the group/others which' ' could give them access to the sensitive files.' ' To avoid giving others access to sensitive files, pywikibot' " won't run with this setting. Choose a more restrictive" " permission or set 'ignore_file_security_warnings' to true.") sys.exit(1) # # When called as main program, list all configuration variables # if __name__ == '__main__': _all = True for _arg in sys.argv[1:]: if _arg == 'modified': _all = False else: warning('Unknown arg {0} ignored'.format(_arg)) for _name in sorted(globals().keys()):
def fetch(uri: str, method: str = 'GET', headers: Optional[dict] = None, default_error_handling: bool = True, use_fake_user_agent: Union[bool, str] = False, **kwargs): """ HTTP request. See :py:obj:`requests.Session.request` for parameters. :param uri: URL to send :param method: HTTP method of the request (default: GET) :param headers: dictionary of headers of the request :param default_error_handling: Use default error handling :param use_fake_user_agent: Set to True to use fake UA, False to use pywikibot's UA, str to specify own UA. This behaviour might be overridden by domain in config. :keyword charset: Either a valid charset (usable for str.decode()) or None to automatically chose the charset from the returned header (defaults to latin-1) :type charset: CodecInfo, str, None :keyword verify: verify the SSL certificate (default is True) :type verify: bool or path to certificates :keyword callbacks: Methods to call once data is fetched :type callbacks: list of callable :rtype: :py:obj:`requests.Response` """ # Change user agent depending on fake UA settings. # Set header to new UA if needed. headers = headers or {} headers.update(config.extra_headers.copy() or {}) def assign_fake_user_agent(use_fake_user_agent, uri): uri_domain = urlparse(uri).netloc use_fake_user_agent = config.fake_user_agent_exceptions.get( uri_domain, use_fake_user_agent) if use_fake_user_agent is False: return user_agent() if use_fake_user_agent is True: return fake_user_agent() if use_fake_user_agent and isinstance(use_fake_user_agent, str): return use_fake_user_agent # Custom UA. raise ValueError('Invalid parameter: ' 'use_fake_user_agent={}'.format(use_fake_user_agent)) def assign_user_agent(user_agent_format_string): if not user_agent_format_string or '{' in user_agent_format_string: return user_agent(None, user_agent_format_string) # do nothing, it is already a UA return user_agent_format_string # If not already specified. if 'user-agent' not in headers: # Get fake UA exceptions from `fake_user_agent_exceptions` config. headers['user-agent'] = assign_fake_user_agent(use_fake_user_agent, uri) # Already specified. else: headers['user-agent'] = assign_user_agent(headers.get('user-agent')) callbacks = kwargs.pop('callbacks', []) # error_handling_callback will be executed first. if default_error_handling: callbacks.insert(0, error_handling_callback) charset = kwargs.pop('charset', None) auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn(str(requests_oauthlib), ImportWarning) error('OAuth authentication not supported: {}'.format( requests_oauthlib)) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout old_validation = kwargs.pop('disable_ssl_certificate_validation', None) if old_validation is not None: issue_deprecation_warning('disable_ssl_certificate_validation', instead='verify', since='20201220') kwargs.update(verify=not old_validation) try: # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, headers=headers, auth=auth, timeout=timeout, **kwargs) except Exception as e: response = e else: response.encoding = _decide_encoding(response, charset) for callback in callbacks: callback(response) return response
def fetch(uri, method='GET', headers=None, default_error_handling: bool = True, use_fake_user_agent: Union[bool, str] = False, **kwargs): """ HTTP request. See L{requests.Session.request} for parameters. @param default_error_handling: Use default error handling @param use_fake_user_agent: Set to True to use fake UA, False to use pywikibot's UA, str to specify own UA. This behaviour might be overridden by domain in config. @kwarg charset: Either a valid charset (usable for str.decode()) or None to automatically chose the charset from the returned header (defaults to latin-1) @type charset: CodecInfo, str, None @kwarg verify: verify the SSL certificate (default is True) @type verify: bool or path to certificates @kwarg callbacks: Methods to call once data is fetched @type callbacks: list of callable @rtype: L{threadedhttp.HttpRequest} """ # Change user agent depending on fake UA settings. # Set header to new UA if needed. headers = headers or {} headers.update(config.extra_headers.copy() or {}) def assign_fake_user_agent(use_fake_user_agent, uri): uri_domain = urlparse(uri).netloc use_fake_user_agent = config.fake_user_agent_exceptions.get( uri_domain, use_fake_user_agent) if use_fake_user_agent is False: return user_agent() if use_fake_user_agent is True: return fake_user_agent() if use_fake_user_agent and isinstance(use_fake_user_agent, str): return use_fake_user_agent # Custom UA. raise ValueError('Invalid parameter: ' 'use_fake_user_agent={}'.format(use_fake_user_agent)) def assign_user_agent(user_agent_format_string): if not user_agent_format_string or '{' in user_agent_format_string: return user_agent(None, user_agent_format_string) else: # do nothing, it is already a UA return user_agent_format_string # If not already specified. if 'user-agent' not in headers: # Get fake UA exceptions from `fake_user_agent_exceptions` config. headers['user-agent'] = assign_fake_user_agent(use_fake_user_agent, uri) # Already specified. else: headers['user-agent'] = assign_user_agent(headers.get('user-agent')) callbacks = kwargs.pop('callbacks', []) if default_error_handling: callbacks.append(error_handling_callback) charset = kwargs.pop('charset', None) request = threadedhttp.HttpRequest(charset=charset) auth = get_authentication(uri) if auth is not None and len(auth) == 4: if isinstance(requests_oauthlib, ImportError): warn('%s' % requests_oauthlib, ImportWarning) error('OAuth authentication not supported: %s' % requests_oauthlib) auth = None else: auth = requests_oauthlib.OAuth1(*auth) timeout = config.socket_timeout old_validation = kwargs.pop('disable_ssl_certificate_validation', None) if old_validation is not None: issue_deprecation_warning('disable_ssl_certificate_validation', instead='verify', warning_class=FutureWarning, since='20201220') kwargs.update(verify=not old_validation) try: # Note that the connections are pooled which mean that a future # HTTPS request can succeed even if the certificate is invalid and # verify=True, when a request with verify=False happened before response = session.request(method, uri, headers=headers, auth=auth, timeout=timeout, **kwargs) except Exception as e: request.data = e response = e else: request.data = response for callback in callbacks: callback(response) # if there's no data in the answer we're in trouble try: request.data except AssertionError as e: raise e return request
else: userinterface_lang = userinterface_lang.split('_')[0] # Fix up default site if family == 'wikipedia' and mylang == 'language': if __no_user_config != '2': warning('family and mylang are not set.\n' "Defaulting to family='test' and mylang='test'.") family = mylang = 'test' # SECURITY WARNINGS if (not ignore_file_security_warnings and private_files_permission & (stat.S_IRWXG | stat.S_IRWXO) != 0): error("CRITICAL SECURITY WARNING: 'private_files_permission' is set" " to allow access from the group/others which" " could give them access to the sensitive files." " To avoid giving others access to sensitive files, pywikibot" " won't run with this setting. Choose a more restrictive" " permission or set 'ignore_file_security_warnings' to true.") sys.exit(1) # # When called as main program, list all configuration variables # if __name__ == "__main__": _all = 1 for _arg in sys.argv[1:]: if _arg == "modified": _all = 0 else: warning('Unknown arg {0} ignored'.format(_arg)) _k = list(globals().keys())