def urljoin_rfc(base, ref, encoding='utf-8'): r""" .. warning:: This function is deprecated and will be removed in future. Please use ``urlparse.urljoin`` instead. Same as urlparse.urljoin but supports unicode values in base and ref parameters (in which case they will be converted to str using the given encoding). Always returns a str. >>> import w3lib.url >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html') 'http://www.example.com/otherpath/index2.html' >>> >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm') 'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm' >>> """ warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning) str_base = unicode_to_str(base, encoding) str_ref = unicode_to_str(ref, encoding) return moves.urllib.parse.urljoin(str_base, str_ref)
def urljoin_rfc(base, ref, encoding='utf-8'): """Same as urlparse.urljoin but supports unicode values in base and ref parameters (in which case they will be converted to str using the given encoding). Always returns a str. """ return urlparse.urljoin(unicode_to_str(base, encoding), \ unicode_to_str(ref, encoding))
def urljoin_rfc(base, ref, encoding="utf-8"): """Same as urlparse.urljoin but supports unicode values in base and ref parameters (in which case they will be converted to str using the given encoding). Always returns a str. """ warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning) return urlparse.urljoin(unicode_to_str(base, encoding), unicode_to_str(ref, encoding))
def urljoin_rfc(base, ref, encoding='utf-8'): """Same as urlparse.urljoin but supports unicode values in base and ref parameters (in which case they will be converted to str using the given encoding). Always returns a str. """ warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning) return urlparse.urljoin(unicode_to_str(base, encoding), \ unicode_to_str(ref, encoding))
def __repr__(self): self.date = self.date or time.ctime() ret = u"%s\n" % self.date ret += u"-" * len(self.date) + "\n" ordered_indexes = sorted(self.indexes, key=lambda x: get_index_order(x["name"])) for index in ordered_indexes: ret += u"%(name)s: %(value)s%(unit)s" % index if get_index_class(index["name"]) in ["commodity", "stock" ] and self.gold_price: ret += u"(%.5fozAu)" % (index["value"] / self.gold_price) old_index = self.indexes_old.get(index["name"]) if old_index: ts = old_index["timestamp"] date = "%s %s" % (_MONTHS[ts.month - 1], ts.day) if self.show_year: date += " %s" % ts.year ret += u" [%s: %s%s" % (date, old_index["value"], old_index["unit"]) if get_index_class(old_index["name"]) in [ "commodity", "stock" ] and self.gold_price_old: ret += u" (%.5fozAu)" % (old_index["value"] / self.gold_price_old) ret += "]" ret += u"\n" return unicode_to_str(ret)
def get_meta_refresh(text, baseurl='', encoding='utf-8'): """Return the http-equiv parameter of the HTML meta element from the given HTML text and return a tuple ``(interval, url)`` where interval is an integer containing the delay in seconds (or zero if not present) and url is a string with the absolute url to redirect. If no meta redirect is found, ``(None, None)`` is returned. """ if six.PY2: baseurl = unicode_to_str(baseurl, encoding) try: text = str_to_unicode(text, encoding) except UnicodeDecodeError: print(text) raise text = remove_comments(replace_entities(text)) m = _meta_refresh_re.search(text) if m: interval = float(m.group('int')) url = safe_url_string(m.group('url').strip(' "\''), encoding) url = moves.urllib.parse.urljoin(baseurl, url) return interval, url else: return None, None
def encode_multipart(data): """Encode the given data to be used in a multipart HTTP POST. Data is a where keys are the field name, and values are either strings or tuples (filename, content) for file uploads. This code is based on distutils.command.upload. Return (body, boundary) tuple where ``body`` is binary body value, and ``boundary`` is the boundary used (as native string). """ # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = b'\r\n--' + boundary.encode('ascii') end_boundary = sep_boundary + b'--' body = BytesIO() for key, value in data.items(): title = u'\r\nContent-Disposition: form-data; name="%s"' % key # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: title += u'; filename="%s"' % value[0] value = value[1] else: value = unicode_to_str( value) # in distutils: str(value).encode('utf-8') body.write(sep_boundary) body.write(title.encode('utf-8')) body.write(b"\r\n\r\n") body.write(value) body.write(end_boundary) body.write(b"\r\n") return body.getvalue(), boundary
def get_meta_refresh(text, baseurl='', encoding='utf-8'): """Return the http-equiv parameter of the HTML meta element from the given HTML text and return a tuple ``(interval, url)`` where interval is an integer containing the delay in seconds (or zero if not present) and url is a string with the absolute url to redirect. If no meta redirect is found, ``(None, None)`` is returned. """ if six.PY2: baseurl = unicode_to_str(baseurl, encoding) try: text = str_to_unicode(text, encoding) except UnicodeDecodeError: print(text) raise text = remove_comments(remove_entities(text)) m = _meta_refresh_re.search(text) if m: interval = float(m.group('int')) url = safe_url_string(m.group('url').strip(' "\''), encoding) url = moves.urllib.parse.urljoin(baseurl, url) return interval, url else: return None, None
def encode_multipart(data): """Encode the given data to be used in a multipart HTTP POST. Data is a where keys are the field name, and values are either strings or tuples (filename, content) for file uploads. This code is based on distutils.command.upload. Return (body, boundary) tuple where ``body`` is binary body value, and ``boundary`` is the boundary used (as native string). """ # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = b'\r\n--' + boundary.encode('ascii') end_boundary = sep_boundary + b'--' body = BytesIO() for key, value in data.items(): title = u'\r\nContent-Disposition: form-data; name="%s"' % key # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: title += u'; filename="%s"' % value[0] value = value[1] else: value = unicode_to_str(value) # in distutils: str(value).encode('utf-8') body.write(sep_boundary) body.write(title.encode('utf-8')) body.write(b"\r\n\r\n") body.write(value) body.write(end_boundary) body.write(b"\r\n") return body.getvalue(), boundary
def encode_multipart(data): r""" .. warning:: This function is deprecated and will be removed in future. Please use ``urllib3.filepost.encode_multipart_formdata`` instead. Encode the given data to be used in a multipart HTTP POST. `data` is a dictionary where keys are the field name, and values are either strings or tuples as `(filename, content)` for file uploads. This code is based on :class:`distutils.command.upload`. Returns a `(body, boundary)` tuple where `body` is binary body value, and `boundary` is the boundary used (as native string). >>> import w3lib.form >>> w3lib.form.encode_multipart({'key': 'value'}) ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254') >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'}) # doctest: +SKIP ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254') >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')}) ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254') >>> """ warnings.warn( "`w3lib.form.encode_multipart` function is deprecated and " "will be removed in future releases. Please use " "`urllib3.filepost.encode_multipart_formdata` instead.", DeprecationWarning ) # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = b'\r\n--' + boundary.encode('ascii') end_boundary = sep_boundary + b'--' body = BytesIO() for key, value in data.items(): title = u'\r\nContent-Disposition: form-data; name="%s"' % key # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: title += u'; filename="%s"' % value[0] value = value[1] else: value = unicode_to_str(value) # in distutils: str(value).encode('utf-8') body.write(sep_boundary) body.write(title.encode('utf-8')) body.write(b"\r\n\r\n") body.write(value) body.write(end_boundary) body.write(b"\r\n") return body.getvalue(), boundary
def encode_multipart(data): r""" .. warning:: This function is deprecated and will be removed in future. Please use ``urllib3.filepost.encode_multipart_formdata`` instead. Encode the given data to be used in a multipart HTTP POST. `data` is a dictionary where keys are the field name, and values are either strings or tuples as `(filename, content)` for file uploads. This code is based on :class:`distutils.command.upload`. Returns a `(body, boundary)` tuple where `body` is binary body value, and `boundary` is the boundary used (as native string). >>> import w3lib.form >>> w3lib.form.encode_multipart({'key': 'value'}) ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254') >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'}) # doctest: +SKIP ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254') >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')}) ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254') >>> """ warnings.warn( "`w3lib.form.encode_multipart` function is deprecated and " "will be removed in future releases. Please use " "`urllib3.filepost.encode_multipart_formdata` instead.", DeprecationWarning) # Build up the MIME payload for the POST data boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' sep_boundary = b'\r\n--' + boundary.encode('ascii') end_boundary = sep_boundary + b'--' body = BytesIO() for key, value in data.items(): title = u'\r\nContent-Disposition: form-data; name="%s"' % key # handle multiple entries for the same name if type(value) != type([]): value = [value] for value in value: if type(value) is tuple: title += u'; filename="%s"' % value[0] value = value[1] else: value = unicode_to_str( value) # in distutils: str(value).encode('utf-8') body.write(sep_boundary) body.write(title.encode('utf-8')) body.write(b"\r\n\r\n") body.write(value) body.write(end_boundary) body.write(b"\r\n") return body.getvalue(), boundary
def get_base_url(text, baseurl='', encoding='utf-8'): """Return the base url if declared in the given html text, relative to the given base url. If no base url is found, the given base url is returned """ text = str_to_unicode(text, encoding) baseurl = unicode_to_str(baseurl, encoding) m = _baseurl_re.search(text) if m: baseurl = urljoin(baseurl, m.group(1).encode(encoding)) return safe_url_string(baseurl)
def safe_url_string(url, encoding='utf8'): """Convert the given url into a legal URL by escaping unsafe characters according to RFC-3986. If a unicode url is given, it is first converted to str using the given encoding (which defaults to 'utf-8'). When passing a encoding, you should use the encoding of the original page (the page from which the url was extracted from). Calling this function on an already "safe" url will return the url unmodified. Always returns a str. """ s = unicode_to_str(url, encoding) return urllib.quote(s, _safe_chars)
def __repr__(self): self.date = self.date or time.ctime() ret = u"%s\n" % self.date ret += u"-" * len(self.date) + "\n" ordered_items = sorted(self.items, key=lambda x: x["name"]) for item in ordered_items: ret += self.lformat % item if self.olformat: old_item = self.items_old.get(item["name"]) if old_item: ts = old_item["timestamp"] date = "%s %s" % (_MONTHS[ts.month - 1], ts.day) if self.show_year: date += " %s" % ts.year ret += u" [%s: " % date + self.olformat % old_item + "]" ret += "\n" return unicode_to_str(ret)
def __repr__(self): self.date = self.date or time.ctime() ret = u"%s\n" % self.date ret += u"-" * len(self.date) + "\n" ordered_indexes = sorted(self.indexes, key=lambda x: get_index_order(x["name"])) for index in ordered_indexes: ret += u"%(name)s: %(value)s%(unit)s" % index if get_index_class(index["name"]) in ["commodity", "stock"] and self.gold_price: ret += u"(%.5fozAu)" % (index["value"] / self.gold_price) old_index = self.indexes_old.get(index["name"]) if old_index: ts = old_index["timestamp"] date = "%s %s" % (_MONTHS[ts.month - 1], ts.day) if self.show_year: date += " %s" % ts.year ret += u" [%s: %s%s" % (date, old_index["value"], old_index["unit"]) if get_index_class(old_index["name"]) in ["commodity", "stock"] and self.gold_price_old: ret += u" (%.5fozAu)" % (old_index["value"] / self.gold_price_old) ret += "]" ret += u"\n" return unicode_to_str(ret)
def jsonrpc_client_call(url, method, *args, **kwargs): """Execute a JSON-RPC call on the given url""" if args and kwargs: raise ValueError( "Pass *args or **kwargs but not both to jsonrpc_client_call") req = { 'jsonrpc': '2.0', 'method': method, 'params': args or kwargs, 'id': 1 } data = unicode_to_str(json.dumps(req)) body = urllib.request.urlopen(url, data).read() res = json.loads(body.decode('utf-8')) if 'result' in res: return res['result'] elif 'error' in res: er = res['error'] raise JsonRpcError(er['code'], er['message'], er['data']) else: msg = "JSON-RPC response must contain 'result' or 'error': %s" % res raise ValueError(msg)
def get_meta_refresh(text, baseurl="", encoding="utf-8"): """Return the http-equiv parameter of the HTML meta element from the given HTML text and return a tuple (interval, url) where interval is an integer containing the delay in seconds (or zero if not present) and url is a string with the absolute url to redirect. If no meta redirect is found, (None, None) is returned. """ baseurl = unicode_to_str(baseurl, encoding) try: text = str_to_unicode(text, encoding) except UnicodeDecodeError: print text raise text = remove_comments(remove_entities(text)) m = _meta_refresh_re.search(text) if m: interval = float(m.group("int")) url = safe_url_string(m.group("url").strip(" \"'")) url = urljoin(baseurl, url) return interval, url else: return None, None
def parse_url(url, encoding=None): """Return urlparsed url from the given argument (which could be an already parsed url) """ return url if isinstance(url, urlparse.ParseResult) else \ urlparse.urlparse(unicode_to_str(url, encoding))
def test_deprecation(self): with deprecated_call(): unicode_to_str("")