def test_quote_unicode(): """Quoting and unquoting Unicode strings should give the same result as when given regular strings. See ticket #28786. """ assert urllib_utf8.quote('montréal') == urllib_utf8.quote(u'montréal') assert urllib_utf8.quote_plus('montréal') == urllib_utf8.quote_plus(u'montréal') assert urllib_utf8.unquote('montréal') == urllib_utf8.unquote(u'montréal') assert urllib_utf8.unquote_plus('montréal') == urllib_utf8.unquote_plus(u'montréal')
def test_quote_and_quote_plus(): strings = ['', 'Hello there!', u'naïve', u' San José ', ' cafés'] for string in strings: utf8_string = string.encode('utf-8') if isinstance(string, unicode) else string assert urllib_utf8.quote(string) == urllib.quote(utf8_string) assert urllib_utf8.quote_plus(string) == urllib.quote_plus(utf8_string)
def test_recode_encoded(charname, chars, pathchars, expected_url, encoding): url_template = u"http://m{chars}nchen.com/m{chars}chen/{pathchars}" unicode_url = url_template.format(chars=chars, pathchars=pathchars) try: encoded_url = unicode_url.encode(encoding) except UnicodeEncodeError: pytest.skip("Some of these things just won't go.") assert E.recode_uri(encoded_url) == expected_url quoted_url = url_template.format( chars=quote(chars.encode(encoding)), pathchars=quote(pathchars.encode(encoding)), ) if charname == 'ascii': # ASCII is a special case when it comes to quoting: their quoted-ness should go untouched. assert E.recode_uri(quoted_url) == quoted_url else: assert E.recode_uri(quoted_url) == expected_url
class TestRecodeEmail(object): munchen = u'münchen' email = u'{munchen}@{munchen}.com?subject={munchen}'.format( munchen=munchen) # The username is best encoded as simply utf8. expected = u'{utf8_munchen}@{idna_munchen}.com?subject={percent_munchen}'.format( utf8_munchen=munchen, idna_munchen=munchen.encode('IDNA').decode('US-ASCII'), percent_munchen=quote(munchen.encode('UTF-8')), ) @staticmethod def test_empty_string(): assert E.encode_email('') == '' def test_not_an_email(self): not_an_email = u"They don't use email in " + self.munchen assert E.encode_email(not_an_email) == not_an_email.encode( 'IDNA').decode('US-ASCII') def test_encode_email(self): assert E.encode_email(self.email) == self.expected def test_decode_email(self): assert E.decode_email(self.expected) == self.email def test_recode_email(self): assert E.recode_email(self.email) == self.expected # Tests for idempotency: def test_encode_email_idempotent(self): assert E.encode_email(E.encode_email(self.email)) == self.expected def test_decode_email_idempotent(self): assert E.decode_email(E.decode_email(self.expected)) == self.email def test_recode_email_idempotent(self): assert E.recode_email(E.recode_email(self.email)) == self.expected def test_mailto_scheme(self): assert E.recode_email('mailto:' + self.email) == 'mailto:' + self.expected # This is technically wrong, but we fix it. assert E.recode_email('mailto://' + self.email) == 'mailto:' + self.expected
def test_url_reserved_chars(): url = 'http://www.yelp.com?chars=%s' % quote(':/?&=') assert E.recode_uri(url) == url
def test_unescape_quoted_nonascii(self): quoted_non_ascii = quote(self.NON_ASCII) unquoted_url = 'http://yelp.com/' + self.NON_ASCII quoted_url = 'http://yelp.com/' + quoted_non_ascii self.assert_unquote_bytes(quoted_url, unquoted_url)
def test_dont_touch_quoted_ascii(self): quoted_ascii = quote(self.ASCII) url = 'http://yelp.com/' + quoted_ascii self.assert_unquote_bytes(url, url)
def test_dont_touch_quoted_ascii(self): quoted_ascii = quote(self.ASCII) url = b'http://yelp.com/' + quoted_ascii.encode('US-ASCII') self.assert_unquote_bytes(url, url)
def test_unescape_quoted_nonascii(self): quoted_non_ascii = quote(self.NON_ASCII) unquoted_url = b'http://yelp.com/' + self.NON_ASCII quoted_url = b'http://yelp.com/' + quoted_non_ascii.encode('US-ASCII') self.assert_unquote_bytes(quoted_url, unquoted_url)