def convert(url, mapping): '''Convert given URL into affiliate URL based on mapping. mapping maps domains with URL query paramater/value pairs to add or replace in given URL. Parameter values must be specified as lists. Example mapping for various amazon domains: { 'amazon.com': {'tag': ['affurl-20']}, 'amazon.de': {'tag': ['affurl-21']}, 'amazon.co.uk': {'tag': ['afflink-21']} } ''' new_url = urlsplit(url) if not new_url.netloc: return None # rather raise an Exception? # Parse_domain returns a tuple like ('co.uk', 'amazon', 'www'). domain = '.'.join(domain_parser.parse_domain(new_url.netloc)[:2][::-1]) # Leave URLs with no matching domain as they are. if domain not in mapping: return url # Add new and replace existing query paramters with given ones. query = parse_qs(new_url.query) params = mapping[domain] query.update(params) # Concatenate and unsplit tuples to create a URL string. return urlunsplit(new_url[:3] + (urlencode(query, True), ) + new_url[4:])
def test_google(self): """Is google.com properly parsed?""" assert domain_parser.parse_domain('http://www.google.com') == ( 'com', 'google', 'www')
def test_internationalized_domain_name(self): """Is 'маил.гоогле.рф', which is entirely composed of non-latin characters, parsed properly?""" # Should always pass when run with Python 3. assert domain_parser.parse_domain('http://маил.гоогле.рф') == ( 'рф', 'гоогле', 'маил')
def test_secure_scheme(self): """Is 'https://www.google.com', which include 'https' instead of 'http', parsed properly?""" assert domain_parser.parse_domain('https://www.google.com') == ( 'com', 'google', 'www')
def test_no_scheme(self): """Is 'www.google.com', which doesn't include the scheme ('http'), parsed properly?""" assert domain_parser.parse_domain('www.google.com') == ('com', 'google', 'www')
def test_guardian(self): """Is 'co.uk', which is wildcarded in the TLD list, parsed properly?""" assert domain_parser.parse_domain('http://www.guardian.co.uk') == ( 'co.uk', 'guardian', 'www')
def test_more_levels(): """Is two levels sub-domain properly parsed?""" assert domain_parser.parse_domain('www.staging.google.com') == ( 'com', 'google', 'www.staging')
def test_no_subdomain(): """Is twitter.com properly parsed?""" assert domain_parser.parse_domain('twitter.com') == ('com', 'twitter', '')
def test_google(self): """Is google.com properly parsed?""" assert domain_parser.parse_domain( 'http://www.google.com') == ('com', 'google', 'www')
def test_secure_scheme(self): """Is 'https://www.google.com', which include 'https' instead of 'http', parsed properly?""" assert domain_parser.parse_domain( 'https://www.google.com') == ('com', 'google', 'www')
def test_no_scheme(self): """Is 'www.google.com', which doesn't include the scheme ('http'), parsed properly?""" assert domain_parser.parse_domain( 'www.google.com') == ('com', 'google', 'www')
def test_guardian(self): """Is 'co.uk', which is wildcarded in the TLD list, parsed properly?""" assert domain_parser.parse_domain( 'http://www.guardian.co.uk') == ('co.uk', 'guardian', 'www')
def test_internationalized_domain_name(self): """Is 'маил.гоогле.рф', which is entirely composed of non-latin characters, parsed properly?""" # Should always pass when run with Python 3. assert domain_parser.parse_domain( 'http://маил.гоогле.рф') == ('рф', 'гоогле', 'маил')