Пример #1
0
def convert(url, mapping):
    '''Convert given URL into affiliate URL based on mapping.

    mapping maps domains with URL query paramater/value pairs to add or replace
    in given URL. Parameter values must be specified as lists.

    Example mapping for various amazon domains:
        {
            'amazon.com': {'tag': ['affurl-20']},
            'amazon.de': {'tag': ['affurl-21']},
            'amazon.co.uk': {'tag': ['afflink-21']}
        }
    '''

    new_url = urlsplit(url)
    if not new_url.netloc:
        return None  # rather raise an Exception?

    # Parse_domain returns a tuple like ('co.uk', 'amazon', 'www').
    domain = '.'.join(domain_parser.parse_domain(new_url.netloc)[:2][::-1])

    # Leave URLs with no matching domain as they are.
    if domain not in mapping:
        return url

    # Add new and replace existing query paramters with given ones.
    query = parse_qs(new_url.query)
    params = mapping[domain]
    query.update(params)

    # Concatenate and unsplit tuples to create a URL string.
    return urlunsplit(new_url[:3] + (urlencode(query, True), ) + new_url[4:])
Пример #2
0
 def test_google(self):
     """Is google.com properly parsed?"""
     assert domain_parser.parse_domain('http://www.google.com') == (
         'com', 'google', 'www')
Пример #3
0
 def test_internationalized_domain_name(self):
     """Is 'маил.гоогле.рф', which is entirely composed of non-latin characters, parsed properly?"""
     # Should always pass when run with Python 3.
     assert domain_parser.parse_domain('http://маил.гоогле.рф') == (
         'рф', 'гоогле', 'маил')
Пример #4
0
 def test_secure_scheme(self):
     """Is 'https://www.google.com', which include 'https' instead of 'http', parsed properly?"""
     assert domain_parser.parse_domain('https://www.google.com') == (
         'com', 'google', 'www')
Пример #5
0
 def test_no_scheme(self):
     """Is 'www.google.com', which doesn't include the scheme ('http'), parsed properly?"""
     assert domain_parser.parse_domain('www.google.com') == ('com',
                                                             'google',
                                                             'www')
Пример #6
0
 def test_guardian(self):
     """Is 'co.uk', which is wildcarded in the TLD list, parsed properly?"""
     assert domain_parser.parse_domain('http://www.guardian.co.uk') == (
         'co.uk', 'guardian', 'www')
Пример #7
0
 def test_more_levels():
     """Is two levels sub-domain properly parsed?"""
     assert domain_parser.parse_domain('www.staging.google.com') == (
         'com', 'google', 'www.staging')
Пример #8
0
 def test_no_subdomain():
     """Is twitter.com properly parsed?"""
     assert domain_parser.parse_domain('twitter.com') == ('com', 'twitter',
                                                          '')
 def test_google(self):
     """Is google.com properly parsed?"""
     assert domain_parser.parse_domain(
             'http://www.google.com') == ('com', 'google', 'www')
 def test_secure_scheme(self):
     """Is 'https://www.google.com', which include 'https' instead of 'http', parsed properly?"""
     assert domain_parser.parse_domain(
             'https://www.google.com') == ('com', 'google', 'www')
 def test_no_scheme(self):
     """Is 'www.google.com', which doesn't include the scheme ('http'), parsed properly?"""
     assert domain_parser.parse_domain(
             'www.google.com') == ('com', 'google', 'www')
 def test_guardian(self):
     """Is 'co.uk', which is wildcarded in the TLD list, parsed properly?"""
     assert domain_parser.parse_domain(
             'http://www.guardian.co.uk') == ('co.uk', 'guardian', 'www')
Пример #13
0
 def test_internationalized_domain_name(self):
     """Is 'маил.гоогле.рф', which is entirely composed of non-latin characters, parsed properly?"""
     # Should always pass when run with Python 3.
     assert domain_parser.parse_domain(
             'http://маил.гоогле.рф') == ('рф', 'гоогле', 'маил')