示例#1
0
def parse_url(url):
    """Try to get the amazon product ID (ASIN) out of the url.
	Returns (domain, asin) where domain is .co.uk/.com/.de etc"""

    regexps = [
        r'amazon\.(?P<domain>[a-z.]+)/(?:gp|exec|o)/.*/?(?:ASIN|-|product)/(?P<asin>[^?/]+)',
        r'amazon.(?P<domain>[a-z.]+)/[^/]+/(gp|dp)/(?P<asin>[0-9X]+)',
        r'amazon.(?P<domain>[a-z.]+)/([^/]+/)?dp/(?P<asin>[^/]+)'
    ]

    for r in regexps:
        m = re.search(r, url, re.I)
        if m:
            return m.group('domain', 'asin')

    # Start trying to find odd cases now.
    m = re.search(r'amazon.([a-z.]+)/', url)
    if not m:
        # I can't even see which amazon domain it's from. Time to give up.
        raise BadUrl(url)
    domain = m.group(1)

    # Maybe there's something that
    # looks like an ISBN sandwiched between two path separators
    for part in url.split("/"):
        v = isbn.verify(part)
        if v:
            return (domain, v)

    # Hunt through the URL looking for any sequence of characters
    # which look like an ISBN
    candidates = list(isbn.hunt(url))
    if len(candidates) == 1:
        return (domain, candidates[0])

    # Nope. I give up.
    raise BadUrl(url)
def parse_url(url):
	"""Try to get the amazon product ID (ASIN) out of the url.
	Returns (domain, asin) where domain is .co.uk/.com/.de etc"""

	regexps = [r'amazon\.(?P<domain>[a-z.]+)/(?:gp|exec|o)/.*/?(?:ASIN|-|product)/(?P<asin>[^?/]+)',
			   r'amazon.(?P<domain>[a-z.]+)/[^/]+/(gp|dp)/(?P<asin>[0-9X]+)',
			   r'amazon.(?P<domain>[a-z.]+)/([^/]+/)?dp/(?P<asin>[^/]+)'
	]

	for r in regexps:
		m = re.search(r, url, re.I)
		if m:
			return m.group('domain', 'asin')

	# Start trying to find odd cases now.
	m = re.search(r'amazon.([a-z.]+)/', url)
	if not m:
		# I can't even see which amazon domain it's from. Time to give up.
		raise BadUrl(url)
	domain = m.group(1)

	# Maybe there's something that
	# looks like an ISBN sandwiched between two path separators
	for part in url.split("/"):
		v = isbn.verify(part)
		if v:
			return (domain, v)

	# Hunt through the URL looking for any sequence of characters
	# which look like an ISBN
	candidates = list(isbn.hunt(url))
	if len(candidates)==1:
		return (domain, candidates[0])

	# Nope. I give up.
	raise BadUrl(url)
示例#3
0
 def test_valid_empty_isbn(self):
     self.assertIs(verify(''), False)
示例#4
0
 def test_invalid_check_digit_X_used_for_0(self):
     self.assertIs(verify('3-598-21515-X'), False)
示例#5
0
 def test_invalid_too_long_isbn(self):
     self.assertIs(verify('3-598-21507-XX'), False)
示例#6
0
 def test_invalid_isbn_without_check_digit(self):
     self.assertIs(verify('3-598-21507'), False)
示例#7
0
 def test_invalid_too_long_isbn_with_no_dashes(self):
     self.assertIs(verify('3598215078X'), False)
示例#8
0
 def test_invalid_isbn_without_check_digit_and_dashes(self):
     self.assertIs(verify('359821507'), False)
示例#9
0
 def test_valid_isbn_without_separating_dashes(self):
     self.assertIs(verify('3598215088'), True)
示例#10
0
 def test_invalid_X_other_than_check_digit(self):
     self.assertIs(verify('3-598-2X507-9'), False)
示例#11
0
 def test_invalid_character_in_isbn(self):
     self.assertIs(verify('3-598-P1581-X'), False)
示例#12
0
 def test_invalid_check_digit_other_than_X(self):
     self.assertIs(verify('3-598-21507-A'), False)
示例#13
0
 def test_valid_with_X_check_digit(self):
     self.assertIs(verify('3-598-21507-X'), True)
示例#14
0
 def test_invalid_check_digit(self):
     self.assertIs(verify('3-598-21508-9'), False)
示例#15
0
 def test_input_is_nine_characters(self):
     self.assertIs(verify('134456729'), False)
示例#16
0
 def test_valid_isbn_number(self):
     self.assertIs(verify('3-598-21508-8'), True)
示例#17
0
 def test_valid_isbn_without_separating_dashes_with_X_check_digit(self):
     self.assertIs(verify('359821507X'), True)