def parse_url(url): """Try to get the amazon product ID (ASIN) out of the url. Returns (domain, asin) where domain is .co.uk/.com/.de etc""" regexps = [ r'amazon\.(?P<domain>[a-z.]+)/(?:gp|exec|o)/.*/?(?:ASIN|-|product)/(?P<asin>[^?/]+)', r'amazon.(?P<domain>[a-z.]+)/[^/]+/(gp|dp)/(?P<asin>[0-9X]+)', r'amazon.(?P<domain>[a-z.]+)/([^/]+/)?dp/(?P<asin>[^/]+)' ] for r in regexps: m = re.search(r, url, re.I) if m: return m.group('domain', 'asin') # Start trying to find odd cases now. m = re.search(r'amazon.([a-z.]+)/', url) if not m: # I can't even see which amazon domain it's from. Time to give up. raise BadUrl(url) domain = m.group(1) # Maybe there's something that # looks like an ISBN sandwiched between two path separators for part in url.split("/"): v = isbn.verify(part) if v: return (domain, v) # Hunt through the URL looking for any sequence of characters # which look like an ISBN candidates = list(isbn.hunt(url)) if len(candidates) == 1: return (domain, candidates[0]) # Nope. I give up. raise BadUrl(url)
def parse_url(url): """Try to get the amazon product ID (ASIN) out of the url. Returns (domain, asin) where domain is .co.uk/.com/.de etc""" regexps = [r'amazon\.(?P<domain>[a-z.]+)/(?:gp|exec|o)/.*/?(?:ASIN|-|product)/(?P<asin>[^?/]+)', r'amazon.(?P<domain>[a-z.]+)/[^/]+/(gp|dp)/(?P<asin>[0-9X]+)', r'amazon.(?P<domain>[a-z.]+)/([^/]+/)?dp/(?P<asin>[^/]+)' ] for r in regexps: m = re.search(r, url, re.I) if m: return m.group('domain', 'asin') # Start trying to find odd cases now. m = re.search(r'amazon.([a-z.]+)/', url) if not m: # I can't even see which amazon domain it's from. Time to give up. raise BadUrl(url) domain = m.group(1) # Maybe there's something that # looks like an ISBN sandwiched between two path separators for part in url.split("/"): v = isbn.verify(part) if v: return (domain, v) # Hunt through the URL looking for any sequence of characters # which look like an ISBN candidates = list(isbn.hunt(url)) if len(candidates)==1: return (domain, candidates[0]) # Nope. I give up. raise BadUrl(url)
def test_valid_empty_isbn(self): self.assertIs(verify(''), False)
def test_invalid_check_digit_X_used_for_0(self): self.assertIs(verify('3-598-21515-X'), False)
def test_invalid_too_long_isbn(self): self.assertIs(verify('3-598-21507-XX'), False)
def test_invalid_isbn_without_check_digit(self): self.assertIs(verify('3-598-21507'), False)
def test_invalid_too_long_isbn_with_no_dashes(self): self.assertIs(verify('3598215078X'), False)
def test_invalid_isbn_without_check_digit_and_dashes(self): self.assertIs(verify('359821507'), False)
def test_valid_isbn_without_separating_dashes(self): self.assertIs(verify('3598215088'), True)
def test_invalid_X_other_than_check_digit(self): self.assertIs(verify('3-598-2X507-9'), False)
def test_invalid_character_in_isbn(self): self.assertIs(verify('3-598-P1581-X'), False)
def test_invalid_check_digit_other_than_X(self): self.assertIs(verify('3-598-21507-A'), False)
def test_valid_with_X_check_digit(self): self.assertIs(verify('3-598-21507-X'), True)
def test_invalid_check_digit(self): self.assertIs(verify('3-598-21508-9'), False)
def test_input_is_nine_characters(self): self.assertIs(verify('134456729'), False)
def test_valid_isbn_number(self): self.assertIs(verify('3-598-21508-8'), True)
def test_valid_isbn_without_separating_dashes_with_X_check_digit(self): self.assertIs(verify('359821507X'), True)