Exemplo n.º 1
0
def is_valid(isbn_id):
    """
    Check that a given string is a valid ISBN.

    :param isbn_id: the isbn to be checked.
    :returns: boolean indicating whether the isbn is valid or not.

    >>> is_valid("978-3-16-148410-0")
    True

    >>> is_valid("9783161484100")
    True

    >>> is_valid("9783161484100aa")
    False

    >>> is_valid("abcd")
    False

    >>> is_valid("0136091814")
    True

    >>> is_valid("0136091812")
    False

    >>> is_valid("9780136091817")
    False

    >>> is_valid("123456789X")
    True
    """
    return ((not isbnlib.notisbn(isbn_id)) and
            (isbnlib.get_canonical_isbn(isbn_id) == isbn_id
             or isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id))
Exemplo n.º 2
0
 def get_canonical_isbn(self, line):
     # logger.debug('[ ' + line + ' ]')
     isbns = []
     for regex in self.ISBN_PATTERN:
         matches = regex.findall(line)
         if len(matches) > 0:
             logger.debug('Unchecked [' + ' '.join(matches) + ']')
             for match in matches:
                 match = match.strip()
                 match = match.replace('i', 'I')
                 match = match.replace('s', 'S')
                 match = match.replace('b', 'B')
                 match = match.replace('n', 'N')
                 match = re.sub(r'\x20', '', match)
                 match = re.sub(r'ISBN', 'ISBN\x20', match)
                 # logger.debug('match= ' + match)
                 if match not in self.SPECIAL_ISBN:
                     try:
                         # logger.debug('isbn= ' + isbn)
                         isbn = isbnlib.get_canonical_isbn(match)
                     except:
                         logger.error('Error in isbnlib while calling get_canonical_isbn')
                     else:
                         if isbn:
                             isbns.append(isbn)
     return isbns
Exemplo n.º 3
0
    def run(self, dump_filepath: str) -> None:
        """
        Performs ISBN normalization (removes hyphens and capitalizes letters)

        dump_filepath -- path to *.txt.gz dump containing editions that need to be operated on
        """
        if self.dry_run:
            self.logger.info(
                'dry_run set to TRUE. Script will run, but no data will be modified.'
            )

        header = {
            'type': 0,
            'key': 1,
            'revision': 2,
            'last_modified': 3,
            'JSON': 4
        }
        comment = 'normalize ISBN'
        with gzip.open(dump_filepath, 'rb') as fin:
            for row_num, row in enumerate(fin):
                row = row.decode().split('\t')
                _json = json.loads(row[header['JSON']])
                if _json['type']['key'] != '/type/edition': continue

                isbns_by_type = dict()
                if 'isbn_10' in _json:
                    isbns_by_type['isbn_10'] = _json.get('isbn_10', None)
                if 'isbn_13' in _json:
                    isbns_by_type['isbn_13'] = _json.get('isbn_13', None)
                if not isbns_by_type: continue

                needs_normalization = any([
                    self.isbn_needs_normalization(isbn)
                    for isbns in isbns_by_type.values() for isbn in isbns
                ])
                if not needs_normalization: continue

                olid = _json['key'].split('/')[-1]
                edition = self.ol.Edition.get(olid)
                if edition.type['key'] != '/type/edition': continue

                for isbn_type, isbns in isbns_by_type.items(
                ):  # if an ISBN is in the wrong field this script will not move it to the appropriate one
                    normalized_isbns = list()
                    isbns = getattr(edition, isbn_type, [])
                    for isbn in isbns:
                        if self.isbn_needs_normalization(isbn):
                            normalized_isbn = isbnlib.get_canonical_isbn(isbn)
                            normalized_isbns.append(normalized_isbn)
                        else:
                            normalized_isbns.append(isbn)
                    normalized_isbns = dedupe(
                        normalized_isbns)  # remove duplicates
                    if normalized_isbns != isbns and normalized_isbns != []:
                        setattr(edition, isbn_type, normalized_isbns)
                        self.logger.info('\t'.join(
                            [olid, str(isbns),
                             str(normalized_isbns)]))
                        self.save(lambda: edition.save(comment=comment))
Exemplo n.º 4
0
 def get_canonical_isbn(self, line):
     # logger.debug('[ ' + line + ' ]')
     isbns = []
     for regex in self.ISBN_PATTERN:
         matches = regex.findall(line)
         if len(matches) > 0:
             logger.debug('Unchecked [' + ' '.join(matches) + ']')
             for match in matches:
                 match = match.strip()
                 match = match.replace('i', 'I')
                 match = match.replace('s', 'S')
                 match = match.replace('b', 'B')
                 match = match.replace('n', 'N')
                 match = re.sub(r'\x20', '', match)
                 match = re.sub(r'ISBN', 'ISBN\x20', match)
                 # logger.debug('match= ' + match)
                 if match not in self.SPECIAL_ISBN:
                     try:
                         # logger.debug('isbn= ' + isbn)
                         isbn = isbnlib.get_canonical_isbn(match)
                     except:
                         logger.error(
                             'Error in isbnlib while calling get_canonical_isbn'
                         )
                     else:
                         if isbn:
                             isbns.append(isbn)
     return isbns
Exemplo n.º 5
0
 def get(self, isbn):
     self.isbn = isbnlib.get_canonical_isbn(isbn)
     self.isbns = [self.isbn]
     if self.cached is not None:
         return self.cached
     self.cached = self.search()
     return self.cached
Exemplo n.º 6
0
	def version_with_isbn(self, record, isbn) :
		ids = record.get('identifier',[])
		gen = (x for x in ids if isinstance(x,dict))
		for obj in gen:
			i_type = obj.get('type');
			i_value = obj.get('value');
			if i_type and i_type.startswith('isbn') and isbnlib.get_canonical_isbn(i_value) == isbn :
				return True
		return False
Exemplo n.º 7
0
 def isbn_needs_normalization(isbn: str) -> bool:
     """
     Returns True if the given ISBN is valid and needs to be normalized (hyphens removed, letters capitalized, etc.)
     Returns False otherwise
     """
     if not set(isbn.strip()).issubset(ALLOWED_ISBN_CHARS):
         return False
     elif isbnlib.notisbn(isbn):
         return False
     else:
         normalized_isbn = isbnlib.get_canonical_isbn(isbn)  # get_canonical_isbn returns None if ISBN is invalid
         return normalized_isbn and normalized_isbn != isbn
Exemplo n.º 8
0
def extract_from_text(text):
    """
    Extract ISBNs from a text.

    :param text: Some text.
    :returns: A list of canonical ISBNs found in the text.

    >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X")
    ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X']
    """
    isbns = [isbnlib.get_canonical_isbn(isbn)
             for isbn in isbnlib.get_isbnlike(text)]
    return [i for i in isbns if i is not None]
Exemplo n.º 9
0
def extract_from_text(text):
    """
    Extract ISBNs from a text.

    :param text: Some text.
    :returns: A list of canonical ISBNs found in the text.

    >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X")
    ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X']
    """
    isbns = [
        isbnlib.get_canonical_isbn(isbn) for isbn in isbnlib.get_isbnlike(text)
    ]
    return [i for i in isbns if i is not None]
Exemplo n.º 10
0
 def check_isbn_validity(self, isbn):
     """ Returns boolean.
         Called by views.alternates() and views.filtered_alternates() """
     validity = False
     try:
         self.canonical_isbn = isbnlib.get_canonical_isbn(
             isbn, output='isbn13')  # will return None on bad isbn
         validity = isbnlib.is_isbn13(
             self.canonical_isbn)  # will raise exception on None
     except Exception as e:
         log.warning(
             'exception assessing validity, ```%s```; looks like ```%s``` is not valid'
             % (e, isbn))
     log.debug('validity, `%s`' % validity)
     return validity
Exemplo n.º 11
0
def is_valid(isbn_id):
    """
    Check that a given string is a valid ISBN.

    :param isbn_id: the isbn to be checked.
    :returns: boolean indicating whether the isbn is valid or not.

    >>> is_valid("978-3-16-148410-0")
    True

    >>> is_valid("9783161484100")
    True

    >>> is_valid("9783161484100aa")
    False

    >>> is_valid("abcd")
    False

    >>> is_valid("0136091814")
    True

    >>> is_valid("0136091812")
    False

    >>> is_valid("9780136091817")
    False

    >>> is_valid("123456789X")
    True
    """
    return (
        (not isbnlib.notisbn(isbn_id)) and (
            isbnlib.get_canonical_isbn(isbn_id) == isbn_id or
            isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id)
    )
Exemplo n.º 12
0
def preprocess_isbns(isbns):
    """

    :param isbns: isbns in different formats
    :return: canonical isbn13s
    """
    canonical_isbns = []
    for isbn in isbns:
        if not isbnlib.notisbn(isbn, level='strict'):
            if isbnlib.is_isbn10(isbn):
                isbn = isbnlib.to_isbn13(isbn)
            isbn = isbnlib.get_canonical_isbn(isbn)
            canonical_isbns.append(isbn)
    canonical_isbns = set(canonical_isbns)
    return list(canonical_isbns)
Exemplo n.º 13
0
 def get_canonical_isbn2(self, line):
     # logger.debug('[ ' + line + ' ]')
     isbns = []
     matches = isbnlib.get_isbnlike(line)
     if len(matches) > 0:
         logger.debug('Unchecked [' + ' '.join(matches) + ']')
     for match in matches:
         if match not in self.SPECIAL_ISBN and not any(match in s for s in isbns):
             try:
                 # logger.debug('isbn= ' + isbn)
                 isbn = isbnlib.get_canonical_isbn(match)
             except:
                 logger.error('Error in isbnlib while calling get_canonical_isbn')
             else:
                 if isbn:
                     isbns.append(isbn)
     return isbns
Exemplo n.º 14
0
 def get_canonical_isbn2(self, line):
     # logger.debug('[ ' + line + ' ]')
     isbns = []
     matches = isbnlib.get_isbnlike(line)
     if len(matches) > 0:
         logger.debug('Unchecked [' + ' '.join(matches) + ']')
     for match in matches:
         if match not in self.SPECIAL_ISBN and not any(match in s for s in isbns):
             try:
                 # logger.debug('isbn= ' + isbn)
                 isbn = isbnlib.get_canonical_isbn(match)
             except:
                 logger.error('Error in isbnlib while calling get_canonical_isbn')
             else:
                 if isbn:
                     isbns.append(isbn)
     return isbns
Exemplo n.º 15
0
def build_keys():
    """ Takes the hyphenated isbns and builds canonical isbns. """
    new_dct = {}
    with open(f'{stuff_dir}/02_source_booklist_2019-04-26.json',
              'r',
              encoding='utf-8') as f:
        lst = json.loads(f.read())
        for dct in lst:
            if dct['ISBN']:  # some records are empty
                canonical_isbn = isbnlib.get_canonical_isbn(dct['ISBN'],
                                                            output='isbn13')
                new_dct[canonical_isbn] = {
                    'isbn_original': dct['ISBN'],
                    'title': dct['Title'],
                    'author': dct['Author']
                }
    jsn = json.dumps(new_dct, sort_keys=True, indent=2)
    log.debug(f'jsn, ```{jsn}```')
    with open(f'{project_dir}/data/05_source_key_data.json',
              'w',
              encoding='utf-8') as f:
        f.write(jsn)
Exemplo n.º 16
0
 async def convert(self, ctx: commands.Context, argument: str) -> int:
     # if argument.isnumeric() and (len(argument)==10 or len(argument)==13):
     #     return int(argument)
     if isbnlib.notisbn(argument):
         raise commands.errors.BadArgument('Invalid ISBN: ' + argument)
     return isbnlib.get_canonical_isbn(argument)
Exemplo n.º 17
0
itemsgen =  zot.makeiter(zot.top(limit=10))
for batch in tqdm(itemsgen, total=int(nitems/10 + int(bool(nitems % 10)))):
    for item in batch:
        try:
            isbn = item['data']['ISBN']
        except KeyError:
            # Not a book-ish item
            continue
            
        if not isbn:
            # No ISBN listed
            continue

        # Transform to canonical (bare) form, then return to standardised
        # form with hyphens.
        canisbn = get_canonical_isbn(isbn.replace(' ', ''))

        if not canisbn:
            # This most likely means that the ISBN given is bogus
            # (e.g. has a faulty checksum). Some books have bogus
            # ISBNs printed on them, so they are used for cataloguing
            # by some libraries despite being formally invalid.
            print("Error extracting ISBN from "+str(isbn))
            continue

        newisbn = mask(canisbn)
        if newisbn != isbn:
            assert newisbn
            print("Updating "+str(isbn)+" to "+str(newisbn))
            item['data']['ISBN'] = newisbn
            if not zot.update_item(item):
Exemplo n.º 18
0
with open(infile) as f:
    for line in f:
        data = line.split("\t")
        book = json.loads(data[4])
        olid = book.get('key').replace('/books/', '')
        wolid = book.get('works', 'NONE')
        if wolid != 'NONE':
            wolid = wolid[0]['key'].replace('/works/', '')

        # get isbn
        good_isbn = []
        bad_isbn = []
        isbn_13 = book.get('isbn_13', [])
        isbn_10 = book.get('isbn_10', [])
        for isbn in isbn_13 + isbn_10:
            canonical = isbnlib.get_canonical_isbn(isbn)
            if canonical:
                if len(canonical) == 10:
                    canonical = isbnlib.to_isbn13(canonical)
                good_isbn.append(canonical)
            else:
                bad_isbn.append(isbn)

        isbns = set(good_isbn)
        for isbn in isbns:
            try:
                assert isbnlib.get_canonical_isbn(isbn)
                print("\t".join(
                    [isbnlib.get_canonical_isbn(isbn), olid, wolid]))
            except Exception as e:
                bad_isbn.append(isbn)
Exemplo n.º 19
0
 def raw_mapping(self, results):
     return {isbnlib.get_canonical_isbn(x['isbn']): x for x in results['data']['results']}
Exemplo n.º 20
0
 def get_search_results(self, request, queryset, search_term):
     qs, use_distinct = super().get_search_results(request, queryset, search_term)
     isbn = isbnlib.get_canonical_isbn(search_term)
     if isbn is not None:
         qs |= self.model.objects.filter(isbn=isbn)
     return qs, use_distinct