def check_tweet(tweet, parent=False): if parent: print("In parent") print(tweet.full_text) print(tweet.in_reply_to_status_id) if tweet.in_reply_to_status_id: tweet = api.get_status(tweet.in_reply_to_status_id, tweet_mode="extended") print(tweet.full_text) else: return [] text = tweet.full_text words = text.split() isbnlike = isbnlib.get_isbnlike(text, level='normal') print(isbnlike) print(words) for word in words: if word.startswith("http") or word.startswith("https"): print(word) resp = requests.head(word) print(resp.headers["Location"]) if "amazon" in resp.headers["Location"] and "/dp/" in resp.headers["Location"]: amazon_text = isbnlib.get_isbnlike( resp.headers["Location"], level='normal') amazon_text = list(dict.fromkeys(amazon_text)) for item in amazon_text: if isbnlib.is_isbn10(item) or isbnlib.is_isbn13(item): isbnlike.append(item) print(isbnlike) return isbnlike
def parse_csv(input_file): with open(os.path.abspath(input_file), encoding="utf-8", newline='\n') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='"') x = 0 for row in reader: if isbnlib.get_isbnlike(row[2]): isbn_final = isbnlib.EAN13(isbnlib.get_isbnlike(row[2])[0]) books.append([x, row[0], row[1], isbn_final, row[3]]) else: problems.append([row[0], row[1], row[2], row[3]]) x += 1 return books
def _isbn(details_url): """Get the card isbn - details_url: valid url leading to the card's product page return: a tuple valid and clean-up isbn (str), the soup """ import isbnlib isbn = None try: log.info("Looking for isbn of {}...".format(details_url)) req = requests.get(details_url) soup = BeautifulSoup(req.content, "lxml") isbn = soup.find(class_="col49 floatRight") isbn = isbnlib.get_isbnlike(isbn.text) isbn = filter(lambda it: it.startswith('978'), isbn) if isbn: isbn = isbnlib.canonical(isbn[0]) log.info("Found isbn of url {}: {}".format(details_url, isbn)) except Exception as e: log.error("Error while getting the isbn from url '{}': {}".format(details_url, e)) return isbn return isbn, soup
def get_isbn_from_file(file_name, max_pdf_pages=0): print "-> Getting ISBN from PDF files..." # PDFMiner boilerplate rsrcmgr = PDFResourceManager() sio = StringIO() codec = 'utf-8' laparams = LAParams() device = TextConverter(rsrcmgr, sio, codec=codec, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) # Extract text fp = file(file_name, 'rb') num_pages = 1 for page in PDFPage.get_pages(fp, maxpages=max_pdf_pages): interpreter.process_page(page) num_pages += 1 print "Pages processed = " + str(num_pages) fp.close() # Get text from StringIO text = sio.getvalue() # Cleanup device.close() sio.close() default_isbn = get_default_isbn(isbnlib.get_isbnlike(text)) return default_isbn
def clean(self): data = self.cleaned_data isbn = data.get('isbn') if i.get_isbnlike(isbn): if i.is_isbn10(isbn) or i.is_isbn13(isbn): return True raise ValidationError( 'ISBN does not seem to be a ISBN13 or ISBN10') raise ValidationError('ISBN does not seem valid')
def get_isbns_from_text(self): pages = self.get_text() pages_as_str = '\n'.join(pages) isbns = isbnlib.get_isbnlike(pages_as_str, level='normal') # print('unprocessed isbns: %s' % isbns) canonical_isbns = preprocess_isbns(isbns) # print('canonical isbns: %s' % canonical_isbns) return canonical_isbns
def extract_from_text(text): """ Extract ISBNs from a text. :param text: Some text. :returns: A list of canonical ISBNs found in the text. >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X") ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X'] """ isbns = [isbnlib.get_canonical_isbn(isbn) for isbn in isbnlib.get_isbnlike(text)] return [i for i in isbns if i is not None]
def extract_from_text(text): """ Extract ISBNs from a text. :param text: Some text. :returns: A list of canonical ISBNs found in the text. >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X") ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X'] """ isbns = [ isbnlib.get_canonical_isbn(isbn) for isbn in isbnlib.get_isbnlike(text) ] return [i for i in isbns if i is not None]
def find_isbns(cls, text): isbns = [] for token in text.split(): if token.startswith("http"): url = requests.head(token).headers.get("Location") or token for service_name in cls.SERVICES: _isbns = getattr(cls, service_name)(url) isbns.extend(_isbns) else: isbns.extend(isbnlib.get_isbnlike(token, level="normal")) return [ isbnlib.canonical(isbn) for isbn in isbns if isbnlib.is_isbn10(isbn) or isbnlib.is_isbn13(isbn) ]
def extract_isbn(value): try: isbns = isbnlib.get_isbnlike(value) isbn = isbns[0] except: raise ValidationError(f"Bad format {value}") if len(isbns) > 1: raise ValidationError("Too much ISBN numbers") elif (len(isbns) == 0) or (not isbnlib.is_isbn10(isbn) and not isbnlib.to_isbn13(isbn)): raise ValidationError("It is not ISBN number") elif len(isbns) == 1: return isbnlib.mask(isbn) else: raise ValidationError("Unexpected option")
def get_canonical_isbn2(self, line): # logger.debug('[ ' + line + ' ]') isbns = [] matches = isbnlib.get_isbnlike(line) if len(matches) > 0: logger.debug('Unchecked [' + ' '.join(matches) + ']') for match in matches: if match not in self.SPECIAL_ISBN and not any(match in s for s in isbns): try: # logger.debug('isbn= ' + isbn) isbn = isbnlib.get_canonical_isbn(match) except: logger.error('Error in isbnlib while calling get_canonical_isbn') else: if isbn: isbns.append(isbn) return isbns
def abgerufen(text, pfad: Path): if text: isbns = get_isbnlike(str(text), level='normal') if len(isbns) == 0: isbns = [isbn_from_words(str(pfad.stem))] if len(isbns) == 0: return None m = None for isbn in isbns: try: m = meta(isbn) if m: break except: continue return m
#!/usr/bin/env python import sys import os import yaml import isbnlib metafile = sys.argv[1] metadata = open(metafile, 'r').read() yamldata = yaml.load(metadata) identifier = {} if "identifier" in yamldata: for id in yamldata["identifier"]: if "key" in id: isbnlike = isbnlib.get_isbnlike(id["text"])[0] if isbnlib.is_isbn13(isbnlike): identifier[id["key"]] = isbnlib.EAN13(isbnlike) isbn = identifier[ sys.argv[2]] if sys.argv[2] in identifier else "9786056644504" if len(sys.argv) >= 4 and sys.argv[3] == "mask": print(isbnlib.mask(isbn)) else: print(isbn)
def get_ISBN_from_barcode_csv(barcode): with open('ISBNs.csv', 'rb') as csvfile: book_list = csv.reader(csvfile, delimiter=',', quotechar='|') for row in book_list: if row[0] == barcode: return str(isbnlib.get_isbnlike(str(row[2]))[0])
#!/usr/bin/env python import sys import os import ruamel.yaml as yaml import isbnlib metafile = sys.argv[1] metadata = open(metafile, 'r').read() yamldata = yaml.safe_load(metadata) identifier = {} if "identifier" in yamldata: for id in yamldata["identifier"]: if "key" in id: isbnlike = isbnlib.get_isbnlike(str(id["text"]))[0] if isbnlib.is_isbn13(isbnlike): identifier[id["key"]] = isbnlib.EAN13(isbnlike) isbn = identifier[ sys.argv[2]] if sys.argv[2] in identifier else "9786056644504" if len(sys.argv) >= 4 and sys.argv[3] == "mask": print(isbnlib.mask(isbn)) else: print(isbn)