def mangle_isbn(raw): result = dict() for value in raw.strip().split(): if isbnlib.is_isbn13(value): result['isbn13'] = isbnlib.mask(value) elif isbnlib.is_isbn10(value): result['isbn10'] = isbnlib.mask(value) return result
async def check(self, entry): fmt = self._cfg.get('isbn_format', entry) if not fmt: return [] isbn = entry.data.get('isbn') if not isbn: return [] clean_isbn = clean(isbn) if not clean_isbn or notisbn(clean_isbn): return [] if fmt not in ('canonical', 'masked'): raise ConfigurationError("The option 'isbn_format' must be \ either of 'canonical' or 'masked'.") if fmt == 'canonical': cisbn = canonical(clean_isbn) if cisbn != isbn: return [(type(self).NAME, "ISBN '{}' is not in canonical format.".format(isbn), "Canonical format would be '{}'".format(cisbn))] elif fmt == 'masked': misbn = mask(clean_isbn) if misbn != isbn: return [(type(self).NAME, "ISBN '{}' is not in masked format.".format(isbn), "Masked format would be '{}'".format(misbn))] return []
def get_meta(self): """ Return the book's meta data (Title, Authors, Year, etc...) in a dictionary form, with the isbn13 field masked. """ d = meta(canonical(self.isbnlike)) d['ISBN-13'] = mask(d['ISBN-13']) return d
def is_valid(isbn_id): """ Check that a given string is a valid ISBN. :param isbn_id: the isbn to be checked. :returns: boolean indicating whether the isbn is valid or not. >>> is_valid("978-3-16-148410-0") True >>> is_valid("9783161484100") True >>> is_valid("9783161484100aa") False >>> is_valid("abcd") False >>> is_valid("0136091814") True >>> is_valid("0136091812") False >>> is_valid("9780136091817") False >>> is_valid("123456789X") True """ return ((not isbnlib.notisbn(isbn_id)) and (isbnlib.get_canonical_isbn(isbn_id) == isbn_id or isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id))
def fix_isbn(entry): if 'isbn' in entry: value = entry['isbn'] if isbnlib.is_isbn10(value): value = isbnlib.to_isbn13(value) if not isbnlib.is_isbn13(value): raise Exception(f'invalid isbn in {entry["ID"]}: {entry["isbn"]}') entry['isbn'] = isbnlib.mask(value, separator='-') return entry
def classify_isbn(isbnlike): isbn = {} isbn['canon'] = ib.canonical(isbnlike) if ib.is_isbn10(isbnlike): isbn['type'] = 'isbn10' elif ib.is_isbn10('0'+isbn['canon']): isbn['canon'] = '0'+isbn['canon'] isbn['masked'] = ib.mask(isbn['canon']) isbn['type'] = 'isbn10' elif ib.is_isbn10('00'+isbn['canon']): isbn['canon'] = '00'+isbn['canon'] isbn['masked'] = ib.mask(isbn['canon']) isbn['type'] = 'isbn10' elif ib.is_isbn13(isbn['canon']): isbn['masked'] = ib.mask(isbn['canon']) isbn['type'] = 'isbn13' else: isbn['type'] = 'invalid?' return isbn
def check_isbns(isbns, return_value="last"): isbn = None all_isbns = [] # Check the validity of regex matches for i in isbns: if isbnlib.is_isbn10(i[0]) or isbnlib.is_isbn13(i[0]): if return_value == "last": isbn = isbnlib.mask(i[0], separator='-') elif return_value == "first": return isbnlib.mask(i[0], separator='-') elif return_value == "all": all_isbns.append(isbnlib.mask(i[0], separator='-')) else: return None else: #print >>sys.stderr, i[0] + " is not valid ISBN" continue return isbn if return_value != "all" else all_isbns
def isbn2bibtex(isbn): if not (isbnlib.is_isbn10(isbn) or isbnlib.is_isbn13(isbn)): bibTexCode = "Koodi ei ole ISBN-koodi :(" else: try: bookID = isbn2id(isbnlib.mask(isbn.replace('-',''))) except: bookID = isbn2id(isbn.replace('-','')) if bookID == False: print('Kokeillaan googlea') bibTexCode = "Koodia ei löydy tietokannasta :(" bibtex = bibformatters['bibtex'] try: bibTexCode = bibtex(isbnlib.meta(isbn.replace('-',''),'goob')) except: bibTexCode = 'Kirjaa ei löydy Googlen eikä Finnan tietokannoista :(' else: try: bibTexCode = id2bibtex(bookID,isbnlib.mask(isbn.replace('-',''))) except: bibTexCode = id2bibtex(bookID,isbn.replace('-','')) return bibTexCode
def extract_isbn(value): try: isbns = isbnlib.get_isbnlike(value) isbn = isbns[0] except: raise ValidationError(f"Bad format {value}") if len(isbns) > 1: raise ValidationError("Too much ISBN numbers") elif (len(isbns) == 0) or (not isbnlib.is_isbn10(isbn) and not isbnlib.to_isbn13(isbn)): raise ValidationError("It is not ISBN number") elif len(isbns) == 1: return isbnlib.mask(isbn) else: raise ValidationError("Unexpected option")
def check_isbn(entry, try_fix): """Check and format ISBN. More information about ISBN: https://en.wikipedia.org/wiki/International_Standard_Book_Number """ isbn_string = entry['isbn'] # is_valid_isbn = False if isbnlib.is_isbn10(isbn_string): # is_valid_isbn = True try: if int(entry['year']) >= 2007: err_message(entry, ("ISBN10 ({}) were issued only before 2007," + " year is actually {}").format( isbn_string, entry['year'])) return False return True # pylint: disable=bare-except except: return False elif isbnlib.is_isbn13(isbn_string): # is_valid_isbn = True try: if int(entry['year']) < 2007 and isbn_string.starstwith('978'): err_message(entry, ("ISBN13 ({}) were issued only after 2007," + " year is actually {}").format( isbn_string, entry['year'])) return True # pylint: disable=bare-except except: return False else: if isbn_string != 'TODO': err_message(entry, "Invalid ISBN {}".format(isbn_string)) # TODO try to look up isbn using isbnlib.goom() # intitle:Understanding+inauthor:McLuhan&tbs=, # cdr:1,cd_min:Jan+1_2+1964,cd_max:Dec+31_2+1974&num=10 return False if try_fix: _fix_based_on_isbn(isbn_string, entry) entry['isbn'] = isbnlib.mask(isbn_string) return True
def normalizeISBN(isbn): """ >>> normalizeISBN('978800105473-4') '978-80-01-05473-4' >>> normalizeISBN('80978800105473-4') '80978800105473-4' >>> normalizeISBN('988800105473-4') '988800105473-4' >>> normalizeISBN('978-80-254-94677') '978-80-254-9467-7' """ try: return isbnlib.mask(isbnlib.canonical(isbn)) except isbnlib.NotValidISBNError: return isbn
def generate(self): """Generate self.sfnt, self.ctnt and self.reft. self.dictionary should be ready before calling this function. The dictionary will be cleaned up (empty values will be removed) and all values will be encoded using encode_for_template() function. ISBN (if exist) will be hyphenated. """ self.dictionary = dict_cleanup(self.dictionary) self.dictionary = encode_for_template(self.dictionary) if 'isbn' in self.dictionary: masked = isbnlib.mask(self.dictionary['isbn']) if masked: self.dictionary['isbn'] = masked self.sfnt = generator.sfn_template(self.dictionary) self.ctnt = generator.citation_template(self.dictionary, self.date_format) self.reft = generator.reference_tag(self.dictionary, self.sfnt, self.ctnt)
def search(self, **kwargs): self.query_dict = { 'noVariants': 'true', } self.query_dict.update(kwargs) if self.isbn is None: self.query_dict['titleStartsWith'] = self.querystring data = self.do_request(self.query_dict) self.total = data['data']['total'] self.pages = self.get_page_count(self.total) self.results = self.parse_results(data) self.mapping = self.raw_mapping(data) return self else: self.query_dict['isbn'] = isbnlib.mask(self.isbn, '-') data = self.do_request(self.query_dict) self.total = data['data']['total'] self.results = self.parse_results(data) self.mapping = self.raw_mapping(data) return self
def fix_isbn(prop, isbn_version, is_isbnversion): """ 1. Gets the ISBNs list 2. checks if the ISBN is valid 2.1. If valid but badly hyphenated, fixes it 2.2. If not valid, adds it to an error list. """ print(colored('\n== Fixing {}s =='.format(isbn_version), 'yellow')) wrong_isbn = [] isbn_list = get_isbn_list(prop) wrong_hyphenation = 0 for r in isbn_list: wd_isbn = r['isbn']['value'] qid = get_qid(r['book']['value']) if is_isbnversion(wd_isbn): isbn_mask = isbnlib.mask(wd_isbn) if isbn_mask != wd_isbn: wrong_hyphenation += set_mask(qid, prop, wd_isbn, isbn_mask) else: wrong_isbn.append((qid, wd_isbn)) print(colored('{} wrong ISBN hyphenation(s) fixed.'.format(wrong_hyphenation), 'blue')) return wrong_isbn
def is_valid(isbn_id): """ Check that a given string is a valid ISBN. :param isbn_id: the isbn to be checked. :returns: boolean indicating whether the isbn is valid or not. >>> is_valid("978-3-16-148410-0") True >>> is_valid("9783161484100") True >>> is_valid("9783161484100aa") False >>> is_valid("abcd") False >>> is_valid("0136091814") True >>> is_valid("0136091812") False >>> is_valid("9780136091817") False >>> is_valid("123456789X") True """ return ( (not isbnlib.notisbn(isbn_id)) and ( isbnlib.get_canonical_isbn(isbn_id) == isbn_id or isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id) )
def normalize_isbn(val): """Normalize an ISBN identifier.""" return mask(val)
def handle_isbn(val): if val: return mask(to_isbn13(str(val)))
def isbn_mask(self): return isbnlib.mask(self.isbn, '-')
def marcxml_parsing(x): # tree = ElementTree.parse("./raw_data/sample_1k_marc.xml") tree = ElementTree.parse(x) collection = tree.getroot() code_336 = pd.read_csv("./raw_data/336_code.csv") code_337 = pd.read_csv("./raw_data/337_code.csv") code_338 = pd.read_csv("./raw_data/338_code.csv") features = [] # list of features # range(len(collection)) for i in range(len(collection)): row = {} print("--------------------- " + str(i)) record = collection[i] leader = record.find('{http://www.loc.gov/MARC21/slim}leader') leader_6 = leader.text[6] leader_17 = leader.text[17] leader_18 = leader.text[18] # print(leader_type) row['leader_6'] = leader_6 row['leader_17'] = leader_17 row['leader_18'] = leader_18 control = record.findall( '{http://www.loc.gov/MARC21/slim}controlfield') F006 = 0 F007 = 0 for c in control: tag = c.get('tag') # print(tag) if tag == '001': oclc_controlnum = c.text # print(physical_desc) row['F001_a'] = oclc_controlnum if tag == '006': F006 = F006 + 1 if tag == '007': F007 = F007 + 1 if tag == '008': value = c.text # print(value) pub_code = value[6] pub_year_1 = value[7:11] pub_year_2 = value[11:15] place = value[15:18] audience = value[22] cont_nature = value[24:28] government = value[28] literary = value[33] language = value[35:38] catalog_source = value[39] # print(place, language, catalog_source) row['F008_06'] = pub_code row['F008_0710'] = pub_year_1 row['F008_1114'] = pub_year_2 row['F008_1517'] = place row['F008_22'] = audience row['F008_2427_a'] = bool(re.search('a', cont_nature)) row['F008_2427_b'] = bool(re.search('b', cont_nature)) row['F008_2427_c'] = bool(re.search('c', cont_nature)) row['F008_2427_d'] = bool(re.search('d', cont_nature)) row['F008_2427_e'] = bool(re.search('e', cont_nature)) row['F008_2427_f'] = bool(re.search('f', cont_nature)) row['F008_2427_g'] = bool(re.search('g', cont_nature)) row['F008_2427_i'] = bool(re.search('i', cont_nature)) row['F008_2427_j'] = bool(re.search('j', cont_nature)) row['F008_2427_k'] = bool(re.search('k', cont_nature)) row['F008_2427_l'] = bool(re.search('l', cont_nature)) row['F008_2427_m'] = bool(re.search('m', cont_nature)) row['F008_2427_n'] = bool(re.search('n', cont_nature)) row['F008_2427_o'] = bool(re.search('o', cont_nature)) row['F008_2427_p'] = bool(re.search('p', cont_nature)) row['F008_2427_q'] = bool(re.search('q', cont_nature)) row['F008_2427_r'] = bool(re.search('r', cont_nature)) row['F008_2427_s'] = bool(re.search('s', cont_nature)) row['F008_2427_t'] = bool(re.search('t', cont_nature)) row['F008_2427_u'] = bool(re.search('u', cont_nature)) row['F008_2427_v'] = bool(re.search('v', cont_nature)) row['F008_2427_w'] = bool(re.search('w', cont_nature)) row['F008_2427_y'] = bool(re.search('y', cont_nature)) row['F008_2427_z'] = bool(re.search('z', cont_nature)) row['F008_2427_2'] = bool(re.search('2', cont_nature)) row['F008_2427_5'] = bool(re.search('5', cont_nature)) row['F008_2427_6'] = bool(re.search('6', cont_nature)) row['F008_28'] = government row['F008_33'] = literary row['F008_3537'] = language row['F008_39'] = catalog_source if place is None: row['008_1517'] = "NA" if language is None: row['008_3537'] = "NA" if len(catalog_source) == 0: row['008_39'] = "NA" row['006_is'] = 1 if F006 > 0 else 0 row['007_is'] = 1 if F007 > 0 else 0 data = record.findall('{http://www.loc.gov/MARC21/slim}datafield') F040_e = 0 F041_is = 0 F050_is = 0 F082_is = 0 F260_is = 0 F264_is = 0 F26x_is = 0 F336_is = 0 F337_is = 0 F338_is = 0 F490_is = 0 F6xxa_is = 0 F6xxv_is = 0 F6xxy_is = 0 F6xxz_is = 0 isbn_list = [] isbn_tag_list = [] F041_a_list = [] F041_h_list = [] F050_a1_list = [] F050_a2_list = [] F082_a1_list = [] F082_a2_list = [] F260_b_list = [] F260_c_list = [] F264_b_list = [] F264_c_list = [] F26x_b_list = [] F26x_c_list = [] F336_b_list = [] F337_b_list = [] F338_b_list = [] F490_a_list = [] F6xx_a_list = [] F6xx_v_list = [] F6xx_y_list = [] F6xx_z_list = [] for d in data: tag = d.get('tag') print("--------------------- " + str(i) + "---- " + tag) if tag == '020': # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': isbn = s.text if len(isbn) == 10 and is_isbn10( str(isbn)) == True and mask(isbn) is not None: isbn_text = str(isbn) isbn_list.append(isbn_text) isbn_tag = '--'.join(mask(isbn).split("-")[0:2]) isbn_tag_list.append(isbn_tag) elif len(isbn) == 13 and is_isbn13( str(isbn)) == True and mask( isbn) is not None and isbn[0:3] == "978": isbn_text = str(isbn) isbn_list.append(isbn_text) isbn_tag = '--'.join( mask(to_isbn10(isbn)).split("-")[0:2]) isbn_tag_list.append(isbn_tag) if tag == "040": subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'e': if s.text == 'rda' or s.text == "RDA": F040_e = F040_e + 1 if tag == "041": F041_is = F041_is + 1 F041_ind1 = d.get('ind1') subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': F041_a_list.append(s.text) if s.get('code') == 'h': F041_h_list.append(s.text) if tag == '050': F050_is = F050_is + 1 subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': match = re.search(r'^[A-Z]{1,3}', str(s.text)) match2 = re.search( r'^[A-Z]{1,3}[0-9]{1,}(?=\.|[A-z]|$| )', str(s.text)) if match and match2: F050_a1_list.append(match.group()) F050_a2_list.append(match2.group()) if tag == '082': F082_is = F082_is + 1 subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': match = re.search(r'^[0-9]{3}', str(s.text)) if match: F082_a1_list.append(match.group()[0]) F082_a2_list.append(match.group()) if tag == '260': F260_is = F260_is + 1 F26x_is = F26x_is + 1 # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'b': F260_b_list.append(s.text) F26x_b_list.append(s.text) if len( re.findall( "printed by |distributed by |distributed in ", s.text.lower())) > 0: F260_is = F260_is - 1 F26x_is = F26x_is - 1 if s.get('code') == 'c': F260_c_list.append(s.text) text_26x = re.findall("\d{4}", s.text) F26x_c_list.extend(text_26x) if tag == '264' and d.get('ind2') == '1': F264_is = F264_is + 1 F26x_is = F26x_is + 1 # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'b': F264_b_list.append(s.text) F26x_b_list.append(s.text) if len( re.findall( "printed by |distributed by |distributed in ", s.text.lower())) > 0: F264_is = F264_is - 1 F26x_is = F26x_is - 1 if s.get('code') == 'c': F264_c_list.append(s.text) text_26x = re.findall("\d{4}", s.text) F26x_c_list.extend(text_26x) if tag == '336': F336_is = F336_is + 1 subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') sub_code_list = [] for t in subfields: sub_code_list.append(t.get("code")) b_is = "b" in sub_code_list a_is = "a" in sub_code_list if b_is > 0: for s in subfields: if s.get('code') == 'b': F336_b_value = s.text if s.get('code') == '2': F336_2_value = s.text elif b_is == 0 and a_is > 0: for s in subfields: if s.get('code') == 'a' and s.text in code_336[ '336_a'].values: text_336b = code_336.loc[code_336['336_a'] == s.text, '336_b'].values[0] F336_b_value = text_336b if s.get('code') == '2': F336_2_value = s.text if "rda" in F336_2_value.lower(): F336_b_list.append(F336_b_value) if tag == '337': F337_is = F337_is + 1 subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') sub_code_list = [] for t in subfields: sub_code_list.append(t.get("code")) b_is = "b" in sub_code_list a_is = "a" in sub_code_list if b_is > 0: for s in subfields: if s.get('code') == 'b': F337_b_value = s.text if s.get('code') == '2': F337_2_value = s.text elif b_is == 0 and a_is > 0: for s in subfields: if s.get('code') == 'a' and s.text in code_337[ '337_a'].values: text_337b = code_337.loc[code_337['337_a'] == s.text, '337_b'].values[0] F337_b_value = text_337b if s.get('code') == '2': F337_2_value = s.text if "rda" in F337_2_value.lower(): F337_b_list.append(F337_b_value) if tag == '338': F338_is = F338_is + 1 subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') sub_code_list = [] for t in subfields: sub_code_list.append(t.get("code")) b_is = "b" in sub_code_list a_is = "a" in sub_code_list if b_is > 0: for s in subfields: if s.get('code') == 'b': F338_b_value = s.text if s.get('code') == '2': F338_2_value = s.text elif b_is == 0 and a_is > 0: for s in subfields: if s.get('code') == 'a' and s.text in code_338[ '338_a'].values: text_338b = code_338.loc[code_338['338_a'] == s.text, '338_b'].values[0] F338_b_value = text_338b if s.get('code') == '2': F338_2_value = s.text if "rda" in F338_2_value.lower(): F338_b_list.append(F338_b_value) if tag == '490': F490_is = F490_is + 1 # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': F490_a_list.append(s.text) if tag in ['600', '610', '611', '630', '650' ] and d.get('ind2') == "0": # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': F6xxa_is = F6xxa_is + 1 F6xx_a_list.append(clean_text(s.text)) if s.get('code') == 'v': F6xxv_is = F6xxv_is + 1 F6xx_v_list.append(clean_text(s.text)) if s.get('code') == 'y': F6xxy_is = F6xxy_is + 1 F6xx_y_list.append(clean_text(s.text)) if s.get('code') == 'z': F6xxz_is = F6xxz_is + 1 F6xx_z_list.append(clean_text(s.text)) if tag == "651" and d.get('ind2') == "0": # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': F6xxz_is = F6xxz_is + 1 F6xx_z_list.append(clean_text(s.text)) if s.get('code') == 'v': F6xxv_is = F6xxv_is + 1 F6xx_v_list.append(clean_text(s.text)) if s.get('code') == 'y': F6xxy_is = F6xxy_is + 1 F6xx_y_list.append(clean_text(s.text)) if s.get('code') == 'z': F6xxz_is = F6xxz_is + 1 F6xx_z_list.append(clean_text(s.text)) if tag == "655" and d.get('ind2') == "0": # print(d) subfields = d.findall( '{http://www.loc.gov/MARC21/slim}subfield') for s in subfields: if s.get('code') == 'a': F6xxv_is = F6xxv_is + 1 F6xx_v_list.append(clean_text(s.text)) if s.get('code') == 'v': F6xxv_is = F6xxv_is + 1 F6xx_v_list.append(clean_text(s.text)) if s.get('code') == 'y': F6xxy_is = F6xxy_is + 1 F6xx_y_list.append(clean_text(s.text)) if s.get('code') == 'z': F6xxz_is = F6xxz_is + 1 F6xx_z_list.append(clean_text(s.text)) # print(code) # print(value) isbn_list1 = set(isbn_list) isbn_tag_list1 = set(isbn_tag_list) if (len(isbn_tag_list) > 0): row['isbn'] = " ;; ".join(set(isbn_list1)) row['isbn_tag'] = " ;; ".join(set(isbn_tag_list1)) row['isbn1'] = isbn_list[0] row['isbn_tag1'] = isbn_tag_list[0] else: row['isbn'] = "NA" row['isbn_tag'] = "NA" row['isbn1'] = "NA" row['isbn_tag1'] = "NA" if F040_e > 0: row['F040_e'] = 1 else: row['F040_e'] = 0 if F041_is > 0: row['F041_ind1'] = F041_ind1 row['F041_a'] = " ;; ".join(F041_a_list) row['F041_h'] = " ;; ".join(F041_h_list) else: row['F041_ind1'] = "NA" row['F041_a'] = "NA" row['F041_h'] = "NA" if len(F050_a1_list) > 0: row['F050_a1'] = " ;; ".join(set(F050_a1_list)) row['F050_a2'] = " ;; ".join(set(F050_a2_list)) else: row['F050_a1'] = "NA" row['F050_a2'] = "NA" if len(F082_a1_list) > 0: row['F082_a1'] = " ;; ".join(set(F082_a1_list)) row['F082_a2'] = " ;; ".join(set(F082_a2_list)) else: row['F082_a1'] = "NA" row['F082_a2'] = "NA" row['F260_is'] = F260_is if F260_is > 0: row['F260_b'] = " ;; ".join(F260_b_list) row['F260_c'] = " ;; ".join(F260_c_list) else: row['F260_b'] = "NA" row['F260_c'] = "NA" row['F264_is'] = F264_is if F264_is > 0: row['F264_b'] = " ;; ".join(F264_b_list[0:0 + F26x_is]) row['F264_c'] = " ;; ".join(F264_c_list) else: row['F264_b'] = "NA" row['F264_c'] = "NA" row['F26x_is'] = F26x_is if F26x_is > 0: row['F26x_b'] = " ;; ".join(set(F26x_b_list[0:0 + F26x_is])) row['F26x_c'] = " ;; ".join(set(F26x_c_list)) else: row['F26x_b'] = "NA" row['F26x_c'] = "NA" if F336_is > 0: F336_b_text = F336_b_list row['F336_b'] = " ;; ".join(F336_b_text) row['F336_b_txt'] = bool(re.search('txt', row['F336_b'])) row['F336_b_sti'] = bool(re.search('sti', row['F336_b'])) row['F336_b_cri'] = bool(re.search('cri', row['F336_b'])) row['F336_b_spw'] = bool(re.search('spw', row['F336_b'])) row['F336_b_tct'] = bool(re.search('tct', row['F336_b'])) else: row['F336_b'] = "NA" row['F336_b_txt'] = "" row['F336_b_sti'] = "" row['F336_b_cri'] = "" row['F336_b_spw'] = "" row['F336_b_tct'] = "" if F337_is > 0: F337_b_text = F337_b_list row['F337_b'] = " ;; ".join(F337_b_text) row['F337_b_c'] = bool(re.search('c', row['F337_b'])) row['F337_b_h'] = bool(re.search('h', row['F337_b'])) row['F337_b_n'] = bool(re.search('n', row['F337_b'])) row['F337_b_s'] = bool(re.search('s', row['F337_b'])) else: row['F337_b'] = "NA" row['F337_b_c'] = "" row['F337_b_h'] = "" row['F337_b_n'] = "" row['F337_b_s'] = "" if F338_is > 0: F338_b_text = F338_b_list row['F338_b'] = " ;; ".join(F338_b_text) row['F338_b_cd'] = bool(re.search('cd', row['F338_b'])) row['F338_b_cr'] = bool(re.search('cr', row['F338_b'])) row['F338_b_hd'] = bool(re.search('hd', row['F338_b'])) row['F338_b_he'] = bool(re.search('he', row['F338_b'])) row['F338_b_nb'] = bool(re.search('nb', row['F338_b'])) row['F338_b_sd'] = bool(re.search('sd', row['F338_b'])) else: row['F338_b'] = "NA" row['F338_b_cd'] = "" row['F338_b_cr'] = "" row['F338_b_hd'] = "" row['F338_b_he'] = "" row['F338_b_nb'] = "" row['F338_b_sd'] = "" if F490_is > 0: row['F490_a'] = " ;; ".join(F490_a_list) else: row['F490_a'] = "NA" if F6xxa_is > 0: row['F6xx_a'] = " ;; ".join(set(F6xx_a_list)) else: row['F6xx_a'] = "NA" if F6xxv_is > 0: row['F6xx_v'] = " ;; ".join(set(F6xx_v_list)) else: row['F6xx_v'] = "NA" if F6xxy_is > 0: row['F6xx_y'] = " ;; ".join(set(F6xx_y_list)) else: row['F6xx_y'] = "NA" if F6xxz_is > 0: row['F6xx_z'] = " ;; ".join(set(F6xx_z_list)) else: row['F6xx_z'] = "NA" features.append(row) df = pd.DataFrame(features) return (df)
def com_isbn_mastk(isbn_string, isbn_seperator='-'): """ Mask (hyphenate) a canonical ISBN. """ return isbnlib.mask(isbn_string, separator=isbn_seperator)
def get_default_isbn(isbn_list): for isbn in isbn_list: if isbnlib.is_isbn13(isbn) or isbnlib.is_isbn10(isbn): return isbnlib.mask(isbn) return ""
#!/usr/bin/env python import sys import os import yaml import isbnlib metafile = sys.argv[1] metadata = open(metafile, 'r').read() yamldata = yaml.load(metadata) identifier = {} if "identifier" in yamldata: for id in yamldata["identifier"]: if "key" in id: isbnlike = isbnlib.get_isbnlike(id["text"])[0] if isbnlib.is_isbn13(isbnlike): identifier[id["key"]] = isbnlib.EAN13(isbnlike) isbn = identifier[ sys.argv[2]] if sys.argv[2] in identifier else "9786056644504" if len(sys.argv) >= 4 and sys.argv[3] == "mask": print(isbnlib.mask(isbn)) else: print(isbn)
def handle_isbn(val): return mask(to_isbn13(str(val)))
def td_format(self, content): try: return mask(content) except: return content
for item in batch: try: isbn = item['data']['ISBN'] except KeyError: # Not a book-ish item continue if not isbn: # No ISBN listed continue # Transform to canonical (bare) form, then return to standardised # form with hyphens. canisbn = get_canonical_isbn(isbn.replace(' ', '')) if not canisbn: # This most likely means that the ISBN given is bogus # (e.g. has a faulty checksum). Some books have bogus # ISBNs printed on them, so they are used for cataloguing # by some libraries despite being formally invalid. print("Error extracting ISBN from "+str(isbn)) continue newisbn = mask(canisbn) if newisbn != isbn: assert newisbn print("Updating "+str(isbn)+" to "+str(newisbn)) item['data']['ISBN'] = newisbn if not zot.update_item(item): raise Exception("Zotero write failed for "+str(isbn))
def isbn_str(self): if self._isbn: return isbnlib.mask(self._isbn) else: return ''