def test_add_simplified_genres(self): work = self._work(with_license_pool=True) fantasy, ignore = Genre.lookup(self._db, "Fantasy", autocreate=True) romance, ignore = Genre.lookup(self._db, "Romance", autocreate=True) work.genres = [fantasy, romance] record = Record() Annotator.add_simplified_genres(record, work) fields = record.get_fields("650") [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(["0", "7"], fantasy_field.indicators) eq_("Fantasy", fantasy_field.get_subfields("a")[0]) eq_("Library Simplified", fantasy_field.get_subfields("2")[0]) eq_(["0", "7"], romance_field.indicators) eq_("Romance", romance_field.get_subfields("a")[0]) eq_("Library Simplified", romance_field.get_subfields("2")[0]) # It also works with a materialized work. self.add_to_materialized_view([work]) # The work is in the materialized view twice since it has two genres, # but we can use either one. [mw, ignore] = self._db.query(MaterializedWorkWithGenre).all() record = Record() Annotator.add_simplified_genres(record, mw) fields = record.get_fields("650") [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(["0", "7"], fantasy_field.indicators) eq_("Fantasy", fantasy_field.get_subfields("a")[0]) eq_("Library Simplified", fantasy_field.get_subfields("2")[0]) eq_(["0", "7"], romance_field.indicators) eq_("Romance", romance_field.get_subfields("a")[0]) eq_("Library Simplified", romance_field.get_subfields("2")[0])
def test_add_series(self): edition = self._edition() edition.series = self._str edition.series_position = 5 record = Record() Annotator.add_series(record, edition) self._check_field(record, "490", { "a": edition.series, "v": str(edition.series_position), }, ["0", " "]) # If there's no series position, the same field is used without # the v subfield. edition.series_position = None record = Record() Annotator.add_series(record, edition) self._check_field(record, "490", { "a": edition.series, }, ["0", " "]) [field] = record.get_fields("490") eq_([], field.get_subfields("v")) # If there's no series, the field is left out. edition.series = None record = Record() Annotator.add_series(record, edition) eq_([], record.get_fields("490"))
def test_add_title(self): edition = self._edition() edition.title = "The Good Soldier" edition.sort_title = "Good Soldier, The" edition.subtitle = "A Tale of Passion" record = Record() Annotator.add_title(record, edition) [field] = record.get_fields("245") self._check_field( record, "245", { "a": edition.title, "b": edition.subtitle, "c": edition.author, }, ["0", "4"]) # If there's no subtitle or no author, those subfields are left out. edition.subtitle = None edition.author = None record = Record() Annotator.add_title(record, edition) [field] = record.get_fields("245") self._check_field( record, "245", { "a": edition.title, }, ["0", "4"]) eq_([], field.get_subfields("b")) eq_([], field.get_subfields("c"))
def test_add_contributors(self): author = "a" author2 = "b" translator = "c" # Edition with one author gets a 100 field and no 700 fields. edition = self._edition(authors=[author]) edition.sort_author = "sorted" record = Record() Annotator.add_contributors(record, edition) eq_([], record.get_fields("700")) self._check_field(record, "100", {"a": edition.sort_author}, ["1", " "]) # Edition with two authors and a translator gets three 700 fields and no 100 fields. edition = self._edition(authors=[author, author2]) edition.add_contributor(translator, Contributor.TRANSLATOR_ROLE) record = Record() Annotator.add_contributors(record, edition) eq_([], record.get_fields("100")) fields = record.get_fields("700") for field in fields: eq_(["1", " "], field.indicators) [author_field, author2_field, translator_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(author, author_field.get_subfields("a")[0]) eq_(Contributor.PRIMARY_AUTHOR_ROLE, author_field.get_subfields("e")[0]) eq_(author2, author2_field.get_subfields("a")[0]) eq_(Contributor.AUTHOR_ROLE, author2_field.get_subfields("e")[0]) eq_(translator, translator_field.get_subfields("a")[0]) eq_(Contributor.TRANSLATOR_ROLE, translator_field.get_subfields("e")[0])
def test_add_series(self): edition = self._edition() edition.series = self._str edition.series_position = 5 record = Record() Annotator.add_series(record, edition) self._check_field(record, "490", { "a": edition.series, "v": str(edition.series_position), }, ["0", " "]) # If there's no series position, the same field is used without # the v subfield. edition.series_position = None record = Record() Annotator.add_series(record, edition) self._check_field(record, "490", { "a": edition.series, }, ["0", " "]) [field] = record.get_fields("490") eq_([], field.get_subfields("v")) # If there's no series, the field is left out. edition.series = None record = Record() Annotator.add_series(record, edition) eq_([], record.get_fields("490"))
def test_add_simplified_genres(self): work = self._work(with_license_pool=True) fantasy, ignore = Genre.lookup(self._db, "Fantasy", autocreate=True) romance, ignore = Genre.lookup(self._db, "Romance", autocreate=True) work.genres = [fantasy, romance] record = Record() Annotator.add_simplified_genres(record, work) fields = record.get_fields("650") [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(["0", "7"], fantasy_field.indicators) eq_("Fantasy", fantasy_field.get_subfields("a")[0]) eq_("Library Simplified", fantasy_field.get_subfields("2")[0]) eq_(["0", "7"], romance_field.indicators) eq_("Romance", romance_field.get_subfields("a")[0]) eq_("Library Simplified", romance_field.get_subfields("2")[0]) # It also works with a materialized work. self.add_to_materialized_view([work]) # The work is in the materialized view twice since it has two genres, # but we can use either one. [mw, ignore] = self._db.query(MaterializedWorkWithGenre).all() record = Record() Annotator.add_simplified_genres(record, mw) fields = record.get_fields("650") [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(["0", "7"], fantasy_field.indicators) eq_("Fantasy", fantasy_field.get_subfields("a")[0]) eq_("Library Simplified", fantasy_field.get_subfields("2")[0]) eq_(["0", "7"], romance_field.indicators) eq_("Romance", romance_field.get_subfields("a")[0]) eq_("Library Simplified", romance_field.get_subfields("2")[0])
def test_add_title(self): edition = self._edition() edition.title = "The Good Soldier" edition.sort_title = "Good Soldier, The" edition.subtitle = "A Tale of Passion" record = Record() Annotator.add_title(record, edition) [field] = record.get_fields("245") self._check_field(record, "245", { "a": edition.title, "b": edition.subtitle, "c": edition.author, }, ["0", "4"]) # If there's no subtitle or no author, those subfields are left out. edition.subtitle = None edition.author = None record = Record() Annotator.add_title(record, edition) [field] = record.get_fields("245") self._check_field(record, "245", { "a": edition.title, }, ["0", "4"]) eq_([], field.get_subfields("b")) eq_([], field.get_subfields("c"))
def test_add_web_client_urls(self): # Web client URLs can come from either the MARC export integration or # a library registry integration. annotator = LibraryAnnotator(self._default_library) # If no web catalog URLs are set for the library, nothing will be changed. record = Record() identifier = self._identifier(foreign_id="identifier") annotator.add_web_client_urls(record, self._default_library, identifier) eq_([], record.get_fields("856")) # Add a URL from a library registry. registry = self._external_integration( ExternalIntegration.OPDS_REGISTRATION, ExternalIntegration.DISCOVERY_GOAL, libraries=[self._default_library]) ConfigurationSetting.for_library_and_externalintegration( self._db, Registration.LIBRARY_REGISTRATION_WEB_CLIENT, self._default_library, registry).value = "http://web_catalog" record = Record() annotator.add_web_client_urls(record, self._default_library, identifier) [field] = record.get_fields("856") eq_(["4", "0"], field.indicators) eq_("http://web_catalog/book/Gutenberg%20ID%2Fidentifier", field.get_subfields("u")[0]) # Add a manually configured URL on a MARC export integration. integration = self._external_integration( ExternalIntegration.MARC_EXPORT, ExternalIntegration.CATALOG_GOAL, libraries=[self._default_library]) ConfigurationSetting.for_library_and_externalintegration( self._db, MARCExporter.WEB_CLIENT_URL, self._default_library, integration).value = "http://another_web_catalog" record = Record() annotator.add_web_client_urls(record, self._default_library, identifier, integration) [field1, field2] = record.get_fields("856") eq_(["4", "0"], field1.indicators) eq_("http://another_web_catalog/book/Gutenberg%20ID%2Fidentifier", field1.get_subfields("u")[0]) eq_(["4", "0"], field2.indicators) eq_("http://web_catalog/book/Gutenberg%20ID%2Fidentifier", field2.get_subfields("u")[0])
def faulty015(record: Record) -> bool: found = False for f in record.get_fields("015"): if "a" in f: if len(f["a"].split(' ')) > 1: found = True return found
async def get_terms_to_search_and_references_to_raw_flds( marc_record: pymarc.Record, for_omnis: bool = False) -> Dict[str, List[pymarc.Field]]: # create empty dict for results terms_fields_ids = {} # switch between fields to check fields_to_check = FIELDS_TO_CHECK_FOR_OMNIS if for_omnis else FIELDS_TO_CHECK # iterate over constant: fields to check in marc record for marc_field_and_subfields in fields_to_check: # get field tag and subfields tags fld, subflds = marc_field_and_subfields[0], marc_field_and_subfields[1] # check if field present in marc record if fld in marc_record: # get list of raw field objects (pymarc.Field) raw_objects_flds_list = marc_record.get_fields(fld) # iterate over raw field objects for raw_fld in raw_objects_flds_list: # get term from raw field and normalize it term_to_search = prepare_name_for_indexing(' '.join( subfld for subfld in raw_fld.get_subfields(*subflds))) # create new entry in dict if necessary and/or append raw fld to list terms_fields_ids.setdefault(term_to_search, {}).setdefault('raw_flds', []).append(raw_fld) return terms_fields_ids
def decode_record(self, record): r""" >>> reader = Reader('http://opac.uthsc.edu', 2) >>> raw = "\nLEADER 00000cas 2200517 a 4500 \n001 1481253 \n003 OCoLC \n005 19951109120000.0 \n008 750727c19589999fr qrzp b 0 b0fre d \n010 sn 86012727 \n022 0003-3995 \n030 AGTQAH \n035 0062827|bMULS|aPITT NO. 0639600000|asa64872000|bFULS \n040 MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n |dTUM \n041 0 engfre|bgeritaspa \n042 nsdp \n049 TUMS \n069 1 A32025000 \n210 0 Ann. genet. \n222 0 Annales de genetique \n229 00 Annales de genetique \n229 Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260 Paris :|bExpansion scientifique,|c1958-2004. \n300 v. :|bill. ;|c28 cm. \n310 Quarterly \n321 Two no. a year \n362 0 1,1958-47,2004. \n510 1 Excerpta medica \n510 1 Index medicus|x0019-3879 \n510 2 Biological abstracts|x0006-3169 \n510 2 Chemical abstracts|x0009-2258 \n510 2 Life sciences collection \n510 0 Bulletin signaletique \n510 0 Current contents \n546 French and English, with summaries in German, Italian, and\n Spanish. \n550 Journal of the Societe francaise de genetique. \n650 2 Genetics|vPeriodicals. \n710 2 Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics. \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936 Unknown|ajuin 1977 \n" >>> record = reader.decode_record(raw) >>> print record.title Annales de genetique """ pseudo_marc = record.strip().split('\n') raw_fields = [] if pseudo_marc[0][0:6] == 'LEADER': record = Record() record.leader = pseudo_marc[0][7:].strip() else: return None for field in pseudo_marc[1:]: tag = field[:3] data = unescape_entities(field[6:].decode('latin1')).encode('utf8') if tag.startswith(' '): # Additional field data needs to be prepended with an extra space # for certain fields ... #for special_tag in ('55','260'): # data = " %s" % (data,) if tag.startswith(special_tag) else data data = " %s" % (data.strip(),) raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data) raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip()) else: data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,) raw_fields.append({ 'tag': tag, 'indicator1': field[3], 'indicator2': field[4], 'value': data.strip(), 'raw': field.strip() }) for raw in raw_fields: tag = raw['tag'] data = raw['value'].strip() field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data) if not field.is_control_field(): for sub in data.split('|'): try: field.add_subfield(sub[0].strip(), sub[1:].strip()) except Exception: # Skip blank/empty subfields continue record.add_field(field) record.parse_leader() # Disregard record if no title present if not record.get_fields('245'): return None else: return record
def test_add_physical_description(self): book = self._edition() book.medium = Edition.BOOK_MEDIUM audio = self._edition() audio.medium = Edition.AUDIO_MEDIUM record = Record() Annotator.add_physical_description(record, book) self._check_field(record, "300", {"a": "1 online resource"}) self._check_field(record, "336", { "a": "text", "b": "txt", "2": "rdacontent", }) self._check_field(record, "337", { "a": "computer", "b": "c", "2": "rdamedia", }) self._check_field(record, "338", { "a": "online resource", "b": "cr", "2": "rdacarrier", }) self._check_field(record, "347", { "a": "text file", "2": "rda", }) self._check_field(record, "380", { "a": "eBook", "2": "tlcgt", }) record = Record() Annotator.add_physical_description(record, audio) self._check_field(record, "300", { "a": "1 sound file", "b": "digital", }) self._check_field(record, "336", { "a": "spoken word", "b": "spw", "2": "rdacontent", }) self._check_field(record, "337", { "a": "computer", "b": "c", "2": "rdamedia", }) self._check_field(record, "338", { "a": "online resource", "b": "cr", "2": "rdacarrier", }) self._check_field(record, "347", { "a": "audio file", "2": "rda", }) eq_([], record.get_fields("380"))
def test_add_physical_description(self): book = self._edition() book.medium = Edition.BOOK_MEDIUM audio = self._edition() audio.medium = Edition.AUDIO_MEDIUM record = Record() Annotator.add_physical_description(record, book) self._check_field(record, "300", {"a": "1 online resource"}) self._check_field(record, "336", { "a": "text", "b": "txt", "2": "rdacontent", }) self._check_field(record, "337", { "a": "computer", "b": "c", "2": "rdamedia", }) self._check_field(record, "338", { "a": "online resource", "b": "cr", "2": "rdacarrier", }) self._check_field(record, "347", { "a": "text file", "2": "rda", }) self._check_field(record, "380", { "a": "eBook", "2": "tlcgt", }) record = Record() Annotator.add_physical_description(record, audio) self._check_field(record, "300", { "a": "1 sound file", "b": "digital", }) self._check_field(record, "336", { "a": "spoken word", "b": "spw", "2": "rdacontent", }) self._check_field(record, "337", { "a": "computer", "b": "c", "2": "rdamedia", }) self._check_field(record, "338", { "a": "online resource", "b": "cr", "2": "rdacarrier", }) self._check_field(record, "347", { "a": "audio file", "2": "rda", }) eq_([], record.get_fields("380"))
def test_add_web_client_urls(self): # Web client URLs can come from either the MARC export integration or # a library registry integration. annotator = LibraryAnnotator(self._default_library) # If no web catalog URLs are set for the library, nothing will be changed. record = Record() identifier = self._identifier(foreign_id="identifier") annotator.add_web_client_urls(record, self._default_library, identifier) eq_([], record.get_fields("856")) # Add a URL from a library registry. registry = self._external_integration( ExternalIntegration.OPDS_REGISTRATION, ExternalIntegration.DISCOVERY_GOAL, libraries=[self._default_library]) ConfigurationSetting.for_library_and_externalintegration( self._db, Registration.LIBRARY_REGISTRATION_WEB_CLIENT, self._default_library, registry).value = "http://web_catalog" record = Record() annotator.add_web_client_urls(record, self._default_library, identifier) [field] = record.get_fields("856") eq_(["4", "0"], field.indicators) eq_("http://web_catalog/book/Gutenberg%20ID%2Fidentifier", field.get_subfields("u")[0]) # Add a manually configured URL on a MARC export integration. integration = self._external_integration( ExternalIntegration.MARC_EXPORT, ExternalIntegration.CATALOG_GOAL, libraries=[self._default_library]) ConfigurationSetting.for_library_and_externalintegration( self._db, MARCExporter.WEB_CLIENT_URL, self._default_library, integration).value = "http://another_web_catalog" record = Record() annotator.add_web_client_urls(record, self._default_library, identifier, integration) [field1, field2] = record.get_fields("856") eq_(["4", "0"], field1.indicators) eq_("http://another_web_catalog/book/Gutenberg%20ID%2Fidentifier", field1.get_subfields("u")[0]) eq_(["4", "0"], field2.indicators) eq_("http://web_catalog/book/Gutenberg%20ID%2Fidentifier", field2.get_subfields("u")[0])
def empty020a(record: Record) -> bool: sf020a = [] if "020" in record: fields = record.get_fields("020") for f in fields: if "a" in f: sf020a.append(f.get_subfields("a").pop()) return sf020a == [""]
def periodsMissing(record: Record) -> bool: fields = record.get_fields("100", "110", "700", "710") for f in fields: if "e" in f: functions = f.get_subfields("e") for func in functions: if func[-1].isalpha(): return True return False
def test_bib_with_vendor_910_tag(self): bib = Record() bib.add_field( Field(tag="910", indicators=[" ", " "], subfields=["a", "foo"])) patches.bib_patches("nypl", "research", "acq", "Amalivre", bib) tags_910 = bib.get_fields("910") self.assertEqual(len(tags_910), 1) self.assertEqual(str(bib["910"]), "=910 \\\\$aRL")
def phrasesInFields(record: Record, phrases: list, fields: list) -> bool: '''Takes a record object, a list of strings and a list of fields (as strings). Returns a boolean.''' for f in fields: for rf in record.get_fields(f): for p in phrases: if p in rf.value().lower(): return True return False
def callZ3950(search_id, target, depth=0): if target == 'UIU': print "UIUC NUMBER: ", search_id query = zoom.Query('PQF', '@attr 1=12 %s' % str(search_id)) database_address = 'z3950.carli.illinois.edu' username = '******' database_name = 'voyager' else: print "LC NUMBER: ", search_id query = zoom.Query('PQF', '@attr 1=9 %s' % str(formatLCCN(search_id))) database_address = 'lx2.loc.gov' username = '' if 'n' in search_id: database_name = 'NAF' else: database_name = 'SAF' # conn = establishZ3950Connection(database_address,210,username,database_name) res = queryZ3950(database_address, username, database_name, query) print len(res) print res if len(res) > 0: for r in res: valid_leader = checkLeader(r.data[:24]) if valid_leader: if len(res) > 1: try: new_record = Record(data=r.data) except UnicodeDecodeError: return (False, 'BROKEN CHARACTER IN RECORD') lccn = new_record.get_fields('001')[0].data.replace( " ", "") if lccn == search_id: marc_record = new_record fixNames(marc_record) else: try: marc_record = Record(data=r.data) except UnicodeDecodeError: return (False, 'BROKEN CHARACTER IN RECORD') fixNames(marc_record) else: return (False, 'BROKEN LEADER') return (marc_record, None) elif depth < 20: waitSixSeconds(datetime.datetime.now().time()) return callZ3950(search_id, target, depth=depth + 1) else: return (None, 'RECORD NOT FOUND')
def record_sorted(record: Record) -> Record: result = Record() result.leader = record.leader for i in range(1000): field_name = str(i) while len(field_name) < 3: field_name = "0" + field_name # 先寻出旧数据中所有的字段名下字段 old_fields = record.get_fields(field_name) for field in old_fields: result.add_field(field) return result
def test_add_control_fields(self): # This edition has one format and was published before 1900. edition, pool = self._edition(with_license_pool=True) identifier = pool.identifier edition.issued = datetime_utc(956, 1, 1) now = utc_now() record = Record() Annotator.add_control_fields(record, identifier, pool, edition) self._check_control_field(record, "001", identifier.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---anuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s0956 xxu eng ") # This French edition has two formats and was published in 2018. edition2, pool2 = self._edition(with_license_pool=True) identifier2 = pool2.identifier edition2.issued = datetime_utc(2018, 2, 3) edition2.language = "fre" LicensePoolDeliveryMechanism.set( pool2.data_source, identifier2, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT, ) record = Record() Annotator.add_control_fields(record, identifier2, pool2, edition2) self._check_control_field(record, "001", identifier2.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---mnuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s2018 xxu fre ")
def textIn020z(record: Record) -> bool: sf020z = [] if "020" in record: fields = record.get_fields("020") for f in fields: if "z" in f: sf020z.append(f.get_subfields("z").pop()) if len(sf020z) > 0: alpha = 0 for f in sf020z: alpha += len([x for x in f if x.isalpha()]) return alpha > 1 return False
def test_add_control_fields(self): # This edition has one format and was published before 1900. edition, pool = self._edition(with_license_pool=True) identifier = pool.identifier edition.issued = datetime.datetime(956, 1, 1) now = datetime.datetime.now() record = Record() Annotator.add_control_fields(record, identifier, pool, edition) self._check_control_field(record, "001", identifier.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---anuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s0956 xxu eng ") # This French edition has two formats and was published in 2018. edition2, pool2 = self._edition(with_license_pool=True) identifier2 = pool2.identifier edition2.issued = datetime.datetime(2018, 2, 3) edition2.language = "fre" LicensePoolDeliveryMechanism.set( pool2.data_source, identifier2, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT) record = Record() Annotator.add_control_fields(record, identifier2, pool2, edition2) self._check_control_field(record, "001", identifier2.urn) assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value() self._check_control_field(record, "006", "m d ") self._check_control_field(record, "007", "cr cn ---mnuuu") self._check_control_field( record, "008", now.strftime("%y%m%d") + "s2018 xxu fre ")
def test_add_contributors(self): author = "a" author2 = "b" translator = "c" # Edition with one author gets a 100 field and no 700 fields. edition = self._edition(authors=[author]) edition.sort_author = "sorted" record = Record() Annotator.add_contributors(record, edition) eq_([], record.get_fields("700")) self._check_field(record, "100", {"a": edition.sort_author}, ["1", " "]) # Edition with two authors and a translator gets three 700 fields and no 100 fields. edition = self._edition(authors=[author, author2]) edition.add_contributor(translator, Contributor.TRANSLATOR_ROLE) record = Record() Annotator.add_contributors(record, edition) eq_([], record.get_fields("100")) fields = record.get_fields("700") for field in fields: eq_(["1", " "], field.indicators) [author_field, author2_field, translator_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(author, author_field.get_subfields("a")[0]) eq_(Contributor.PRIMARY_AUTHOR_ROLE, author_field.get_subfields("e")[0]) eq_(author2, author2_field.get_subfields("a")[0]) eq_(Contributor.AUTHOR_ROLE, author2_field.get_subfields("e")[0]) eq_(translator, translator_field.get_subfields("a")[0]) eq_(Contributor.TRANSLATOR_ROLE, translator_field.get_subfields("e")[0])
def has_overdrive_access_point_tag(record: Record) -> bool: """ Determines if the record has 710 2 $a Overdrive, Inc. tag Args: record: pymarc.field.Field instance Returns: bool """ found = False for tag in record.get_fields("710"): if "overdrive" in tag.value().lower(): found = True break return found
def test_add_simplified_genres(self): work = self._work(with_license_pool=True) fantasy, ignore = Genre.lookup(self._db, "Fantasy", autocreate=True) romance, ignore = Genre.lookup(self._db, "Romance", autocreate=True) work.genres = [fantasy, romance] record = Record() Annotator.add_simplified_genres(record, work) fields = record.get_fields("650") [fantasy_field, romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_(["0", "7"], fantasy_field.indicators) eq_("Fantasy", fantasy_field.get_subfields("a")[0]) eq_("Library Simplified", fantasy_field.get_subfields("2")[0]) eq_(["0", "7"], romance_field.indicators) eq_("Romance", romance_field.get_subfields("a")[0]) eq_("Library Simplified", romance_field.get_subfields("2")[0])
def test_add_formats(self): edition, pool = self._edition(with_license_pool=True) epub_no_drm, ignore = DeliveryMechanism.lookup( self._db, Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) pool.delivery_mechanisms[0].delivery_mechanism = epub_no_drm LicensePoolDeliveryMechanism.set( pool.data_source, pool.identifier, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT) record = Record() Annotator.add_formats(record, pool) fields = record.get_fields("538") eq_(2, len(fields)) [pdf, epub] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_("Adobe PDF eBook", pdf.get_subfields("a")[0]) eq_([" ", " "], pdf.indicators) eq_("EPUB eBook", epub.get_subfields("a")[0]) eq_([" ", " "], epub.indicators)
def test_add_formats(self): edition, pool = self._edition(with_license_pool=True) epub_no_drm, ignore = DeliveryMechanism.lookup( self._db, Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) pool.delivery_mechanisms[0].delivery_mechanism = epub_no_drm LicensePoolDeliveryMechanism.set(pool.data_source, pool.identifier, Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT) record = Record() Annotator.add_formats(record, pool) fields = record.get_fields("538") eq_(2, len(fields)) [pdf, epub] = sorted(fields, key=lambda x: x.get_subfields("a")[0]) eq_("Adobe PDF eBook", pdf.get_subfields("a")[0]) eq_([" ", " "], pdf.indicators) eq_("EPUB eBook", epub.get_subfields("a")[0]) eq_([" ", " "], epub.indicators)
def test_add_publisher(self): edition = self._edition() edition.publisher = self._str edition.issued = datetime.datetime(1894, 4, 5) record = Record() Annotator.add_publisher(record, edition) self._check_field( record, "264", { "a": "[Place of publication not identified]", "b": edition.publisher, "c": "1894", }, [" ", "1"]) # If there's no publisher, the field is left out. record = Record() edition.publisher = None Annotator.add_publisher(record, edition) eq_([], record.get_fields("264"))
def test_add_publisher(self): edition = self._edition() edition.publisher = self._str edition.issued = datetime.datetime(1894, 4, 5) record = Record() Annotator.add_publisher(record, edition) self._check_field( record, "264", { "a": "[Place of publication not identified]", "b": edition.publisher, "c": "1894", }, [" ", "1"]) # If there's no publisher, the field is left out. record = Record() edition.publisher = None Annotator.add_publisher(record, edition) eq_([], record.get_fields("264"))
def test_add_isbn(self): isbn = self._identifier(identifier_type=Identifier.ISBN) record = Record() Annotator.add_isbn(record, isbn) self._check_field(record, "020", {"a": isbn.identifier}) # If the identifier isn't an ISBN, but has an equivalent that is, it still # works. equivalent = self._identifier() data_source = DataSource.lookup(self._db, DataSource.OCLC) equivalent.equivalent_to(data_source, isbn, 1) record = Record() Annotator.add_isbn(record, equivalent) self._check_field(record, "020", {"a": isbn.identifier}) # If there is no ISBN, the field is left out. non_isbn = self._identifier() record = Record() Annotator.add_isbn(record, non_isbn) eq_([], record.get_fields("020"))
def test_add_isbn(self): isbn = self._identifier(identifier_type=Identifier.ISBN) record = Record() Annotator.add_isbn(record, isbn) self._check_field(record, "020", {"a": isbn.identifier}) # If the identifier isn't an ISBN, but has an equivalent that is, it still # works. equivalent = self._identifier() data_source = DataSource.lookup(self._db, DataSource.OCLC) equivalent.equivalent_to(data_source, isbn, 1) record = Record() Annotator.add_isbn(record, equivalent) self._check_field(record, "020", {"a": isbn.identifier}) # If there is no ISBN, the field is left out. non_isbn = self._identifier() record = Record() Annotator.add_isbn(record, non_isbn) eq_([], record.get_fields("020"))
def test_read_records(self): self.yc.read_records() output_reader = MARCReader(open(self.output_path, 'rb'), to_unicode=True) test_reader = MARCReader(open("test/converted_records.mrc", 'rb'), to_unicode=True) test_record = Record() while test_record: output_record = next(output_reader, None) test_record = next(test_reader, None) if output_record: output_fields = [] test_fields = [] for field in output_record.get_fields(): output_fields.append(str(field)) for field in test_record.get_fields(): test_fields.append(str(field)) self.assertCountEqual(output_fields, test_fields) self.assertListEqual(output_fields, test_fields) output_reader.close() test_reader.close()
def test_add_web_client_urls(self): # Web client URLs can come from either the MARC export integration or # a library registry integration. identifier = self._identifier(foreign_id="identifier") lib_short_name = self._default_library.short_name # The URL for a work is constructed as: # - <cm-base>/<lib-short-name>/works/<qualified-identifier> work_link_template = "{cm_base}/{lib}/works/{qid}" # It is then encoded and the web client URL is constructed in this form: # - <web-client-base>/book/<encoded-work-url> client_url_template = "{client_base}/book/{work_link}" qualified_identifier = urllib.parse.quote( identifier.type + "/" + identifier.identifier, safe="" ) cm_base_url = "http://test-circulation-manager" expected_work_link = work_link_template.format( cm_base=cm_base_url, lib=lib_short_name, qid=qualified_identifier ) encoded_work_link = urllib.parse.quote(expected_work_link, safe="") client_base_1 = "http://web_catalog" client_base_2 = "http://another_web_catalog" expected_client_url_1 = client_url_template.format( client_base=client_base_1, work_link=encoded_work_link ) expected_client_url_2 = client_url_template.format( client_base=client_base_2, work_link=encoded_work_link ) # A few checks to ensure that our setup is useful. assert len(lib_short_name) > 0 assert client_base_1 != client_base_2 assert expected_client_url_1 != expected_client_url_2 assert expected_client_url_1.startswith(client_base_1) assert expected_client_url_2.startswith(client_base_2) ConfigurationSetting.sitewide( self._db, Configuration.BASE_URL_KEY ).value = cm_base_url annotator = LibraryAnnotator(self._default_library) # If no web catalog URLs are set for the library, nothing will be changed. record = Record() annotator.add_web_client_urls(record, self._default_library, identifier) assert [] == record.get_fields("856") # Add a URL from a library registry. registry = self._external_integration( ExternalIntegration.OPDS_REGISTRATION, ExternalIntegration.DISCOVERY_GOAL, libraries=[self._default_library], ) ConfigurationSetting.for_library_and_externalintegration( self._db, Registration.LIBRARY_REGISTRATION_WEB_CLIENT, self._default_library, registry, ).value = client_base_1 record = Record() annotator.add_web_client_urls(record, self._default_library, identifier) [field] = record.get_fields("856") assert ["4", "0"] == field.indicators assert expected_client_url_1 == field.get_subfields("u")[0] # Add a manually configured URL on a MARC export integration. integration = self._external_integration( ExternalIntegration.MARC_EXPORT, ExternalIntegration.CATALOG_GOAL, libraries=[self._default_library], ) ConfigurationSetting.for_library_and_externalintegration( self._db, MARCExporter.WEB_CLIENT_URL, self._default_library, integration ).value = client_base_2 record = Record() annotator.add_web_client_urls( record, self._default_library, identifier, integration ) [field1, field2] = record.get_fields("856") assert ["4", "0"] == field1.indicators assert expected_client_url_2 == field1.get_subfields("u")[0] assert ["4", "0"] == field2.indicators assert expected_client_url_1 == field2.get_subfields("u")[0]
def getId(record: Record) -> str: return [x.value() for x in record.get_fields("001")].pop()
def field020ContainsAbbreviation(record: Record) -> bool: abbrevs = ["sid.", "nid.", "inb.", "hft."] if "020" in record: return len([x for x in abbrevs if x in str(record.get_fields("020").pop()).lower()]) > 0 return False
def hasLCSHfields(record: Record) -> bool: if "650" in record: for f in record.get_fields("650"): return getSecondIndicator(f) == "0" return False
def wrongIndicatorIn245(record: Record) -> bool: return has130Field(record) and getFirstIndicator(record.get_fields("245").pop()) == "0"
def test_bib_without_vendor_910_tag(self): bib = Record() patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib) tags_910 = bib.get_fields("910") self.assertEqual(len(tags_910), 1) self.assertEqual(str(bib["910"]), "=910 \\\\$aBL")
def contains024c(record: Record) -> bool: if "024" in record: return "c" in record.get_fields("024").pop() return False