def decode_record(self, record): r""" >>> reader = Reader('http://opac.uthsc.edu', 2) >>> raw = "\nLEADER 00000cas 2200517 a 4500 \n001 1481253 \n003 OCoLC \n005 19951109120000.0 \n008 750727c19589999fr qrzp b 0 b0fre d \n010 sn 86012727 \n022 0003-3995 \n030 AGTQAH \n035 0062827|bMULS|aPITT NO. 0639600000|asa64872000|bFULS \n040 MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n |dTUM \n041 0 engfre|bgeritaspa \n042 nsdp \n049 TUMS \n069 1 A32025000 \n210 0 Ann. genet. \n222 0 Annales de genetique \n229 00 Annales de genetique \n229 Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260 Paris :|bExpansion scientifique,|c1958-2004. \n300 v. :|bill. ;|c28 cm. \n310 Quarterly \n321 Two no. a year \n362 0 1,1958-47,2004. \n510 1 Excerpta medica \n510 1 Index medicus|x0019-3879 \n510 2 Biological abstracts|x0006-3169 \n510 2 Chemical abstracts|x0009-2258 \n510 2 Life sciences collection \n510 0 Bulletin signaletique \n510 0 Current contents \n546 French and English, with summaries in German, Italian, and\n Spanish. \n550 Journal of the Societe francaise de genetique. \n650 2 Genetics|vPeriodicals. \n710 2 Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics. \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936 Unknown|ajuin 1977 \n" >>> record = reader.decode_record(raw) >>> print record.title Annales de genetique """ pseudo_marc = record.strip().split('\n') raw_fields = [] if pseudo_marc[0][0:6] == 'LEADER': record = Record() record.leader = pseudo_marc[0][7:].strip() else: return None for field in pseudo_marc[1:]: tag = field[:3] data = unescape_entities(field[6:].decode('latin1')).encode('utf8') if tag.startswith(' '): # Additional field data needs to be prepended with an extra space # for certain fields ... #for special_tag in ('55','260'): # data = " %s" % (data,) if tag.startswith(special_tag) else data data = " %s" % (data.strip(),) raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data) raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip()) else: data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,) raw_fields.append({ 'tag': tag, 'indicator1': field[3], 'indicator2': field[4], 'value': data.strip(), 'raw': field.strip() }) for raw in raw_fields: tag = raw['tag'] data = raw['value'].strip() field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data) if not field.is_control_field(): for sub in data.split('|'): try: field.add_subfield(sub[0].strip(), sub[1:].strip()) except Exception: # Skip blank/empty subfields continue record.add_field(field) record.parse_leader() # Disregard record if no title present if not record.get_fields('245'): return None else: return record
def validate006(self, marc_record): """ Method checks/sets 006 fixed length data elements in MARC record. :param marc_record: MARC record """ field006 = Field(tag="006", indicators=None) field006.data = r"m o d " marc_record.add_field(field006) return marc_record
def validate006(self,marc_record): ''' Method checks/sets 006 fixed length data elements in MARC record. :param marc_record: MARC record ''' field006 = Field(tag='006',indicators=None) field006.data = r'm o d ' marc_record.add_field(field006) return marc_record
def processURLs(self, marc_record, proxy_location, public_note='View online - Access limited to subscribers', note_prefix='Available via Internet'): """ Method extracts URL from 856 field, sets 538 and 856 to CC's format practices. Parameters: :param marc_record: - MARC Record :param proxy_location: - proxy prefix prepended to extracted URL from 856 field :param public_note: - subfield z value, default is for CC :param note_prefix: - prefix for original URL in 538 note field, default is for CC. """ all538fields = marc_record.get_fields('538') for field538 in all538fields: marc_record.remove_field(field538) all856fields = marc_record.get_fields('856') for field856 in all856fields: # Extracts raw url from 856 subfield u, creates a url object # for original and proxy urls and replaces net location with WAM location # for proxy raw_url = urlparse.urlparse(field856.get_subfields('u')[0]) if re.match(r'http://',proxy_location): protocol = '' else: protocol = 'http://' proxy_raw_url = '%s%s%s?%s' % (protocol, proxy_location, raw_url.path, raw_url.query) proxy_url = urlparse.urlparse(proxy_raw_url) # Sets values for new 538 with constructed note in new538 = Field(tag='538', indicators=[' ',' '], subfields=['a','%s, %s' % (note_prefix,raw_url.geturl())]) marc_record.add_field(new538) # Sets values for 856 field new856 = Field(tag='856', indicators = ['4','0'], subfields=['u',proxy_url.geturl()]) # Checks for subfield 3 in original 856 field, adds to public note # in subfield z new_public_note = public_note if len(field856.get_subfields('3')) > 0: for subfield3 in field856.get_subfields('3'): subfield3_all = "%s - %s" % (public_note, subfield3) new_public_note = subfield3_all new856.add_subfield('z',new_public_note) marc_record.remove_field(field856) marc_record.add_field(new856) return marc_record
def validate006(self,marc_record): """ Default validation of the 006 field with standard field data of m||||||||c|||||||| for electronic video records. :param marc_record: Required, MARC record """ marc_record = self.__remove_field__(marc_record=marc_record, tag='006') field006 = Field(tag='006',indicators=None) field006.data = r'm o c ' marc_record.add_field(field006) return marc_record
def validate006(self, marc_record): """ Default validation of the 006 field with standard field data of m||||||||d|||||||| for electronic records. Parameters: `marc_record`: Required, MARC record """ marc_record = self.__remove_field__(marc_record=marc_record, tag="006") field006 = Field(tag="006", indicators=None) field006.data = r"m d " marc_record.add_field(field006) return marc_record
def sort_6_subs(rec): msg = '' new_rec = Record(to_unicode=True, force_utf8=True) new_rec_fields = [] rec_fields = rec.get_fields() for field in rec_fields: script_field = False if not field.is_control_field() and (len(field.get_subfields('6')) > 0): # the field contains a subfield $6 script_field = True ind1 = field.indicator1 ind2 = field.indicator2 tag = field.tag first_sub = True # variable to keep track of whether you're on the first subfield in the field needs_sorted = True # variable to keep track of whether the field needs sorted or if the $6 is already correctly the first subfield field_subs = [] # list variable to capture all the subfields in the field *except* for the subfield $6 for subfield in field: # check if $6 is the first subfield - if so, the field is OK and does *not* need to be sorted if needs_sorted and first_sub and subfield[0] == '6': needs_sorted = False elif needs_sorted: if first_sub: # this is the first subfield and is *not* $6, so the field needs sorted - creates one instance of a new_field object only when the 1st subfield is encountered new_field = Field(tag=tag, indicators=[ind1,ind2], subfields=[]) # when subfield $6 is finally encountered in the field (not the 1st), add it to the new_field object now so it becomes the first subfield # Note: subfield[0] is the subfield code and subfield[1] is the subfield content for this subfield if subfield[0]=='6': new_field.add_subfield(subfield[0],subfield[1]) # if the subfield is *not* $6, add it to the list of subfields to be added later to the new_field else: field_subs.append([subfield[0],subfield[1]]) first_sub = False if needs_sorted: # then the $6 was *not* the 1st subfield and we need to now add the remaining subfields to the new_field object for sub in field_subs: # add the remaining subfields to the new_field object new_field.add_subfield(sub[0],sub[1]) new_rec_fields.append(new_field) # add the new field to the record if not script_field or not needs_sorted: new_rec_fields.append(field) for new_f in new_rec_fields: new_rec.add_field(new_f) return new_rec
def validate006(self, marc_record): """ Method validates/adds 006 field :param marc_record: MARC record, required """ all006s = marc_record.get_fields('006') if all006s: pass else: field006 = Field(tag='006',indicators=None) field006.data = r'm d ' marc_record.add_field(field006) return marc_record
def processURLs( self, marc_record, proxy_location, public_note="View online - Access limited to subscribers", note_prefix="Available via Internet", ): """ Method extracts URL from 856 field, sets 538 and 856 to CC's format practices. Parameters: :param marc_record: - MARC Record :param proxy_location: - proxy prefix prepended to extracted URL from 856 field :param public_note: - subfield z value, default is for CC :param note_prefix: - prefix for original URL in 538 note field, default is for CC. """ all538fields = marc_record.get_fields("538") for field538 in all538fields: marc_record.remove_field(field538) all856fields = marc_record.get_fields("856") for field856 in all856fields: # Extracts raw url from 856 subfield u, creates a url object # for original and proxy urls and replaces net location with WAM location # for proxy raw_url = urllib.parse.urlparse(field856.get_subfields("u")[0]) if re.match(r"http://", proxy_location): protocol = "" else: protocol = "http://" proxy_raw_url = "{}{}{}?{}".format(protocol, proxy_location, raw_url.path, raw_url.query) proxy_url = urllib.parse.urlparse(proxy_raw_url) # Sets values for new 538 with constructed note in new538 = Field( tag="538", indicators=[" ", " "], subfields=["a", "%s, %s" % (note_prefix, raw_url.geturl())] ) marc_record.add_field(new538) # Sets values for 856 field new856 = Field(tag="856", indicators=["4", "0"], subfields=["u", proxy_url.geturl()]) # Checks for subfield 3 in original 856 field, adds to public note # in subfield z new_public_note = public_note if len(field856.get_subfields("3")) > 0: for subfield3 in field856.get_subfields("3"): subfield3_all = "%s - %s" % (public_note, subfield3) new_public_note = subfield3_all new856.add_subfield("z", new_public_note) marc_record.remove_field(field856) marc_record.add_field(new856) return marc_record
def validate006(self,marc_record): ''' Method checks/sets 006 fixed length data elements in MARC record. :param marc_record: MARC record ''' existing_fields = marc_record.get_fields('006') if existing_fields: field006 = existing_fields[0] marc_record.remove_field(field006) else: field006 = Field(tag='006',indicators=None) field006.data = r'm d ' marc_record.add_field(field006) return marc_record
def validate006(self, marc_record): """Method creates new 006 with the following data m|||||o||d|||||||| Args: marc_record(pymarc.Record): MARC21 record Returns: pymarc.Record """ marc_record = self.__remove_field__( marc_record=marc_record, tag='006') field006 = Field(tag='006', indicators=None) field006.data = r'm o d ' marc_record.add_field(field006) return marc_record
def generate538(self, marc_record): """Method creates a 538 field following a standard pattern Args: marc_record(pymarc.Record): MARC21 record Returns: pymarc.Record """ field856 = marc_record['856'] original_url = field856['u'] new538 = Field(tag='538', indicators=[' ',' ']) new538.add_subfield( 'a', 'Available via Internet, {}'.format(original_url)) marc_record.add_field(new538) return marc_record
def __format245__(self, field245): """Method takes a 245 field from a MARC record and returns properly formatted subfields. By not copying subfield 'h', performs the first conversion PCC recommendation. Args: field245(pymarc.Field): 245 field Returns: pymarc.Field """ if field245.tag != '245': return subfield_a,subfield_c= '','' a_subfields = field245.get_subfields('a') indicator1,indicator2 = field245.indicators if len(a_subfields) > 0: subfield_a = a_subfields[0] if len(subfield_a) > 0: if ['.','\\'].count(subfield_a[-1]) > 0: subfield_a = subfield_a[:-1].strip() new245 = Field(tag='245', indicators=[indicator1,indicator2], subfields = ['a', u'{0} '.format(subfield_a)]) b_subfields = field245.get_subfields('b') c_subfields = field245.get_subfields('c') n_subfields = field245.get_subfields('n') p_subfields = field245.get_subfields('p') # Order for 245 subfields are: # $a $n $p $b $c if len(n_subfields) > 0: for subfield_n in n_subfields: new245.add_subfield('n', subfield_n) if len(p_subfields) > 0: for subfield_p in p_subfields: new245.add_subfield('p', subfield_p) if len(c_subfields) > 0 and len(b_subfields) < 1: if 'a' in new245.subfields: new245['a'] = u'{0} /'.format(new245['a'].strip()) elif len(b_subfields) > 0: if 'a' in new245.subfields: new245['a'] = u'{0} :'.format(new245['a'].strip()) if len(b_subfields) > 0: for subfield_b in b_subfields: new245.add_subfield('b',subfield_b) if len(c_subfields) > 0: for subfield_c in c_subfields: new245.add_subfield('c',subfield_c) return new245
def getNewField(self, line): self.fieldTag = line["fieldTag"] self.fieldTagOccurrence = line["fieldTagOccurrence"] if line["subfieldLabel"]: # Normal field self.field = Field( tag = line["fieldTag"], indicators = [ line["indicator1"], line["indicator2"] ] ) else: # Datafield self.field = Field( tag = line["fieldTag"], data = line["value"] )
def create_999_field(rec, oclc_nums): rec_003 = rec.get_fields('003')[0].value() rec_001 = rec.get_fields('001')[0].value() rec_999s = rec.get_fields('999') if len(rec_999s) == 0: new_999 = Field(tag='999', indicators=[' ',' '], subfields=['i',rec_001]) for oclc_num in oclc_nums: new_999.add_subfield('o',oclc_num) rec_orig.add_ordered_field(new_999) rec.add_ordered_field(new_999) msg += 'Record 999: '+new_999.value()+'\n' elif len(rec_999s) > 0: msg += 'ERROR-MISC: Record contains at least one 999 field\n' for rec_999 in rec_999s: msg += ' '+rec_999+'\n'
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'wb')) writer.write(record) writer.close() reader = MARCReader(open('test/foo', 'rb'), to_unicode=True) record = next(reader) self.assertEqual(record['245']['a'], unichr(0x1234)) reader.close() os.remove('test/foo')
def setUp(self): self.bib1 = Record() self.bib1.add_field( Field( tag='245', indicators=['0', '0'], subfields=[ 'a', 'Test ' ])) self.bib1.add_field( Field( tag='901', indicators=[' ', ' '], subfields=[ 'a', 'abcd' ])) self.bib1.add_field( Field( tag='001', data='1234' )) self.bib2 = Record() self.bib2.add_field( Field( tag='245', indicators=['0', '0'], subfields=[ 'a', 'Test ' ])) self.bib2.add_field( Field( tag='901', indicators=[' ', ' '], subfields=[ 'a', 'abcd' ]))
def add_simplified_genres(cls, record, work): """Create subject fields for this work.""" genres = [] genres = work.genres for genre in genres: record.add_field( Field(tag="650", indicators=["0", "7"], subfields=[ "a", genre.name, "2", "Library Simplified", ]))
def __add_592(record, value001): """ This add a local field for OCLC numbers that have been excluded and removed from the record because we believe they are inaccurate. Records can later be retrieved in Alma using this field and reviewed. :param record: pymarc record :param value001: :return: """ target_field = Field( tag='592', indicators=["", ""], subfields=['a', 'Candidate OCLC number: ' + value001] ) record.add_ordered_field(target_field)
def test_mixed_template_vendor_subfields(self): self.temp.acqType = "a" self.temp.code2 = "d" self.temp.code3 = "e" self.temp.orderType = "i" self.temp.status = "m" self.temp.vendor = "v" vfield = Field( tag="960", indicators=[" ", " "], subfields=["a", "1", "s", "9.99", "u", "2"], ) field = bibs.db_template_to_960(self.temp, vfield) self.assertEqual(str(field), "=960 \\\\$s9.99$u2$aa$dd$ee$ii$mm$vv")
def test_nypl_branch_BT_SERIES_juvenile_pic_compound_name(self): bib = Record() bib.leader = "00000nam a2200000u 4500" tags = [] tags.append(Field(tag="001", data="0001")) tags.append( Field(tag="245", indicators=["0", "0"], subfields=["a", "Test title"])) tags.append( Field(tag="091", indicators=[" ", " "], subfields=["a", "J E COMPOUND NAME"])) for tag in tags: bib.add_ordered_field(tag) mod_bib = patches.bib_patches("nypl", "branches", "cat", "BT SERIES", bib) correct_indicators = [" ", " "] correct_subfields = ["p", "J", "a", "E", "c", "COMPOUND NAME"] self.assertEqual(correct_indicators, mod_bib.get_fields("091")[0].indicators) self.assertEqual(correct_subfields, mod_bib.get_fields("091")[0].subfields)
def _process_standard(self, record, fielddata) -> Record: for f in fielddata: if int(f["field"]["value"]) < 10: data = "" for sf in f: if sf != "field" and sf != "first_ind" and sf != "second_ind": data += f[sf]["value"] if data != "": record.add_field(Field(tag=f["field"]["value"], data=data)) else: subfields = [] for sf in f: if sf != "field" and sf != "first_ind" and sf != "second_ind": subfields.append(sf) subfields.append(f[sf]["value"]) if len(subfields) > 0: record.add_field( Field(tag=f["field"]["value"], indicators=[ f["first_ind"]["value"], f["second_ind"]["value"] ], subfields=subfields)) return record
def ER_NBER(self, x, name='ER-NBER'): print('\nRunning change script ' + name + '\n') recs = utilities.BreakMARCFile(x) # NBER has begun using two 856 fields. DELETE 856 fields with www.nber.org ... RETAIN 856 fields with dx.doi.org for rec in recs: for field in rec: if field.tag == '856' and field['u'].find("nber.org") >= 0: rec.remove_field(field) #move value of 001 to 002 rec.add_ordered_field(Field(tag='002', data=rec['001'].value())) rec.remove_field(rec.get_fields('001')[0]) rec.add_ordered_field( Field(tag='949', indicators=['\\', '1'], subfields=['l', 'uint', 'r', 's', 't', '99'])) rec.add_ordered_field( Field(tag='949', indicators=['\\', '\\'], subfields=['a', '*b3=z;bn=buint;'])) #rec.add_ordered_field(Field(tag = '830', indicators = ['\\', '0'], subfields = ['a', 'Working paper series (National Bureau of Economic Research : Online)'])) rec.add_ordered_field( Field(tag='730', indicators=['0', '\\'], subfields=[ 'a', 'NBER working paper series online.', '5', 'OCU' ])) rec.add_ordered_field( Field( tag='533', indicators=['\\', '\\'], subfields=[ 'a', 'Electronic reproduction.', 'b', 'Cambridge, Mass.', 'c', 'National Bureau of Economic Research,', 'd', '200-', 'e', '1 electronic text : PDF file.', 'f', 'NBER working paper series.', 'n', 'Access restricted to patrons at subscribing institutions' ])) rec.add_ordered_field(Field(tag='003', data='ER-NBER')) # 530 field, change Hardcopy to Print rec['530'][ 'a'] = 'Print version available to institutional subscribers.' # 490 and 830 fields lack ISBD punctuation, supply where lacking #x = re.sub('(?m)^(=490.*)[^ ;](\$v.*)', '\\1 ;\\2', x) four90a = rec['490']['a'] + ' ;' rec['490']['a'] = four90a eight30a = rec['830']['a'] + ' ;' rec['830']['a'] = eight30a # delete supplied 690 fields rec.remove_field(rec.get_fields('690')[0]) rec = utilities.DeleteLocGov(rec) rec = utilities.Standardize856_956(rec, 'NBER') rec = utilities.AddEresourceGMD(rec) rec = utilities.SaveToMRK(recs, filename) x = utilities.MakeMARCFile(recs, filename) return x
def create_tag_910(system, library): """ Creates 910 for NYPL records with code for Research or Branches """ if system == "nypl": if library == "branches": code = "BL" elif library == "research": code = "RL" else: raise ValueError("Invalid 'library' argument passed.") return Field(tag="910", indicators=[" ", " "], subfields=["a", code]) else: return None
def test_mixed_vendor_template_field(self): vfield = Field( tag="961", indicators=[" ", " "], subfields=[ "a", "1", "v", "1", ], ) self.temp.identity = "a" self.temp.blanketPO = "m" field = bibs.db_template_to_961(self.temp, vfield) self.assertEqual(str(field), "=961 \\\\$v1$aa$mm")
def construct_overdrive_control_number_tag(control_number: str) -> Field: """ Constructs 019 MARC tag with provided OverDrive control number Args: control_number: OverdDrive MarcExpress control number Returns: `pymarc.field.Field` object """ return Field( tag="019", indicators=[" ", " "], subfields=["a", control_number], )
def add_formats(cls, record, pool): formats = [] for lpdm in pool.delivery_mechanisms: format = None dm = lpdm.delivery_mechanism format = cls.FORMAT_TERMS.get((dm.content_type, dm.drm_scheme)) if format: record.add_field( Field( tag="538", indicators=[" ", " "], subfields=[ "a", format, ], ))
def to_marc_field(self): """ Returns: Field -- Marc21 Field obsahujuce ohlas """ subfields=[] for i in['9','d','m','n','p','r','s','t','v','x','y','g','w']: if i in self.data: subfields.append(i) subfields.append(self.data[i]) field=Field( tag = '591', indicators = [self.CONST_INDICATOR_1,self.CONST_INDICATOR_2], subfields = subfields) return field
def str_to_marc(self, string): tag = string[1:4] indicator_1 = string[6] indicator_2 = string[7] if indicator_1 == "\\": indicator_1 = " " if indicator_2 == "\\": indicator_2 = " " fields = re.split('\$', string) subfields = [] for f in fields[1:]: subfields.append(f[0]) subfields.append(f[1:]) return Field(tag=tag, indicators=[indicator_1, indicator_2], subfields=subfields)
def record_replace_fields(record: Record, old_field_name: str, new_field_name: str, replace_num_list: list = [], index=-1) -> Record: result = record # 先寻出旧数据中所有的字段名下字段 old_fields = result.get_fields(old_field_name) for index, old_field in enumerate(old_fields): if index in replace_num_list or replace_num_list == []: result.remove_field(old_field) result.add_field( Field(tag=new_field_name, indicators=old_field.indicators, subfields=old_field.subfields)) return result
def test_if_template_overwrites_vendor_subfields(self): self.temp.identity = "a" self.temp.generalNote = "c" self.temp.internalNote = "d" self.temp.oldOrdNo = "e" self.temp.selector = "f" self.temp.venAddr = "g" self.temp.venNote = "h" self.temp.blanketPO = "m" self.temp.venTitleNo = "i" self.temp.paidNote = "j" self.temp.shipTo = "k" self.temp.requestor = "l" vfield = Field( tag="961", indicators=[" ", " "], subfields=[ "a", "1", "c", "3", "d", "4", "e", "5", "f", "6", "g", "7", "i", "8", "j", "9", "k", "10", "l", "11", "m", "12", "h", "13", ], ) field = bibs.db_template_to_961(self.temp, vfield) self.assertEqual(str(field), "=961 \\\\$aa$cc$dd$ee$ff$gg$hh$mm$ii$jj$kk$ll")
def construct_initials_tag(librarySystemId: int) -> Field: """ Creates 901 or 947 tag with NightShift marker Args: datastore.Resrouce.librarySystemId Returns: `pymarc.field.Field` object """ # NYPL if librarySystemId == 1: tag = "901" elif librarySystemId == 2: tag = "947" return Field(tag=tag, indicators=[" ", " "], subfields=["a", "NightShift"])
def simple_lookup(cls, text, element_type=None): """ Given just a single string of text, assume it is the value of the primary subfield of an identity, and look up its associated control number. Useful for Builders to look up Types for set control. (could Type control numbers maybe live in some separate cache?) If element type is unspecified, only returns a matching value if there is an unambigous match to one element type. """ element_type = element_type or cls.simple_element_type_from_value(text) if element_type is None: return cls.UNVERIFIED assert element_type in cls.index, f"element type {element_type} not indexed" subf = LaneMARCRecord.IDENTITY_SUBFIELD_MAP[element_type][0] return cls.lookup(Field(' ', ' ', [subf, text]), element_type)
def set_nypl_sierra_bib_default_location(library, bib): """ adds a 949 MARC tag command for setting bibliographic location args: bib: pymarc.record.Record returns: bib: pymarc.record.Record, with added command "bn=" to the "949 $a" field, the field is created if missing """ # determine correct location code if library == "branches": defloc = NBIB_DEFAULT_LOCATIONS["branches"] elif library == "research": defloc = NBIB_DEFAULT_LOCATIONS["research"] else: raise OverloadError("Invalid library argument passed: {}".format(library)) # determine if 949 already preset if sierra_command_tag(bib): for field in bib.get_fields("949"): if field.indicators == [" ", " "]: command = field["a"].strip() if "bn=" in command: # skip, already present break else: if command[-1] == ";": new_command = "{}{}".format(field["a"], "bn={};".format(defloc)) else: new_command = "{}{}".format( field["a"], ";bn={};".format(defloc) ) field["a"] = new_command break else: # command tag not preset add bib.add_field( Field( tag="949", indicators=[" ", " "], subfields=["a", "*bn={};".format(defloc)], ) ) return bib
def add_publisher(cls, record, edition): if edition.publisher: publication_date = edition.issued or edition.published year = "" if publication_date: year = str(publication_date.year) record.add_field( Field(tag="264", indicators=[" ", "1"], subfields=[ "a", "[Place of publication not identified]", "b", unicode(edition.publisher or ""), "c", year, ]))
def __parse_organization_prequalifiers(cls, field): """ Parse a X10 field for - a list of prequalifiers as RefElement objects, to pass into a Builder. """ field_lang, field_script = field['3'], field['4'] org_prequalifiers = [] # if ^b, then ^a and any ^b except the last is a prequalifier. if 'b' in field: # is ^a Org or Place? ^b are always going to be orgs if field.indicator1 == '1': prequalifier_element, rb = PLACE, PlaceRefBuilder() else: prequalifier_element, rb = ORGANIZATION, OrganizationRefBuilder() # ^a cumulative_subfields = [] for val in field.get_subfields('a'): if prequalifier_element == PLACE: val = tfcm.normalize_place(val) val = cls.__strip_ending_punctuation(val) cumulative_subfields.extend(['a', val]) rb.set_link( val, href_URI = Indexer.simple_lookup(val, prequalifier_element) ) rb.add_name( val, lang = field_lang, script = field_script, nonfiling = 0 ) org_prequalifiers.append(rb.build()) # ^b for val in field.get_subfields('b')[:-1]: cumulative_subfields.extend(['b', val]) val = cls.__strip_ending_punctuation(val) orb = OrganizationRefBuilder() orb.set_link( val, href_URI = Indexer.lookup(Field(' ',' ',cumulative_subfields), ORGANIZATION) ) orb.add_name( val, lang = field_lang, script = field_script, nonfiling = 0 ) org_prequalifiers.append(orb.build()) return org_prequalifiers
def test_960_items_mandatory_subfields(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', ' '], subfields=['a', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"i" subfield is mandatory.', report) self.assertIn('"l" subfield is mandatory.', report) self.assertIn('"p" subfield is mandatory.', report) self.assertIn('"q" subfield is mandatory.', report) self.assertNotIn('"o" subfield is mandatory.', report) self.assertIn('"t" subfield is mandatory.', report) self.assertIn('"r" subfield is mandatory.', report) self.assertIn('"s" subfield is mandatory.', report) self.assertIn('"v" subfield is mandatory.', report) self.assertIn('"n" subfield is mandatory.', report)
def add_summary(cls, record, work): summary = None if isinstance(work, BaseMaterializedWork): # TODO: This is inefficient. # OPDS adds the summary from scripts since it's not library-specific, but # here individual libraries determine whether to include the summary. summary = work.license_pool.work.summary_text else: summary = work.summary_text if summary: stripped = re.sub('<[^>]+?>', ' ', summary) record.add_field( Field(tag="520", indicators=[" ", " "], subfields=[ "a", stripped.encode('ascii', 'ignore'), ]))
def build_ref_from_field(self, field, element_type): """ Build a ref based on a parsable field and its element type. Returns a Ref object to serve as the target of a Relationship. """ rb_class = tfcm.ref_builder_map.get(element_type) parse_name = NameParser.get_parser_for_element_type(element_type) assert rb_class and parse_name, f"invalid element type: {element_type}" rb = rb_class() # names/qualifiers ref_names_and_qualifiers = parse_name(field) for ref_name_or_qualifier in ref_names_and_qualifiers: if isinstance(ref_name_or_qualifier, dict): rb.add_name(**ref_name_or_qualifier) else: rb.add_qualifier(ref_name_or_qualifier) # link attrs if not (field.tag in ('700', '710') and element_type == WORK_INST): # ignore author-title field works rb.set_link(*self.get_linking_info(field, element_type)) # subdivisions if element_type == CONCEPT and not field.tag.endswith('80'): # ^vxyz should always be subdivisions in concept/language fields for code, val in field.get_subfields('v', 'x', 'y', 'z', with_codes=True): subcrb = ConceptRefBuilder() if code == 'x' and element_type == CONCEPT: # CONCEPT ^x (MeSH qualifier) needs special Indexer treatment val_href = Indexer.lookup(Field('650', ' ', ['x', val]), CONCEPT) else: subdiv_element_type = { 'v': CONCEPT, 'x': LANGUAGE, 'y': TIME }.get(code, PLACE) val_href = Indexer.simple_lookup(val, subdiv_element_type) subcrb.set_link(val, val_href) subcrb.add_name(val) rb.add_subdivision(subcrb.build()) return rb.build()
def add_simplified_genres(cls, record, work): """Create subject fields for this work.""" genres = [] if isinstance(work, BaseMaterializedWork): # TODO: This is inefficient. genres = work.license_pool.work.genres else: genres = work.genres for genre in genres: record.add_field( Field(tag="650", indicators=["0", "7"], subfields=[ "a", genre.name, "2", "Library Simplified", ]))
def simple_element_type_from_value(cls, text): """ If there is a match to a primary-field-string (simplified) identity in only one element type, return that element type. """ primary_subfs = set([ subfs[0] for subfs in LaneMARCRecord.IDENTITY_SUBFIELD_MAP.values() ]) bespoke_fields = [ Field(' ', ' ', [subf, text]) for subf in primary_subfs ] results = list( filter(None, [ cls.element_type_from_value(bespoke_field) for bespoke_field in bespoke_fields ])) return results[0] if len(results) == 1 else None
def construct_callnumber_tag(sierraFormatId: int, librarySystemId: int) -> Field: """ Creates call number (091 for NYPL or 099 for BPL) tag Args: sierraFormatId: datastore Resource sierraFormatId librarySystemId: datastore Resource librarySystemId Returns: `pymarc.field.Field` object """ if sierraFormatId == 1: call_number = "ERROR UNKNOWN" if librarySystemId == 1: tag = "091" if sierraFormatId == 2: call_number = "eNYPL Book" elif sierraFormatId == 3: call_number = "eNYPL Audio" elif sierraFormatId == 4: call_number = "eNYPL Video" elif sierraFormatId == 5: raise NightShiftError("Processing of print materials not implemented yet.") elif librarySystemId == 2: tag = "099" if sierraFormatId == 2: call_number = "eBOOK" elif sierraFormatId == 3: call_number = "eAUDIO" elif sierraFormatId == 4: call_number = "eVIDEO" elif sierraFormatId == 5: raise NightShiftError("Processing of print materials not implemented yet.") return Field( tag=tag, indicators=[" ", " "], subfields=["a", call_number], )
def test_949_items_nonrepeatable_subfields(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=[ 'i', 'TEST', 'i', 'TEST', 'l', 'TEST', 'l', 'TEST', 'p', '9.99', 'p', '9.99', 't', 'TEST', 't', 'TEST', 'o', 'TEST', 'o', 'TEST', 'u', 'TEST', 'u', 'TEST', 'm', 'TEST', 'm', 'TEST', 'v', 'TEST', 'v', 'TEST' ])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn('"i" subfield is not repeatable.', report) self.assertIn('"l" subfield is not repeatable.', report) self.assertIn('"p" subfield is not repeatable.', report) self.assertIn('"t" subfield is not repeatable.', report) self.assertIn('"o" subfield is not repeatable.', report)
def add_web_client_urls(self, record, library, identifier, integration=None): _db = Session.object_session(library) settings = [] if integration: marc_setting = self.value(MARCExporter.WEB_CLIENT_URL, integration) if marc_setting: settings.append(marc_setting) from api.registration.registry import Registration settings += [ s.value for s in _db.query(ConfigurationSetting).filter( ConfigurationSetting.key == Registration.LIBRARY_REGISTRATION_WEB_CLIENT, ConfigurationSetting.library_id == library.id, ) if s.value ] qualified_identifier = urllib.parse.quote(identifier.type + "/" + identifier.identifier, safe="") for web_client_base_url in settings: link = "{}/{}/works/{}".format( self.base_url, library.short_name, qualified_identifier, ) encoded_link = urllib.parse.quote(link, safe="") url = "{}/book/{}".format(web_client_base_url, encoded_link) record.add_field( Field( tag="856", indicators=["4", "0"], subfields=["u", url], ))
def add_isbn(cls, record, identifier): # Add the ISBN if we have one. isbn = None if identifier.type == Identifier.ISBN: isbn = identifier if not isbn: _db = Session.object_session(identifier) identifier_ids = identifier.equivalent_identifier_ids()[ identifier.id] isbn = _db.query(Identifier).filter( Identifier.type == Identifier.ISBN).filter( Identifier.id.in_(identifier_ids)).order_by( Identifier.id).first() if isbn: record.add_field( Field(tag="020", indicators=[" ", " "], subfields=[ "a", isbn.identifier, ]))
def validate300(self, marc_record): """Method modifies existing 300 field to the following RDA format: 300 1 online resource (xxvi, 368 pages) : $b illustrations Args: marc_record(pymarc.Record): Input MARC21 Returns: pymarc.Record: Modified MARC21 """ preface_pages_re = re.compile(r"(\w+), (\w+) p+") illus_re = re.compile(r"illus") all300Fields = marc_record.get_fields('300') for field in all300Fields: new_a = "1 online resource" new300 = Field(tag='300', indicators=[' ',' ']) subfield_a_lst = field.get_subfields('a') if len(subfield_a_lst) < 1: new300.add_subfield('a', new_a) for subfield in subfield_a_lst: illus_search = illus_re.search(subfield) preface_search = preface_pages_re.search(subfield) if preface_search is not None: preface, pages = preface_search.groups() new_a = "{} ({}, {} pages)".format( new_a, preface.lower(), pages) if illus_search is not None: new_a += " :" new300.add_subfield('a', new_a) if illus_search is not None: new300.add_subfield('b', 'illustrations') marc_record.remove_field(field) marc_record.add_field(new300) return marc_record
def validate245(self,marc_record): """ Method adds a subfield 'h' with value of electronic resource to the 245 field. Parameters: `marc_record`: Required, MARC record """ all245s = marc_record.get_fields('245') subfield_h_val = '[electronic resource]' if len(all245s) > 0: field245 = all245s[0] marc_record.remove_field(field245) subfield_a,subfield_c= '','' a_subfields = field245.get_subfields('a') indicator1,indicator2 = field245.indicators if len(a_subfields) > 0: subfield_a = a_subfields[0] if len(subfield_a) > 0: if subfield_a[-1] == '/': subfield_a = subfield_a[:-1].strip() new245 = Field(tag='245', indicators=[indicator1,indicator2], subfields = ['a','%s ' % subfield_a]) b_subfields = field245.get_subfields('b') c_subfields = field245.get_subfields('c') if len(c_subfields) > 0 and len(b_subfields) < 1: new245.add_subfield('h','%s / ' % subfield_h_val) elif len(b_subfields) > 0: new245.add_subfield('h','%s : ' % subfield_h_val) else: new245.add_subfield('h',subfield_h_val) if len(b_subfields) > 0: for subfield_b in b_subfields: new245.add_subfield('b',subfield_b) if len(c_subfields) > 0: for subfield_c in c_subfields: new245.add_subfield('c',subfield_c) marc_record.add_field(new245) return marc_record
class CSV2MARC (object): """ Converts CSV to MARC records. """ def __init__(self): """ Load the CSV file. """ if len(sys.argv) > 1: filepath = sys.argv[1] else: raise Exception( "You need to provide a file path to the CSV file as an argument." ) try: self.reader = csv.reader( open(filepath, "r"), delimiter = "," ) except IOError: print >>sys.stderr, "Cannot open {0}".format(filepath) raise SystemExit output = "{0}.mrc".format(os.path.splitext(filepath)[0]) self.file = open(output, "w") # State variables self.sysno = False self.record = False self.field = False self.fieldTag = False self.fieldTagOccurrence = False self.subfieldLabel = False self.subfieldLabelOccurrence = False self.line = False def checkFieldChange(self, fieldTag, fieldTagOccurrence): if (self.fieldTag != fieldTag) or ((self.fieldTag == fieldTag) and (self.fieldTagOccurrence != fieldTagOccurrence)): return True else: return False def checkRecordChange(self, sysno): if not (sysno == self.sysno): return True else: return False def writeMARCRecord(self, record): writer = MARCWriter(self.file) writer.write(record) def getNewRecord(self, sysno): self.sysno = sysno self.record = Record() def getNewField(self, line): self.fieldTag = line["fieldTag"] self.fieldTagOccurrence = line["fieldTagOccurrence"] if line["subfieldLabel"]: # Normal field self.field = Field( tag = line["fieldTag"], indicators = [ line["indicator1"], line["indicator2"] ] ) else: # Datafield self.field = Field( tag = line["fieldTag"], data = line["value"] ) def main(self): for line in self.reader: # Parse the line line = { "sysno" : line[0], "fieldTag" : line[1], "fieldTagOccurrence" : line[2], "indicator1" : line[3], "indicator2" : line[4], "subfieldLabel" : line[5], "subfieldLabelOccurrence" : line[6], "value" : line[7], } if not self.sysno: self.getNewRecord(line["sysno"]) if self.checkRecordChange(line["sysno"]): self.record.add_field(self.field) # Add the last field of the previous record self.field = False # Remove the last field of the previous record self.fieldTag = False self.writeMARCRecord(self.record) self.getNewRecord(line["sysno"]) if not self.fieldTag: self.getNewField(line) if self.checkFieldChange(line["fieldTag"], line["fieldTagOccurrence"]): self.record.add_field(self.field) self.getNewField(line) if line["subfieldLabel"]: # If we have a subfield self.field.add_subfield( line["subfieldLabel"], line["value"] ) self.record.add_field(self.field) # Write the last field self.writeMARCRecord(self.record) # Write the last record after the iteration has ended self.file.close()
def validate245(self,marc_record): """ Method adds a subfield 'h' with value of electronic resource to the 245 field. Parameters: `marc_record`: Required, MARC record """ all245s = marc_record.get_fields('245') subfield_h_val = '[electronic resource]' if len(all245s) > 0: field245 = all245s[0] marc_record.remove_field(field245) subfield_a,subfield_c= '','' a_subfields = field245.get_subfields('a') indicator1,indicator2 = field245.indicators if len(a_subfields) > 0: subfield_a = a_subfields[0] if len(subfield_a) > 0: if ['.','\\'].count(subfield_a[-1]) > 0: subfield_a = subfield_a[:-1].strip() new245 = Field(tag='245', indicators=[indicator1,indicator2], subfields = ['a', u'{0} '.format(subfield_a)]) b_subfields = field245.get_subfields('b') c_subfields = field245.get_subfields('c') n_subfields = field245.get_subfields('n') p_subfields = field245.get_subfields('p') # Order for 245 subfields are: # $a $n $p $h $b $c if len(n_subfields) > 0: for subfield_n in n_subfields: new245.add_subfield('n', subfield_n) if len(p_subfields) > 0: for subfield_p in p_subfields: new245.add_subfield('p', subfield_p) if len(c_subfields) > 0 and len(b_subfields) < 1: new245.add_subfield('h','{0} / '.format(subfield_h_val)) elif len(b_subfields) > 0: new245.add_subfield('h','{0} : '.format(subfield_h_val)) else: new245.add_subfield('h',subfield_h_val) if len(b_subfields) > 0: for subfield_b in b_subfields: new245.add_subfield('b',subfield_b) if len(c_subfields) > 0: for subfield_c in c_subfields: new245.add_subfield('c',subfield_c) marc_record.add_field(new245) return marc_record
def convert_2_eres_rec(rec, rda_rec): msg = '' rec_003_value = rec.get_fields('003')[0].value() # the partner's institutional code from the 003 rec_001_value = rec.get_fields('001')[0].value() # the partner's local record number (BSN) from the 001 if rec_003_value == 'NNU': inst_name = 'New York Univeristy Libraries' inst_710a = 'New York University.' inst_710b = 'Libraries.' elif rec_003_value == 'NIC': inst_name = 'Cornell University Libraries' inst_710a = 'Cornell University.' inst_710b = 'Libraries.' elif rec_003_value == 'NNC': inst_name = 'Columbia University Libraries' inst_710a = 'Columbia University.' inst_710b = 'Libraries.' elif rec_003_value == 'NjP': inst_name = 'Princeton University Libraries' inst_710a = 'Princeton University.' inst_710b = 'Library.' elif rec_003_value == 'LeBAU': inst_name = "American University of Beirut's Jafet Memorial Library" inst_710a = 'Jafet Memorial Library.' inst_710b = '' elif rec_003_value == 'UaCaAUL': inst_name = "American University in Cairo Library" inst_710a = 'American University in Cairo.' inst_710b = 'Library.' else: inst_name = '' inst_710a = '' inst_710b = '' msg += 'ERROR-MISC: 003 code - '+rec_003_value+' - did not match any of the partner institutions.\n' if rec_001_value.startswith('o'): # this OCLC record did not get processed in step 4 msg += 'ERROR-MISC: 003/001 field values did not change to institutional code and BSN\n' msg += ' Record 003/001: '+rec_003_value+'_'+rec_001_value+'\n' for rec_035 in rec.get_fields('035'): msg += ' '+str(rec_035)+'\n' # delete the 005 field for rec_005 in rec.get_fields('005'): rec.remove_field(rec_005) # change the cataloging date in bytes 00-05 of the 008 to the current date curr_date = datetime.date.today() yy = str(curr_date.year)[2:].zfill(2) mm = str(curr_date.month).zfill(2) dd = str(curr_date.day).zfill(2) rec_008_value = rec.get_fields('008')[0].value() new_008_data = yy+mm+dd+rec_008_value[6:] new_008 = Field(tag='008', data=new_008_data) rec.remove_field(rec.get_fields('008')[0]) rec.add_ordered_field(new_008) # change byte 23 in the 008 field to code 'o' for 'online' rec_008_value = rec['008'].data rec['008'].data = rec_008_value[0:23] + 'o' + rec_008_value[24:] # add the 006/007 format fields for electronic resource characteristics if len(rec.get_fields('006')) > 0: for rec_006 in rec.get_fields('006'): rec_006_value = rec_006.value() msg += 'ERROR-MISC: 006 '+rec_006_value+'\n' rec.remove_field(rec_006) new_006 = Field(tag='006', data='m d ') rec.add_ordered_field(new_006) if len(rec.get_fields('007')) > 0: for rec_007 in rec.get_fields('007'): rec_007_value = rec_007.value() msg += 'ERROR-MISC: 007 '+rec_007_value+'\n' rec.remove_field(rec_007) new_007 = Field(tag='007', data='cr cn |||m|||a') rec.add_ordered_field(new_007) # delete fields that relate to the print version if len(rec.get_fields('016')) > 0: for rec_016 in rec.get_fields('016'): rec.remove_field(rec_016) if len(rec.get_fields('019')) > 0: for rec_019 in rec.get_fields('019'): rec.remove_field(rec_019) if len(rec.get_fields('025')) > 0: for rec_025 in rec.get_fields('025'): rec.remove_field(rec_025) if len(rec.get_fields('029')) > 0: for rec_029 in rec.get_fields('029'): rec.remove_field(rec_029) if len(rec.get_fields('042')) > 0: for rec_042 in rec.get_fields('042'): rec.remove_field(rec_042) if len(rec.get_fields('049')) > 0: for rec_049 in rec.get_fields('049'): rec.remove_field(rec_049) # create new 040 field for NNU for rec_040 in rec.get_fields('040'): rec.remove_field(rec_040) # delete the existing 040 field(s) if rec_003_value == 'LeBAU': cat_lang = 'ara' else: cat_lang = 'eng' if rda_rec: new_040 = Field(tag='040', indicators=[' ',' '], subfields=['a','NNU','b',cat_lang,'e','rda','c','NNU']) else: new_040 = Field(tag='040', indicators=[' ',' '], subfields=['a','NNU','b',cat_lang,'c','NNU']) rec.add_ordered_field(new_040) # correct the 041 language code field when multiple codes exist in the same subfield if len(rec.get_fields('041')) > 0: for rec_041 in rec.get_fields('041'): for rec_041_sub in rec_041: mult_langs = False new_041_subs = [] # Note: sub[0] is the subfield code and sub[1] is the subfield content for this subfield if len(rec_041_sub[1]) > 3: # there are multiple language codes in this 041 subfield mult_langs = True rec_041_sub_langs = re.findall('...',rec_041_sub[1]) for rec_041_sub_lang in rec_041_sub_langs: new_041_subs.append([rec_041_sub[0],rec_041_sub_lang]) else: new_041_subs.append([rec_041_sub[0],rec_041_sub[1]]) if mult_langs: rec_041_ind1 = rec_041.indicator1 rec_041_ind2 = rec_041.indicator2 new_rec_041 = Field(tag='041', indicators=[rec_041_ind1,rec_041_ind2], subfields=[]) for new_041_sub in new_041_subs: new_rec_041.add_subfield(new_041_sub[0],new_041_sub[1]) rec.remove_field(rec_041) rec.add_ordered_field(new_rec_041) # correct the 050 indicator 2 rec_050s = rec.get_fields('050') for rec_050 in rec_050s: this_index = rec_050s.index(rec_050) # check indicator 2 value and fix if needed if rec_050.indicator2 == ' ': rec.get_fields('050')[this_index].indicator2 = '4' # correct the 082 indicator 1 rec_082s = rec.get_fields('082') for rec_082 in rec_082s: this_index = rec_082s.index(rec_082) # check indicator 1 value and fix if needed if rec_082.indicator1 == ' ': rec.get_fields('082')[this_index].indicator1 = '0' if not rda_rec: # add GMD to 245$h for "[electronic resource]" rec_245s = rec.get_fields('245') gmd_added = False if len(rec_245s) == 0: msg += 'ERROR-MISC: Record is missing a 245 field\n' elif len(rec_245s) > 1: msg += 'ERROR-MISC: Record has multiple 245 fields\n' else: for rec_245 in rec_245s: rec_245_ind1 = rec_245.indicator1 rec_245_ind2 = rec_245.indicator2 new_rec_245 = Field(tag='245', indicators=[rec_245_ind1,rec_245_ind2], subfields=[]) # delete any existing 245 $h GMD subfields if len(rec_245.get_subfields('h')) > 0: for rec_245h in rec_245.get_subfields('h'): msg += 'ERROR-MISC: Original record for the print contains a 245$h GMD: '+rec_245h+'\n' rec_245.delete_subfield('h') rec_245_str = '' for rec_245_sub in rec_245: rec_245_str += '|$'+rec_245_sub[0]+rec_245_sub[1] # sub[0]=the subfield code; sub[1]=the subfield content rec_245_list = rec_245_str.split('|') rec_245_re1 = re.compile('\$a[^\$]*$') # matches subfield pattern $a not followed by any other subfield if rec_245_re1.search(rec_245_str) and not gmd_added: for sub in rec_245_list: post_gmd_sub_code = '' if sub.startswith('$a'): sub_index = rec_245_list.index(sub) if len(rec_245_list) > sub_index+1: post_gmd_sub = rec_245_list[sub_index+1] post_gmd_sub_code = post_gmd_sub[0:2] new_rec_245, gmd_added = add_ordered_gmd(sub,'$a', post_gmd_sub_code, new_rec_245, gmd_added) rec_245_re2 = re.compile('\$a[^\$]*\$[^np]') # matches subfield pattern $a not followed by $n or $p if rec_245_re2.search(rec_245_str) and not gmd_added: for sub in rec_245_list: post_gmd_sub_code = '' if sub.startswith('$a'): sub_index = rec_245_list.index(sub) if len(rec_245_list) > sub_index+1: post_gmd_sub = rec_245_list[sub_index+1] post_gmd_sub_code = post_gmd_sub[0:2] new_rec_245, gmd_added = add_ordered_gmd(sub,'$a', post_gmd_sub_code, new_rec_245, gmd_added) rec_245_re3 = re.compile('\$a[^\$]*\$n[^\$]*\$[^np]') # matches subfield pattern $a $n not followed by $n or $p if rec_245_re3.search(rec_245_str) and not gmd_added: for sub in rec_245_list: post_gmd_sub_code = '' if sub.startswith('$n'): sub_index = rec_245_list.index(sub) if len(rec_245_list) > sub_index+1: post_gmd_sub = rec_245_list[sub_index+1] post_gmd_sub_code = post_gmd_sub[0:2] new_rec_245, gmd_added = add_ordered_gmd(sub,'$n', post_gmd_sub_code, new_rec_245, gmd_added) rec_245_re4 = re.compile('\$a[^\$]*\$p[^\$]*\$[^np]') # matches subfield pattern $a $p not followed by $n or $p if rec_245_re4.search(rec_245_str) and not gmd_added: for sub in rec_245_list: post_gmd_sub_code = '' if sub.startswith('$p'): sub_index = rec_245_list.index(sub) if len(rec_245_list) > sub_index+1: post_gmd_sub = rec_245_list[sub_index+1] post_gmd_sub_code = post_gmd_sub[0:2] new_rec_245, gmd_added = add_ordered_gmd(sub,'$p', post_gmd_sub_code, new_rec_245, gmd_added) rec_245_re5 = re.compile('\$a[^\$]*\$n[^\$]*\$p[^\$]*\$[^np]') # matches subfield pattern $a $n $p not followed by $n or $p if rec_245_re5.search(rec_245_str) and not gmd_added: for sub in rec_245_list: post_gmd_sub_code = '' if sub.startswith('$p'): sub_index = rec_245_list.index(sub) if len(rec_245_list) > sub_index+1: post_gmd_sub = rec_245_list[sub_index+1] post_gmd_sub_code = post_gmd_sub[0:2] new_rec_245, gmd_added = add_ordered_gmd(sub,'$p', post_gmd_sub_code, new_rec_245, gmd_added) rec.remove_field(rec_245) rec.add_ordered_field(new_rec_245) if not gmd_added: msg += 'ERROR-MISC: GMD did not get added to non-RDA record\n' # NEED TO FIGURE OUT HOW TO ADD GMD to corresponding 880 field if it exists # delete subfield $c from 300 fields, modify punctuation in subfields $a and $b, and add 'online resource' to subfield $a for rec_300 in rec.get_fields('300'): if not rec_300.get_subfields('a')[0].startswith('online'): rec_300.delete_subfield('c') rec_300a = rec_300.get_subfields('a')[0] rec_300a = rec_300a.strip(' ;') rec_300a_pgs = rec_300a.split(' :') rec_300.delete_subfield('a') try: rec_300b = rec_300.get_subfields('b')[0] rec_300b = rec_300b.strip(' ;') rec_300.delete_subfield('b') rec_300a_mod = 'online resource ('+rec_300a_pgs[0]+') :' rec_300.add_subfield('a', rec_300a_mod) rec_300.add_subfield('b', rec_300b) except: # there is no subfield $b in the 300 rec_300a_mod = 'online resource ('+rec_300a_pgs[0]+')' rec_300.add_subfield('a', rec_300a_mod) if rda_rec: # Delete any existing 336, 337, and 338 fields for the print characteristics if len(rec.get_fields('336')) > 0: for rec_336 in rec.get_fields('336'): rec.remove_field(rec_336) if len(rec.get_fields('337')) > 0: for rec_337 in rec.get_fields('337'): rec.remove_field(rec_337) if len(rec.get_fields('338')) > 0: for rec_338 in rec.get_fields('338'): rec.remove_field(rec_338) # Add 336, 337, and 338 fields for the e-resource characteristics for content, media, and carrier new_rec_336 = Field(tag='336', indicators=[' ',' '], subfields=['a','text','2','rdacontent']) new_rec_337 = Field(tag='337', indicators=[' ',' '], subfields=['a','computer','2','rdamedia']) new_rec_338 = Field(tag='338', indicators=[' ',' '], subfields=['a','online resource','2','rdacarrier']) rec.add_ordered_field(new_rec_336) rec.add_ordered_field(new_rec_337) rec.add_ordered_field(new_rec_338) # add ACO note field new_500_aco = Field(tag='500', indicators=[' ',' '], subfields=['a','Part of the Arabic Collections Online (ACO) project, contributed by '+inst_name+'.']) rec.add_ordered_field(new_500_aco) # delete any print record's reference to other formats if len(rec.get_fields('530')) > 0: for rec_530 in rec.get_fields('530'): rec.remove_field(rec_530) # delete any existing 533 fields (e.g. for microform) for rec_533 in rec.get_fields('533'): rec.remove_field(rec_533) # add 533 field related to electronic reproduction curr_year = datetime.date.today().year new_533 = Field(tag='533', indicators=[' ',' '], subfields=['a', 'Electronic reproduction.', 'b', 'New York, N.Y. :', 'c', 'New York University,', 'd', str(curr_year)+'.', '5', 'NNU']) rec.add_ordered_field(new_533) # delete any existing 539 fields (e.g. for microform) for rec_539 in rec.get_fields('539'): rec.remove_field(rec_539) # new_539 = Field(tag='539', indicators=[' ',' '], subfields=['a', 's', 'b', str(curr_year), 'd', 'nyu', 'e', 'n', 'g', 'o']) # rec.add_ordered_field(new_539) # add headings referring to the ACO project and partners if not inst_710b == '': new_710 = Field(tag='710', indicators=['2',' '], subfields=['a', inst_710a, 'b', inst_710b]) else: new_710 = Field(tag='710', indicators=['2',' '], subfields=['a', inst_710a]) rec.add_ordered_field(new_710) new_730 = Field(tag='730', indicators=['0',' '], subfields=['a','Arabic Collections Online.']) rec.add_ordered_field(new_730) # add a new 776 field referencing the relationship to the print version new_776 = Field(tag='776', indicators=['0','8'], subfields=['i', 'Print version:']) # capture name entry from 100 or 110 if they exist and insert into new 776 subfield $a to reference print version if len(rec.get_fields('100', '110')) > 0: new_776a = rec.get_fields('100', '110')[0].value() if new_776a.startswith('8'): new_776a = new_776a[7:] new_776.add_subfield('a', new_776a) # capture title entry from 245 and insert into new 776 subfield $t to reference print version new_776t = rec.get_fields('245')[0].get_subfields('a')[0] new_776t = new_776t.rstrip(' /:.,') new_776.add_subfield('t', new_776t) # capture institutional ID entry from 003/001 and insert into new 776 subfield $w to reference print version new_776.add_subfield('w', '('+rec_003_value+')'+rec_001_value) if len(rec.get_fields('010')) > 0: if len(rec.get_fields('010')[0].get_subfields('a')) > 0: new_776w_010 = rec.get_fields('010')[0].get_subfields('a')[0] new_776.add_subfield('w', '(DLC)'+new_776w_010) rec.remove_field(rec.get_fields('010')[0]) if len(rec.get_fields('035')) > 0: for rec_035 in rec.get_fields('035'): rec_035a = rec_035.get_subfields('a')[0] if rec_035a.startswith('(OCoLC)'): new_776w_oclc = rec_035a new_776.add_subfield('w', new_776w_oclc) rec.remove_field(rec_035) new_020z_fields = [] # variable to collect the 020 fields as "invalid" subfield z's instead of subfield a's new_020z_subfields = [] # variable to collect the print ISBNs to add to the 776 field if len(rec.get_fields('020')) > 0: # record contains 020 ISBN fields for rec_020 in rec.get_fields('020'): # iterate through each of the 020 fields msg += '020s: YES\n' if len(rec_020.get_subfields('a')) > 0: # the 020 field has a subfield a for rec_020a in rec_020.get_subfields('a'): # iterate through the subfield a's msg += '020a: '+str(rec_020a)+'\n' new_020z_field = Field(tag='020', indicators=[' ',' '], subfields=['z', rec_020a]) new_020z_fields.append(new_020z_field) new_020z_subfields.append(rec_020a) rec.remove_field(rec_020) for new_020z_field in new_020z_fields: rec.add_ordered_field(new_020z_field) for new_776z in new_020z_subfields: new_776.add_subfield('z', new_776z) rec.add_ordered_field(new_776) # delete any 090 $h/$i fields if len(rec.get_fields('090')) > 0: for rec_090 in rec.get_fields('090'): if len(rec_090.get_subfields('h')) > 0: for rec_090h in rec_090.get_subfields('h'): rec_090.delete_subfield('h') if len(rec_090.get_subfields('i')) > 0: for rec_090i in rec_090.get_subfields('i'): rec_090.delete_subfield('i') if rec_090.format_field()=='': rec.remove_field(rec_090) # delete any local fields (9XXs, OWN, AVA) rec_9XXs = rec.get_fields('852','903','907','910','938','945','950','955','981','987','994','998','OWN','AVA') if len(rec_9XXs) > 0: for rec_9XX in rec_9XXs: rec.remove_field(rec_9XX) return (rec, msg)
def validate245(self, marc_record): """ Method adds a subfield 'h' with value of electronic resource to the 245 field. Parameters: `marc_record`: Required, MARC record """ all245s = marc_record.get_fields("245") subfield_h_val = "[electronic resource]" if len(all245s) > 0: field245 = all245s[0] marc_record.remove_field(field245) subfield_a, subfield_c = "", "" a_subfields = field245.get_subfields("a") indicator1, indicator2 = field245.indicators if len(a_subfields) > 0: subfield_a = a_subfields[0] if len(subfield_a) > 0: if [".", "\\"].count(subfield_a[-1]) > 0: subfield_a = subfield_a[:-1].strip() new245 = Field(tag="245", indicators=[indicator1, indicator2], subfields=["a", u"{0} ".format(subfield_a)]) b_subfields = field245.get_subfields("b") c_subfields = field245.get_subfields("c") n_subfields = field245.get_subfields("n") p_subfields = field245.get_subfields("p") # Order for 245 subfields are: # $a $n $p $h $b $c if len(n_subfields) > 0: for subfield_n in n_subfields: new245.add_subfield("n", subfield_n) if len(p_subfields) > 0: for subfield_p in p_subfields: new245.add_subfield("p", subfield_p) if len(c_subfields) > 0 and len(b_subfields) < 1: new245.add_subfield("h", "{0} / ".format(subfield_h_val)) elif len(b_subfields) > 0: new245.add_subfield("h", "{0} : ".format(subfield_h_val)) else: new245.add_subfield("h", subfield_h_val) if len(b_subfields) > 0: for subfield_b in b_subfields: new245.add_subfield("b", subfield_b) if len(c_subfields) > 0: for subfield_c in c_subfields: new245.add_subfield("c", subfield_c) marc_record.add_field(new245) return marc_record
def process_001_003_fields(rec_orig, rec, oclc_nums_bsns_all): msg = '' oclc_id = '' inst_id = '' oclc_match = False rec_003_value = rec.get_fields('003')[0].value() # the institutional code from the 003 (either "OCLC" or a partner institution) rec_001_value = rec.get_fields('001')[0].value() # the local record number from the 001 (either the OCLC number or the partner's BSN) # Process OCLC records exported from Connexion if rec_001_value.startswith('o'): oclc_id = '('+rec_003_value+')'+rec_001_value msg += 'OCLC ID: '+oclc_id+'\n' # for oclc records, add a new 999 $i subfield containing the orig record's 001/003 data using txt file # extract list of OCLC numbers for this OCLC record from 035 subfields $a and $z rec_oclc_nums = set() if len(rec.get_fields('035')) > 0: # check if there are any 035 fields in the OCLC record for rec_035 in rec.get_fields('035'): # iterate through each of the 035 fields rec_035az = rec_035.get_subfields('a','z') # capture all the subfields a or z in the 035 field if len(rec_035az) > 0: # check if any subfields a or z exist for this_az in rec_035az: # iterate through each of the subfields a or z this_oclc_num = strip_number(this_az) # strip the subfield data down to just the OCLC number digits rec_oclc_nums.add(this_oclc_num) # add the number to the list of this record's OCLC numbers msg += ' oclc_rec_035_num: '+str(this_az)+'\n' for line in oclc_nums_bsns_all: # iterate through each of the lines in the txt file containing 001s/003s and OCLC numbers from original records if line.startswith('003'): # this is the first header line in the txt file # skip the header row skipped_line = line else: # process the line data from the oclc_nums_bsns_all txt file line_data = line.split(',') line_003 = line_data[0].strip() # capture the partner's institution code line_001 = line_data[1].strip() # capture the partner's bsn line_oclc_nums = line_data[2].strip() # capture the corresponding OCLC numbers and remove any white space around them line_oclc_nums = line_oclc_nums.strip('"') # remove the quotes around the OCLC number(s) line_oclc_nums = line_oclc_nums.split('|') # create a list of the OCLC numbers based on the pipe delimiter, in case there are more than one # iterate through this record's OCLC numbers to see if one is in the list of all OCLC numbers for this batch for rec_oclc_num in rec_oclc_nums: for line_oclc_num in line_oclc_nums: if line_oclc_num == rec_oclc_num: oclc_match = True inst_id = line_003+'_'+line_001 msg += 'Institution ID: '+inst_id+'\n' # delete the existing 001/003 fields from the OCLC record containing the OCLC number and symbol rec.remove_field(rec.get_fields('003')[0]) rec.remove_field(rec.get_fields('001')[0]) # add new 001/003 fields to the OCLC record containing the partner's bsn and institution code new_003 = Field(tag='003', data=line_003) rec.add_ordered_field(new_003) new_001 = Field(tag='001', data=line_001) rec.add_ordered_field(new_001) if not oclc_match: msg += 'ERROR-MISC: OCLC numbers in this OCLC record did not match any original record\n' # Process Original Records (no OCLC record found in Connexion) else: inst_id = rec_003_value+'_'+rec_001_value msg += 'Institution ID: '+inst_id+'\n' # for orig records, delete all existing 035 fields if len(rec.get_fields('035')) > 0: for rec_035 in rec.get_fields('035'): msg += ' orig_rec_035 num: '+str(rec_035)+'\n' rec.remove_field(rec_035) # delete this 035 field rec_999s = rec.get_fields('999') if len(rec_999s) == 0: if oclc_id == '': new_999_nums = Field(tag='999', indicators=[' ',' '], subfields=['i',inst_id]) else: new_999_nums = Field(tag='999', indicators=[' ',' '], subfields=['i',inst_id,'o',oclc_id]) rec_orig.add_ordered_field(new_999_nums) rec.add_ordered_field(new_999_nums) msg += 'Record 999: '+new_999_nums.value()+'\n' elif len(rec_999s) > 1: msg += 'ERROR-MISC: Record contains multiple 999 fields\n' for rec_999 in rec_999s: msg += ' '+rec_999+'\n' elif len(rec_999s) == 1: new_999 = deepcopy(rec_999s[0]) for new_999e in new_999.get_subfields('e'): # delete any existing subfield $e in the new 999 field new_999.delete_subfield('e') msg += 'Record 999: '+new_999.value()+'\n' return (rec_orig, rec, oclc_id, inst_id, oclc_match, msg)