def platform2pymarc_obj(data=None): """ converts platform bib data into pymarc object args: data in json format return: pymarc Record obj """ record = Record(to_unicode=True, force_utf8=True) # parse variable fields varFields = data.get("varFields") for f in varFields: if f.get("fieldTag") == "_": record.leader = f.get("content") # control fields case elif f.get("subfields") is None: field = Field( tag=f.get("marcTag"), indicators=[f.get("ind1"), f.get("ind2")], data=f.get("content"), ) record.add_field(field) else: # variable fields subfields = [] for d in f.get("subfields"): subfields.append(d.get("tag")) subfields.append(d.get("content")) field = Field( tag=f.get("marcTag"), indicators=[f.get("ind1"), f.get("ind2")], subfields=subfields, ) record.add_field(field) return record
def test_build_string_list_from_fields(): """Test build_string_list_from_fields.""" record = Record() record.add_field( Field(tag='200', indicators=['0', '1'], subfields=[ 'a', 'Cerasi', 'b', 'Claudio et Elena', 'x', "Collections d'art" ])) data = build_string_list_from_fields(record=record, tag='200', subfields={ 'a': ', ', 'b': ', ', 'c': ', ', 'd': ', ', 'f': ', ', 'x': ' - ' }) assert data == ["Cerasi, Claudio et Elena - Collections d'art"] record = Record() record.add_field( Field(tag='210', indicators=['0', '1'], subfields=[ 'a', 'Place of public./distr.', 'b', 'Address/publisher/dist.', 'c', 'Name of publisher/dist.', 'd', 'Date', 'e', 'Place', 'f', 'Address' ])) data = build_string_list_from_fields(record=record, tag='210', subfields={ 'a': ', ', 'b': '. ', 'c': ', ', 'd': '; ', 'e': '; ', 'f': '; ' }, tag_grouping=[{ 'subtags': 'c', 'start': ' ( ', 'end': ' )', 'delimiter': '', 'subdelimiter': ', ' }, { 'subtags': 'def', 'start': ' ( ', 'end': ' )', 'delimiter': '', 'subdelimiter': '; ' }]) assert data == [ 'Place of public./distr.' '. Address/publisher/dist.' ' ( Name of publisher/dist. )' ' ( Date; Place; Address )' ]
def test_960_items_nonrepeatable_subfields(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', ' '], subfields=[ 'i', 'TEST', 'i', 'TEST', 'l', 'TEST', 'l', 'TEST', 'p', '9.99', 'p', '9.99', 'q', 'TEST', 'q', 'TEST', 'o', 'TEST', 'o', 'TEST', 't', 'TEST', 't', 'TEST', 'r', 'TEST', 'r', 'TEST', 's', 'TEST', 's', 'TEST', 'v', 'TEST', 'v', 'TEST', 'n', 'TEST', 'n', 'TEST', 'v', 'TEST', 'v', 'TEST' ])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"i" subfield is not repeatable.', report) self.assertIn('"l" subfield is not repeatable.', report) self.assertIn('"p" subfield is not repeatable.', report) self.assertIn('"q" subfield is not repeatable.', report) self.assertIn('"o" subfield is not repeatable.', report) self.assertIn('"t" subfield is not repeatable.', report) self.assertIn('"r" subfield is not repeatable.', report) self.assertIn('"s" subfield is not repeatable.', report) self.assertIn('"v" subfield is not repeatable.', report) self.assertIn('"n" subfield is not repeatable.', report)
def decode_record(self, record): r""" >>> reader = Reader('http://opac.uthsc.edu', 2) >>> raw = "\nLEADER 00000cas 2200517 a 4500 \n001 1481253 \n003 OCoLC \n005 19951109120000.0 \n008 750727c19589999fr qrzp b 0 b0fre d \n010 sn 86012727 \n022 0003-3995 \n030 AGTQAH \n035 0062827|bMULS|aPITT NO. 0639600000|asa64872000|bFULS \n040 MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n |dTUM \n041 0 engfre|bgeritaspa \n042 nsdp \n049 TUMS \n069 1 A32025000 \n210 0 Ann. genet. \n222 0 Annales de genetique \n229 00 Annales de genetique \n229 Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260 Paris :|bExpansion scientifique,|c1958-2004. \n300 v. :|bill. ;|c28 cm. \n310 Quarterly \n321 Two no. a year \n362 0 1,1958-47,2004. \n510 1 Excerpta medica \n510 1 Index medicus|x0019-3879 \n510 2 Biological abstracts|x0006-3169 \n510 2 Chemical abstracts|x0009-2258 \n510 2 Life sciences collection \n510 0 Bulletin signaletique \n510 0 Current contents \n546 French and English, with summaries in German, Italian, and\n Spanish. \n550 Journal of the Societe francaise de genetique. \n650 2 Genetics|vPeriodicals. \n710 2 Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics. \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936 Unknown|ajuin 1977 \n" >>> record = reader.decode_record(raw) >>> print record.title Annales de genetique """ pseudo_marc = record.strip().split('\n') raw_fields = [] if pseudo_marc[0][0:6] == 'LEADER': record = Record() record.leader = pseudo_marc[0][7:].strip() else: return None for field in pseudo_marc[1:]: tag = field[:3] data = unescape_entities(field[6:].decode('latin1')).encode('utf8') if tag.startswith(' '): # Additional field data needs to be prepended with an extra space # for certain fields ... #for special_tag in ('55','260'): # data = " %s" % (data,) if tag.startswith(special_tag) else data data = " %s" % (data.strip(),) raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data) raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip()) else: data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,) raw_fields.append({ 'tag': tag, 'indicator1': field[3], 'indicator2': field[4], 'value': data.strip(), 'raw': field.strip() }) for raw in raw_fields: tag = raw['tag'] data = raw['value'].strip() field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data) if not field.is_control_field(): for sub in data.split('|'): try: field.add_subfield(sub[0].strip(), sub[1:].strip()) except Exception: # Skip blank/empty subfields continue record.add_field(field) record.parse_leader() # Disregard record if no title present if not record.get_fields('245'): return None else: return record
def test_960_items_incorrect_format(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', ' '], subfields=['r', 'z'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertIn('"r" subfield has incorrect value.', report)
def test_960_items_correct_price_format(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', '1'], subfields=['p', '9.99'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertNotIn('"p" subfield has incorrect price format.', report)
def test_949_items_stat_code_incorrect(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=['t', '600'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertIn('"t" subfield has incorrect value.', report)
def test_949_items_empty_price_subfield(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=['p', ''])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertIn('"p" subfield has incorrect price format.', report)
def test_949_subfield_a_mandatory(self): b = Record() b.add_field(Field(tag='949', indicators=[' ', ' '], subfields=[])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn('"a" subfield is mandatory.', report)
def test_bib_with_vendor_910_tag(self): bib = Record() bib.add_field( Field(tag="910", indicators=[" ", " "], subfields=["a", "foo"])) patches.bib_patches("nypl", "research", "acq", "Amalivre", bib) tags_910 = bib.get_fields("910") self.assertEqual(len(tags_910), 1) self.assertEqual(str(bib["910"]), "=910 \\\\$aRL")
def test_949_items_barcode_not_digits(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=['i', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn('"i" subfield has incorrect barcode.', report)
def test_091_no_subfield_a(self): b = Record() b.add_field( Field(tag='099', indicators=[' ', ' '], subfields=['p', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"a" subfield is mandatory.', report)
def test_947_incorrect_subfield_a_value(self): b = Record() b.add_field( Field(tag='947', indicators=[' ', ' '], subfields=['a', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"a" subfield has incorrect value', report)
def test_960_items_mandatory(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', '1'], subfields=['a', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"960 " mandatory tag not found.', report)
def test_960_items_incorrect_location(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', ' '], subfields=['l', 'mma0l'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"l" subfield has incorrect location code.', report)
def test_949_subfield_a_incorrect_value(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', ' '], subfields=['a', 'b2=a;'])) # missing * in the begining bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn('"a" subfield has incorrect value', report)
def test_960_items_good_barcode(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', '1'], subfields=['i', '34444987954328'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertNotIn('"i" subfield has incorrect barcode.', report)
def test_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo')) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234))
def test_949_items_repeatable(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=['a', 'TEST'])) b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=['a', 'TEST2'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertNotIn('"949 1" is not repeatable.', report)
def test_091_not_repeatable(self): b = Record() b.add_field( Field(tag='099', indicators=[' ', ' '], subfields=['a', 'TEST'])) b.add_field( Field(tag='0099', indicators=[' ', ' '], subfields=['a', 'TEST2'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"099 " is not repeatable', report)
def test_947_nonrepeatable_subfield_a(self): b = Record() b.add_field( Field(tag='947', indicators=[' ', ' '], subfields=['a', 'TEST', 'a', 'TEST1'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertIn('"a" subfield is not repeatable.', report)
def test_960_repeatable_diff_indicators(self): b = Record() b.add_field( Field(tag='960', indicators=[' ', ' '], subfields=['a', 'TEST'])) b.add_field( Field(tag='960', indicators=[' ', '1'], subfields=['a', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'bpl', ['specs_test.mrc'], self.bcl) self.assertFalse(passed) self.assertNotIn('"960 1" is not repeatable', report)
class TestFindMatches(unittest.TestCase): def setUp(self): self.bib1 = Record() self.bib1.add_field( Field( tag='245', indicators=['0', '0'], subfields=[ 'a', 'Test ' ])) self.bib1.add_field( Field( tag='901', indicators=[' ', ' '], subfields=[ 'a', 'abcd' ])) self.bib1.add_field( Field( tag='001', data='1234' )) self.bib2 = Record() self.bib2.add_field( Field( tag='245', indicators=['0', '0'], subfields=[ 'a', 'Test ' ])) self.bib2.add_field( Field( tag='901', indicators=[' ', ' '], subfields=[ 'a', 'abcd' ])) def test_2_matches(self): conditions = [('901', 'a', 'abcd'), ('001', None, '1234')] self.assertEqual( vendors.find_matches(self.bib1, conditions), 2) def test_only_1_match(self): conditions = [('901', 'a', 'abcd'), ('001', None, '12345')] self.assertEqual( vendors.find_matches(self.bib1, conditions), 1) def test_bib_missing_tag(self): conditions = [('901', 'a', 'abcd'), ('001', None, '1234')] self.assertEqual( vendors.find_matches(self.bib2, conditions), 1)
def record_sorted(record: Record) -> Record: result = Record() result.leader = record.leader for i in range(1000): field_name = str(i) while len(field_name) < 3: field_name = "0" + field_name # 先寻出旧数据中所有的字段名下字段 old_fields = record.get_fields(field_name) for field in old_fields: result.add_field(field) return result
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'w')) writer.write(record) writer.close() reader = MARCReader(open('test/foo'), to_unicode=True) record = reader.next() self.assertEqual(record['245']['a'], unichr(0x1234)) os.remove('test/foo')
def test_949_items_mandatory_subfields(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=['a', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn('"i" subfield is mandatory.', report) self.assertIn('"l" subfield is mandatory.', report) self.assertIn('"p" subfield is mandatory.', report) self.assertIn('"t" subfield is mandatory.', report) self.assertIn('"v" subfield is mandatory.', report)
def make_marc(self, obj): """Make one MARC record.""" inst_id = obj.get('id', 'NO-ID') logging.info("Creating MARC for %s" % (inst_id)) record = Record() # Find associated work if ('bf:instanceOf' in obj and 'id' in obj['bf:instanceOf']): work = self.find_obj_by_id(obj['bf:instanceOf']['id'], 'bf:Work') else: raise Exception("No bf:instanceOf so can't get work!") # <collection xmlns="http://www.loc.gov/MARC21/slim"> # <record> # <leader>01050cam a22003011 4500</leader> # <controlfield tag="001">102063</controlfield> # <controlfield tag="008">860506s1957 nyua b 000 0 eng </controlfield> if ('bib:hasActivity' in obj and obj['bib:hasActivity'].get('type') == "bib:PublicationActivity"): # https://www.loc.gov/marc/bibliographic/bd008.html pub_year = obj['bib:hasActivity'].get('dcterms:date', '') pub_loc = '' pub_lang = '' if ('bib:atLocation' in obj['bib:hasActivity']): loc = obj['bib:hasActivity']['bib:atLocation'].get('id', '') if (loc.startswith('loc:')): pub_loc = loc.lstrip('loc:') if ('dcterms:language' in work): lang = work['dcterms:language'].get('id', '') if (lang.startswith('lang:')): pub_lang = lang.lstrip('lang:') f008 = "%6s%1s%4s%4s%3s%17s%3s%1s%1s" % ( '', '', pub_year, '', pub_loc, '', pub_lang, '', '') record.add_field(Field(tag='008', data=f008)) # FIXME - seems that the 'eng' is recorded int the Work but the Work is not linked # FIXME - to the Instance! # <datafield tag="245" ind1="0" ind2="0"> # <subfield code="a">Clinical cardiopulmonary physiology.</subfield> f245 = [] if ('bf:title' in obj and 'rdfs:label' in obj['bf:title']): f245.append('a') f245.append(obj['bf:title']['rdfs:label']) # <subfield code="c">Sponsored by the American College of Chest Physicians. Editorial board: Burgess L. Gordon, chairman, editor-in-chief, Albert H. Andrews [and others]</subfield> if ('bf:responsibilityStatement' in obj): f245.append('c') f245.append(obj['bf:responsibilityStatement']) if (len(f245) > 0): record.add_field( Field(tag='245', indicators=[0, 0], subfields=f245)) # </datafield> # </record> # </collection> return (record)
def addLBD(config, oclcnumber, note): oauth_session = config.get('oauth-session') #create the LBD record = Record(leader='00000n a2200000 4500') record.add_field(Field(tag='004', data=oclcnumber)) record.add_field( Field(indicators=[' ', ' '], tag='500', subfields=['a', note]), Field(indicators=[' ', ' '], tag='935', subfields=['a', str(time.time())]), Field(indicators=[' ', ' '], tag='940', subfields=['a', config.get('oclcSymbol')])) input = pymarc.record_to_xml(record).decode("utf-8") try: r = oauth_session.post( config.get('metadata_service_url') + "/lbd/data", data=input, headers={ "Accept": 'application/atom+xml;content="application/vnd.oclc.marc21+xml"', "Content-Type": "application/vnd.oclc.marc21+xml" }) r.raise_for_status try: result = ElementTree.fromstring(r.content) ns = { 'atom': 'http://www.w3.org/2005/Atom', 'wc': 'http://worldcat.org/rb' } marcNode = result.findall('atom:content/wc:response', ns)[0].getchildren()[0] marcData = StringIO( ElementTree.tostring(marcNode, encoding='unicode', method='xml')) # need to get this XML section out as a string and into a file like object marcRecords = pymarc.parse_xml_to_array(marcData) # pull out the LBD accession number print(marcRecords) accessionNumber = marcRecords[0]['001'].value() status = "success" except xml.etree.ElementTree.ParseError as err: accessionNumber = "" status = "failed XML parsing issue" print(err) except requests.exceptions.HTTPError as err: status = "failed" return pd.Series([oclcnumber, accessionNumber, status])
def test_091_subfields(self): b = Record() b.add_field( Field(tag='091', indicators=[' ', ' '], subfields=['p', 'TEST', 'p', 'TEST', 'c', 'TEST'])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn( '"091": tag occurance 1:\n\t"p" subfield is not repeatable.', report) self.assertIn('"a" subfield is mandatory.', report)
def test_949_items_location_mandatory(self): b = Record() b.add_field( Field(tag='949', indicators=[' ', '1'], subfields=[ 'i', '33333700904853', 'p', '4.99', 't', '211', 'v', 'BTSERIES' ])) bibs.write_marc21('specs_test.mrc', b) passed, report = local_specs.local_specs_validation( 'nypl', ['specs_test.mrc'], self.ncl) self.assertFalse(passed) self.assertIn('"l" subfield is mandatory.', report)
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)])) record.leader = ' a ' writer = MARCWriter(open('test/foo', 'wb')) writer.write(record) writer.close() reader = MARCReader(open('test/foo', 'rb'), to_unicode=True) record = next(reader) self.assertEqual(record['245']['a'], unichr(0x1234)) reader.close() os.remove('test/foo')
def test_writing_unicode(self): record = Record() record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)])) record.leader = " a " writer = MARCWriter(open("test/foo", "wb")) writer.write(record) writer.close() reader = MARCReader(open("test/foo", "rb"), to_unicode=True) record = next(reader) self.assertEqual(record["245"]["a"], chr(0x1234)) reader.close() os.remove("test/foo")
def sort_6_subs(rec): msg = '' new_rec = Record(to_unicode=True, force_utf8=True) new_rec_fields = [] rec_fields = rec.get_fields() for field in rec_fields: script_field = False if not field.is_control_field() and (len(field.get_subfields('6')) > 0): # the field contains a subfield $6 script_field = True ind1 = field.indicator1 ind2 = field.indicator2 tag = field.tag first_sub = True # variable to keep track of whether you're on the first subfield in the field needs_sorted = True # variable to keep track of whether the field needs sorted or if the $6 is already correctly the first subfield field_subs = [] # list variable to capture all the subfields in the field *except* for the subfield $6 for subfield in field: # check if $6 is the first subfield - if so, the field is OK and does *not* need to be sorted if needs_sorted and first_sub and subfield[0] == '6': needs_sorted = False elif needs_sorted: if first_sub: # this is the first subfield and is *not* $6, so the field needs sorted - creates one instance of a new_field object only when the 1st subfield is encountered new_field = Field(tag=tag, indicators=[ind1,ind2], subfields=[]) # when subfield $6 is finally encountered in the field (not the 1st), add it to the new_field object now so it becomes the first subfield # Note: subfield[0] is the subfield code and subfield[1] is the subfield content for this subfield if subfield[0]=='6': new_field.add_subfield(subfield[0],subfield[1]) # if the subfield is *not* $6, add it to the list of subfields to be added later to the new_field else: field_subs.append([subfield[0],subfield[1]]) first_sub = False if needs_sorted: # then the $6 was *not* the 1st subfield and we need to now add the remaining subfields to the new_field object for sub in field_subs: # add the remaining subfields to the new_field object new_field.add_subfield(sub[0],sub[1]) new_rec_fields.append(new_field) # add the new field to the record if not script_field or not needs_sorted: new_rec_fields.append(field) for new_f in new_rec_fields: new_rec.add_field(new_f) return new_rec
def item2marc(i): r = Record() id = unicode(i.no) r.add_field(Field(tag='001', data=id)) if i.creator: r.add_field(Field(tag = '100', indicators = ['',''], subfields = ['a', i.creator])) if i.title: r.add_field(Field(tag = '245', indicators = ['',''], subfields = ['a', i.title])) if i.pages: pages = unicode(i.pages) r.add_field(Field(tag = '300', indicators = ['',''], subfields = ['a', pages])) for urn in i.urn: r.add_field(Field(tag = '856', indicators = ['',''], subfields = ['g', urn])) return r
def writeMetadataToMarc(data, MARCMapping, saveLocation): record = Record() for key in data: if key in MARCMapping: if(key == u'UUID'): field = Field( tag = MARCMapping[key], data = data[key]) else: field = Field( tag = MARCMapping[key][:3], subfields = [MARCMapping[key][3], data[key]], indicators=['0', '0']) record.add_field(field) writeRecordToFile(record, filename)
class JsonHandler: """Handle JSON.""" def __init__(self): """Init.""" self.records = [] self._record = None self._field = None self._text = [] def element(self, element_dict, name=None): """Converts a JSON `element_dict` to pymarc fields.""" if not name: self._record = Record() self.element(element_dict, "leader") elif name == "leader": self._record.leader = element_dict[name] self.element(element_dict, "fields") elif name == "fields": fields = iter(element_dict[name]) for field in fields: tag, remaining = field.popitem() self._field = Field(tag) if self._field.is_control_field(): self._field.data = remaining else: self.element(remaining, "subfields") self._field.indicators.extend( [remaining["ind1"], remaining["ind2"]]) self._record.add_field(self._field) self.process_record(self._record) elif name == "subfields": subfields = iter(element_dict[name]) for subfield in subfields: code, text = subfield.popitem() self._field.add_subfield(code, text) def elements(self, dict_list): """Sends `dict_list` to `element`.""" if type(dict_list) is not list: dict_list = [dict_list] for rec in dict_list: self.element(rec) return self.records def process_record(self, record): """Append `record` to `self.records`.""" self.records.append(record)
def __next__(self): jobj = next(self.iter) rec = Record() rec.leader = jobj['leader'] for field in jobj['fields']: k,v = list(field.items())[0] if 'subfields' in v and hasattr(v,'update'): # flatten m-i-j dict to list in pymarc subfields = [] for sub in v['subfields']: for code,value in sub.items(): subfields.extend((code,value)) fld = Field(tag=k,subfields=subfields,indicators=[v['ind1'], v['ind2']]) else: fld = Field(tag=k,data=v) rec.add_field(fld) return rec
def handleXLSX(mapping, sheet, outputFolder, rowsToIgnore): # mapping as defined in mapping function loadMapping # sheet is the specific excel sheet that contains the data we're extracting # rowsToIgnore is the number of rows at top of sheet that we don't care about for row in sheet.rows[rowsToIgnore:]: record = Record() # for every key in our map for i, key_entry in enumerate(mapping): key = key_entry["field"] # if the key isn't empty (i.e. we're not mapping that column) if key: addField(record, key_entry, [key[3], unicode(row[i].value)]) if DEBUG: print record.title() if record.isbn() <> None: # Create unique UUID & add to record bookUUID = str(uuid.uuid1()) record.add_field(Field(tag='001', data=bookUUID)) # Write out our marcxml for each row writeMARCXML(record, os.path.join(outputFolder, record.isbn() + '.xml'))
''' from pymarc import Record,Field,record_to_xml import MySQLdb record=Record() print dir(record) dbLo=MySQLdb.connect("localhost","shailesh","123","shailesh") dbKoha=MySQLdb.connect("localhost","root","","koha1") curaKoha=dbKoha.cursor() curaLocl = dbLo.cursor() curaLocl.execute("select BookNo,BookName,authorName from book_info group by BookNo;") dat=curaLocl.fetchall() curaLocl.execute("select accession,BookNo,callNo from book_info;") datIte=curaLocl.fetchall() for i in dat: record=Record() record.add_field(Field(tag='040',indicators=['0','1'],subfields=['c','LIBRARY OF CONGRESS'])) record.add_field(Field(tag='245',indicators=['0','1'],subfields=['a',i[1]])) record.add_field(Field(tag='942',indicators=['0','1'],subfields=['2','book of parag','c','BOOK'])) record.add_field(Field(tag='100',indicators=['0','1'],subfields=['a',i[2]])) record.add_field(Field(tag='999',indicators=['0','1'],subfields=['c','8','d','8'])) marcI=record_to_xml(record) #print i[0],i[1],i[2] curaKoha.execute("insert into biblio(biblionumber,title,author) values(%s,%s,%s);",(i[0],i[1],i[2])) curaKoha.execute("insert into biblioitems(biblionumber,biblioitemnumber,marcxml) values(%s,%s,%s);",(i[0],i[0],marcI)) for i in datIte: barcode='1111'+str(i[0]) curaKoha.execute("insert into items(itemnumber,biblionumber,biblioitemnumber,barcode,itemcallnumber) values(%s,%s,%s,%s,%s);",(i[0],i[1],i[1],barcode,i[2])) dbKoha.commit() dbKoha.close()
labels = g.preferredLabel(conc, lang=PRIMARYLANG) try: return labels[0][1] except IndexError: print >>sys.stderr, "WARNING: couldn't find label of %s, result: %s" % (conc,labels) return '' for conc in sorted(g.subjects(RDF.type, SKOS.Concept)): if (conc, OWL.deprecated, Literal(True)) in g: continue rec = Record(leader=LEADER) # URI -> 001 rec.add_field( Field( tag='001', data=conc ) ) # dct:modified -> 005 mod = g.value(conc, DCT.modified, None) if mod is None: modified = datetime.date(2000, 1, 1) else: modified = mod.toPython() # datetime.date or datetime.datetime object rec.add_field( Field( tag='005', data=modified.strftime('%Y%m%d%H%M%S.0') ) )
# Get PDF pdfUrl = 'http://apps.who.int' + aPdf['href'] urllib.urlretrieve(pdfUrl, SAVE_LOCATION + data['UUID'] + '.' + url.rsplit('.')[-1]) # Print metadata in MARCXML record = Record() for key in data: if key in MARCMapping: if(key == u'UUID'): field = Field( tag = MARCMapping[key], data = data[key]) else: field = Field( tag = MARCMapping[key][:3], subfields = [MARCMapping[key][3], data[key]], indicators=['0', '0']) record.add_field(field) writer = XMLWriter(open(SAVE_LOCATION + data[u'UUID'] + '.xml', 'wb')) writer.write(record) writer.close() except: # If there's an error, put it in error list errorUrlList.append(url) with open(errorListFile, 'wb') as fp: json.dump(errorUrlList, fp) else: # if there's no error, save metadata cleanUrlList.append(url) with open(cleanListFile, 'wb') as fp: json.dump(cleanUrlList, fp)
class XmlHandler(ContentHandler): """ You can subclass XmlHandler and add your own process_record method that'll be passed a pymarc.Record as it becomes available. This could be useful if you want to stream the records elsewhere (like to a rdbms) without having to store them all in memory. """ def __init__(self, strict=False, normalize_form=None): self.records = [] self._record = None self._field = None self._subfield_code = None self._text = [] self._strict = strict self.normalize_form = normalize_form def startElementNS(self, name, qname, attrs): if self._strict and name[0] != MARC_XML_NS: return element = name[1] self._text = [] if element == 'record': self._record = Record() elif element == 'controlfield': tag = attrs.getValue((None, u'tag')) self._field = Field(tag) elif element == 'datafield': tag = attrs.getValue((None, u'tag')) ind1 = attrs.get((None, u'ind1'), u' ') ind2 = attrs.get((None, u'ind2'), u' ') self._field = Field(tag, [ind1, ind2]) elif element == 'subfield': self._subfield_code = attrs[(None, 'code')] def endElementNS(self, name, qname): if self._strict and name[0] != MARC_XML_NS: return element = name[1] if self.normalize_form is not None: text = unicodedata.normalize(self.normalize_form, u''.join(self._text)) else: text = u''.join(self._text) if element == 'record': self.process_record(self._record) self._record = None elif element == 'leader': self._record.leader = text elif element == 'controlfield': self._field.data = text self._record.add_field(self._field) self._field = None elif element == 'datafield': self._record.add_field(self._field) self._field = None elif element == 'subfield': self._field.subfields.append(self._subfield_code) self._field.subfields.append(text) self._subfield_code = None self._text = [] def characters(self, chars): self._text.append(chars) def process_record(self, record): self.records.append(record)
def epub_to_marc(fname, conf_file=None): ns = { 'n': 'urn:oasis:names:tc:opendocument:xmlns:container', 'pkg': 'http://www.idpf.org/2007/opf', 'dc': 'http://purl.org/dc/elements/1.1/' } # prepare to read from the .epub file zip = zipfile.ZipFile(fname) # find the contents metafile txt = zip.read('META-INF/container.xml') tree = etree.fromstring(txt) for el in tree: for elel in el: for item in elel.items(): if item[0] == 'full-path': cfname = item[1] # grab the metadata block from the contents metafile cf = zip.read(cfname) tree = etree.fromstring(cf) p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0] # Read from the config file conf = configparser.ConfigParser() if conf_file: conf.read(conf_file) else: conf.read_string(DEFAULT_CONF) leader_dict = {} tag_005_dict = {} tag_006_dict = {} tag_007_dict = {} tag_008_dict = {} tag_040_dict = {} tag_264_dict = {} sections = conf.sections() for section in sections: if section == 'leader': for option in conf.options(section): leader_dict[option] = conf.get(section, option) elif section == '006': for option in conf.options(section): tag_006_dict[option] = conf.get(section, option) elif section == '007': for option in conf.options(section): tag_007_dict[option] = conf.get(section, option) elif section == '008': for option in conf.options(section): tag_008_dict[option] = conf.get(section, option) elif section == '040': for option in conf.options(section): tag_040_dict[option] = conf.get(section, option) elif section == '264': for option in conf.options(section): tag_264_dict[option] = conf.get(section, option) record = Record(force_utf8=True) # set the leader record.leader = build_leader(leader_dict) # I *think* it's updating the 'Base Address of Data' position when # it is written to file, so I have kept characters 12-16 blank. # Field 005 record.add_field(Field(tag='005', data=build_tag_005())) # Field 006 record.add_field(Field(tag='006', data=build_tag_006(tag_006_dict, tag_008_dict))) # Field 007 record.add_field(Field(tag='007', data=build_tag_007(tag_007_dict))) # Field 008 record.add_field(Field(tag='008', data=build_tag_008(tag_008_dict, p, ns))) # Field 020 if p.xpath('dc:identifier[@id="ISBN"]/text()', namespaces=ns): epub_isbn = p.xpath( 'dc:identifier[@id="ISBN"]/text()', namespaces=ns)[0].strip() epub_field = Field( tag = '020', indicators = [' ', ' '], subfields = ['a', epub_isbn, 'q', 'epub'] ) elif p.xpath('dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns): epub_isbn = p.xpath( 'dc:identifier[@pkg:scheme="ISBN"]/text()', namespaces=ns)[0].strip() epub_field = Field( tag = '020', indicators = [' ', ' '], subfields = ['a', epub_isbn, 'q', 'epub'] ) # Field 040 # First, check if the indicators are empty and if they are, # turn them into single spaces. for value in ('indicator_1', 'indicator_2'): if tag_040_dict[value] == '': tag_040_dict[value] = ' ' record.add_field(Field( tag = '040', indicators = [tag_040_dict['indicator_1'], tag_040_dict['indicator_2']], subfields = ['a', tag_040_dict['subfield_a'], 'b', tag_040_dict['subfield_b'], 'e', tag_040_dict['subfield_e'], 'c', tag_040_dict['subfield_c']] )) # Field 245 if p.xpath('dc:title/text()',namespaces=ns): full_title = p.xpath('dc:title/text()',namespaces=ns)[0] if ":" in full_title: title = full_title[:full_title.index(':') ].strip() subtitle = full_title[full_title.index(':') + 1:].strip() else: title = full_title subtitle = None if p.xpath('dc:creator/text()', namespaces=ns)[0]: creator_statement = p.xpath('dc:creator/text()', namespaces=ns)[0] if title and subtitle and creator_statement: offset = 0 if ' ' in title: title_words = title.split(' ') if title_words[0].lower() in NON_FILING_WORDS: offset = len(title_words[0]) + 1 record.add_field( Field('245', ['0', offset], ['a', title + " :", 'b', subtitle + " /", 'c', creator_statement])) elif title and creator_statement: offset = 0 if ' ' in title: title_words = title.split(' ') if title_words[0].lower() in NON_FILING_WORDS: offset = len(title_words[0]) + 1 record.add_field( Field('245', ['0', offset], ['a', title + " /", 'c', creator_statement])) # Field 264 if p.xpath('dc:publisher/text()', namespaces=ns) \ and p.xpath('dc:date/text()', namespaces=ns): record.add_field(Field('264', [' ', '1'], ['a', tag_264_dict['subfield_a'] + ' :', 'b', p.xpath('dc:publisher/text()', namespaces=ns)[0] + ", ", 'c', p.xpath('dc:date/text()', namespaces=ns)[0]])) if p.xpath('dc:rights/text()', namespaces=ns): copyright_statement = "" copyright_symbol = "©" rights_words_array = p.xpath('dc:rights/text()', namespaces=ns)[0].split() for word in rights_words_array: if word in copyright_year_range: copyright_statement = copyright_symbol + word if len(copyright_statement) > 4: record.add_field(Field('264', [' ', '4'], ['c', copyright_statement])) return record
class CSV2MARC (object): """ Converts CSV to MARC records. """ def __init__(self): """ Load the CSV file. """ if len(sys.argv) > 1: filepath = sys.argv[1] else: raise Exception( "You need to provide a file path to the CSV file as an argument." ) try: self.reader = csv.reader( open(filepath, "r"), delimiter = "," ) except IOError: print >>sys.stderr, "Cannot open {0}".format(filepath) raise SystemExit output = "{0}.mrc".format(os.path.splitext(filepath)[0]) self.file = open(output, "w") # State variables self.sysno = False self.record = False self.field = False self.fieldTag = False self.fieldTagOccurrence = False self.subfieldLabel = False self.subfieldLabelOccurrence = False self.line = False def checkFieldChange(self, fieldTag, fieldTagOccurrence): if (self.fieldTag != fieldTag) or ((self.fieldTag == fieldTag) and (self.fieldTagOccurrence != fieldTagOccurrence)): return True else: return False def checkRecordChange(self, sysno): if not (sysno == self.sysno): return True else: return False def writeMARCRecord(self, record): writer = MARCWriter(self.file) writer.write(record) def getNewRecord(self, sysno): self.sysno = sysno self.record = Record() def getNewField(self, line): self.fieldTag = line["fieldTag"] self.fieldTagOccurrence = line["fieldTagOccurrence"] if line["subfieldLabel"]: # Normal field self.field = Field( tag = line["fieldTag"], indicators = [ line["indicator1"], line["indicator2"] ] ) else: # Datafield self.field = Field( tag = line["fieldTag"], data = line["value"] ) def main(self): for line in self.reader: # Parse the line line = { "sysno" : line[0], "fieldTag" : line[1], "fieldTagOccurrence" : line[2], "indicator1" : line[3], "indicator2" : line[4], "subfieldLabel" : line[5], "subfieldLabelOccurrence" : line[6], "value" : line[7], } if not self.sysno: self.getNewRecord(line["sysno"]) if self.checkRecordChange(line["sysno"]): self.record.add_field(self.field) # Add the last field of the previous record self.field = False # Remove the last field of the previous record self.fieldTag = False self.writeMARCRecord(self.record) self.getNewRecord(line["sysno"]) if not self.fieldTag: self.getNewField(line) if self.checkFieldChange(line["fieldTag"], line["fieldTagOccurrence"]): self.record.add_field(self.field) self.getNewField(line) if line["subfieldLabel"]: # If we have a subfield self.field.add_subfield( line["subfieldLabel"], line["value"] ) self.record.add_field(self.field) # Write the last field self.writeMARCRecord(self.record) # Write the last record after the iteration has ended self.file.close()
def book_to_mark21_file(book,owner, xml = False): #New record record = Record() # Number and value explanation : http://www.loc.gov/marc/bibliographic/bdleader.html # Adding Leader tags l = list(record.leader) l[5] = 'n' # New l[6] = 'a' #For manuscript file use 't' l[7] = 'm' # Monograph l[9] = 'a' l[19] = '#' record.leader = "".join(l) # Category of material - Text record.add_field(record_control_field('007','t')) #Languages languages = book.languages.all() if languages: for lang in languages: record.add_field(record_control_field('008',lang.code)) else: record.add_field(record_control_field('008','eng')) #ISBN - International Standard Book Number isbn = models.Identifier.objects.filter(book=book).exclude(identifier='pub_id').exclude(identifier='urn').exclude(identifier='doi') for identifier in isbn: if book.book_type: record.add_field(record_field('020',['#','#'],['a', str(identifier.value)+' '+book.book_type])) else: record.add_field(record_field('020',['#','#'],['a', str(identifier.value)])) #Source of acquisition try: base_url = models.Setting.objects.get(group__name='general', name='base_url').value except: base_url='localhost:8000' book_url = 'http://%s/editor/submission/%s/' % (base_url, book.id) record.add_field(record_field('030',['#','#'],['b', book_url])) # Main entry - Personal name authors = book.author.all() author_names='' for author in authors: auhtor_names=author_names+author.full_name()+' ' name=author.last_name+', '+author.first_name if author.middle_name: name=name+' '+author.middle_name[:1]+'.' record.add_field(record_field('100',['1','#'],['a', name])) #Title statement title_words = (book.title).split(' ') first_word = title_words[0] if first_word.lower() == 'the': record.add_field(record_field('245',['1','4'],['a', book.title,'c',author_names])) else: record.add_field(record_field('245',['1','0'],['a', book.title,'c',author_names])) #Publication try: press_name = models.Setting.objects.get(group__name='general', name='press_name').value except: press_name=None try: city = models.Setting.objects.get(group__name='general', name='city').value except: city = None publication_info=[] if book.publication_date: #Press' city if city : publication_info.append('a') publication_info.append(str(city)) #Press' name if press_name: publication_info.append('b') publication_info.append(str(press_name)) #Date of Publication publication_info.append('c') publication_info.append(str(book.publication_date)) record.add_field(record_field('260',['#','#'],publication_info)) #Physical details if book.pages: record.add_field(record_field('300',['#','#'],['a',str(book.pages)+' pages'])) #Content type record.add_field(record_field('336',['#','#'],['a', 'text','2','rdacontent'])) #Media type record.add_field(record_field('337',['#','#'],['a', 'unmediated','2','rdamedia'])) #Carrier type record.add_field(record_field('338',['#','#'],['a', 'volume','2','rdacarrier'])) #Language note if languages: for lang in languages: record.add_field(record_field('546',['#','#'],['a', lang.display])) else: record.add_field(record_field('546',['#','#'],['a', 'In English'])) press_editors = book.press_editors.all() #editors for editor in press_editors: record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (editor.last_name,editor.first_name),'e','Press editor'])) #Series if book.series: record.add_field(record_field('830',['#','0'],['a', book.series.name ])) if book.series.editor: record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (book.series.editor.last_name,book.series.editor.first_name),'e','Series editor'])) #Add record to file title= book.title if not xml: filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.dat' file=handle_marc21_file(record.as_marc(),filename, book, owner) else: filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.xml' content=record_to_xml(record, quiet=False, namespace=False) file=handle_marc21_file(content,filename, book, owner) return file.pk