def test_writing_3_records(self): expected = r""" <?xml version="1.0" encoding="UTF-8"?> <collection xmlns="http://www.loc.gov/MARC21/slim"> <record> <leader> 22 4500</leader> <controlfield tag="008">090227s2009 mau chi d</controlfield> <datafield ind1="0" ind2="0" tag="100"> <subfield code="a">me</subfield> </datafield> <datafield ind1="0" ind2="0" tag="245"> <subfield code="a">Foo /</subfield> <subfield code="c">by me.</subfield> </datafield> </record> <record> <leader> 22 4500</leader> <datafield ind1="0" ind2="0" tag="100"> <subfield code="a">me</subfield> </datafield> <datafield ind1="0" ind2="0" tag="245"> <subfield code="a">Foo /</subfield> <subfield code="c">by me.</subfield> </datafield> </record> <record> <leader> 22 4500</leader> <datafield ind1="0" ind2="0" tag="245"> <subfield code="a">Foo /</subfield> <subfield code="c">by me.</subfield> </datafield> </record> </collection> """ expected = textwrap.dedent(expected[1:]).replace("\n", "").encode() file_handle = BytesIO() try: writer = pymarc.XMLWriter(file_handle) record = pymarc.Record() record.add_field( pymarc.Field("008", data="090227s2009 mau chi d")) record.add_field(pymarc.Field("100", ["0", "0"], ["a", "me"])) record.add_field( pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."])) writer.write(record) record = pymarc.Record() record.add_field(pymarc.Field("100", ["0", "0"], ["a", "me"])) record.add_field( pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."])) writer.write(record) record = pymarc.Record() record.add_field( pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."])) writer.write(record) writer.close(close_fh=False) self.assertEquals(file_handle.getvalue(), expected) finally: file_handle.close()
def setUp(self): self.reader = pymarc.MARCReader(open("test/test.dat", "rb")) self._record = pymarc.Record() field = pymarc.Field(tag="245", indicators=["1", "0"], subfields=["a", "Python", "c", "Guido"]) self._record.add_field(field)
def to_html(self): """Return an HTML representation of any MARC records.""" records = [pymarc.Record(data=r) for r in self.record_data] xmllist = [pymarc.record_to_xml(r) for r in records] xslt = 'marcxml-to-html.xsl' html_list = [self._transform(xml, xslt) for xml in xmllist] return "".join(html_list)
def test_writing_1_record(self): expected = r""" <?xml version="1.0" encoding="UTF-8"?> <collection xmlns="http://www.loc.gov/MARC21/slim"> <record> <leader> 22 4500</leader> <datafield ind1="0" ind2="0" tag="100"> <subfield code="a">me</subfield> </datafield> <datafield ind1="0" ind2="0" tag="245"> <subfield code="a">Foo /</subfield> <subfield code="c">by me.</subfield> </datafield> </record> </collection> """ expected = textwrap.dedent(expected[1:]).replace('\n', '') if str != binary_type: expected = expected.encode() file_handle = BytesIO() try: writer = pymarc.XMLWriter(file_handle) record = pymarc.Record() record.add_field(pymarc.Field('100', ['0', '0'], ['a', u('me')])) record.add_field( pymarc.Field( '245', ['0', '0'], ['a', u('Foo /'), 'c', u('by me.')])) writer.write(record) writer.close(close_fh=False) self.assertEquals(file_handle.getvalue(), expected) finally: file_handle.close()
def test_tokenize_marc21_one(self): marc_record = pymarc.Record() marc_record.leader = marc_record.leader[0:6] + 'a' + marc_record.leader[7:] marc_record.add_field( pymarc.Field(tag='100', indicators=['1', ' '], subfields=['a','Naslund, Sena Jeter.'])) marc_record.add_field( pymarc.Field(tag='245', indicators=['1', '0'], subfields = ['a', "Ahab's wife, or, The star-gazer :", 'b', "a novel /"])) self.assertEquals( sorted(self.classifier.__tokenize_marc21__(marc_record)), ['ahab', 'book', 'gazer', 'jeter', 'naslund', 'novel', 'sena', 'star', 'wife'])
def setUp(self): self.reader = pymarc.MARCReader(open('test/test.dat', 'rb')) self._record = pymarc.Record() field = pymarc.Field(tag='245', indicators=['1', '0'], subfields=['a', 'Python', 'c', 'Guido']) self._record.add_field(field)
def next(self): """To support iteration.""" pos = self.file_handle.tell() first5 = self.file_handle.read(5) if not first5: raise StopIteration if len(first5) < 5: raise RecordLengthInvalid length = int(first5) chunk = self.file_handle.read(length - 5) chunk = first5 + chunk try: record = pymarc.Record(chunk, to_unicode=self.to_unicode, force_utf8=self.force_utf8, hide_utf8_warnings=self.hide_utf8_warnings, utf8_handling=self.utf8_handling) self.count += 1 return record except (RecordLeaderInvalid, BaseAddressNotFound, BaseAddressInvalid, RecordDirectoryInvalid, NoFieldsFound, UnicodeDecodeError): self.file_handle.seek(pos + length) self.count += 1 self.failed += 1 pass
def test_marc_clean_subfields(): record = marcx.Record() record.add("001", data="1234") record.add("245", a="", b="ok") assert record.strict == True # Behind the scenes, marcx will not add empty subfield values # (https://git.io/fjIWU). assert marc_clean_subfields(record["245"], inplace=False) == ['b', 'ok'] assert record["245"].subfields == ['b', 'ok'] assert marc_clean_subfields(record["245"], inplace=True) is None assert record["245"].subfields == ['b', 'ok'] # Test pymarc record. record = pymarc.Record() record.add_field(pymarc.Field(tag='001', data='1234')) record.add_field( pymarc.Field(tag='245', indicators=['0', '1'], subfields=['a', '', 'b', 'ok'])) assert len(record.get_fields()) == 2 assert marc_clean_subfields(record["245"], inplace=False) == ['b', 'ok'] assert record["245"].subfields == ['a', '', 'b', 'ok'] assert marc_clean_subfields(record["245"], inplace=True) is None assert record["245"].subfields == ['b', 'ok']
def GetMarcRecord(self): rec = pymarc.Record('', False, True) # force Unicode record for fieldbuilder in self.Fields: builderType = type(fieldbuilder).__name__ if builderType == 'MarcFixedFieldBuilder': field = fieldbuilder.GetMarcField() if field.tag == '000': self.Debug("000 value: '%s'" % field.value()) self.Debug("Leader before: '%s'" % rec.leader) self._addToLeader(rec, field.value()) self.Debug("Leader after: '%s'" % rec.leader) else: if len(field.data) > 0: rec.add_field(field) else: # MarcFieldBuilder # expecting list of fields fields = fieldbuilder.GetMarcField() for f in fields: if len(f.subfields) > 0: self.Debug("Adding Field %s for Item %s" % (f.tag, self.ItemID)) self.Debug("subfields in %s are " % f.tag + str(f.subfields)) rec.add_field(f) return rec
def test_writing_3_records(self): expected = r""" =LDR 22 4500 =008 090227s2009\\\\mau\\\\\\\\\\\\\\\\\chi\d =100 00$ame =245 00$aFoo /$cby me. =LDR 22 4500 =100 00$ame =245 00$aFoo /$cby me. =LDR 22 4500 =245 00$aFoo /$cby me. """ expected = textwrap.dedent(expected[1:]) file_handle = StringIO() try: writer = pymarc.TextWriter(file_handle) record = pymarc.Record() record.add_field( pymarc.Field( '008', data=u('090227s2009 mau chi d'))) record.add_field(pymarc.Field('100', ['0', '0'], ['a', u('me')])) record.add_field( pymarc.Field( '245', ['0', '0'], ['a', u('Foo /'), 'c', u('by me.')])) writer.write(record) record = pymarc.Record() record.add_field(pymarc.Field('100', ['0', '0'], ['a', u('me')])) record.add_field( pymarc.Field( '245', ['0', '0'], ['a', u('Foo /'), 'c', u('by me.')])) writer.write(record) record = pymarc.Record() record.add_field( pymarc.Field( '245', ['0', '0'], ['a', u('Foo /'), 'c', u('by me.')])) writer.write(record) writer.close(close_fh=False) self.assertEquals(file_handle.getvalue(), expected) finally: file_handle.close()
def test_tokenize_marc21_title_only(self): marc_record = pymarc.Record() marc_record.add_field( pymarc.Field(tag='245', indicators=['1', '0'], subfields = ['a', "Ahab's wife, or, The star-gazer :", 'b', "a novel /"])) self.assertEquals( self.classifier.__tokenize_marc21__(marc_record), ['novel', 'star', 'ahab', 'wife', 'work', 'gazer'])
def to_marcxml(self): """Return a MARCXML representation of any MARC records.""" records = [pymarc.Record(data=r) for r in self.record_data] xmllist = [pymarc.record_to_xml(r) for r in records] xmlstr = "".join(xmllist) xmldoc = """<?xml version="1.0" encoding="utf-8"?> <collection xmlns="http://www.loc.gov/MARC21/slim"> {0} </collection>""".format(xmlstr) return self._transform(xmldoc, 'format-xml.xsl')
def merge_marc_record(bib_record, holdings_record): repeatable_fields = [ "014", "020", "035", "015", "538", "600", "650", "655", "700",\ "852", "856", "866", "867", "868", "876", "583", "561", "562", "863" ] id_subfields = { "538": "8", "852": "8", "856": "8", "866": "9", "867": "9", "868": "9", "876": "9" } exclude_from_holdings = ["001", "003", "004", "007", "008", "014", "020"] composite_record = pymarc.Record() composite_record.leader = bib_record.leader bib_fields = bib_record.get_fields() bib_tags = [field.tag for field in bib_fields] for field in bib_fields: composite_record.add_ordered_field(field) holdings_fields = [ field for field in holdings_record.get_fields() if field.tag not in exclude_from_holdings ] for field in holdings_fields: field_exists = False if field.tag in bib_tags and field.tag not in repeatable_fields: field_exists = True if field.is_control_field(): raise Exception("Tag %s is defined in the bib fields" % field.tag) else: existing_field = composite_record.get_fields(field.tag)[0] for subfield in field: existing_subfields = existing_field.get_subfields( subfield[0]) if existing_subfields and existing_subfields[ 0] == subfield[1]: #just ignore it, it's the same pass else: #it's different and not repeatable, error #print("Warning: Subfield %s collision on tag %s, not copying" %\ # (subfield[0], field.tag)) logging.warning( "Subfield %s collision on tag %s, not copying", subfield[0], field.tag) if not field_exists: if field.tag in id_subfields: id_subfield = id_subfields[field.tag] holdings_id = get_string_value( holdings_record.get_fields('001')) field.add_subfield(id_subfield, holdings_id) composite_record.add_ordered_field(field) return composite_record
def to_json(self, **kwargs): """Return a JSON representation of any MARC records. :param ``**kwargs``: Arbitrary keyword arguments that will be added to the returned JSON string. """ reclist = [pymarc.Record(data=r).as_dict() for r in self.record_data] recdict = {"data": reclist} recdict.update(self.metadata) recdict.update(kwargs) return json.dumps(recdict)
def test_from_record(self): record = pymarc.Record() record.add_field(pymarc.Field('001', data='123')) record.add_field(pymarc.Field('020', [' ', ' '], subfields=['a', '123'])) with self.assertRaises(AttributeError): record.itervalues('020') obj = marcx.Record.from_record(record) self.assertEquals(obj.__class__.__name__, 'Record') self.assertEquals(obj.__dict__, record.__dict__) self.assertEquals(list(obj.itervalues('020')), ['123'])
def test_marc_clean_record(): record = pymarc.Record() record.add_field(pymarc.Field(tag='001', data='1234')) record.add_field( pymarc.Field(tag='245', indicators=['0', '1'], subfields=['a', '', 'b', 'ok'])) assert len(record.get_fields()) == 2 assert record["245"].subfields == ['a', '', 'b', 'ok'] marc_clean_record(record) assert record["245"].subfields == ['b', 'ok']
def test_writing_3_records(self): expected = r""" =LDR 22 4500 =008 090227s2009\\\\mau\\\\\\\\\\\\\\\\\chi\d =100 00$ame =245 00$aFoo /$cby me. =LDR 22 4500 =100 00$ame =245 00$aFoo /$cby me. =LDR 22 4500 =245 00$aFoo /$cby me. """ expected = textwrap.dedent(expected[1:]) file_handle = StringIO() try: writer = pymarc.TextWriter(file_handle) record = pymarc.Record() record.add_field( pymarc.Field("008", data="090227s2009 mau chi d")) record.add_field(pymarc.Field("100", ["0", "0"], ["a", "me"])) record.add_field( pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."])) writer.write(record) record = pymarc.Record() record.add_field(pymarc.Field("100", ["0", "0"], ["a", "me"])) record.add_field( pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."])) writer.write(record) record = pymarc.Record() record.add_field( pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."])) writer.write(record) writer.close(close_fh=False) self.assertEquals(file_handle.getvalue(), expected) finally: file_handle.close()
def marcwriter(file): #remove prior file for fname in os.listdir("."): if os.path.isfile(fname) and fname.startswith("holdings"): os.remove(fname) #create an output file for the created MARC records (wb means 'write binary') outputfile = open('holdings{}.mrc'.format(date.today()), 'wb') #iterate through each row of the file for rownum, row in enumerate(file): #declare PyMARC record object item_load = pymarc.Record(to_unicode=True, force_utf8=True) #define data fields in CSV file ocn = row[1] isbn = row[2].split('|') #upc = row[3].split('|') #Clean up OCLC numbers with regular expression ocn = re.sub("[^0-9]", "", ocn) #write data to field variables field_001 = pymarc.Field(tag='001', data=ocn) item_load.add_ordered_field(field_001) for i in isbn: if i == '': break field_020 = pymarc.Field(tag='020', indicators=[' ', ' '], subfields=['a', i]) item_load.add_ordered_field(field_020) ##If including 024 fields #for j in upc: # if j == '': # break # field_024 = pymarc.Field( # tag='024', # indicators = ['3',' '], # subfields = ['a', j] # ) # item_load.add_ordered_field(field_024) #Create output file outputfile.write(item_load.as_marc()) #close the output file outputfile.close()
def test_writing_empty_record(self): expected = r""" =LDR 22 4500 """ expected = textwrap.dedent(expected[1:]) file_handle = StringIO() try: writer = pymarc.TextWriter(file_handle) record = pymarc.Record() writer.write(record) writer.close(close_fh=False) self.assertEquals(file_handle.getvalue(), expected) finally: file_handle.close()
def df_to_mrc(df, field_delimiter, path_out, txt_error_file): mrc_errors = [] df = df.replace(r'^\s*$', np.nan, regex=True) outputfile = open(path_out, 'wb') errorfile = io.open(txt_error_file, 'wt', encoding='UTF-8') list_of_dicts = df.to_dict('records') for record in tqdm(list_of_dicts, total=len(list_of_dicts)): record = {k: v for k, v in record.items() if pd.notnull(v)} try: pymarc_record = pymarc.Record(to_unicode=True, force_utf8=True, leader=record['LDR']) del record['LDR'] for k, v in record.items(): v = v.split(field_delimiter) if int(k) < 10: tag = k data = ''.join(v) marc_field = pymarc.Field(tag=tag, data=data) pymarc_record.add_ordered_field(marc_field) else: if len(v) == 1: tag = k record_in_list = re.split('\$(.)', ''.join(v)) indicators = list(record_in_list[0]) subfields = record_in_list[1:] marc_field = pymarc.Field(tag=tag, indicators=indicators, subfields=subfields) pymarc_record.add_ordered_field(marc_field) else: for element in v: tag = k record_in_list = re.split('\$(.)', ''.join(element)) indicators = list(record_in_list[0]) subfields = record_in_list[1:] marc_field = pymarc.Field(tag=tag, indicators=indicators, subfields=subfields) pymarc_record.add_ordered_field(marc_field) outputfile.write(pymarc_record.as_marc()) except ValueError: mrc_errors.append(record) if len(mrc_errors) > 0: for element in mrc_errors: errorfile.write(str(element) + '\n\n') errorfile.close() outputfile.close()
def export_record(parent, database, loader, config, pymarc_record=None, **kwargs): # type: (DBObject, Database, Dict[AnyStr, Any], Dict[AnyStr, Any], PymarcRecord, **Any) -> PymarcRecord if pymarc_record is None: pymarc_record = pymarc.Record() generated_fields = get_generated_fields(loader, config) if generated_fields: pymarc_record = store_record(parent, pymarc_record, generated_fields) sub_loaders = get_loaders(loader, config) for child_table, sub_loader in sub_loaders.items(): for sub_loader_i in sub_loader: child_table = child_table[1:].lower() try: children = [getattr(parent, child_table)] except: try: children = getattr(parent, child_table + "s") except: continue for child in children: export_record(child, database, sub_loader_i, config, pymarc_record) return pymarc_record
def test_add_grouped_fields(self): record = pymarc.Record() for tag in ('999', '888', '111', 'abc', '666', '988', '998'): field = pymarc.Field(tag, ['0', '0'], ['a', 'foo']) record.add_grouped_field(field) # ensure all numeric fields are in grouped order grouped = list() for field in record: if not field.tag.isdigit(): continue grouped.append(field.tag) exp = ['111', '666', '888', '999', '988', '998'] self.assertEqual(grouped, exp, "Fields are not grouped numerically")
def test_write(self): # write a record off to a file writer = pymarc.MARCWriter(file('test/writer-test.dat', 'w')) record = pymarc.Record() field = pymarc.Field('245', ['0', '0'], ['a', 'foo']) record.add_field(field) writer.write(record) writer.close() # read it back in reader = pymarc.MARCReader(file('test/writer-test.dat')) record = reader.next() # remove it os.remove('test/writer-test.dat')
def test_add_grouped_fields(self): record = pymarc.Record() for tag in ("999", "888", "111", "abc", "666", "988", "998"): field = pymarc.Field(tag, ["0", "0"], ["a", "foo"]) record.add_grouped_field(field) # ensure all numeric fields are in grouped order grouped = list() for field in record: if not field.tag.isdigit(): continue grouped.append(field.tag) exp = ["111", "666", "888", "999", "988", "998"] self.assertEqual(grouped, exp, "Fields are not grouped numerically")
def csvmarcwriter(file): #Open your CSV File with open(file) as fh: itemread = csv.reader(fh) itemlist = list(itemread) #create an output file for the created MARC records (wb means 'write binary') outputfile = open('writer.mrc', 'wb') #iterate through each row of the CSV file for row in itemlist[1:]: #declare PyMARC record object item_load = pymarc.Record(to_unicode=True, force_utf8=True) #define data fields in CSV file ocn = row[0] barcode = row[1] ltitle = row[2] #Clean up OCLC numbers with regular expression ocn = re.sub("[^0-9]", "", ocn) #write data to field variables field_001 = pymarc.Field(tag='001', data=ocn) field_974 = pymarc.Field( tag='974', indicators = [' ',' '], subfields = ['a', ltitle, '9', "LOCAL"], ) field_949 = pymarc.Field( tag='949', indicators = [' ',' '], subfields = ['a', barcode] ) #add field variables to PyMARC record object item_load.add_ordered_field(field_001) item_load.add_ordered_field(field_974) item_load.add_ordered_field(field_949) #Create output file outputfile.write(item_load.as_marc()) #close the output file outputfile.close()
def test_encoding(self): # Create a record record1 = pymarc.Record() # Add a field containing no diacritics record1.add_field( pymarc.Field( tag='245', indicators=[' ', ' '], subfields=[ 'a', 'Report of the Committee on the Peaceful Uses of Outer Space' ])) # And a field containing diacritics record1.add_field( pymarc.Field( tag='246', indicators=[' ', ' '], subfields=[ 'a', "Rapport du Comité des utilisations pacifiques de l'espace extra-atmosphérique" ])) # Create XML with an encoding specified record_xml = pymarc.marcxml.record_to_xml(record1, encoding='utf-8') # Parse the generated XML record2 = pymarc.parse_xml_to_array(six.BytesIO(record_xml))[0] # Compare the two records. If the other tests above pass, and this one passes, then the addition of an encoding # parameter in the marcxml.record_to_xml fuction didn't seem to break basic functionality of the library. self.assertEqual(record1.leader, record2.leader) field1 = record1.get_fields() field2 = record2.get_fields() self.assertEqual(len(field1), len(field2)) pos = 0 while pos < len(field1): self.assertEqual(field1[pos].tag, field2[pos].tag) if field1[pos].is_control_field(): self.assertEqual(field1[pos].data, field2[pos].data) else: self.assertEqual(field1[pos].get_subfields(), field2[pos].get_subfields()) self.assertEqual(field1[pos].indicators, field2[pos].indicators) pos += 1
def map_node(node, mapping_fields=None): """ Map a node to a MARC21 record. Returns its serialised form. """ if not mapping_fields: mapping_fields = find_marc_mapping(node.getSchema()) if mapping_fields is None: raise LookupError( "Failed to find marc mappings for node %s with schema %s" % (node.id, node.getSchema())) # interpolate MARC field value templates using node attributes fields = {} for (field, subfield), field_template in mapping_fields: value = field_template(node) if not value: continue if field not in fields: fields[field] = {} fields[field][subfield] = str(value) # build MARC record, adding field indicators accordingly record = pymarc.Record() for field, subfields in sorted(fields.iteritems()): subfields = sorted(subfields.iteritems()) # determine MARC field indicators ind1 = ind2 = '#' # == "undefined" indicators = _indicators.get(field) if indicators: ind1, ind2 = indicators # support custom indicator value extraction functions (e.g. for URLs) if callable(ind1): ind1 = ind1(field, subfields) if callable(ind2): ind2 = ind2(field, subfields) record.add_field( pymarc.Field(field, indicators=[ind1, ind2], subfields=list(chain(*subfields)))) # serialise return record.as_marc()
def test_writing_1_record(self): expected = json.loads(r""" [ { "leader" : " 22 4500", "fields" : [ { "100": { "ind1": "0", "ind2": "0", "subfields": [ { "a": "me" } ] } }, { "245": { "ind1": "0", "ind2": "0", "subfields": [ { "a": "Foo /" }, { "c": "by me." } ] } } ] } ] """) file_handle = StringIO() try: writer = pymarc.JSONWriter(file_handle) record = pymarc.Record() record.add_field(pymarc.Field('100', ['0', '0'], ['a', u('me')])) record.add_field( pymarc.Field( '245', ['0', '0'], ['a', u('Foo /'), 'c', u('by me.')])) writer.write(record) writer.close(close_fh=False) actual = json.loads(file_handle.getvalue()) self.assertEquals(actual, expected) finally: file_handle.close()
def test_copy_utf8(self): writer = pymarc.MARCWriter(open('test/write-utf8-test.dat', 'wb')) new_record = pymarc.Record(to_unicode=True, force_utf8=True) def process_xml(record): new_record.leader = record.leader for field in record.get_fields(): new_record.add_field(field) pymarc.map_xml(process_xml, 'test/utf8.xml') try: writer.write(new_record) writer.close() finally: # remove it os.remove('test/write-utf8-test.dat')
def test_writing_empty_record(self): expected = json.loads(r""" [ { "leader" : " 22 4500", "fields" : [] } ] """) file_handle = StringIO() try: writer = pymarc.JSONWriter(file_handle) record = pymarc.Record() writer.write(record) writer.close(close_fh=False) actual = json.loads(file_handle.getvalue()) self.assertEquals(actual, expected) finally: file_handle.close()