def setUp(self): self.field = Field( tag = '245', indicators = [ 0, 1 ], subfields = [ 'a', 'Huckleberry Finn: ', 'b', 'An American Odyssey' ] ) self.controlfield = Field( tag = '008', data = '831227m19799999nyu ||| | ger ' ) self.subjectfield = Field( tag = '650', indicators = [' ', '0'], subfields = [ 'a', 'Python (Computer program language)', 'v', 'Poetry.' ] )
class FieldTest(unittest.TestCase): def setUp(self): self.field = Field( tag="245", indicators=[0, 1], subfields=["a", "Huckleberry Finn: ", "b", "An American Odyssey"], ) self.controlfield = Field( tag="008", data="831227m19799999nyu ||| | ger ") self.subjectfield = Field( tag="650", indicators=[" ", "0"], subfields=[ "a", "Python (Computer program language)", "v", "Poetry." ], ) def test_string(self): self.assertEqual(str(self.field), "=245 01$aHuckleberry Finn: $bAn American Odyssey") def test_controlfield_string(self): self.assertEqual(str(self.controlfield), r"=008 831227m19799999nyu\\\\\\\\\\\|||\|\ger\\") def test_indicators(self): self.assertEqual(self.field.indicator1, "0") self.assertEqual(self.field.indicator2, "1") def test_subfields_created(self): subfields = self.field.subfields self.assertEqual(len(subfields), 4) def test_subfield_short(self): self.assertEqual(self.field["a"], "Huckleberry Finn: ") self.assertEqual(self.field["z"], None) def test_subfields(self): self.assertEqual(self.field.get_subfields("a"), ["Huckleberry Finn: "]) self.assertEqual(self.subjectfield.get_subfields("a"), ["Python (Computer program language)"]) def test_subfields_multi(self): self.assertEqual( self.field.get_subfields("a", "b"), ["Huckleberry Finn: ", "An American Odyssey"], ) self.assertEqual( self.subjectfield.get_subfields("a", "v"), ["Python (Computer program language)", "Poetry."], ) def test_encode(self): self.field.as_marc(encoding="utf-8") def test_membership(self): self.assertTrue("a" in self.field) self.assertFalse("zzz" in self.field) def test_iterator(self): string = "" for subfield in self.field: string += subfield[0] string += subfield[1] self.assertEqual(string, "aHuckleberry Finn: bAn American Odyssey") def test_value(self): self.assertEqual(self.field.value(), "Huckleberry Finn: An American Odyssey") self.assertEqual(self.controlfield.value(), "831227m19799999nyu ||| | ger ") def test_non_integer_tag(self): # make sure this doesn't throw an exception Field(tag="3 0", indicators=[0, 1], subfields=["a", "foo"]) def test_add_subfield(self): field = Field(tag="245", indicators=[0, 1], subfields=["a", "foo"]) field.add_subfield("a", "bar") self.assertEqual(field.__str__(), "=245 01$afoo$abar") field.add_subfield("b", "baz", 0) self.assertEqual(field.__str__(), "=245 01$bbaz$afoo$abar") field.add_subfield("c", "qux", 2) self.assertEqual(field.__str__(), "=245 01$bbaz$afoo$cqux$abar") field.add_subfield("z", "wat", 8) self.assertEqual(field.__str__(), "=245 01$bbaz$afoo$cqux$abar$zwat") def test_delete_subfield(self): field = Field( tag="200", indicators=[0, 1], subfields=["a", "My Title", "a", "Kinda Bogus Anyhow"], ) self.assertEqual(field.delete_subfield("z"), None) self.assertEqual(field.delete_subfield("a"), "My Title") self.assertEqual(field.delete_subfield("a"), "Kinda Bogus Anyhow") self.assertTrue(len(field.subfields) == 0) def test_is_subject_field(self): self.assertEqual(self.subjectfield.is_subject_field(), True) self.assertEqual(self.field.is_subject_field(), False) def test_format_field(self): self.subjectfield.add_subfield("6", "880-4") self.assertEqual( self.subjectfield.format_field(), "Python (Computer program language) -- Poetry.", ) self.field.add_subfield("6", "880-1") self.assertEqual(self.field.format_field(), "Huckleberry Finn: An American Odyssey") def test_tag_normalize(self): f = Field(tag="42", indicators=["", ""]) self.assertEqual(f.tag, "042") def test_alphatag(self): f = Field(tag="CAT", indicators=[0, 1], subfields=["a", "foo"]) self.assertEqual(f.tag, "CAT") self.assertEqual(f["a"], "foo") self.assertEqual(f.is_control_field(), False) def test_setitem_no_key(self): try: self.field["h"] = "error" except KeyError: pass except Exception: e = sys.exc_info()[1] self.fail("Unexpected exception thrown: %s" % e) else: self.fail("KeyError not thrown") def test_setitem_repeated_key(self): try: self.field.add_subfield("a", "bar") self.field["a"] = "error" except KeyError: pass except Exception: e = sys.exc_info()[1] self.fail("Unexpected exception thrown: %s" % e) else: self.fail("KeyError not thrown") def test_iter_over_controlfield(self): try: [subfield for subfield in self.controlfield] except AttributeError as e: self.fail("Error during iteration: %s" % e) def test_setitem(self): self.field["a"] = "changed" self.assertEqual(self.field["a"], "changed") def test_delete_subfield_only_by_code(self): self.field.delete_subfield("An American Odyssey") self.assertEqual(self.field["b"], "An American Odyssey") self.field.delete_subfield("b") self.assertTrue(self.field["b"] is None) def test_set_indicators_affects_str(self): self.field.indicators[0] = "9" self.field.indicator2 = "9" self.assertEqual(str(self.field), "=245 99$aHuckleberry Finn: $bAn American Odyssey") def test_set_indicators_affects_marc(self): self.field.indicators[0] = "9" self.field.indicator2 = "9" self.assertEqual( self.field.as_marc("utf-8"), b"99\x1faHuckleberry Finn: \x1fbAn American Odyssey\x1e", )
def test_add_subfield(self): field = Field(tag="245", indicators=[0, 1], subfields=["a", "foo"]) field.add_subfield("a", "bar") self.assertEqual(field.__str__(), "=245 01$afoo$abar") field.add_subfield("b", "baz", 0) self.assertEqual(field.__str__(), "=245 01$bbaz$afoo$abar") field.add_subfield("c", "qux", 2) self.assertEqual(field.__str__(), "=245 01$bbaz$afoo$cqux$abar") field.add_subfield("z", "wat", 8) self.assertEqual(field.__str__(), "=245 01$bbaz$afoo$cqux$abar$zwat")
def decode_marc(self, marc, to_unicode=True, force_utf8=False, hide_utf8_warnings=False, utf8_handling='strict', encoding='iso8859-1'): """ decode_marc() accepts a MARC record in transmission format as a a string argument, and will populate the object based on the data found. The Record constructor actually uses decode_marc() behind the scenes when you pass in a chunk of MARC data to it. """ # extract record leader self.leader = marc[0:LEADER_LEN].decode('ascii') if len(self.leader) != LEADER_LEN: raise RecordLeaderInvalid if self.leader[9] == 'a' or self.force_utf8: encoding = 'utf-8' # extract the byte offset where the record data starts base_address = int(marc[12:17]) if base_address <= 0: raise BaseAddressNotFound if base_address >= len(marc): raise BaseAddressInvalid # extract directory, base_address-1 is used since the # director ends with an END_OF_FIELD byte directory = marc[LEADER_LEN:base_address - 1].decode('ascii') # determine the number of fields in record if len(directory) % DIRECTORY_ENTRY_LEN != 0: raise RecordDirectoryInvalid field_total = len(directory) / DIRECTORY_ENTRY_LEN # add fields to our record using directory offsets field_count = 0 while field_count < field_total: entry_start = field_count * DIRECTORY_ENTRY_LEN entry_end = entry_start + DIRECTORY_ENTRY_LEN entry = directory[entry_start:entry_end] entry_tag = entry[0:3] entry_length = int(entry[3:7]) entry_offset = int(entry[7:12]) entry_data = marc[base_address + entry_offset:base_address + entry_offset + entry_length - 1] # assume controlfields are numeric; replicates ruby-marc behavior if entry_tag < '010' and entry_tag.isdigit(): if to_unicode: field = Field(tag=entry_tag, data=entry_data.decode(encoding)) else: field = RawField(tag=entry_tag, data=entry_data) else: subfields = list() subs = entry_data.split(SUBFIELD_INDICATOR.encode('ascii')) # The MARC spec requires there to be two indicators in a # field. However experience in the wild has shown that # indicators are sometimes missing, and sometimes there # are too many. Rather than throwing an exception because # we can't find what we want and rejecting the field, or # barfing on the whole record we'll try to use what we can # find. This means missing indicators will be recorded as # blank spaces, and any more than 2 are dropped on the floor. first_indicator = second_indicator = ' ' subs[0] = subs[0].decode('ascii') if len(subs[0]) == 0: logging.warning("missing indicators: %s", entry_data) first_indicator = second_indicator = ' ' elif len(subs[0]) == 1: logging.warning("only 1 indicator found: %s", entry_data) first_indicator = subs[0][0] second_indicator = ' ' elif len(subs[0]) > 2: logging.warning("more than 2 indicators found: %s", entry_data) first_indicator = subs[0][0] second_indicator = subs[0][1] else: first_indicator = subs[0][0] second_indicator = subs[0][1] for subfield in subs[1:]: if len(subfield) == 0: continue code = subfield[0:1].decode('ascii') data = subfield[1:] if to_unicode: if self.leader[9] == 'a' or force_utf8: data = data.decode('utf-8', utf8_handling) elif encoding == 'iso8859-1': data = marc8_to_unicode(data, hide_utf8_warnings) else: data = data.decode(encoding) subfields.append(code) subfields.append(data) if to_unicode: field = Field( tag=entry_tag, indicators=[first_indicator, second_indicator], subfields=subfields, ) else: field = RawField( tag=entry_tag, indicators=[first_indicator, second_indicator], subfields=subfields, ) self.add_field(field) field_count += 1 if field_count == 0: raise NoFieldsFound
def test_delete_subfield(self): field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow']) self.assertEqual(field.delete_subfield('z'), None) self.assertEqual(field.delete_subfield('a'), 'My Title') self.assertEqual(field.delete_subfield('a'), 'Kinda Bogus Anyhow') self.assertTrue(len(field.subfields) == 0)
def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a','bar') self.assertEquals(field.__str__(), '=245 01$afoo$abar')
class FieldTest(unittest.TestCase): def setUp(self): self.field = Field( tag = '245', indicators = [ 0, 1 ], subfields = [ 'a', 'Huckleberry Finn: ', 'b', 'An American Odyssey' ] ) self.controlfield = Field( tag = '008', data = '831227m19799999nyu ||| | ger ' ) self.subjectfield = Field( tag = '650', indicators = [' ', '0'], subfields = [ 'a', 'Python (Computer program language)', 'v', 'Poetry.' ] ) def test_string(self): self.assertEquals(str(self.field), '=245 01$aHuckleberry Finn: $bAn American Odyssey') def test_controlfield_string(self): self.assertEquals(str(self.controlfield), r'=008 831227m19799999nyu\\\\\\\\\\\|||\|\ger\\') def test_indicators(self): assert self.field.indicator1 is 0 self.assertEqual(self.field.indicator2, 1) def test_subfields_created(self): subfields = self.field.subfields self.assertEqual(len(subfields), 4) def test_subfield_short(self): self.assertEqual(self.field['a'], 'Huckleberry Finn: ') self.assertEqual(self.field['z'], None) def test_subfields(self): self.assertEqual(self.field.get_subfields('a'), ['Huckleberry Finn: ']) self.assertEqual(self.subjectfield.get_subfields('a'), ['Python (Computer program language)']) def test_subfields_multi(self): self.assertEqual(self.field.get_subfields('a','b'), ['Huckleberry Finn: ', 'An American Odyssey' ]) self.assertEqual(self.subjectfield.get_subfields('a','v'), ['Python (Computer program language)', 'Poetry.' ]) def test_encode(self): self.field.as_marc() def test_iterator(self): string = "" for subfield in self.field: string += subfield[0] string += subfield[1] self.assertEquals(string, 'aHuckleberry Finn: bAn American Odyssey') def test_value(self): self.assertEquals(self.field.value(), 'Huckleberry Finn: An American Odyssey') self.assertEquals(self.controlfield.value(), '831227m19799999nyu ||| | ger ') def test_non_integer_tag(self): # make sure this doesn't throw an exception field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo']) def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a','bar') self.assertEquals(field.__str__(), '=245 01$afoo$abar') def test_delete_subfield(self): field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow']) self.assertEquals(field.delete_subfield('z'), None) self.assertEquals(field.delete_subfield('a'), 'My Title') self.assertEquals(field.delete_subfield('a'), 'Kinda Bogus Anyhow') self.assertTrue(len(field.subfields) == 0) def test_is_subject_field(self): self.assertEqual(self.subjectfield.is_subject_field(), True) self.assertEqual(self.field.is_subject_field(), False) def test_format_field(self): self.assertEqual(self.subjectfield.format_field(), 'Python (Computer program language) -- Poetry.') self.assertEqual(self.field.format_field(), 'Huckleberry Finn: An American Odyssey') def test_tag_normalize(self): f = Field(tag='42', indicators=['', '']) self.assertEqual(f.tag, '042') def test_alphatag(self): f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo']) self.assertEqual(f.tag, 'CAT') self.assertEqual(f['a'], 'foo') self.assertEqual(f.is_control_field(), False)
def test_non_integer_tag(self): # make sure this doesn't throw an exception Field(tag="3 0", indicators=[0, 1], subfields=["a", "foo"])
class FieldTest(unittest.TestCase): def setUp(self): self.field = Field( tag='245', indicators=[0, 1], subfields=['a', 'Huckleberry Finn: ', 'b', 'An American Odyssey']) self.controlfield = Field( tag='008', data='831227m19799999nyu ||| | ger ') self.subjectfield = Field(tag='650', indicators=[' ', '0'], subfields=[ 'a', 'Python (Computer program language)', 'v', 'Poetry.' ]) def test_string(self): self.assertEqual(str(self.field), '=245 01$aHuckleberry Finn: $bAn American Odyssey') def test_controlfield_string(self): self.assertEqual(str(self.controlfield), r'=008 831227m19799999nyu\\\\\\\\\\\|||\|\ger\\') def test_indicators(self): self.assertEqual(self.field.indicator1, '0') self.assertEqual(self.field.indicator2, '1') def test_subfields_created(self): subfields = self.field.subfields self.assertEqual(len(subfields), 4) def test_subfield_short(self): self.assertEqual(self.field['a'], 'Huckleberry Finn: ') self.assertEqual(self.field['z'], None) def test_subfields(self): self.assertEqual(self.field.get_subfields('a'), ['Huckleberry Finn: ']) self.assertEqual(self.subjectfield.get_subfields('a'), ['Python (Computer program language)']) def test_subfields_multi(self): self.assertEqual(self.field.get_subfields('a', 'b'), ['Huckleberry Finn: ', 'An American Odyssey']) self.assertEqual(self.subjectfield.get_subfields('a', 'v'), ['Python (Computer program language)', 'Poetry.']) def test_encode(self): self.field.as_marc(encoding='utf-8') def test_membership(self): self.assertTrue('a' in self.field) self.assertFalse('zzz' in self.field) def test_iterator(self): string = "" for subfield in self.field: string += subfield[0] string += subfield[1] self.assertEqual(string, 'aHuckleberry Finn: bAn American Odyssey') def test_value(self): self.assertEqual(self.field.value(), 'Huckleberry Finn: An American Odyssey') self.assertEqual(self.controlfield.value(), '831227m19799999nyu ||| | ger ') def test_non_integer_tag(self): # make sure this doesn't throw an exception field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo']) def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a', 'bar') self.assertEqual(field.__str__(), '=245 01$afoo$abar') field.add_subfield('b', 'baz', 0) self.assertEqual(field.__str__(), '=245 01$bbaz$afoo$abar') field.add_subfield('c', 'qux', 2) self.assertEqual(field.__str__(), '=245 01$bbaz$afoo$cqux$abar') field.add_subfield('z', 'wat', 8) self.assertEqual(field.__str__(), '=245 01$bbaz$afoo$cqux$abar$zwat') def test_delete_subfield(self): field = Field(tag='200', indicators=[0, 1], subfields=['a', 'My Title', 'a', 'Kinda Bogus Anyhow']) self.assertEqual(field.delete_subfield('z'), None) self.assertEqual(field.delete_subfield('a'), 'My Title') self.assertEqual(field.delete_subfield('a'), 'Kinda Bogus Anyhow') self.assertTrue(len(field.subfields) == 0) def test_is_subject_field(self): self.assertEqual(self.subjectfield.is_subject_field(), True) self.assertEqual(self.field.is_subject_field(), False) def test_format_field(self): self.subjectfield.add_subfield('6', '880-4') self.assertEqual(self.subjectfield.format_field(), 'Python (Computer program language) -- Poetry.') self.field.add_subfield('6', '880-1') self.assertEqual(self.field.format_field(), 'Huckleberry Finn: An American Odyssey') def test_tag_normalize(self): f = Field(tag='42', indicators=['', '']) self.assertEqual(f.tag, '042') def test_alphatag(self): f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo']) self.assertEqual(f.tag, 'CAT') self.assertEqual(f['a'], 'foo') self.assertEqual(f.is_control_field(), False) def test_setitem_no_key(self): try: self.field['h'] = 'error' except KeyError: pass except Exception: e = sys.exc_info()[1] self.fail('Unexpected exception thrown: %s' % e) else: self.fail('KeyError not thrown') def test_setitem_repeated_key(self): try: self.field.add_subfield('a', 'bar') self.field['a'] = 'error' except KeyError: pass except Exception: e = sys.exc_info()[1] self.fail('Unexpected exception thrown: %s' % e) else: self.fail('KeyError not thrown') def test_iter_over_controlfield(self): try: l = [subfield for subfield in self.controlfield] except AttributeError as e: self.fail('Error during iteration: %s' % e) def test_setitem(self): self.field['a'] = 'changed' self.assertEqual(self.field['a'], 'changed') def test_delete_subfield_only_by_code(self): self.field.delete_subfield('An American Odyssey') self.assertEqual(self.field['b'], 'An American Odyssey') self.field.delete_subfield('b') self.assertTrue(self.field['b'] is None) def test_set_indicators_affects_str(self): self.field.indicators[0] = '9' self.field.indicator2 = '9' self.assertEquals(str(self.field), '=245 99$aHuckleberry Finn: $bAn American Odyssey') def test_set_indicators_affects_marc(self): self.field.indicators[0] = '9' self.field.indicator2 = '9' self.assertEquals( self.field.as_marc('utf-8'), b'99\x1faHuckleberry Finn: \x1fbAn American Odyssey\x1e')
def test_alphatag(self): f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo']) self.assertEqual(f.tag, 'CAT') self.assertEqual(f['a'], 'foo') self.assertEqual(f.is_control_field(), False)
def test_tag_normalize(self): f = Field(tag='42', indicators=['', '']) self.assertEqual(f.tag, '042')
def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a', 'bar') self.assertEquals(field.__str__(), '=245 01$afoo$abar')
class FieldTest(unittest.TestCase): def setUp(self): self.field = Field( tag='245', indicators=[0, 1], subfields=['a', 'Huckleberry Finn: ', 'b', 'An American Odyssey']) self.controlfield = Field( tag='008', data='831227m19799999nyu ||| | ger ') self.subjectfield = Field(tag='650', indicators=[' ', '0'], subfields=[ 'a', 'Python (Computer program language)', 'v', 'Poetry.' ]) def test_string(self): self.assertEquals(str(self.field), '=245 01$aHuckleberry Finn: $bAn American Odyssey') def test_controlfield_string(self): self.assertEquals(str(self.controlfield), r'=008 831227m19799999nyu\\\\\\\\\\\|||\|\ger\\') def test_indicators(self): assert self.field.indicator1 is 0 self.assertEqual(self.field.indicator2, 1) def test_subfields_created(self): subfields = self.field.subfields self.assertEqual(len(subfields), 4) def test_subfield_short(self): self.assertEqual(self.field['a'], 'Huckleberry Finn: ') self.assertEqual(self.field['z'], None) def test_subfields(self): self.assertEqual(self.field.get_subfields('a'), ['Huckleberry Finn: ']) self.assertEqual(self.subjectfield.get_subfields('a'), ['Python (Computer program language)']) def test_subfields_multi(self): self.assertEqual(self.field.get_subfields('a', 'b'), ['Huckleberry Finn: ', 'An American Odyssey']) self.assertEqual(self.subjectfield.get_subfields('a', 'v'), ['Python (Computer program language)', 'Poetry.']) def test_encode(self): self.field.as_marc() def test_iterator(self): string = "" for subfield in self.field: string += subfield[0] string += subfield[1] self.assertEquals(string, 'aHuckleberry Finn: bAn American Odyssey') def test_value(self): self.assertEquals(self.field.value(), 'Huckleberry Finn: An American Odyssey') self.assertEquals(self.controlfield.value(), '831227m19799999nyu ||| | ger ') def test_non_integer_tag(self): # make sure this doesn't throw an exception field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo']) def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a', 'bar') self.assertEquals(field.__str__(), '=245 01$afoo$abar') def test_delete_subfield(self): field = Field(tag='200', indicators=[0, 1], subfields=['a', 'My Title', 'a', 'Kinda Bogus Anyhow']) self.assertEquals(field.delete_subfield('z'), None) self.assertEquals(field.delete_subfield('a'), 'My Title') self.assertEquals(field.delete_subfield('a'), 'Kinda Bogus Anyhow') self.assertTrue(len(field.subfields) == 0) def test_is_subject_field(self): self.assertEqual(self.subjectfield.is_subject_field(), True) self.assertEqual(self.field.is_subject_field(), False) def test_format_field(self): self.assertEqual(self.subjectfield.format_field(), 'Python (Computer program language) -- Poetry.') self.assertEqual(self.field.format_field(), 'Huckleberry Finn: An American Odyssey') def test_tag_normalize(self): f = Field(tag='42', indicators=['', '']) self.assertEqual(f.tag, '042') def test_alphatag(self): f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo']) self.assertEqual(f.tag, 'CAT') self.assertEqual(f['a'], 'foo') self.assertEqual(f.is_control_field(), False)
class FieldTest(unittest.TestCase): def setUp(self): self.field = Field( tag = '245', indicators = [ 0, 1 ], subfields = [ 'a', 'Huckleberry Finn: ', 'b', 'An American Odyssey' ] ) self.controlfield = Field( tag = '008', data = '831227m19799999nyu ||| | ger ' ) self.subjectfield = Field( tag = '650', indicators = [' ', '0'], subfields = [ 'a', 'Python (Computer program language)', 'v', 'Poetry.' ] ) def test_string(self): self.assertEqual(str(self.field), '=245 01$aHuckleberry Finn: $bAn American Odyssey') def test_controlfield_string(self): self.assertEqual(str(self.controlfield), r'=008 831227m19799999nyu\\\\\\\\\\\|||\|\ger\\') def test_indicators(self): self.assertEqual(self.field.indicator1, '0') self.assertEqual(self.field.indicator2, '1') def test_subfields_created(self): subfields = self.field.subfields self.assertEqual(len(subfields), 4) def test_subfield_short(self): self.assertEqual(self.field['a'], 'Huckleberry Finn: ') self.assertEqual(self.field['z'], None) def test_subfields(self): self.assertEqual(self.field.get_subfields('a'), ['Huckleberry Finn: ']) self.assertEqual(self.subjectfield.get_subfields('a'), ['Python (Computer program language)']) def test_subfields_multi(self): self.assertEqual(self.field.get_subfields('a','b'), ['Huckleberry Finn: ', 'An American Odyssey' ]) self.assertEqual(self.subjectfield.get_subfields('a','v'), ['Python (Computer program language)', 'Poetry.' ]) def test_encode(self): self.field.as_marc(encoding='utf-8') def test_membership(self): self.assertTrue('a' in self.field) self.assertFalse('zzz' in self.field) def test_iterator(self): string = "" for subfield in self.field: string += subfield[0] string += subfield[1] self.assertEqual(string, 'aHuckleberry Finn: bAn American Odyssey') def test_value(self): self.assertEqual(self.field.value(), 'Huckleberry Finn: An American Odyssey') self.assertEqual(self.controlfield.value(), '831227m19799999nyu ||| | ger ') def test_non_integer_tag(self): # make sure this doesn't throw an exception field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo']) def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a','bar') self.assertEqual(field.__str__(), '=245 01$afoo$abar') def test_delete_subfield(self): field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow']) self.assertEqual(field.delete_subfield('z'), None) self.assertEqual(field.delete_subfield('a'), 'My Title') self.assertEqual(field.delete_subfield('a'), 'Kinda Bogus Anyhow') self.assertTrue(len(field.subfields) == 0) def test_is_subject_field(self): self.assertEqual(self.subjectfield.is_subject_field(), True) self.assertEqual(self.field.is_subject_field(), False) def test_format_field(self): self.subjectfield.add_subfield('6', '880-4') self.assertEqual(self.subjectfield.format_field(), 'Python (Computer program language) -- Poetry.') self.field.add_subfield('6', '880-1') self.assertEqual(self.field.format_field(), 'Huckleberry Finn: An American Odyssey') def test_tag_normalize(self): f = Field(tag='42', indicators=['', '']) self.assertEqual(f.tag, '042') def test_alphatag(self): f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo']) self.assertEqual(f.tag, 'CAT') self.assertEqual(f['a'], 'foo') self.assertEqual(f.is_control_field(), False) def test_setitem_no_key(self): try: self.field['h'] = 'error' except KeyError: pass except Exception: e = sys.exc_info()[1] self.fail('Unexpected exception thrown: %s' % e) else: self.fail('KeyError not thrown') def test_setitem_repeated_key(self): try: self.field.add_subfield('a','bar') self.field['a'] = 'error' except KeyError: pass except Exception: e = sys.exc_info()[1] self.fail('Unexpected exception thrown: %s' % e) else: self.fail('KeyError not thrown') def test_iter_over_controlfield(self): try: l = [subfield for subfield in self.controlfield] except AttributeError as e: self.fail('Error during iteration: %s' % e) def test_setitem(self): self.field['a'] = 'changed' self.assertEqual(self.field['a'], 'changed') def test_delete_subfield_only_by_code(self): self.field.delete_subfield('An American Odyssey') self.assertEqual(self.field['b'], 'An American Odyssey') self.field.delete_subfield('b') self.assertTrue(self.field['b'] is None)
def test_tag_normalize(self): f = Field(tag="42", indicators=["", ""]) self.assertEqual(f.tag, "042")
def test_alphatag(self): f = Field(tag="CAT", indicators=[0, 1], subfields=["a", "foo"]) self.assertEqual(f.tag, "CAT") self.assertEqual(f["a"], "foo") self.assertEqual(f.is_control_field(), False)
def test_non_integer_tag(self): # make sure this doesn't throw an exception field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo'])
def test_add_subfield(self): field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo']) field.add_subfield('a', 'bar') self.assertEqual(field.__str__(), '=245 01$afoo$abar') field.add_subfield('b', 'baz', 0) self.assertEqual(field.__str__(), '=245 01$bbaz$afoo$abar') field.add_subfield('c', 'qux', 2) self.assertEqual(field.__str__(), '=245 01$bbaz$afoo$cqux$abar') field.add_subfield('z', 'wat', 8) self.assertEqual(field.__str__(), '=245 01$bbaz$afoo$cqux$abar$zwat')
def test_pub_date(self): record = Record() self.assertEqual(record.pub_date(), None) record.add_field(Field('269', [' ', ' '], subfields=['a', '20170123'])) self.assertEqual(record.pub_date(), '20170123')
def decode_marc(self, marc, to_unicode=True, force_utf8=False, hide_utf8_warnings=False, utf8_handling='ignore'): self.leader = marc[0:LEADER_LEN] if len(self.leader) != LEADER_LEN: raise RecordLeaderInvalid if self.leader[9] == 'a' or self.force_utf8: encoding = 'utf-8' else: encoding = 'iso8859-1' base_address = int(marc[12:17]) if base_address <= 0: raise BaseAddressNotFound if base_address >= len(marc): raise BaseAddressInvalid directory = marc[LEADER_LEN:base_address - 1] if len(directory) % DIRECTORY_ENTRY_LEN != 0: raise RecordDirectoryInvalid field_total = len(directory) / DIRECTORY_ENTRY_LEN field_count = 0 while field_count < field_total: entry_start = field_count * DIRECTORY_ENTRY_LEN entry_end = entry_start + DIRECTORY_ENTRY_LEN entry = directory[entry_start:entry_end] entry_tag = entry[0:3] entry_length = int(entry[3:7]) entry_offset = int(entry[7:12]) entry_data = marc[base_address + entry_offset:base_address + entry_offset + entry_length - 1] if entry_tag < '010' and entry_tag.isdigit(): if to_unicode: field = Field(tag=entry_tag, data=entry_data) else: field = RawField(tag=entry_tag, data=entry_data) else: subfields = list() subs = entry_data.split(SUBFIELD_INDICATOR) #subs = entry_data.split(SUBFIELD_INDICATOR.encode().decode('ascii', errors='ignore')) first_indicator = second_indicator = ' ' #subs[0] = subs[0].decode('ascii') if len(subs[0]) == 0: logging.warning("missing indicators: %s", entry_data) first_indicator = second_indicator = ' ' elif len(subs[0]) == 1: logging.warning("only 1 indicator found: %s", entry_data) first_indicator = subs[0][0] second_indicator = ' ' elif len(subs[0]) > 2: logging.warning("more than 2 indicators found: %s", entry_data) first_indicator = subs[0][0] second_indicator = subs[0][1] else: first_indicator = subs[0][0] second_indicator = subs[0][1] for subfield in subs[1:]: if len(subfield) == 0: continue code = subfield[0:1] data = subfield[1:] if to_unicode: if self.leader[9] == 'a' or force_utf8: data = data.encode().decode('utf-8', utf8_handling) else: data = marc8_to_unicode(data, hide_utf8_warnings) subfields.append(code) subfields.append(data) if to_unicode: field = Field( tag=entry_tag, indicators=[first_indicator, second_indicator], subfields=subfields) else: field = RawField( tag=entry_tag, indicators=[first_indicator, second_indicator], subfields=subfields) self.add_field(field) field_count += 1 if field_count == 0: raise NoFieldsFound return self
def test_delete_subfield(self): field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow']) self.assertEquals(field.delete_subfield('z'), None) self.assertEquals(field.delete_subfield('a'), 'My Title') self.assertEquals(field.delete_subfield('a'), 'Kinda Bogus Anyhow') self.assertTrue(len(field.subfields) == 0)
def decode_marc(self, marc, to_unicode=False, force_utf8=False, hide_utf8_warnings=False, utf8_handling='strict'): """ decode_marc() accepts a MARC record in transmission format as a a string argument, and will populate the object based on the data found. The Record constructor actually uses decode_marc() behind the scenes when you pass in a chunk of MARC data to it. """ # extract record leader self.leader = marc[0:LEADER_LEN] if len(self.leader) != LEADER_LEN: raise RecordLeaderInvalid # extract the byte offset where the record data starts base_address = int(marc[12:17]) if base_address <= 0: raise BaseAddressNotFound if base_address >= len(marc): raise BaseAddressInvalid # extract directory, base_address-1 is used since the # director ends with an END_OF_FIELD byte directory = marc[LEADER_LEN:base_address - 1] # determine the number of fields in record if len(directory) % DIRECTORY_ENTRY_LEN != 0: raise RecordDirectoryInvalid field_total = len(directory) / DIRECTORY_ENTRY_LEN # add fields to our record using directory offsets field_count = 0 while field_count < field_total: entry_start = field_count * DIRECTORY_ENTRY_LEN entry_end = entry_start + DIRECTORY_ENTRY_LEN entry = directory[entry_start:entry_end] entry_tag = entry[0:3] entry_length = int(entry[3:7]) entry_offset = int(entry[7:12]) entry_data = marc[base_address + entry_offset:base_address + entry_offset + entry_length - 1] # assume controlfields are numeric; replicates ruby-marc behavior if entry_tag < '010' and entry_tag.isdigit(): field = Field(tag=entry_tag, data=entry_data) else: subfields = list() subs = entry_data.split(SUBFIELD_INDICATOR) # make sure we've got the indicators and subfields we expected if len(subs) < 2 or len(subs[0]) != 2: # TODO: should we log a warning, or throw an exception here? # we are currently just moving forward first_indicator = ' ' second_indicator = ' ' else: first_indicator = subs[0][0] second_indicator = subs[0][1] for subfield in subs[1:]: if len(subfield) == 0: continue code = subfield[0] data = subfield[1:] if to_unicode: if self.leader[9] == 'a' or force_utf8: data = data.decode('utf-8', utf8_handling) else: data = marc8_to_unicode( data, hide_utf8_warnings) subfields.append(code) subfields.append(data) field = Field( tag=entry_tag, indicators=[first_indicator, second_indicator], subfields=subfields, ) self.add_field(field) field_count += 1 if field_count == 0: raise NoFieldsFound