Пример #1
0
    def setUp(self):
        self.field = Field(
            tag = '245', 
            indicators = [ 0, 1 ], 
            subfields = [ 
                'a', 'Huckleberry Finn: ', 
                'b', 'An American Odyssey'
            ]
        )

        self.controlfield = Field(
            tag = '008', 
            data = '831227m19799999nyu           ||| | ger  '
        )
        
        self.subjectfield = Field(
            tag = '650',
            indicators = [' ', '0'],
            subfields = [
                'a', 'Python (Computer program language)',
                'v', 'Poetry.'
            ]
        )
Пример #2
0
class FieldTest(unittest.TestCase):
    def setUp(self):
        self.field = Field(
            tag="245",
            indicators=[0, 1],
            subfields=["a", "Huckleberry Finn: ", "b", "An American Odyssey"],
        )

        self.controlfield = Field(
            tag="008", data="831227m19799999nyu           ||| | ger  ")

        self.subjectfield = Field(
            tag="650",
            indicators=[" ", "0"],
            subfields=[
                "a", "Python (Computer program language)", "v", "Poetry."
            ],
        )

    def test_string(self):
        self.assertEqual(str(self.field),
                         "=245  01$aHuckleberry Finn: $bAn American Odyssey")

    def test_controlfield_string(self):
        self.assertEqual(str(self.controlfield),
                         r"=008  831227m19799999nyu\\\\\\\\\\\|||\|\ger\\")

    def test_indicators(self):
        self.assertEqual(self.field.indicator1, "0")
        self.assertEqual(self.field.indicator2, "1")

    def test_subfields_created(self):
        subfields = self.field.subfields
        self.assertEqual(len(subfields), 4)

    def test_subfield_short(self):
        self.assertEqual(self.field["a"], "Huckleberry Finn: ")
        self.assertEqual(self.field["z"], None)

    def test_subfields(self):
        self.assertEqual(self.field.get_subfields("a"), ["Huckleberry Finn: "])
        self.assertEqual(self.subjectfield.get_subfields("a"),
                         ["Python (Computer program language)"])

    def test_subfields_multi(self):
        self.assertEqual(
            self.field.get_subfields("a", "b"),
            ["Huckleberry Finn: ", "An American Odyssey"],
        )
        self.assertEqual(
            self.subjectfield.get_subfields("a", "v"),
            ["Python (Computer program language)", "Poetry."],
        )

    def test_encode(self):
        self.field.as_marc(encoding="utf-8")

    def test_membership(self):
        self.assertTrue("a" in self.field)
        self.assertFalse("zzz" in self.field)

    def test_iterator(self):
        string = ""
        for subfield in self.field:
            string += subfield[0]
            string += subfield[1]
        self.assertEqual(string, "aHuckleberry Finn: bAn American Odyssey")

    def test_value(self):
        self.assertEqual(self.field.value(),
                         "Huckleberry Finn: An American Odyssey")
        self.assertEqual(self.controlfield.value(),
                         "831227m19799999nyu           ||| | ger  ")

    def test_non_integer_tag(self):
        # make sure this doesn't throw an exception
        Field(tag="3 0", indicators=[0, 1], subfields=["a", "foo"])

    def test_add_subfield(self):
        field = Field(tag="245", indicators=[0, 1], subfields=["a", "foo"])
        field.add_subfield("a", "bar")
        self.assertEqual(field.__str__(), "=245  01$afoo$abar")
        field.add_subfield("b", "baz", 0)
        self.assertEqual(field.__str__(), "=245  01$bbaz$afoo$abar")
        field.add_subfield("c", "qux", 2)
        self.assertEqual(field.__str__(), "=245  01$bbaz$afoo$cqux$abar")
        field.add_subfield("z", "wat", 8)
        self.assertEqual(field.__str__(), "=245  01$bbaz$afoo$cqux$abar$zwat")

    def test_delete_subfield(self):
        field = Field(
            tag="200",
            indicators=[0, 1],
            subfields=["a", "My Title", "a", "Kinda Bogus Anyhow"],
        )
        self.assertEqual(field.delete_subfield("z"), None)
        self.assertEqual(field.delete_subfield("a"), "My Title")
        self.assertEqual(field.delete_subfield("a"), "Kinda Bogus Anyhow")
        self.assertTrue(len(field.subfields) == 0)

    def test_is_subject_field(self):
        self.assertEqual(self.subjectfield.is_subject_field(), True)
        self.assertEqual(self.field.is_subject_field(), False)

    def test_format_field(self):
        self.subjectfield.add_subfield("6", "880-4")
        self.assertEqual(
            self.subjectfield.format_field(),
            "Python (Computer program language) -- Poetry.",
        )
        self.field.add_subfield("6", "880-1")
        self.assertEqual(self.field.format_field(),
                         "Huckleberry Finn:  An American Odyssey")

    def test_tag_normalize(self):
        f = Field(tag="42", indicators=["", ""])
        self.assertEqual(f.tag, "042")

    def test_alphatag(self):
        f = Field(tag="CAT", indicators=[0, 1], subfields=["a", "foo"])
        self.assertEqual(f.tag, "CAT")
        self.assertEqual(f["a"], "foo")
        self.assertEqual(f.is_control_field(), False)

    def test_setitem_no_key(self):
        try:
            self.field["h"] = "error"
        except KeyError:
            pass
        except Exception:
            e = sys.exc_info()[1]
            self.fail("Unexpected exception thrown: %s" % e)
        else:
            self.fail("KeyError not thrown")

    def test_setitem_repeated_key(self):
        try:
            self.field.add_subfield("a", "bar")
            self.field["a"] = "error"
        except KeyError:
            pass
        except Exception:
            e = sys.exc_info()[1]
            self.fail("Unexpected exception thrown: %s" % e)
        else:
            self.fail("KeyError not thrown")

    def test_iter_over_controlfield(self):
        try:
            [subfield for subfield in self.controlfield]
        except AttributeError as e:
            self.fail("Error during iteration: %s" % e)

    def test_setitem(self):
        self.field["a"] = "changed"
        self.assertEqual(self.field["a"], "changed")

    def test_delete_subfield_only_by_code(self):
        self.field.delete_subfield("An American Odyssey")
        self.assertEqual(self.field["b"], "An American Odyssey")
        self.field.delete_subfield("b")
        self.assertTrue(self.field["b"] is None)

    def test_set_indicators_affects_str(self):
        self.field.indicators[0] = "9"
        self.field.indicator2 = "9"
        self.assertEqual(str(self.field),
                         "=245  99$aHuckleberry Finn: $bAn American Odyssey")

    def test_set_indicators_affects_marc(self):
        self.field.indicators[0] = "9"
        self.field.indicator2 = "9"
        self.assertEqual(
            self.field.as_marc("utf-8"),
            b"99\x1faHuckleberry Finn: \x1fbAn American Odyssey\x1e",
        )
Пример #3
0
 def test_add_subfield(self):
     field = Field(tag="245", indicators=[0, 1], subfields=["a", "foo"])
     field.add_subfield("a", "bar")
     self.assertEqual(field.__str__(), "=245  01$afoo$abar")
     field.add_subfield("b", "baz", 0)
     self.assertEqual(field.__str__(), "=245  01$bbaz$afoo$abar")
     field.add_subfield("c", "qux", 2)
     self.assertEqual(field.__str__(), "=245  01$bbaz$afoo$cqux$abar")
     field.add_subfield("z", "wat", 8)
     self.assertEqual(field.__str__(), "=245  01$bbaz$afoo$cqux$abar$zwat")
Пример #4
0
    def decode_marc(self,
                    marc,
                    to_unicode=True,
                    force_utf8=False,
                    hide_utf8_warnings=False,
                    utf8_handling='strict',
                    encoding='iso8859-1'):
        """
        decode_marc() accepts a MARC record in transmission format as a
        a string argument, and will populate the object based on the data
        found. The Record constructor actually uses decode_marc() behind
        the scenes when you pass in a chunk of MARC data to it.

        """
        # extract record leader
        self.leader = marc[0:LEADER_LEN].decode('ascii')
        if len(self.leader) != LEADER_LEN:
            raise RecordLeaderInvalid

        if self.leader[9] == 'a' or self.force_utf8:
            encoding = 'utf-8'

        # extract the byte offset where the record data starts
        base_address = int(marc[12:17])
        if base_address <= 0:
            raise BaseAddressNotFound
        if base_address >= len(marc):
            raise BaseAddressInvalid

        # extract directory, base_address-1 is used since the
        # director ends with an END_OF_FIELD byte
        directory = marc[LEADER_LEN:base_address - 1].decode('ascii')

        # determine the number of fields in record
        if len(directory) % DIRECTORY_ENTRY_LEN != 0:
            raise RecordDirectoryInvalid
        field_total = len(directory) / DIRECTORY_ENTRY_LEN

        # add fields to our record using directory offsets
        field_count = 0
        while field_count < field_total:
            entry_start = field_count * DIRECTORY_ENTRY_LEN
            entry_end = entry_start + DIRECTORY_ENTRY_LEN
            entry = directory[entry_start:entry_end]
            entry_tag = entry[0:3]
            entry_length = int(entry[3:7])
            entry_offset = int(entry[7:12])
            entry_data = marc[base_address + entry_offset:base_address +
                              entry_offset + entry_length - 1]
            # assume controlfields are numeric; replicates ruby-marc behavior
            if entry_tag < '010' and entry_tag.isdigit():
                if to_unicode:
                    field = Field(tag=entry_tag,
                                  data=entry_data.decode(encoding))
                else:
                    field = RawField(tag=entry_tag, data=entry_data)
            else:
                subfields = list()
                subs = entry_data.split(SUBFIELD_INDICATOR.encode('ascii'))

                # The MARC spec requires there to be two indicators in a
                # field. However experience in the wild has shown that
                # indicators are sometimes missing, and sometimes there
                # are too many. Rather than throwing an exception because
                # we can't find what we want and rejecting the field, or
                # barfing on the whole record we'll try to use what we can
                # find. This means missing indicators will be recorded as
                # blank spaces, and any more than 2 are dropped on the floor.

                first_indicator = second_indicator = ' '
                subs[0] = subs[0].decode('ascii')
                if len(subs[0]) == 0:
                    logging.warning("missing indicators: %s", entry_data)
                    first_indicator = second_indicator = ' '
                elif len(subs[0]) == 1:
                    logging.warning("only 1 indicator found: %s", entry_data)
                    first_indicator = subs[0][0]
                    second_indicator = ' '
                elif len(subs[0]) > 2:
                    logging.warning("more than 2 indicators found: %s",
                                    entry_data)
                    first_indicator = subs[0][0]
                    second_indicator = subs[0][1]
                else:
                    first_indicator = subs[0][0]
                    second_indicator = subs[0][1]

                for subfield in subs[1:]:
                    if len(subfield) == 0:
                        continue
                    code = subfield[0:1].decode('ascii')
                    data = subfield[1:]

                    if to_unicode:
                        if self.leader[9] == 'a' or force_utf8:
                            data = data.decode('utf-8', utf8_handling)
                        elif encoding == 'iso8859-1':
                            data = marc8_to_unicode(data, hide_utf8_warnings)
                        else:
                            data = data.decode(encoding)
                    subfields.append(code)
                    subfields.append(data)
                if to_unicode:
                    field = Field(
                        tag=entry_tag,
                        indicators=[first_indicator, second_indicator],
                        subfields=subfields,
                    )
                else:
                    field = RawField(
                        tag=entry_tag,
                        indicators=[first_indicator, second_indicator],
                        subfields=subfields,
                    )
            self.add_field(field)
            field_count += 1

        if field_count == 0:
            raise NoFieldsFound
Пример #5
0
 def test_delete_subfield(self):
     field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow'])
     self.assertEqual(field.delete_subfield('z'), None)
     self.assertEqual(field.delete_subfield('a'), 'My Title')
     self.assertEqual(field.delete_subfield('a'), 'Kinda Bogus Anyhow')
     self.assertTrue(len(field.subfields) == 0)
Пример #6
0
 def test_add_subfield(self):
     field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
     field.add_subfield('a','bar')
     self.assertEquals(field.__str__(), '=245  01$afoo$abar')
Пример #7
0
class FieldTest(unittest.TestCase):

    def setUp(self):
        self.field = Field(
            tag = '245', 
            indicators = [ 0, 1 ], 
            subfields = [ 
                'a', 'Huckleberry Finn: ', 
                'b', 'An American Odyssey'
            ]
        )

        self.controlfield = Field(
            tag = '008', 
            data = '831227m19799999nyu           ||| | ger  '
        )
        
        self.subjectfield = Field(
            tag = '650',
            indicators = [' ', '0'],
            subfields = [
                'a', 'Python (Computer program language)',
                'v', 'Poetry.'
            ]
        )
    
    def test_string(self):
        self.assertEquals(str(self.field), 
            '=245  01$aHuckleberry Finn: $bAn American Odyssey')

    def test_controlfield_string(self):
        self.assertEquals(str(self.controlfield),
            r'=008  831227m19799999nyu\\\\\\\\\\\|||\|\ger\\')

    def test_indicators(self):
        assert self.field.indicator1 is 0
        self.assertEqual(self.field.indicator2, 1) 
        
    def test_subfields_created(self):
        subfields = self.field.subfields
        self.assertEqual(len(subfields), 4)

    def test_subfield_short(self):
        self.assertEqual(self.field['a'], 'Huckleberry Finn: ')
        self.assertEqual(self.field['z'], None)

    def test_subfields(self):
        self.assertEqual(self.field.get_subfields('a'), 
            ['Huckleberry Finn: '])
        self.assertEqual(self.subjectfield.get_subfields('a'),
            ['Python (Computer program language)'])

    def test_subfields_multi(self):
        self.assertEqual(self.field.get_subfields('a','b'), 
            ['Huckleberry Finn: ', 'An American Odyssey' ])
        self.assertEqual(self.subjectfield.get_subfields('a','v'), 
            ['Python (Computer program language)', 'Poetry.' ])

    def test_encode(self):
        self.field.as_marc()

    def test_iterator(self):
        string = ""
        for subfield in self.field:
            string += subfield[0]
            string += subfield[1]
        self.assertEquals(string, 'aHuckleberry Finn: bAn American Odyssey')

    def test_value(self):
        self.assertEquals(self.field.value(), 
            'Huckleberry Finn: An American Odyssey')
        self.assertEquals(self.controlfield.value(), 
                '831227m19799999nyu           ||| | ger  ')

    def test_non_integer_tag(self):
        # make sure this doesn't throw an exception
        field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo'])

    def test_add_subfield(self):
        field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
        field.add_subfield('a','bar')
        self.assertEquals(field.__str__(), '=245  01$afoo$abar')
        
    def test_delete_subfield(self):
        field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow'])
        self.assertEquals(field.delete_subfield('z'), None)
        self.assertEquals(field.delete_subfield('a'), 'My Title')
        self.assertEquals(field.delete_subfield('a'), 'Kinda Bogus Anyhow')
        self.assertTrue(len(field.subfields) == 0)
        
    def test_is_subject_field(self):
        self.assertEqual(self.subjectfield.is_subject_field(), True)
        self.assertEqual(self.field.is_subject_field(), False)
        
    def test_format_field(self):
        self.assertEqual(self.subjectfield.format_field(),
            'Python (Computer program language) -- Poetry.')
        self.assertEqual(self.field.format_field(), 
                'Huckleberry Finn:  An American Odyssey')

    def test_tag_normalize(self):
        f = Field(tag='42', indicators=['', ''])
        self.assertEqual(f.tag, '042')

    def test_alphatag(self):
        f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo'])
        self.assertEqual(f.tag, 'CAT')
        self.assertEqual(f['a'], 'foo')
        self.assertEqual(f.is_control_field(), False)
Пример #8
0
 def test_non_integer_tag(self):
     # make sure this doesn't throw an exception
     Field(tag="3 0", indicators=[0, 1], subfields=["a", "foo"])
Пример #9
0
class FieldTest(unittest.TestCase):
    def setUp(self):
        self.field = Field(
            tag='245',
            indicators=[0, 1],
            subfields=['a', 'Huckleberry Finn: ', 'b', 'An American Odyssey'])

        self.controlfield = Field(
            tag='008', data='831227m19799999nyu           ||| | ger  ')

        self.subjectfield = Field(tag='650',
                                  indicators=[' ', '0'],
                                  subfields=[
                                      'a',
                                      'Python (Computer program language)',
                                      'v', 'Poetry.'
                                  ])

    def test_string(self):
        self.assertEqual(str(self.field),
                         '=245  01$aHuckleberry Finn: $bAn American Odyssey')

    def test_controlfield_string(self):
        self.assertEqual(str(self.controlfield),
                         r'=008  831227m19799999nyu\\\\\\\\\\\|||\|\ger\\')

    def test_indicators(self):
        self.assertEqual(self.field.indicator1, '0')
        self.assertEqual(self.field.indicator2, '1')

    def test_subfields_created(self):
        subfields = self.field.subfields
        self.assertEqual(len(subfields), 4)

    def test_subfield_short(self):
        self.assertEqual(self.field['a'], 'Huckleberry Finn: ')
        self.assertEqual(self.field['z'], None)

    def test_subfields(self):
        self.assertEqual(self.field.get_subfields('a'), ['Huckleberry Finn: '])
        self.assertEqual(self.subjectfield.get_subfields('a'),
                         ['Python (Computer program language)'])

    def test_subfields_multi(self):
        self.assertEqual(self.field.get_subfields('a', 'b'),
                         ['Huckleberry Finn: ', 'An American Odyssey'])
        self.assertEqual(self.subjectfield.get_subfields('a', 'v'),
                         ['Python (Computer program language)', 'Poetry.'])

    def test_encode(self):
        self.field.as_marc(encoding='utf-8')

    def test_membership(self):
        self.assertTrue('a' in self.field)
        self.assertFalse('zzz' in self.field)

    def test_iterator(self):
        string = ""
        for subfield in self.field:
            string += subfield[0]
            string += subfield[1]
        self.assertEqual(string, 'aHuckleberry Finn: bAn American Odyssey')

    def test_value(self):
        self.assertEqual(self.field.value(),
                         'Huckleberry Finn: An American Odyssey')
        self.assertEqual(self.controlfield.value(),
                         '831227m19799999nyu           ||| | ger  ')

    def test_non_integer_tag(self):
        # make sure this doesn't throw an exception
        field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo'])

    def test_add_subfield(self):
        field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
        field.add_subfield('a', 'bar')
        self.assertEqual(field.__str__(), '=245  01$afoo$abar')
        field.add_subfield('b', 'baz', 0)
        self.assertEqual(field.__str__(), '=245  01$bbaz$afoo$abar')
        field.add_subfield('c', 'qux', 2)
        self.assertEqual(field.__str__(), '=245  01$bbaz$afoo$cqux$abar')
        field.add_subfield('z', 'wat', 8)
        self.assertEqual(field.__str__(), '=245  01$bbaz$afoo$cqux$abar$zwat')

    def test_delete_subfield(self):
        field = Field(tag='200',
                      indicators=[0, 1],
                      subfields=['a', 'My Title', 'a', 'Kinda Bogus Anyhow'])
        self.assertEqual(field.delete_subfield('z'), None)
        self.assertEqual(field.delete_subfield('a'), 'My Title')
        self.assertEqual(field.delete_subfield('a'), 'Kinda Bogus Anyhow')
        self.assertTrue(len(field.subfields) == 0)

    def test_is_subject_field(self):
        self.assertEqual(self.subjectfield.is_subject_field(), True)
        self.assertEqual(self.field.is_subject_field(), False)

    def test_format_field(self):
        self.subjectfield.add_subfield('6', '880-4')
        self.assertEqual(self.subjectfield.format_field(),
                         'Python (Computer program language) -- Poetry.')
        self.field.add_subfield('6', '880-1')
        self.assertEqual(self.field.format_field(),
                         'Huckleberry Finn:  An American Odyssey')

    def test_tag_normalize(self):
        f = Field(tag='42', indicators=['', ''])
        self.assertEqual(f.tag, '042')

    def test_alphatag(self):
        f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo'])
        self.assertEqual(f.tag, 'CAT')
        self.assertEqual(f['a'], 'foo')
        self.assertEqual(f.is_control_field(), False)

    def test_setitem_no_key(self):
        try:
            self.field['h'] = 'error'
        except KeyError:
            pass
        except Exception:
            e = sys.exc_info()[1]
            self.fail('Unexpected exception thrown: %s' % e)
        else:
            self.fail('KeyError not thrown')

    def test_setitem_repeated_key(self):
        try:
            self.field.add_subfield('a', 'bar')
            self.field['a'] = 'error'
        except KeyError:
            pass
        except Exception:
            e = sys.exc_info()[1]
            self.fail('Unexpected exception thrown: %s' % e)
        else:
            self.fail('KeyError not thrown')

    def test_iter_over_controlfield(self):
        try:
            l = [subfield for subfield in self.controlfield]
        except AttributeError as e:
            self.fail('Error during iteration: %s' % e)

    def test_setitem(self):
        self.field['a'] = 'changed'
        self.assertEqual(self.field['a'], 'changed')

    def test_delete_subfield_only_by_code(self):
        self.field.delete_subfield('An American Odyssey')
        self.assertEqual(self.field['b'], 'An American Odyssey')
        self.field.delete_subfield('b')
        self.assertTrue(self.field['b'] is None)

    def test_set_indicators_affects_str(self):
        self.field.indicators[0] = '9'
        self.field.indicator2 = '9'
        self.assertEquals(str(self.field),
                          '=245  99$aHuckleberry Finn: $bAn American Odyssey')

    def test_set_indicators_affects_marc(self):
        self.field.indicators[0] = '9'
        self.field.indicator2 = '9'
        self.assertEquals(
            self.field.as_marc('utf-8'),
            b'99\x1faHuckleberry Finn: \x1fbAn American Odyssey\x1e')
Пример #10
0
 def test_alphatag(self):
     f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo'])
     self.assertEqual(f.tag, 'CAT')
     self.assertEqual(f['a'], 'foo')
     self.assertEqual(f.is_control_field(), False)
Пример #11
0
 def test_tag_normalize(self):
     f = Field(tag='42', indicators=['', ''])
     self.assertEqual(f.tag, '042')
Пример #12
0
 def test_add_subfield(self):
     field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
     field.add_subfield('a', 'bar')
     self.assertEquals(field.__str__(), '=245  01$afoo$abar')
Пример #13
0
class FieldTest(unittest.TestCase):
    def setUp(self):
        self.field = Field(
            tag='245',
            indicators=[0, 1],
            subfields=['a', 'Huckleberry Finn: ', 'b', 'An American Odyssey'])

        self.controlfield = Field(
            tag='008', data='831227m19799999nyu           ||| | ger  ')

        self.subjectfield = Field(tag='650',
                                  indicators=[' ', '0'],
                                  subfields=[
                                      'a',
                                      'Python (Computer program language)',
                                      'v', 'Poetry.'
                                  ])

    def test_string(self):
        self.assertEquals(str(self.field),
                          '=245  01$aHuckleberry Finn: $bAn American Odyssey')

    def test_controlfield_string(self):
        self.assertEquals(str(self.controlfield),
                          r'=008  831227m19799999nyu\\\\\\\\\\\|||\|\ger\\')

    def test_indicators(self):
        assert self.field.indicator1 is 0
        self.assertEqual(self.field.indicator2, 1)

    def test_subfields_created(self):
        subfields = self.field.subfields
        self.assertEqual(len(subfields), 4)

    def test_subfield_short(self):
        self.assertEqual(self.field['a'], 'Huckleberry Finn: ')
        self.assertEqual(self.field['z'], None)

    def test_subfields(self):
        self.assertEqual(self.field.get_subfields('a'), ['Huckleberry Finn: '])
        self.assertEqual(self.subjectfield.get_subfields('a'),
                         ['Python (Computer program language)'])

    def test_subfields_multi(self):
        self.assertEqual(self.field.get_subfields('a', 'b'),
                         ['Huckleberry Finn: ', 'An American Odyssey'])
        self.assertEqual(self.subjectfield.get_subfields('a', 'v'),
                         ['Python (Computer program language)', 'Poetry.'])

    def test_encode(self):
        self.field.as_marc()

    def test_iterator(self):
        string = ""
        for subfield in self.field:
            string += subfield[0]
            string += subfield[1]
        self.assertEquals(string, 'aHuckleberry Finn: bAn American Odyssey')

    def test_value(self):
        self.assertEquals(self.field.value(),
                          'Huckleberry Finn: An American Odyssey')
        self.assertEquals(self.controlfield.value(),
                          '831227m19799999nyu           ||| | ger  ')

    def test_non_integer_tag(self):
        # make sure this doesn't throw an exception
        field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo'])

    def test_add_subfield(self):
        field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
        field.add_subfield('a', 'bar')
        self.assertEquals(field.__str__(), '=245  01$afoo$abar')

    def test_delete_subfield(self):
        field = Field(tag='200',
                      indicators=[0, 1],
                      subfields=['a', 'My Title', 'a', 'Kinda Bogus Anyhow'])
        self.assertEquals(field.delete_subfield('z'), None)
        self.assertEquals(field.delete_subfield('a'), 'My Title')
        self.assertEquals(field.delete_subfield('a'), 'Kinda Bogus Anyhow')
        self.assertTrue(len(field.subfields) == 0)

    def test_is_subject_field(self):
        self.assertEqual(self.subjectfield.is_subject_field(), True)
        self.assertEqual(self.field.is_subject_field(), False)

    def test_format_field(self):
        self.assertEqual(self.subjectfield.format_field(),
                         'Python (Computer program language) -- Poetry.')
        self.assertEqual(self.field.format_field(),
                         'Huckleberry Finn:  An American Odyssey')

    def test_tag_normalize(self):
        f = Field(tag='42', indicators=['', ''])
        self.assertEqual(f.tag, '042')

    def test_alphatag(self):
        f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo'])
        self.assertEqual(f.tag, 'CAT')
        self.assertEqual(f['a'], 'foo')
        self.assertEqual(f.is_control_field(), False)
Пример #14
0
class FieldTest(unittest.TestCase):

    def setUp(self):
        self.field = Field(
            tag = '245',
            indicators = [ 0, 1 ],
            subfields = [
                'a', 'Huckleberry Finn: ',
                'b', 'An American Odyssey'
            ]
        )

        self.controlfield = Field(
            tag = '008',
            data = '831227m19799999nyu           ||| | ger  '
        )

        self.subjectfield = Field(
            tag = '650',
            indicators = [' ', '0'],
            subfields = [
                'a', 'Python (Computer program language)',
                'v', 'Poetry.'
            ]
        )

    def test_string(self):
        self.assertEqual(str(self.field),
            '=245  01$aHuckleberry Finn: $bAn American Odyssey')

    def test_controlfield_string(self):
        self.assertEqual(str(self.controlfield),
            r'=008  831227m19799999nyu\\\\\\\\\\\|||\|\ger\\')

    def test_indicators(self):
        self.assertEqual(self.field.indicator1, '0')
        self.assertEqual(self.field.indicator2, '1')

    def test_subfields_created(self):
        subfields = self.field.subfields
        self.assertEqual(len(subfields), 4)

    def test_subfield_short(self):
        self.assertEqual(self.field['a'], 'Huckleberry Finn: ')
        self.assertEqual(self.field['z'], None)

    def test_subfields(self):
        self.assertEqual(self.field.get_subfields('a'),
            ['Huckleberry Finn: '])
        self.assertEqual(self.subjectfield.get_subfields('a'),
            ['Python (Computer program language)'])

    def test_subfields_multi(self):
        self.assertEqual(self.field.get_subfields('a','b'),
            ['Huckleberry Finn: ', 'An American Odyssey' ])
        self.assertEqual(self.subjectfield.get_subfields('a','v'),
            ['Python (Computer program language)', 'Poetry.' ])

    def test_encode(self):
        self.field.as_marc(encoding='utf-8')

    def test_membership(self):
        self.assertTrue('a' in self.field)
        self.assertFalse('zzz' in self.field)

    def test_iterator(self):
        string = ""
        for subfield in self.field:
            string += subfield[0]
            string += subfield[1]
        self.assertEqual(string, 'aHuckleberry Finn: bAn American Odyssey')

    def test_value(self):
        self.assertEqual(self.field.value(),
            'Huckleberry Finn: An American Odyssey')
        self.assertEqual(self.controlfield.value(),
                '831227m19799999nyu           ||| | ger  ')

    def test_non_integer_tag(self):
        # make sure this doesn't throw an exception
        field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo'])

    def test_add_subfield(self):
        field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
        field.add_subfield('a','bar')
        self.assertEqual(field.__str__(), '=245  01$afoo$abar')

    def test_delete_subfield(self):
        field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow'])
        self.assertEqual(field.delete_subfield('z'), None)
        self.assertEqual(field.delete_subfield('a'), 'My Title')
        self.assertEqual(field.delete_subfield('a'), 'Kinda Bogus Anyhow')
        self.assertTrue(len(field.subfields) == 0)

    def test_is_subject_field(self):
        self.assertEqual(self.subjectfield.is_subject_field(), True)
        self.assertEqual(self.field.is_subject_field(), False)

    def test_format_field(self):
        self.subjectfield.add_subfield('6', '880-4')
        self.assertEqual(self.subjectfield.format_field(),
            'Python (Computer program language) -- Poetry.')
        self.field.add_subfield('6', '880-1')
        self.assertEqual(self.field.format_field(),
            'Huckleberry Finn:  An American Odyssey')

    def test_tag_normalize(self):
        f = Field(tag='42', indicators=['', ''])
        self.assertEqual(f.tag, '042')

    def test_alphatag(self):
        f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo'])
        self.assertEqual(f.tag, 'CAT')
        self.assertEqual(f['a'], 'foo')
        self.assertEqual(f.is_control_field(), False)

    def test_setitem_no_key(self):
        try:
            self.field['h'] = 'error'
        except KeyError:
            pass
        except Exception:
            e = sys.exc_info()[1]
            self.fail('Unexpected exception thrown: %s' % e)
        else:
            self.fail('KeyError not thrown')

    def test_setitem_repeated_key(self):
        try:
            self.field.add_subfield('a','bar')
            self.field['a'] = 'error'
        except KeyError:
            pass
        except Exception:
            e = sys.exc_info()[1]
            self.fail('Unexpected exception thrown: %s' % e)
        else:
            self.fail('KeyError not thrown')

    def test_iter_over_controlfield(self):
        try:
            l = [subfield for subfield in self.controlfield]
        except AttributeError as e:
            self.fail('Error during iteration: %s' % e)

    def test_setitem(self):
        self.field['a'] = 'changed'
        self.assertEqual(self.field['a'], 'changed')

    def test_delete_subfield_only_by_code(self):
        self.field.delete_subfield('An American Odyssey')
        self.assertEqual(self.field['b'], 'An American Odyssey')
        self.field.delete_subfield('b')
        self.assertTrue(self.field['b'] is None)
Пример #15
0
 def test_tag_normalize(self):
     f = Field(tag="42", indicators=["", ""])
     self.assertEqual(f.tag, "042")
Пример #16
0
 def test_alphatag(self):
     f = Field(tag="CAT", indicators=[0, 1], subfields=["a", "foo"])
     self.assertEqual(f.tag, "CAT")
     self.assertEqual(f["a"], "foo")
     self.assertEqual(f.is_control_field(), False)
Пример #17
0
 def test_non_integer_tag(self):
     # make sure this doesn't throw an exception
     field = Field(tag='3 0', indicators=[0, 1], subfields=['a', 'foo'])
Пример #18
0
 def test_alphatag(self):
     f = Field(tag='CAT', indicators=[0, 1], subfields=['a', 'foo'])
     self.assertEqual(f.tag, 'CAT')
     self.assertEqual(f['a'], 'foo')
     self.assertEqual(f.is_control_field(), False)
Пример #19
0
 def test_add_subfield(self):
     field = Field(tag='245', indicators=[0, 1], subfields=['a', 'foo'])
     field.add_subfield('a', 'bar')
     self.assertEqual(field.__str__(), '=245  01$afoo$abar')
     field.add_subfield('b', 'baz', 0)
     self.assertEqual(field.__str__(), '=245  01$bbaz$afoo$abar')
     field.add_subfield('c', 'qux', 2)
     self.assertEqual(field.__str__(), '=245  01$bbaz$afoo$cqux$abar')
     field.add_subfield('z', 'wat', 8)
     self.assertEqual(field.__str__(), '=245  01$bbaz$afoo$cqux$abar$zwat')
Пример #20
0
 def test_pub_date(self):
     record = Record()
     self.assertEqual(record.pub_date(), None)
     record.add_field(Field('269', [' ', ' '], subfields=['a', '20170123']))
     self.assertEqual(record.pub_date(), '20170123')
Пример #21
0
    def decode_marc(self,
                    marc,
                    to_unicode=True,
                    force_utf8=False,
                    hide_utf8_warnings=False,
                    utf8_handling='ignore'):

        self.leader = marc[0:LEADER_LEN]
        if len(self.leader) != LEADER_LEN:
            raise RecordLeaderInvalid
        if self.leader[9] == 'a' or self.force_utf8:
            encoding = 'utf-8'
        else:
            encoding = 'iso8859-1'

        base_address = int(marc[12:17])
        if base_address <= 0:
            raise BaseAddressNotFound
        if base_address >= len(marc):
            raise BaseAddressInvalid

        directory = marc[LEADER_LEN:base_address - 1]

        if len(directory) % DIRECTORY_ENTRY_LEN != 0:
            raise RecordDirectoryInvalid
        field_total = len(directory) / DIRECTORY_ENTRY_LEN

        field_count = 0
        while field_count < field_total:
            entry_start = field_count * DIRECTORY_ENTRY_LEN
            entry_end = entry_start + DIRECTORY_ENTRY_LEN
            entry = directory[entry_start:entry_end]
            entry_tag = entry[0:3]
            entry_length = int(entry[3:7])
            entry_offset = int(entry[7:12])
            entry_data = marc[base_address + entry_offset:base_address +
                              entry_offset + entry_length - 1]

            if entry_tag < '010' and entry_tag.isdigit():
                if to_unicode:
                    field = Field(tag=entry_tag, data=entry_data)
                else:
                    field = RawField(tag=entry_tag, data=entry_data)
            else:
                subfields = list()
                subs = entry_data.split(SUBFIELD_INDICATOR)
                #subs = entry_data.split(SUBFIELD_INDICATOR.encode().decode('ascii', errors='ignore'))
                first_indicator = second_indicator = ' '
                #subs[0] = subs[0].decode('ascii')
                if len(subs[0]) == 0:
                    logging.warning("missing indicators: %s", entry_data)
                    first_indicator = second_indicator = ' '
                elif len(subs[0]) == 1:
                    logging.warning("only 1 indicator found: %s", entry_data)
                    first_indicator = subs[0][0]
                    second_indicator = ' '
                elif len(subs[0]) > 2:
                    logging.warning("more than 2 indicators found: %s",
                                    entry_data)
                    first_indicator = subs[0][0]
                    second_indicator = subs[0][1]
                else:
                    first_indicator = subs[0][0]
                    second_indicator = subs[0][1]
                for subfield in subs[1:]:
                    if len(subfield) == 0:
                        continue
                    code = subfield[0:1]
                    data = subfield[1:]
                    if to_unicode:
                        if self.leader[9] == 'a' or force_utf8:
                            data = data.encode().decode('utf-8', utf8_handling)
                        else:
                            data = marc8_to_unicode(data, hide_utf8_warnings)
                    subfields.append(code)
                    subfields.append(data)
                if to_unicode:
                    field = Field(
                        tag=entry_tag,
                        indicators=[first_indicator, second_indicator],
                        subfields=subfields)
                else:
                    field = RawField(
                        tag=entry_tag,
                        indicators=[first_indicator, second_indicator],
                        subfields=subfields)
            self.add_field(field)
            field_count += 1

        if field_count == 0:
            raise NoFieldsFound

        return self
Пример #22
0
 def test_delete_subfield(self):
     field = Field(tag='200', indicators=[0,1], subfields=['a','My Title', 'a', 'Kinda Bogus Anyhow'])
     self.assertEquals(field.delete_subfield('z'), None)
     self.assertEquals(field.delete_subfield('a'), 'My Title')
     self.assertEquals(field.delete_subfield('a'), 'Kinda Bogus Anyhow')
     self.assertTrue(len(field.subfields) == 0)
Пример #23
0
    def decode_marc(self,
                    marc,
                    to_unicode=False,
                    force_utf8=False,
                    hide_utf8_warnings=False,
                    utf8_handling='strict'):
        """
        decode_marc() accepts a MARC record in transmission format as a
        a string argument, and will populate the object based on the data
        found. The Record constructor actually uses decode_marc() behind
        the scenes when you pass in a chunk of MARC data to it.

        """
        # extract record leader
        self.leader = marc[0:LEADER_LEN]
        if len(self.leader) != LEADER_LEN:
            raise RecordLeaderInvalid

        # extract the byte offset where the record data starts
        base_address = int(marc[12:17])
        if base_address <= 0:
            raise BaseAddressNotFound
        if base_address >= len(marc):
            raise BaseAddressInvalid

        # extract directory, base_address-1 is used since the
        # director ends with an END_OF_FIELD byte
        directory = marc[LEADER_LEN:base_address - 1]

        # determine the number of fields in record
        if len(directory) % DIRECTORY_ENTRY_LEN != 0:
            raise RecordDirectoryInvalid
        field_total = len(directory) / DIRECTORY_ENTRY_LEN

        # add fields to our record using directory offsets
        field_count = 0
        while field_count < field_total:
            entry_start = field_count * DIRECTORY_ENTRY_LEN
            entry_end = entry_start + DIRECTORY_ENTRY_LEN
            entry = directory[entry_start:entry_end]
            entry_tag = entry[0:3]
            entry_length = int(entry[3:7])
            entry_offset = int(entry[7:12])
            entry_data = marc[base_address + entry_offset:base_address +
                              entry_offset + entry_length - 1]

            # assume controlfields are numeric; replicates ruby-marc behavior
            if entry_tag < '010' and entry_tag.isdigit():
                field = Field(tag=entry_tag, data=entry_data)
            else:
                subfields = list()
                subs = entry_data.split(SUBFIELD_INDICATOR)

                # make sure we've got the indicators and subfields we expected
                if len(subs) < 2 or len(subs[0]) != 2:
                    # TODO: should we log a warning, or throw an exception here?
                    # we are currently just moving forward
                    first_indicator = ' '
                    second_indicator = ' '
                else:
                    first_indicator = subs[0][0]
                    second_indicator = subs[0][1]
                    for subfield in subs[1:]:
                        if len(subfield) == 0:
                            continue
                        code = subfield[0]
                        data = subfield[1:]

                        if to_unicode:
                            if self.leader[9] == 'a' or force_utf8:
                                data = data.decode('utf-8', utf8_handling)
                            else:
                                data = marc8_to_unicode(
                                    data, hide_utf8_warnings)
                        subfields.append(code)
                        subfields.append(data)
                field = Field(
                    tag=entry_tag,
                    indicators=[first_indicator, second_indicator],
                    subfields=subfields,
                )

            self.add_field(field)
            field_count += 1

        if field_count == 0:
            raise NoFieldsFound