Пример #1
0
 def __init__(self, data):
     root = etree.fromstring(data)
     if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
         root = root[0]
     rec = MarcXml(root)
     self.rec = rec
     self.leader = rec.leader()
Пример #2
0
 def test_xml(self):
     for i in xml_samples:
         try:
             expect_filename = 'test_data/xml_expect/' + i + '_marc.xml'
             path = 'test_data/xml_input/' + i + '_marc.xml'
             element = etree.parse(open(path)).getroot()
             if element.tag != record_tag and element[0].tag == record_tag:
                 element = element[0]
             rec = MarcXml(element)
             edition_marc_xml = read_edition(rec)
             assert edition_marc_xml
             #            if i.startswith('engin'):
             #                pprint(edition_marc_xml)
             #                assert False
             j = {}
             if os.path.exists(expect_filename):
                 j = simplejson.load(open(expect_filename))
                 if not j:
                     print expect_filename
                 assert j
             if not j:
                 simplejson.dump(edition_marc_xml,
                                 open(expect_filename, 'w'),
                                 indent=2)
                 continue
             self.assertEqual(sorted(edition_marc_xml.keys()),
                              sorted(j.keys()))
             for k in edition_marc_xml.keys():
                 print ` i, k, edition_marc_xml[k] `
                 self.assertEqual(edition_marc_xml[k], j[k])
             self.assertEqual(edition_marc_xml, j)
         except:
             print 'bad marc:', i
             raise
Пример #3
0
 def test_subjects_xml(self, item, expected):
     filename = os.path.dirname(__file__) + '/test_data/xml_input/' + item + '_marc.xml'
     element = etree.parse(filename).getroot()
     if element.tag != record_tag and element[0].tag == record_tag:
         element = element[0]
     rec = MarcXml(element)
     assert read_subjects(rec) == expected
Пример #4
0
 def test_xml(self):
     for i in xml_samples:
         try:
             expect_filename = "%s/xml_expect/%s_marc.xml" % (test_data, i)
             path = "%s/xml_input/%s_marc.xml" % (test_data, i)
             element = etree.parse(open(path)).getroot()
             # Handle MARC XML collection elements in our test_data expectations:
             if element.tag == collection_tag and element[
                     0].tag == record_tag:
                 element = element[0]
             rec = MarcXml(element)
             edition_marc_xml = read_edition(rec)
             assert edition_marc_xml
             j = {}
             if os.path.exists(expect_filename):
                 j = simplejson.load(open(expect_filename))
                 assert j, "Unable to open test data: %s" % expect_filename
             else:
                 print "WARNING: test data %s not found, recreating it!" % expect_filename
                 simplejson.dump(edition_marc_xml,
                                 open(expect_filename, 'w'),
                                 indent=2)
                 continue
             self.assertEqual(sorted(edition_marc_xml.keys()),
                              sorted(j.keys()))
             for k in edition_marc_xml.keys():
                 self.assertEqual(edition_marc_xml[k], j[k])
             self.assertEqual(edition_marc_xml, j)
         except:
             print 'Bad MARC:', i
             raise
Пример #5
0
 def test_subjects_xml(self):
     for item, expect in xml_samples:
         #print item
         filename = 'test_data/' + item + '_marc.xml'
         element = etree.parse(filename).getroot()
         if element.tag != record_tag and element[0].tag == record_tag:
             element = element[0]
         rec = MarcXml(element)
         yield self._test_subjects, rec, expect
Пример #6
0
 def test_subjects_xml(self):
     for item, expect in xml_samples:
         filename = os.path.dirname(
             __file__) + '/test_data/xml_input/' + item + '_marc.xml'
         element = etree.parse(filename).getroot()
         if element.tag != record_tag and element[0].tag == record_tag:
             element = element[0]
         rec = MarcXml(element)
         yield self._test_subjects, rec, expect
Пример #7
0
            filename = 'test_data/' + item

            data = open(filename).read()
            if len(data) != int(data[:5]):
                data = data.decode('utf-8').encode('raw_unicode_escape')
            rec = MarcBinary(data)
            yield self._test_subjects, rec, expect


subjects = []
for item, expect in xml_samples:
    filename = 'test_data/' + item + '_marc.xml'
    element = etree.parse(filename).getroot()
    if element.tag != record_tag and element[0].tag == record_tag:
        element = element[0]
    rec = MarcXml(element)
    subjects.append(read_subjects(rec))

for item, expect in bin_samples:
    filename = 'test_data/' + item

    data = open(filename).read()
    if len(data) != int(data[:5]):
        data = data.decode('utf-8').encode('raw_unicode_escape')
    rec = MarcBinary(data)
    subjects.append(read_subjects(rec))

all_subjects = defaultdict(lambda: defaultdict(int))
for a in subjects:
    for b, c in a.items():
        for d, e in c.items():
Пример #8
0
 def __init__(self, data):
     tree = etree.fromstring(data)
     rec = MarcXml(tree)
     self.rec = rec
     self.leader = rec.leader()