def __init__(self, data): root = etree.fromstring(data) if root.tag == '{http://www.loc.gov/MARC21/slim}collection': root = root[0] rec = MarcXml(root) self.rec = rec self.leader = rec.leader()
def test_xml(self): for i in xml_samples: try: expect_filename = 'test_data/xml_expect/' + i + '_marc.xml' path = 'test_data/xml_input/' + i + '_marc.xml' element = etree.parse(open(path)).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) edition_marc_xml = read_edition(rec) assert edition_marc_xml # if i.startswith('engin'): # pprint(edition_marc_xml) # assert False j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) if not j: print expect_filename assert j if not j: simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys())) for k in edition_marc_xml.keys(): print ` i, k, edition_marc_xml[k] ` self.assertEqual(edition_marc_xml[k], j[k]) self.assertEqual(edition_marc_xml, j) except: print 'bad marc:', i raise
def test_subjects_xml(self, item, expected): filename = os.path.dirname(__file__) + '/test_data/xml_input/' + item + '_marc.xml' element = etree.parse(filename).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) assert read_subjects(rec) == expected
def test_xml(self): for i in xml_samples: try: expect_filename = "%s/xml_expect/%s_marc.xml" % (test_data, i) path = "%s/xml_input/%s_marc.xml" % (test_data, i) element = etree.parse(open(path)).getroot() # Handle MARC XML collection elements in our test_data expectations: if element.tag == collection_tag and element[ 0].tag == record_tag: element = element[0] rec = MarcXml(element) edition_marc_xml = read_edition(rec) assert edition_marc_xml j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) assert j, "Unable to open test data: %s" % expect_filename else: print "WARNING: test data %s not found, recreating it!" % expect_filename simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys())) for k in edition_marc_xml.keys(): self.assertEqual(edition_marc_xml[k], j[k]) self.assertEqual(edition_marc_xml, j) except: print 'Bad MARC:', i raise
def test_subjects_xml(self): for item, expect in xml_samples: #print item filename = 'test_data/' + item + '_marc.xml' element = etree.parse(filename).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) yield self._test_subjects, rec, expect
def test_subjects_xml(self): for item, expect in xml_samples: filename = os.path.dirname( __file__) + '/test_data/xml_input/' + item + '_marc.xml' element = etree.parse(filename).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) yield self._test_subjects, rec, expect
filename = 'test_data/' + item data = open(filename).read() if len(data) != int(data[:5]): data = data.decode('utf-8').encode('raw_unicode_escape') rec = MarcBinary(data) yield self._test_subjects, rec, expect subjects = [] for item, expect in xml_samples: filename = 'test_data/' + item + '_marc.xml' element = etree.parse(filename).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) subjects.append(read_subjects(rec)) for item, expect in bin_samples: filename = 'test_data/' + item data = open(filename).read() if len(data) != int(data[:5]): data = data.decode('utf-8').encode('raw_unicode_escape') rec = MarcBinary(data) subjects.append(read_subjects(rec)) all_subjects = defaultdict(lambda: defaultdict(int)) for a in subjects: for b, c in a.items(): for d, e in c.items():
def __init__(self, data): tree = etree.fromstring(data) rec = MarcXml(tree) self.rec = rec self.leader = rec.leader()