def readRecord(record): """Accepts a single XML record and attempts to extract several header fields and then parse the main record using marcalyx. Returns recordID: The unique DOAB identifier for this record dateIssued: The datetime the record was last updated in DOAB marcRecord: A marxalyx object containing parsed MARC data """ recordID = record.findtext('.//{}identifier'.format(OAI_NS)) logger.info('Loading DOAB record {}'.format(recordID)) recordHead = record.find('.//{}header'.format(OAI_NS)) if recordHead.get('status') == 'deleted': logger.info('DOAB record flagged as deleted, skip.') return None dateIssued = recordHead.find('.//{}datestamp'.format(OAI_NS)).text logger.info('Parsing record with marcalyx') try: marcRecord = marcalyx.Record(record.find( './/{}record'.format(MARC_NS))) except TypeError as err: logger.error('Unable to parse MARCXML record {}'.format(recordID)) logger.debug(err) return None logger.info('transforming {} into SFR data model'.format( marcRecord.titleStatement())) return (recordID, dateIssued, marcRecord)
def parseMARC(marcData): """Parses raw MARCXML data into a marcalyx record that can be used to extract all metadata from record""" try: parseMARC = etree.fromstring(marcData.encode('utf-8')) except etree.XMLSyntaxError as err: logger.error('OCLC Catalog returned invalid XML') logger.debug(err) raise OCLCError('Received invalid XML from OCLC service') try: record = marcalyx.Record(parseMARC) except IndexError as err: logger.error('marcalyx failed to parse entry for catalog entry') logger.debug(err) raise OCLCError('MARCXML could not be parsed by marcalyx') return record
def kindred(): tree = ET.parse('tests/xml/1027474578.xml') root = tree.getroot() return marcalyx.Record(root)
def tokio(): tree = ET.parse('tests/xml/26003.xml') root = tree.getroot() return marcalyx.Record(root)
def fissures(): tree = ET.parse('tests/xml/53998.xml') root = tree.getroot() return marcalyx.Record(root)
def xenophon(): tree = ET.parse('tests/xml/3863.xml') root = tree.getroot() return marcalyx.Record(root)
def russian(): tree = ET.parse('tests/xml/528635.xml') root = tree.getroot() return marcalyx.Record(root)
def marner(): tree = ET.parse('tests/xml/2971.xml') root = tree.getroot() return marcalyx.Record(root)
def wrinkle(): tree = ET.parse('tests/xml/14026028.xml') root = tree.getroot() return marcalyx.Record(root[0])
def binti(): tree = ET.parse('tests/xml/973807354.xml') root = tree.getroot() return marcalyx.Record(root)
def quilt(): tree = ET.parse('tests/xml/10705.xml') root = tree.getroot() return marcalyx.Record(root)
def ctrl(): tree = ET.parse('tests/xml/1027474578.xml') root = tree.getroot() return marcalyx.Record(root)['001'][0]