Пример #1
0
def readRecord(record):
    """Accepts a single XML record and attempts to extract several header
    fields and then parse the main record using marcalyx. Returns
    recordID: The unique DOAB identifier for this record
    dateIssued: The datetime the record was last updated in DOAB
    marcRecord: A marxalyx object containing parsed MARC data
    """
    recordID = record.findtext('.//{}identifier'.format(OAI_NS))
    logger.info('Loading DOAB record {}'.format(recordID))

    recordHead = record.find('.//{}header'.format(OAI_NS))
    if recordHead.get('status') == 'deleted':
        logger.info('DOAB record flagged as deleted, skip.')
        return None

    dateIssued = recordHead.find('.//{}datestamp'.format(OAI_NS)).text

    logger.info('Parsing record with marcalyx')
    try:
        marcRecord = marcalyx.Record(record.find(
            './/{}record'.format(MARC_NS)))
    except TypeError as err:
        logger.error('Unable to parse MARCXML record {}'.format(recordID))
        logger.debug(err)
        return None

    logger.info('transforming {} into SFR data model'.format(
        marcRecord.titleStatement()))

    return (recordID, dateIssued, marcRecord)
Пример #2
0
def parseMARC(marcData):
    """Parses raw MARCXML data into a marcalyx record that can be used
    to extract all metadata from record"""
    try:
        parseMARC = etree.fromstring(marcData.encode('utf-8'))
    except etree.XMLSyntaxError as err:
        logger.error('OCLC Catalog returned invalid XML')
        logger.debug(err)
        raise OCLCError('Received invalid XML from OCLC service')

    try:
        record = marcalyx.Record(parseMARC)
    except IndexError as err:
        logger.error('marcalyx failed to parse entry for catalog entry')
        logger.debug(err)
        raise OCLCError('MARCXML could not be parsed by marcalyx')

    return record
Пример #3
0
def kindred():
    tree = ET.parse('tests/xml/1027474578.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #4
0
def tokio():
    tree = ET.parse('tests/xml/26003.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #5
0
def fissures():
    tree = ET.parse('tests/xml/53998.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #6
0
def xenophon():
    tree = ET.parse('tests/xml/3863.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #7
0
def russian():
    tree = ET.parse('tests/xml/528635.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #8
0
def marner():
    tree = ET.parse('tests/xml/2971.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #9
0
def wrinkle():
    tree = ET.parse('tests/xml/14026028.xml')
    root = tree.getroot()
    return marcalyx.Record(root[0])
Пример #10
0
def binti():
    tree = ET.parse('tests/xml/973807354.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #11
0
def quilt():
    tree = ET.parse('tests/xml/10705.xml')
    root = tree.getroot()
    return marcalyx.Record(root)
Пример #12
0
def ctrl():
    tree = ET.parse('tests/xml/1027474578.xml')
    root = tree.getroot()
    return marcalyx.Record(root)['001'][0]