示例#1
0
def translate(bytes_in, leader_says_marc8=False):
    """
    Converts MARC8 to unicode
    """
    marc8 = MARC8ToUnicode(quiet=True)
    if leader_says_marc8:
        data = marc8.translate(mnemonics.read(bytes_in))
    else:
        data = bytes_in.decode('utf-8')
    return normalize('NFC', data)
示例#2
0
def translate(bytes_in, leader_says_marc8=False):
    marc8 = MARC8ToUnicode(quiet=True)
    try:
        if leader_says_marc8:
            data = marc8.translate(mnemonics.read(bytes_in))
        else:
            data = bytes_in.decode('utf-8')
        return normalize('NFC', data)
    except:
        print('translate error for:', repr(bytes_in))
        print('marc8:', leader_says_marc8)
        raise
示例#3
0
def translate(bytes_in, leader_says_marc8=False):
    """
    Converts a binary MARC field value to unicode str,
    from either MARC8 or UTF8 encoded bytes.

    :param bytes_in bytes:
    :rtype: str
    """
    assert isinstance(bytes_in, bytes)
    marc8 = MARC8ToUnicode(quiet=True)
    if leader_says_marc8:
        data = marc8.translate(mnemonics.read(bytes_in))
    else:
        data = bytes_in.decode('utf-8')
    return normalize('NFC', data)
示例#4
0
    def record_to_xml_node(self, record, quiet=False, namespace=False):
        """
	    converts a record object to a chunk of xml

	    # include the marcxml namespace in the root tag (default: False)
	    record_to_xml(record, namespace=True)
	    """
        # helper for converting non-unicode data to unicode
        # TODO: maybe should set g0 and g1 appropriately using 066 $a and $b?

        prefix = 'marc:'

        marc8 = MARC8ToUnicode(quiet=quiet)

        def translate(data):
            if type(data) == six.text_type:
                return data
            else:
                return marc8.translate(data)

        root = ET.Element(prefix + 'record')
        if namespace:
            root.set('xmlns', MARC_XML_NS)
            root.set('xmlns:xsi', XSI_NS)
            root.set('xsi:schemaLocation', MARC_XML_SCHEMA)
        leader = ET.SubElement(root, prefix + 'leader')
        leader.text = record.leader
        for field in record:
            if field.is_control_field():
                control_field = ET.SubElement(root, prefix + 'controlfield')
                control_field.set('tag', field.tag)
                control_field.text = translate(field.data)
            else:
                data_field = ET.SubElement(root, prefix + 'datafield')
                data_field.set('tag', field.tag)
                data_field.set('ind1', field.indicators[0])
                data_field.set('ind2', field.indicators[1])
                for subfield in field:
                    data_subfield = ET.SubElement(data_field,
                                                  prefix + 'subfield')
                    data_subfield.set('code', subfield[0])
                    data_subfield.text = translate(subfield[1])

        return root
示例#5
0
文件: marcxml.py 项目: pejalptar/marc
def record_to_xml_node(record, quiet=False, namespace=False):
    """Converts a record object to a chunk of XML.

    If you would like to include the marcxml namespace in the root tag set namespace to
    True.
    """
    # helper for converting non-unicode data to unicode
    # TODO: maybe should set g0 and g1 appropriately using 066 $a and $b?
    marc8 = MARC8ToUnicode(quiet=quiet)

    def translate(data):
        if type(data) == str:
            return data
        else:
            return marc8.translate(data)

    root = ET.Element("record")
    if namespace:
        root.set("xmlns", MARC_XML_NS)
        root.set("xmlns:xsi", XSI_NS)
        root.set("xsi:schemaLocation", MARC_XML_SCHEMA)
    leader = ET.SubElement(root, "leader")
    leader.text = str(record.leader)
    for field in record:
        if field.is_control_field():
            control_field = ET.SubElement(root, "controlfield")
            control_field.set("tag", field.tag)
            control_field.text = translate(field.data)
        else:
            data_field = ET.SubElement(root, "datafield")
            data_field.set("ind1", field.indicators[0])
            data_field.set("ind2", field.indicators[1])
            data_field.set("tag", field.tag)
            for subfield in field:
                data_subfield = ET.SubElement(data_field, "subfield")
                data_subfield.set("code", subfield[0])
                data_subfield.text = translate(subfield[1])

    return root
示例#6
0
from pymarc import MARC8ToUnicode
from unicodedata import normalize

from openlibrary.catalog.marc import mnemonics
from openlibrary.catalog.marc.marc_base import MarcBase, MarcException, BadMARC

marc8 = MARC8ToUnicode(quiet=True)


class BadLength(MarcException):
    pass


def handle_wrapped_lines(_iter):
    """
    Handles wrapped MARC fields, which appear to be multiple
    fields with the same field number ending with ++
    Have not found an official spec which describe this.
    """
    cur_lines = []
    cur_tag = None
    maybe_wrap = False
    for t, l in _iter:
        if len(l) > 500 and l.endswith(b'++\x1e'):
            assert not cur_tag or cur_tag == t
            cur_tag = t
            cur_lines.append(l)
            continue
        if cur_lines:
            yield cur_tag, cur_lines[0][:-3] + b''.join(
                i[2:-3] for i in cur_lines[1:]) + l[2:]