示例#1
0
def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    # top may be set as a graph attribute or as a link (see below)
    top_id = int(xml.get('top')) if 'top' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None

    for elem in xml:
        if elem.tag == 'node':
            node = Node.from_xml(elem, convert_legacy_prontype)
            dmrs.add_node(node)

        elif elem.tag == 'link':
            link = Link.from_xml(elem)
            if link.start == 0:
                # this would overwrite any graph-level top attribute
                # (see above), but let's assume we won't encounter
                # both in the same graph
                top_id = link.end
            else:
                dmrs.add_link(link)
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
示例#2
0
def loads_xml(bytestring,
              encoding=None,
              cls=ListDmrs,
              convert_legacy_prontype=True,
              **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None
    top_id = None

    for elem in xml:
        if elem.tag == 'node':
            node = Node.from_xml(elem, convert_legacy_prontype)
            dmrs.add_node(node)

        elif elem.tag == 'link':
            link = Link.from_xml(elem)
            if link.start == 0:
                top_id = link.end
            else:
                dmrs.add_link(link)
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs