示例#1
0
    def _tree2etree(self, parent):
        from nltk.parse import Tree

        root = Element(parent.node)
        for child in parent:
            if isinstance(child, Tree):
                root.append(self._tree2etree(child))
            else:
                text, tag = child
                e = SubElement(root, tag)
                e.text = text
        return root
示例#2
0
def add_default_fields(elem, default_fields):
    """Add blank elements and subelements specified in default_fields.
    
    @param elem: toolbox data in an elementtree structure
    @type elem: ElementTree._ElementInterface
    @param default_fields: fields to add to each type of element and subelement
    @type default_fields: dictionary of tuples
    """
    for field in default_fields.get(elem.tag,  []):
        if elem.find(field) is None:
            SubElement(elem, field)
    for child in elem:
        add_default_fields(child, default_fields)
示例#3
0
def _append_char_coded_text(elem,  s,  char_code_pat):
    """Append s to C{elem} with text in coded with character style codes  converted to span elements.
    
    @param elem: element corresponding to an MDF field. This is modified by the function. 
        It may already have 'span' subelements corresponding to character styled text earlier in the MDF field.
    @type elem: C{ElementTree._ElementInterface}
    @param s: field contents possibly including parts coded with MDF character style codes
    @type s: C{String}
    @param char_code_pat:  compiled regular expression describing the character styled text with the style 
        code. It must have two sets of capturing parentheses. The first set captures the style code and the 
        second the styled text.
    @type char_code_pat: compiled regular expression pattern
    """
    mobj = char_code_pat.search(s)
    pos = 0
    while mobj is not None:
        elem_append_string(elem,  s[pos:mobj.start()])
        attribs = char_code_attribs[mobj.group(1)]
        span_elem = SubElement(elem, 'span',  attribs)
        span_elem.text = mobj.group(2)
        pos = mobj.end()
        mobj = char_code_pat.search(s,  pos)
    elem_append_string(elem,  s[pos:])
示例#4
0
def add_cv_field(entry):
    for field in entry:
        if field.tag == 'lx':
            cv_field = SubElement(entry, 'cv')
            cv_field.text = cv(field.text)