Пример #1
0
def fix_lists(dom):
    # <ins> and <del> tags are not allowed within <ul> or <ol> tags.
    # Move them to the nearest li, so that the numbering isn't interrupted.

    _internalize_changes_markup(dom, set(['li']))

    # Find all del > li and ins > li sets.
    del_tags = set()
    ins_tags = set()
    for node in list(dom.getElementsByTagName('li')):
        parent = node.parentNode
        if parent.tagName == 'del':
            del_tags.add(parent)
        elif parent.tagName == 'ins':
            ins_tags.add(parent)
    # Change ins > li into li > ins.
    for ins_tag in ins_tags:
        distribute(ins_tag)
    # Change del > li into li.del-li > del.
    for del_tag in del_tags:
        children = list(del_tag.childNodes)
        unwrap(del_tag)
        for c in children:
            if c.nodeName == 'li':
                c.setAttribute('class', 'del-li')
                wrap_inner(c, 'del')
Пример #2
0
def fix_lists(dom):
    # <ins> and <del> tags are not allowed within <ul> or <ol> tags.
    # Move them to the nearest li, so that the numbering isn't interrupted.

    _internalize_changes_markup(dom, set(["li"]))

    # Find all del > li and ins > li sets.
    del_tags = set()
    ins_tags = set()
    for node in list(dom.getElementsByTagName("li")):
        parent = node.parentNode
        if parent.tagName == "del":
            del_tags.add(parent)
        elif parent.tagName == "ins":
            ins_tags.add(parent)
    # Change ins > li into li > ins.
    for ins_tag in ins_tags:
        distribute(ins_tag)
    # Change del > li into li.del-li > del.
    for del_tag in del_tags:
        children = list(del_tag.childNodes)
        unwrap(del_tag)
        for c in children:
            if c.nodeName == "li":
                c.setAttribute("class", "del-li")
                wrap_inner(c, "del")
Пример #3
0
def _internalize_changes_markup(dom, child_tag_names):
    # Delete tags are always ordered first.
    for del_tag in list(dom.getElementsByTagName('del')):
        ins_tag = del_tag.nextSibling
        # The one child tag of `del_tag` should be child_tag_names
        if len(del_tag.childNodes) != 1:
            continue
        if ins_tag is None or len(ins_tag.childNodes) != 1:
            continue
        if ins_tag.tagName != 'ins':
            continue
        deleted_tag = del_tag.firstChild
        if not is_element(deleted_tag):
            continue
        if deleted_tag.tagName not in child_tag_names:
            continue
        # The one child tag of `ins_tag` should be child_tag_names
        inserted_tag = ins_tag.firstChild
        if not is_element(inserted_tag):
            continue
        if inserted_tag.tagName not in child_tag_names:
            continue

        attributes = dict([key, value]
                          for key, value in inserted_tag.attributes.items())
        nodes_to_unwrap = [
            deleted_tag,
            inserted_tag,
        ]
        for n in nodes_to_unwrap:
            unwrap(n)
        new_node = wrap_nodes([del_tag, ins_tag], inserted_tag.tagName)
        for key, value in attributes.items():
            new_node.setAttribute(key, value)
Пример #4
0
def _internalize_changes_markup(dom, child_tag_names):
    # Delete tags are always ordered first.
    for del_tag in list(dom.getElementsByTagName("del")):
        ins_tag = del_tag.nextSibling
        # The one child tag of `del_tag` should be child_tag_names
        if len(del_tag.childNodes) != 1:
            continue
        if ins_tag is None or len(ins_tag.childNodes) != 1:
            continue
        if ins_tag.tagName != "ins":
            continue
        deleted_tag = del_tag.firstChild
        if not is_element(deleted_tag):
            continue
        if deleted_tag.tagName not in child_tag_names:
            continue
        # The one child tag of `ins_tag` should be child_tag_names
        inserted_tag = ins_tag.firstChild
        if not is_element(inserted_tag):
            continue
        if inserted_tag.tagName not in child_tag_names:
            continue

        attributes = dict([key, value] for key, value in inserted_tag.attributes.items())
        nodes_to_unwrap = [deleted_tag, inserted_tag]
        for n in nodes_to_unwrap:
            unwrap(n)
        new_node = wrap_nodes([del_tag, ins_tag], inserted_tag.tagName)
        for key, value in attributes.items():
            new_node.setAttribute(key, value)
Пример #5
0
def distribute(node):
    """
    Wrap a copy of the given element around the contents of each of its
    children, removing the node in the process.
    """
    children = list(c for c in node.childNodes if is_element(c))
    unwrap(node)
    tag_name = node.tagName
    for c in children:
        wrap_inner(c, tag_name)
Пример #6
0
def distribute(node):
    """
    Wrap a copy of the given element around the contents of each of its
    children, removing the node in the process.
    """
    children = list(c for c in node.childNodes if is_element(c))
    unwrap(node)
    tag_name = node.tagName
    for c in children:
        wrap_inner(c, tag_name)
Пример #7
0
def remove_nesting(dom, tag_name):
    """
    Unwrap items in the node list that have ancestors with the same tag.
    """
    for node in dom.getElementsByTagName(tag_name):
        for ancestor in ancestors(node):
            if ancestor is node:
                continue
            if ancestor is dom.documentElement:
                break
            if ancestor.tagName == tag_name:
                unwrap(node)
                break
Пример #8
0
def remove_nesting(dom, tag_name):
    """
    Unwrap items in the node list that have ancestors with the same tag.
    """
    for node in dom.getElementsByTagName(tag_name):
        for ancestor in ancestors(node):
            if ancestor is node:
                continue
            if ancestor is dom.documentElement:
                break
            if ancestor.tagName == tag_name:
                unwrap(node)
                break
Пример #9
0
def fix_lists(dom):
    # <ins> and <del> tags are not allowed within <ul> or <ol> tags.
    # Move them to the nearest li, so that the numbering isn't interrupted.

    # Find all del > li and ins > li sets.
    del_tags = set()
    ins_tags = set()
    for node in list(dom.getElementsByTagName('li')):
        parent = node.parentNode
        if parent.tagName == 'del':
            del_tags.add(parent)
        elif parent.tagName == 'ins':
            ins_tags.add(parent)
    # Change ins > li into li > ins.
    for ins_tag in ins_tags:
        distribute(ins_tag)
    # Change del > li into li.del-li > del.
    for del_tag in del_tags:
        children = list(del_tag.childNodes)
        unwrap(del_tag)
        for c in children:
            if c.nodeName == 'li':
                c.setAttribute('class', 'del-li')
                wrap_inner(c, 'del')
Пример #10
0
def _strip_changes_old(node):
    for ins_node in node.getElementsByTagName('ins'):
        remove_node(ins_node)
    for del_node in node.getElementsByTagName('del'):
        unwrap(del_node)
Пример #11
0
def _strip_changes_old(node):
    for ins_node in node.getElementsByTagName('ins'):
        remove_node(ins_node)
    for del_node in node.getElementsByTagName('del'):
        unwrap(del_node)