def fix_lists(dom): # <ins> and <del> tags are not allowed within <ul> or <ol> tags. # Move them to the nearest li, so that the numbering isn't interrupted. _internalize_changes_markup(dom, set(['li'])) # Find all del > li and ins > li sets. del_tags = set() ins_tags = set() for node in list(dom.getElementsByTagName('li')): parent = node.parentNode if parent.tagName == 'del': del_tags.add(parent) elif parent.tagName == 'ins': ins_tags.add(parent) # Change ins > li into li > ins. for ins_tag in ins_tags: distribute(ins_tag) # Change del > li into li.del-li > del. for del_tag in del_tags: children = list(del_tag.childNodes) unwrap(del_tag) for c in children: if c.nodeName == 'li': c.setAttribute('class', 'del-li') wrap_inner(c, 'del')
def fix_lists(dom): # <ins> and <del> tags are not allowed within <ul> or <ol> tags. # Move them to the nearest li, so that the numbering isn't interrupted. _internalize_changes_markup(dom, set(["li"])) # Find all del > li and ins > li sets. del_tags = set() ins_tags = set() for node in list(dom.getElementsByTagName("li")): parent = node.parentNode if parent.tagName == "del": del_tags.add(parent) elif parent.tagName == "ins": ins_tags.add(parent) # Change ins > li into li > ins. for ins_tag in ins_tags: distribute(ins_tag) # Change del > li into li.del-li > del. for del_tag in del_tags: children = list(del_tag.childNodes) unwrap(del_tag) for c in children: if c.nodeName == "li": c.setAttribute("class", "del-li") wrap_inner(c, "del")
def _internalize_changes_markup(dom, child_tag_names): # Delete tags are always ordered first. for del_tag in list(dom.getElementsByTagName('del')): ins_tag = del_tag.nextSibling # The one child tag of `del_tag` should be child_tag_names if len(del_tag.childNodes) != 1: continue if ins_tag is None or len(ins_tag.childNodes) != 1: continue if ins_tag.tagName != 'ins': continue deleted_tag = del_tag.firstChild if not is_element(deleted_tag): continue if deleted_tag.tagName not in child_tag_names: continue # The one child tag of `ins_tag` should be child_tag_names inserted_tag = ins_tag.firstChild if not is_element(inserted_tag): continue if inserted_tag.tagName not in child_tag_names: continue attributes = dict([key, value] for key, value in inserted_tag.attributes.items()) nodes_to_unwrap = [ deleted_tag, inserted_tag, ] for n in nodes_to_unwrap: unwrap(n) new_node = wrap_nodes([del_tag, ins_tag], inserted_tag.tagName) for key, value in attributes.items(): new_node.setAttribute(key, value)
def _internalize_changes_markup(dom, child_tag_names): # Delete tags are always ordered first. for del_tag in list(dom.getElementsByTagName("del")): ins_tag = del_tag.nextSibling # The one child tag of `del_tag` should be child_tag_names if len(del_tag.childNodes) != 1: continue if ins_tag is None or len(ins_tag.childNodes) != 1: continue if ins_tag.tagName != "ins": continue deleted_tag = del_tag.firstChild if not is_element(deleted_tag): continue if deleted_tag.tagName not in child_tag_names: continue # The one child tag of `ins_tag` should be child_tag_names inserted_tag = ins_tag.firstChild if not is_element(inserted_tag): continue if inserted_tag.tagName not in child_tag_names: continue attributes = dict([key, value] for key, value in inserted_tag.attributes.items()) nodes_to_unwrap = [deleted_tag, inserted_tag] for n in nodes_to_unwrap: unwrap(n) new_node = wrap_nodes([del_tag, ins_tag], inserted_tag.tagName) for key, value in attributes.items(): new_node.setAttribute(key, value)
def distribute(node): """ Wrap a copy of the given element around the contents of each of its children, removing the node in the process. """ children = list(c for c in node.childNodes if is_element(c)) unwrap(node) tag_name = node.tagName for c in children: wrap_inner(c, tag_name)
def remove_nesting(dom, tag_name): """ Unwrap items in the node list that have ancestors with the same tag. """ for node in dom.getElementsByTagName(tag_name): for ancestor in ancestors(node): if ancestor is node: continue if ancestor is dom.documentElement: break if ancestor.tagName == tag_name: unwrap(node) break
def fix_lists(dom): # <ins> and <del> tags are not allowed within <ul> or <ol> tags. # Move them to the nearest li, so that the numbering isn't interrupted. # Find all del > li and ins > li sets. del_tags = set() ins_tags = set() for node in list(dom.getElementsByTagName('li')): parent = node.parentNode if parent.tagName == 'del': del_tags.add(parent) elif parent.tagName == 'ins': ins_tags.add(parent) # Change ins > li into li > ins. for ins_tag in ins_tags: distribute(ins_tag) # Change del > li into li.del-li > del. for del_tag in del_tags: children = list(del_tag.childNodes) unwrap(del_tag) for c in children: if c.nodeName == 'li': c.setAttribute('class', 'del-li') wrap_inner(c, 'del')
def _strip_changes_old(node): for ins_node in node.getElementsByTagName('ins'): remove_node(ins_node) for del_node in node.getElementsByTagName('del'): unwrap(del_node)