def commentSubstitutions(self, ElementTree, w3c_compat=False, \ w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs): # Basic substitutions instance_basic_comment_subs = basic_comment_subs # Add more basic substitutions in compat. mode if w3c_compat or w3c_compat_substitutions: instance_basic_comment_subs += ((logo, logo_sub), (copyright, copyright_sub)) # Set of nodes to remove to_remove = set() # Link in_link = False for node in ElementTree.iter(): if in_link: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"end-link": if node.getparent() is not link_parent: raise DifferentParentException(u"begin-link and end-link have different parents") utils.removeInteractiveContentChildren(link) link.set(u"href", utils.textContent(link)) in_link = False else: if node.getparent() is link_parent: link.append(deepcopy(node)) to_remove.add(node) elif node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"begin-link": link_parent = node.getparent() in_link = True link = etree.Element(u"a") link.text = node.tail node.tail = None node.addnext(link) # Basic substitutions for comment, sub in instance_basic_comment_subs: begin_sub = u"begin-" + comment end_sub = u"end-" + comment in_sub = False for node in ElementTree.iter(): if in_sub: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == end_sub: if node.getparent() is not sub_parent: raise DifferentParentException(u"%s and %s have different parents" % begin_sub, end_sub) in_sub = False else: to_remove.add(node) elif node.tag is etree.Comment: if node.text.strip(utils.spaceCharacters) == begin_sub: sub_parent = node.getparent() in_sub = True node.tail = None node.addnext(deepcopy(sub)) elif node.text.strip(utils.spaceCharacters) == comment: node.addprevious(etree.Comment(begin_sub)) node.addprevious(deepcopy(sub)) node.addprevious(etree.Comment(end_sub)) node.getprevious().tail = node.tail to_remove.add(node) # Remove nodes for node in to_remove: node.getparent().remove(node)
def commentSubstitutions(self, ElementTree, w3c_compat=False, \ w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs): # Basic substitutions instance_basic_comment_subs = basic_comment_subs # Add more basic substitutions in compat. mode if w3c_compat or w3c_compat_substitutions: instance_basic_comment_subs += ((logo, logo_sub), (copyright, copyright_sub)) # Set of nodes to remove to_remove = set() # Link in_link = False for node in ElementTree.iter(): if in_link: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"end-link": if node.getparent() is not link_parent: raise DifferentParentException( u"begin-link and end-link have different parents") utils.removeInteractiveContentChildren(link) link.set(u"href", utils.textContent(link)) in_link = False else: if node.getparent() is link_parent: link.append(deepcopy(node)) to_remove.add(node) elif node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"begin-link": link_parent = node.getparent() in_link = True link = etree.Element(u"a") link.text = node.tail node.tail = None node.addnext(link) # Basic substitutions for comment, sub in instance_basic_comment_subs: begin_sub = u"begin-" + comment end_sub = u"end-" + comment in_sub = False for node in ElementTree.iter(): if in_sub: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == end_sub: if node.getparent() is not sub_parent: raise DifferentParentException( u"%s and %s have different parents" % begin_sub, end_sub) in_sub = False else: to_remove.add(node) elif node.tag is etree.Comment: if node.text.strip(utils.spaceCharacters) == begin_sub: sub_parent = node.getparent() in_sub = True node.tail = None node.addnext(deepcopy(sub)) elif node.text.strip(utils.spaceCharacters) == comment: node.addprevious(etree.Comment(begin_sub)) node.addprevious(deepcopy(sub)) node.addprevious(etree.Comment(end_sub)) node.getprevious().tail = node.tail to_remove.add(node) # Remove nodes for node in to_remove: node.getparent().remove(node)
def commentSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, enable_woolly=False, **kwargs): # Basic substitutions instance_basic_comment_subs = basic_comment_subs # Add more basic substitutions in compat. mode if w3c_compat or w3c_compat_substitutions: copyright = "copyright" copyright_sub = etree.fromstring( '<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © %s <a href="http://www.w3.org/"><abbr title="World Wide Web Consortium">W3C</abbr></a><sup>®</sup> (<a href="http://www.csail.mit.edu/"><abbr title="Massachusetts Institute of Technology">MIT</abbr></a>, <a href="http://www.ercim.eu/"><abbr title="European Research Consortium for Informatics and Mathematics">ERCIM</abbr></a>, <a href="http://www.keio.ac.jp/">Keio</a>, <a href="http://ev.buaa.edu.cn/">Beihang</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>' % time.strftime("%Y", self.pubdate)) logo = "logo" logo_str = '<a href="http://www.w3.org/"><img height="48" width="72" alt="W3C" src="https://www.w3.org/Icons/w3c_home"/></a>' if enable_woolly: logo_str += '<a class="logo" href="https://www.w3.org/Style/Group/" rel="in-activity"><img alt="CSS WG" src="https://www.w3.org/Style/Woolly/woolly-icon"/></a>' logo_sub = etree.fromstring('<p>%s</p>' % logo_str) instance_basic_comment_subs += ((logo, logo_sub), (copyright, copyright_sub)) # Set of nodes to remove to_remove = set() # Link link_parent = None link = None for node in ElementTree.iter(): if link_parent is not None: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == "end-link": if node.getparent() is not link_parent: raise utils.DifferentParentException( "begin-link and end-link have different parents") utils.removeInteractiveContentChildren(link) link.set("href", utils.textContent(link)) link_parent = None else: if node.getparent() is link_parent: link.append(deepcopy(node)) to_remove.add(node) elif node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == "begin-link": link_parent = node.getparent() link = etree.Element("a") link.text = node.tail node.tail = None node.addnext(link) # Basic substitutions for comment, sub in instance_basic_comment_subs: utils.replaceComment(ElementTree, comment, sub, **kwargs) # Remove nodes for node in to_remove: node.getparent().remove(node)
def buildToc(self, ElementTree, min_depth=2, max_depth=6, w3c_compat=False, w3c_compat_class_toc=False, **kwargs): # Build the outline of the document outline_creator = outliner.Outliner(ElementTree, **kwargs) outline = outline_creator.build(**kwargs) # Get a list of all the top level sections, and their depth (0) sections = [(section, 0) for section in reversed(outline)] # Numbering num = [] # Loop over all sections in a DFS while sections: # Get the section and depth at the end of list section, depth = sections.pop() # If we have a header, regardless of how deep we are if section.header is not None: # Get the element that represents the section header's text if section.header.tag == u"hgroup": i = 1 while i <= 6: header_text = section.header.find(u".//h" + unicode(i)) if header_text is not None: break i += 1 else: header_text = None else: header_text = section.header else: header_text = None # If we have a section heading text element, regardless of depth if header_text is not None: # Remove any existing number for element in header_text.findall(u".//span"): if utils.elementHasClass(element, u"secno"): # Copy content, to prepare for the node being # removed utils.copyContentForRemoval(element, text=False, children=False) # Remove the element (we can do this as we're not # iterating over the elements, but over a list) element.getparent().remove(element) # Check we're in the valid depth range (min/max_depth are 1 based, # depth is 0 based) if depth >= min_depth - 1 and depth <= max_depth - 1: # Calculate the corrected depth (i.e., the actual depth within # the numbering/TOC) corrected_depth = depth - min_depth + 1 # Numbering: # No children, no sibling, move back to parent's sibling if corrected_depth + 1 < len(num): del num[corrected_depth + 1:] # Children elif corrected_depth == len(num): num.append(0) # Increment the current section's number if header_text is not None and \ not utils.elementHasClass(header_text, u"no-num") or \ header_text is None and section: num[-1] += 1 # Get the current TOC section for this depth, and add another # item to it if header_text is not None and \ not utils.elementHasClass(header_text, u"no-toc") or \ header_text is None and section: # Find the appropriate section of the TOC i = 0 toc_section = self.toc while i < corrected_depth: try: # If the final li has no children, or the last # children isn't an ol element if len(toc_section[-1]) == 0 or \ toc_section[-1][-1].tag != u"ol": toc_section[-1].append(etree.Element(u"ol")) self.indentNode(toc_section[-1][-1], (i + 1) * 2, **kwargs) if w3c_compat or w3c_compat_class_toc: toc_section[-1][-1].set(u"class", u"toc") except IndexError: # If the current ol has no li in it toc_section.append(etree.Element(u"li")) self.indentNode(toc_section[0], (i + 1) * 2 - 1, **kwargs) toc_section[0].append(etree.Element(u"ol")) self.indentNode(toc_section[0][0], (i + 1) * 2, **kwargs) if w3c_compat or w3c_compat_class_toc: toc_section[0][0].set(u"class", u"toc") # TOC Section is now the final child (ol) of the final # item (li) in the previous section assert toc_section[-1].tag == u"li" assert toc_section[-1][-1].tag == u"ol" toc_section = toc_section[-1][-1] i += 1 # Add the current item to the TOC item = etree.Element(u"li") toc_section.append(item) self.indentNode(item, (i + 1) * 2 - 1, **kwargs) # If we have a header if header_text is not None: # Add ID to header id = utils.generateID(header_text, **kwargs) if header_text.get(u"id") is not None: del header_text.attrib[u"id"] section.header.set(u"id", id) # Add number, if @class doesn't contain no-num if not utils.elementHasClass(header_text, u"no-num"): header_text[0:0] = [etree.Element(u"span", {u"class": u"secno"})] header_text[0].tail = header_text.text header_text.text = None header_text[0].text = u".".join(map(unicode, num)) header_text[0].text += u" " # Add to TOC, if @class doesn't contain no-toc if not utils.elementHasClass(header_text, u"no-toc"): link = deepcopy(header_text) item.append(link) # Make it link to the header link.tag = u"a" link.set(u"href", u"#" + id) # Remove interactive content child elements utils.removeInteractiveContentChildren(link) # Remove other child elements for element_name in remove_elements_from_toc: # Iterate over all the desendants of the new link # with that element name for element in link.findall(u".//" + element_name): # Copy content, to prepare for the node being # removed utils.copyContentForRemoval(element) # Remove the element (we can do this as we're # not iterating over the elements, but over a # list) element.getparent().remove(element) # Remove unwanted attributes for element in link.iter(tag=etree.Element): for attribute_name in remove_attributes_from_toc: if element.get(attribute_name) is not None: del element.attrib[attribute_name] # We don't want the old tail link.tail = None # Check we haven't changed the content in all of that assert utils.textContent(header_text) == \ utils.textContent(link) # Add subsections in reverse order (so the next one is executed # next) with a higher depth value sections.extend([(child_section, depth + 1) for child_section in reversed(section)])