def munge(document, template, linkrel, d, fullpath, ext, url, config): fixRelativeLinks(template, linkrel) addMtime(template, fullpath) removeH1(document) fixAPI(document, url) fontifyPython(document) addPyListings(document, d) addHTMLListings(document, d) fixLinks(document, ext) putInToC(template, generateToC(document)) footnotes(document) notes(document) # Insert the document into the template title = domhelpers.findNodesNamed(document, 'title')[0].childNodes setTitle(template, title) authors = domhelpers.findNodesNamed(document, 'link') authors = [(n.getAttribute('title',''), n.getAttribute('href', '')) for n in authors if n.getAttribute('rel', '') == 'author'] setAuthors(template, authors) body = domhelpers.findNodesNamed(document, "body")[0] tmplbody = domhelpers.findElementsWithAttribute(template, "class", "body")[0] tmplbody.childNodes = body.childNodes tmplbody.setAttribute("class", "content")
def check_80_columns(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): # the ps/pdf output is in a font that cuts off at 80 characters, # so this is enforced to make sure the interesting parts (which # are likely to be on the right-hand edge) stay on the printed # page. for line in domhelpers.gatherTextNodes(node, 1).split('\n'): if len(line.rstrip()) > 80: self._reportError(filename, node, 'text wider than 80 columns in pre') for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class', '').endswith('listing'): try: fn = os.path.dirname(filename) fn = os.path.join(fn, node.getAttribute('href')) lines = open(fn,'r').readlines() except: self._reportError(filename, node, 'bad listing href: %r' % node.getAttribute('href')) continue for line in lines: if len(line.rstrip()) > 80: self._reportError(filename, node, 'listing wider than 80 columns')
def check_lists(self, dom, filename): for node in domhelpers.findNodesNamed(dom, "ul") + domhelpers.findNodesNamed(dom, "ol"): if not node.childNodes: self._reportError(filename, node, "empty list") for child in node.childNodes: if child.nodeName != "li": self._reportError(filename, node, "only list items allowed in lists")
def templateMutate(self, document, parentCount=0): if self.metaTemplate: newDoc = self.metaTemplate.cloneNode(1) if parentCount: dotdot = parentCount * '../' for ddname in 'href', 'src', 'action': for node in domhelpers.findElementsWithAttribute( newDoc, ddname): node.setAttribute(ddname, dotdot + node.getAttribute(ddname)) ttl = domhelpers.findNodesNamed(newDoc, "title")[0] ttl2 = domhelpers.findNodesNamed(document, "title")[0] ttl.childNodes[:] = [] for n in ttl2.childNodes: ttl.appendChild(n) body = domhelpers.findElementsWithAttribute( newDoc, "class", "__BODY__")[0] body2 = domhelpers.findNodesNamed(document, "body")[0] ndx = body.parentNode.childNodes.index(body) body.parentNode.childNodes[ndx:ndx + 1] = body2.childNodes for n in body2.childNodes: n.parentNode = body.parentNode f = open("garbage.html", "wb") f.write(newDoc.toprettyxml()) return newDoc return document
def check_80_columns(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): # the ps/pdf output is in a font that cuts off at 80 characters, # so this is enforced to make sure the interesting parts (which # are likely to be on the right-hand edge) stay on the printed # page. for line in domhelpers.gatherTextNodes(node, 1).split('\n'): if len(line.rstrip()) > 80: self._reportError(filename, node, 'text wider than 80 columns in pre') for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class').endswith('listing'): try: fn = os.path.dirname(filename) fn = os.path.join(fn, node.getAttribute('href')) lines = open(fn, 'r').readlines() except: self._reportError( filename, node, 'bad listing href: %r' % node.getAttribute('href')) continue for line in lines: if len(line.rstrip()) > 80: self._reportError(filename, node, 'listing wider than 80 columns')
def makeBook(dom, d): body = microdom.Element('body') body.appendChild(domhelpers.findNodesNamed(dom, 'h1')[0]) toc = domhelpers.findElementsWithAttribute(dom, 'class', 'toc')[0] toc = domhelpers.findNodesNamed(toc, 'li') for node in toc: if (node.hasAttribute('class') and node.getAttribute('class')=='tocignore'): continue parents = domhelpers.getParents(node) nodeLevel = len([1 for parent in parents if hasattr(parent, 'tagName') and parent.tagName in ('ol', 'ul')]) data = node.childNodes[0].data != '' if not data: node = node.childNodes[1] newNode = lowerDocument(node.getAttribute('href'), d, nodeLevel) for child in newNode.childNodes: body.appendChild(child) else: text = microdom.Text(node.childNodes[0].data) newNode = microdom.Element('h'+str(nodeLevel)) newNode.appendChild(text) body.appendChild(newNode) origBody = domhelpers.findNodesNamed(dom, 'body')[0] origBody.parentNode.replaceChild(body, origBody)
def check_lists(self, dom, filename): for node in (domhelpers.findNodesNamed(dom, 'ul') + domhelpers.findNodesNamed(dom, 'ol')): if not node.childNodes: self._reportError(filename, node, 'empty list') for child in node.childNodes: if child.nodeName != 'li': self._reportError(filename, node, 'only list items allowed in lists')
def check_lists(self, dom, filename): for node in (domhelpers.findNodesNamed(dom, 'ul')+ domhelpers.findNodesNamed(dom, 'ol')): if not node.childNodes: self._reportError(filename, node, 'empty list') for child in node.childNodes: if child.nodeName != 'li': self._reportError(filename, node, 'only list items allowed in lists')
def getTitleLink(url): d = client.getPage("http://moshez.org/discuss/rss") d.addCallback(microdom.parseString) d.addCallback(lambda d: domhelpers.findNodesNamed(d, 'item')[0]) d.addCallback(lambda d: ( domhelpers.getNodeText(domhelpers.findNodesNamed(d, 'link')[0]), domhelpers.getNodeText(domhelpers.findNodesNamed(d, 'title')[0]), )) return d
def check_title(self, dom, filename): doc = dom.documentElement title = domhelpers.findNodesNamed(dom, 'title') if len(title)!=1: return self._reportError(filename, doc, 'not exactly one title') h1 = domhelpers.findNodesNamed(dom, 'h1') if len(h1)!=1: return self._reportError(filename, doc, 'not exactly one h1') if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]): self._reportError(filename, h1[0], 'title and h1 text differ')
def check_title(self, dom, filename): doc = dom.documentElement title = domhelpers.findNodesNamed(dom, 'title') if len(title) != 1: return self._reportError(filename, doc, 'not exactly one title') h1 = domhelpers.findNodesNamed(dom, 'h1') if len(h1) != 1: return self._reportError(filename, doc, 'not exactly one h1') if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]): self._reportError(filename, h1[0], 'title and h1 text differ')
def extractRSSFeeds(dom): for bookmark in domhelpers.findNodesNamed(dom, "bookmark"): titleNodes = domhelpers.findNodesNamed(bookmark, "title") if titleNodes: title = domhelpers.getNodeText(titleNodes[0]) else: title = None url = bookmark.getAttribute("href") url = urllib.unquote(url) yield title, url
def findRSSFolder(dom, folderName=None): if folderName is None: folderName = "RSS" if folderName == "": return dom for folder in domhelpers.findNodesNamed(dom, "folder"): for title in domhelpers.findNodesNamed(folder, "title"): text = domhelpers.getNodeText(title) if text == folderName: return folder return None
def numberDocument(document, chapterNumber): """ Number the sections of the given document. A dot-separated chapter, section number is added to the beginning of each section, as defined by C{h2} nodes. This is probably intended to interact in a rather specific way with L{getSectionNumber}. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @type chapterNumber: C{int} @param chapterNumber: The chapter number of this content in an overall document. @return: C{None} """ i = 1 for node in domhelpers.findNodesNamed(document, "h2"): label = dom.Text() label.data = "%s.%d " % (chapterNumber, i) node.insertBefore(label, node.firstChild) i += 1
def getDescription(self): # http://purl.org/dc/elements/1.1/ description l = domhelpers.findNodesNamed(self.dom, 'description') if l: return domhelpers.getNodeText(l[0]) else: return None
def setTitle(template, title, chapterNumber): """ Add title and chapter number information to the template document. The title is added to the end of the first C{title} tag and the end of the first tag with a C{class} attribute set to C{title}. If specified, the chapter is inserted before the title. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type title: C{list} of DOM Nodes @param title: Nodes from the input document defining its title. @type chapterNumber: C{int} @param chapterNumber: The chapter number of this content in an overall document. If not applicable, any C{False} value will result in this information being omitted. @return: C{None} """ for nodeList in (domhelpers.findNodesNamed(template, "title"), domhelpers.findElementsWithAttribute( template, "class", 'title')): if nodeList: if numberer.getNumberSections() and chapterNumber: nodeList[0].childNodes.append( microdom.Text('%s. ' % chapterNumber)) nodeList[0].childNodes.extend(title)
def footnotes(document): """ Find footnotes in the given document, move them to the end of the body, and generate links to them. A footnote is any node with a C{class} attribute set to C{footnote}. Footnote links are generated as superscript. Footnotes are collected in a C{ol} node at the end of the document. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @return: C{None} """ footnotes = domhelpers.findElementsWithAttribute(document, "class", "footnote") if not footnotes: return footnoteElement = microdom.Element("ol") id = 1 for footnote in footnotes: href = microdom.parseString('<a href="#footnote-%(id)d">' "<super>%(id)d</super></a>" % vars()).documentElement text = " ".join(domhelpers.getNodeText(footnote).split()) href.setAttribute("title", text) target = microdom.Element("a", attributes={"name": "footnote-%d" % id}) target.childNodes = [footnote] footnoteContent = microdom.Element("li") footnoteContent.childNodes = [target] footnoteElement.childNodes.append(footnoteContent) footnote.parentNode.replaceChild(href, footnote) id += 1 body = domhelpers.findNodesNamed(document, "body")[0] header = microdom.parseString("<h2>Footnotes</h2>").documentElement body.childNodes.append(header) body.childNodes.append(footnoteElement)
def testAwfulTagSoup(self): s = """ <html> <head><title> I send you this message to have your advice!!!!</titl e </headd> <body bgcolor alink hlink vlink> <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW FREE WITH `ENLARGER'</h1> YES THIS WONDERFUL AWFER IS NOW HERER!!! <script LANGUAGE="javascript"> function give_answers() { if (score < 70) { alert("I hate you"); }} </script><a href=/foo.com/lalal name=foo>lalal</a> </body> </HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, 'blink') self.assertEquals(len(l), 1)
def test_awfulTagSoup(self): s = """ <html> <head><title> I send you this message to have your advice!!!!</titl e </headd> <body bgcolor alink hlink vlink> <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW FREE WITH `ENLARGER'</h1> YES THIS WONDERFUL AWFER IS NOW HERER!!! <script LANGUAGE="javascript"> function give_answers() { if (score < 70) { alert("I hate you"); }} </script><a href=/foo.com/lalal name=foo>lalal</a> </body> </HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, "blink") self.assertEqual(len(l), 1)
def check_pre_py_listing(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): if node.getAttribute('class') == 'python': try: text = domhelpers.getNodeText(node) # Fix < and > text = text.replace('>', '>').replace('<', '<') # Strip blank lines lines = filter(None, [l.rstrip() for l in text.split('\n')]) # Strip leading space while not [ 1 for line in lines if line[:1] not in ('', ' ') ]: lines = [line[1:] for line in lines] text = '\n'.join(lines) + '\n' try: parser.suite(text) except SyntaxError: # Pretend the "..." idiom is syntactically valid text = text.replace("...", "'...'") parser.suite(text) except SyntaxError as e: self._reportError(filename, node, 'invalid python code:' + str(e))
def setAuthors(template, authors): # First, similarly to setTitle, insert text into an <div class="authors"> text = '' for name, href in authors: # FIXME: Do proper quoting/escaping (is it ok to use # xml.sax.saxutils.{escape,quoteattr}?) anchor = '<a href="%s">%s</a>' % (href, name) if (name, href) == authors[-1]: if len(authors) == 1: text = anchor else: text += 'and ' + anchor else: text += anchor + ',' childNodes = microdom.parseString('<span>' + text +'</span>').childNodes for node in domhelpers.findElementsWithAttribute(template, "class", 'authors'): node.childNodes.extend(childNodes) # Second, add appropriate <link rel="author" ...> tags to the <head>. head = domhelpers.findNodesNamed(template, 'head')[0] authors = [microdom.parseString('<link rel="author" href="%s" title="%s"/>' % (href, name)).childNodes[0] for name, href in authors] head.childNodes.extend(authors)
def setTitle(template, title, chapterNumber): """ Add title and chapter number information to the template document. The title is added to the end of the first C{title} tag and the end of the first tag with a C{class} attribute set to C{title}. If specified, the chapter is inserted before the title. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type title: C{list} of DOM Nodes @param title: Nodes from the input document defining its title. @type chapterNumber: C{int} @param chapterNumber: The chapter number of this content in an overall document. If not applicable, any C{False} value will result in this information being omitted. @return: C{None} """ for nodeList in ( domhelpers.findNodesNamed(template, "title"), domhelpers.findElementsWithAttribute(template, "class", "title"), ): if nodeList: if numberer.getNumberSections() and chapterNumber: nodeList[0].childNodes.append(microdom.Text("%s. " % chapterNumber)) nodeList[0].childNodes.extend(title)
def getFirstAncestorWithSectionHeader(entry): """Go up ancestors until one with at least one <h2> is found, then return the <h2> nodes""" for a in domhelpers.getParents(entry)[1:]: headers = domhelpers.findNodesNamed(a, "h2") if len(headers) > 0: return headers return []
def setTitle(template, title, chapterNumber): for nodeList in (domhelpers.findNodesNamed(template, "title"), domhelpers.findElementsWithAttribute(template, "class", 'title')): if nodeList: if numberer.getNumberSections() and chapterNumber: nodeList[0].childNodes.append(microdom.Text('%s. ' % chapterNumber)) nodeList[0].childNodes.extend(title)
def render_GET(self, request): url = urlTemplate % request.args['user'][0] client.getPage(url).addCallback(microdom.parseString).addCallback( lambda t: domhelpers.findNodesNamed(t, 'item') ).addCallback(lambda itms: zip( [domhelpers.findNodesNamed(x, 'title')[0] for x in itms ], [domhelpers.findNodesNamed(x, 'link')[0] for x in itms] )).addCallback( lambda itms: '<html><head></head><body><ul>%s</ul></body></html>' % '\n'.join([ '<li><a href="%s">%s</a></li>' % (domhelpers.getNodeText(link), domhelpers.getNodeText(title)) for (title, link) in itms ])).addCallback(lambda s: (request.write(s), request.finish( ))).addErrback(lambda e: (request.write('Error: %s' % e), request.finish())) return server.NOT_DONE_YET
def lowerDocument(href, d, nodeLevel): newNode = microdom.parse(open(os.path.join(d, href))) newNode = domhelpers.findNodesNamed(newNode, 'body')[0] headers = domhelpers.findElements(newNode, lambda x: len(x.tagName)==2 and x.tagName[0]=='h' and x.tagName[1] in '123456') for header in headers: header.tagName = 'h'+str(int(header.tagName[1])+nodeLevel) return newNode
def render_GET(self, request): url = urlTemplate % request.args['user'][0] client.getPage(url).addCallback( microdom.parseString).addCallback( lambda t: domhelpers.findNodesNamed(t, 'item')).addCallback( lambda itms: zip([domhelpers.findNodesNamed(x, 'title')[0] for x in itms], [domhelpers.findNodesNamed(x, 'link')[0] for x in itms] )).addCallback( lambda itms: '<html><head></head><body><ul>%s</ul></body></html>' % '\n'.join( ['<li><a href="%s">%s</a></li>' % ( domhelpers.getNodeText(link), domhelpers.getNodeText(title)) for (title, link) in itms]) ).addCallback(lambda s: (request.write(s),request.finish())).addErrback( lambda e: (request.write('Error: %s' % e),request.finish())) return server.NOT_DONE_YET
def check_texturl_matches_href(self, dom, filename): for node in domhelpers.findNodesNamed(dom, "a"): if not node.hasAttribute("href"): continue text = domhelpers.getNodeText(node) proto = urlparse.urlparse(text)[0] if proto and " " not in text: if text != node.getAttribute("href"): self._reportError(filename, node, "link text does not match href")
def testDeferredModel(self): channel = FakeHTTPChannel() channel.site = FakeSite(_TestPage()) request = channel.makeFakeRequest('/') while not request.finished: reactor.iterate() dom = microdom.parseXMLString(request.written.getvalue()) spanElems = domhelpers.findNodesNamed(dom, 'span') for spanElem in spanElems: self.failUnlessEqual('The Result', spanElem.childNodes[0].data)
def check_texturl_matches_href(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'a'): if not node.hasAttribute('href'): continue text = domhelpers.getNodeText(node) proto = urlparse.urlparse(text)[0] if proto and ' ' not in text: if text != node.getAttribute('href'): self._reportError(filename, node, 'link text does not match href')
def setAuthors(template, authors): """ Add author information to the template document. Names and contact information for authors are added to each node with a C{class} attribute set to C{authors} and to the template head as C{link} nodes. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type authors: C{list} of two-tuples of C{str} @param authors: List of names and contact information for the authors of the input document. @return: C{None} """ for node in domhelpers.findElementsWithAttribute(template, "class", 'authors'): # First, similarly to setTitle, insert text into an <div # class="authors"> container = dom.Element('span') for name, href in authors: anchor = dom.Element('a') anchor.setAttribute('href', href) anchorText = dom.Text() anchorText.data = name anchor.appendChild(anchorText) if (name, href) == authors[-1]: if len(authors) == 1: container.appendChild(anchor) else: andText = dom.Text() andText.data = 'and ' container.appendChild(andText) container.appendChild(anchor) else: container.appendChild(anchor) commaText = dom.Text() commaText.data = ', ' container.appendChild(commaText) node.appendChild(container) # Second, add appropriate <link rel="author" ...> tags to the <head>. head = domhelpers.findNodesNamed(template, 'head')[0] authors = [ dom.parseString('<link rel="author" href="%s" title="%s"/>' % (href, name)).childNodes[0] for name, href in authors ] head.childNodes.extend(authors)
def setAuthors(template, authors): """ Add author information to the template document. Names and contact information for authors are added to each node with a C{class} attribute set to C{authors} and to the template head as C{link} nodes. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type authors: C{list} of two-tuples of C{str} @param authors: List of names and contact information for the authors of the input document. @return: C{None} """ for node in domhelpers.findElementsWithAttribute(template, "class", 'authors'): # First, similarly to setTitle, insert text into an <div # class="authors"> container = dom.Element('span') for name, href in authors: anchor = dom.Element('a') anchor.setAttribute('href', href) anchorText = dom.Text() anchorText.data = name anchor.appendChild(anchorText) if (name, href) == authors[-1]: if len(authors) == 1: container.appendChild(anchor) else: andText = dom.Text() andText.data = 'and ' container.appendChild(andText) container.appendChild(anchor) else: container.appendChild(anchor) commaText = dom.Text() commaText.data = ', ' container.appendChild(commaText) node.appendChild(container) # Second, add appropriate <link rel="author" ...> tags to the <head>. head = domhelpers.findNodesNamed(template, 'head')[0] authors = [dom.parseString('<link rel="author" href="%s" title="%s"/>' % (href, name)).childNodes[0] for name, href in authors] head.childNodes.extend(authors)
def check_a_py_listing(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class') == 'py-listing': fn = os.path.join(os.path.dirname(filename), node.getAttribute('href')) lines = open(fn).readlines() lines = lines[int(node.getAttribute('skipLines', 0)):] for line, num in zip(lines, range(len(lines))): if line.count('59 Temple Place, Suite 330, Boston'): self._reportError(filename, node, 'included source file %s has licence boilerplate.' ' Use skipLines="%d".' % (fn, int(node.getAttribute('skipLines',0))+num+1))
def testDeferredModel(self): # Test that multiple uses of a deferred model work correctly. channel = FakeHTTPChannel() channel.site = FakeSite(_TestPage()) request = channel.makeFakeRequest('/') while not request.finished: reactor.iterate() dom = microdom.parseXMLString(request.written.getvalue()) spanElems = domhelpers.findNodesNamed(dom, 'span') for spanElem in spanElems: self.failUnlessEqual('The Result', spanElem.childNodes[0].data)
def render_GET(self, request): """ Get an xml feed from LiveJournal and construct a new HTML page using the 'title' and 'link' parsed from the xml document. """ url = urlTemplate % request.args['user'][0] client.getPage(url, timeout=30).addCallback( microdom.parseString).addCallback( lambda t: domhelpers.findNodesNamed(t, 'item')).addCallback( lambda itms: zip([domhelpers.findNodesNamed(x, 'title')[0] for x in itms], [domhelpers.findNodesNamed(x, 'link')[0] for x in itms] )).addCallback( lambda itms: '<html><head></head><body><ul>%s</ul></body></html>' % '\n'.join( ['<li><a href="%s">%s</a></li>' % ( domhelpers.getNodeText(link), domhelpers.getNodeText(title)) for (title, link) in itms]) ).addCallback(lambda s: (request.write(s),request.finish())).addErrback( lambda e: (request.write('Error: %s' % e),request.finish())) return server.NOT_DONE_YET
def check_80_columns(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): for line in domhelpers.getNodeText(node).split('\n'): if len(line.rstrip()) > 80: self._reportError(filename, node, 'text wider than 80 columns in pre') for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class', '').endswith('listing'): try: fn = os.path.dirname(filename) fn = os.path.join(fn, node.getAttribute('href')) lines = open(fn,'r').readlines() except: self._reportError(filename, node, 'bad listing href: %r' % node.getAttribute('href')) continue for line in lines: if len(line.rstrip()) > 80: self._reportError(filename, node, 'listing wider than 80 columns')
def templateMutate(self, document, parentCount=0): if self.metaTemplate: newDoc = self.metaTemplate.cloneNode(1) if parentCount: dotdot = parentCount * '../' for ddname in 'href', 'src', 'action': for node in domhelpers.findElementsWithAttribute(newDoc, ddname): node.setAttribute(ddname, dotdot + node.getAttribute(ddname)) ttl = domhelpers.findNodesNamed(newDoc, "title")[0] ttl2 = domhelpers.findNodesNamed(document, "title")[0] ttl.childNodes[:] = [] for n in ttl2.childNodes: ttl.appendChild(n) body = domhelpers.findElementsWithAttribute(newDoc, "class", "__BODY__")[0] body2 = domhelpers.findNodesNamed(document, "body")[0] ndx = body.parentNode.childNodes.index(body) body.parentNode.childNodes[ndx:ndx+1] = body2.childNodes for n in body2.childNodes: n.parentNode = body.parentNode f = open("garbage.html", "wb") f.write(newDoc.toprettyxml()) return newDoc return document
def munge(document, template, linkrel, dir, fullpath, ext, url, config, outfileGenerator=getOutputFileName): fixRelativeLinks(template, linkrel) addMtime(template, fullpath) removeH1(document) if not config.get('noapi', False): fixAPI(document, url) fontifyPython(document) fixLinks(document, ext) addPyListings(document, dir) addHTMLListings(document, dir) addPlainListings(document, dir) putInToC(template, generateToC(document)) footnotes(document) notes(document) setIndexLink(template, indexer.getIndexFilename()) setVersion(template, config.get('version', '')) # Insert the document into the template chapterNumber = htmlbook.getNumber(fullpath) title = domhelpers.findNodesNamed(document, 'title')[0].childNodes setTitle(template, title, chapterNumber) if numberer.getNumberSections() and chapterNumber: numberDocument(document, chapterNumber) index(document, outfileGenerator(os.path.split(fullpath)[1], ext), htmlbook.getReference(fullpath)) authors = domhelpers.findNodesNamed(document, 'link') authors = [(node.getAttribute('title',''), node.getAttribute('href', '')) for node in authors if node.getAttribute('rel', '') == 'author'] setAuthors(template, authors) body = domhelpers.findNodesNamed(document, "body")[0] tmplbody = domhelpers.findElementsWithAttribute(template, "class", "body")[0] tmplbody.childNodes = body.childNodes tmplbody.setAttribute("class", "content")
def check_a_py_listing(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class') == 'py-listing': fn = os.path.join(os.path.dirname(filename), node.getAttribute('href')) lines = open(fn).readlines() lines = lines[int(node.getAttribute('skipLines', 0)):] for line, num in zip(lines, range(len(lines))): if line.count('59 Temple Place, Suite 330, Boston'): self._reportError( filename, node, 'included source file %s has licence boilerplate.' ' Use skipLines="%d".' % (fn, int(node.getAttribute('skipLines', 0)) + num + 1))
def splitIntoSlides(document): body = domhelpers.findNodesNamed(document, 'body')[0] slides = [] slide = [] title = '(unset)' for child in body.childNodes: if isinstance(child, dom.Element) and child.tagName == 'h2': if slide: slides.append((title, slide)) slide = [] title = domhelpers.getNodeText(child) else: slide.append(child) slides.append((title, slide)) return slides
def splitIntoSlides(document): body = domhelpers.findNodesNamed(document, 'body')[0] slides = [] slide = [] title = '(unset)' for child in body.childNodes: if isinstance(child, microdom.Element) and child.tagName == 'h2': if slide: slides.append((title, slide)) slide = [] title = domhelpers.getNodeText(child) else: slide.append(child) slides.append((title, slide)) return slides
def testCaseSensitiveSoonCloser(self): s = """ <HTML><BODY> <P ALIGN="CENTER"> <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"></A> </P> <P> This is an insane set of text nodes that should NOT be gathered under the A tag above. </P> </BODY></HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, 'a') n = domhelpers.gatherTextNodes(l[0],1).replace(' ',' ') self.assertEquals(n.find('insane'), -1)
def getFirstAncestorWithSectionHeader(entry): """ Visit the ancestors of C{entry} until one with at least one C{h2} child node is found, then return all of that node's C{h2} child nodes. @type entry: A DOM Node @param entry: The node from which to begin traversal. This node itself is excluded from consideration. @rtype: C{list} of DOM Nodes @return: All C{h2} nodes of the ultimately selected parent node. """ for a in domhelpers.getParents(entry)[1:]: headers = domhelpers.findNodesNamed(a, "h2") if len(headers) > 0: return headers return []
def removeH1(document): """ Replace all C{h1} nodes in the given document with empty C{span} nodes. C{h1} nodes mark up document sections and the output template is given an opportunity to present this information in a different way. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @return: C{None} """ h1 = domhelpers.findNodesNamed(document, 'h1') empty = dom.Element('span') for node in h1: node.parentNode.replaceChild(empty, node)
def setAuthors(template, authors): """ Add author information to the template document. Names and contact information for authors are added to each node with a C{class} attribute set to C{authors} and to the template head as C{link} nodes. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type authors: C{list} of two-tuples of C{str} @param authors: List of names and contact information for the authors of the input document. @return: C{None} """ # First, similarly to setTitle, insert text into an <div class="authors"> text = '' for name, href in authors: # FIXME: Do proper quoting/escaping (is it ok to use # xml.sax.saxutils.{escape,quoteattr}?) anchor = '<a href="%s">%s</a>' % (href, name) if (name, href) == authors[-1]: if len(authors) == 1: text = anchor else: text += 'and ' + anchor else: text += anchor + ',' childNodes = microdom.parseString('<span>' + text + '</span>').childNodes for node in domhelpers.findElementsWithAttribute(template, "class", 'authors'): node.childNodes.extend(childNodes) # Second, add appropriate <link rel="author" ...> tags to the <head>. head = domhelpers.findNodesNamed(template, 'head')[0] authors = [ microdom.parseString('<link rel="author" href="%s" title="%s"/>' % (href, name)).childNodes[0] for name, href in authors ] head.childNodes.extend(authors)
def test_caseSensitiveSoonCloser(self): s = """ <HTML><BODY> <P ALIGN="CENTER"> <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"></A> </P> <P> This is an insane set of text nodes that should NOT be gathered under the A tag above. </P> </BODY></HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, "a") n = domhelpers.gatherTextNodes(l[0], 1).replace(" ", " ") self.assertEqual(n.find("insane"), -1)
def numberDocument(document, chapterNumber): """ Number the sections of the given document. A dot-separated chapter, section number is added to the beginning of each section, as defined by C{h2} nodes. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @type chapterNumber: C{int} @param chapterNumber: The chapter number of this content in an overall document. @return: C{None} """ i = 1 for node in domhelpers.findNodesNamed(document, "h2"): node.childNodes = [microdom.Text("%s.%d " % (chapterNumber, i))] + node.childNodes i += 1
def footnotes(document): """ Find footnotes in the given document, move them to the end of the body, and generate links to them. A footnote is any node with a C{class} attribute set to C{footnote}. Footnote links are generated as superscript. Footnotes are collected in a C{ol} node at the end of the document. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @return: C{None} """ footnotes = domhelpers.findElementsWithAttribute(document, "class", "footnote") if not footnotes: return footnoteElement = dom.Element('ol') id = 1 for footnote in footnotes: href = dom.parseString('<a href="#footnote-%(id)d">' '<super>%(id)d</super></a>' % vars()).documentElement text = ' '.join(domhelpers.getNodeText(footnote).split()) href.setAttribute('title', text) target = dom.Element('a') target.setAttribute('name', 'footnote-%d' % (id, )) target.childNodes = [footnote] footnoteContent = dom.Element('li') footnoteContent.childNodes = [target] footnoteElement.childNodes.append(footnoteContent) footnote.parentNode.replaceChild(href, footnote) id += 1 body = domhelpers.findNodesNamed(document, "body")[0] header = dom.parseString('<h2>Footnotes</h2>').documentElement body.childNodes.append(header) body.childNodes.append(footnoteElement)
def setTitle(template, title, chapterNumber): """ Add title and chapter number information to the template document. The title is added to the end of the first C{title} tag and the end of the first tag with a C{class} attribute set to C{title}. If specified, the chapter is inserted before the title. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type title: C{list} of DOM Nodes @param title: Nodes from the input document defining its title. @type chapterNumber: C{int} @param chapterNumber: The chapter number of this content in an overall document. If not applicable, any C{False} value will result in this information being omitted. @return: C{None} """ if numberer.getNumberSections() and chapterNumber: titleNode = dom.Text() # This is necessary in order for cloning below to work. See Python # isuse 4851. titleNode.ownerDocument = template.ownerDocument titleNode.data = '%s. ' % (chapterNumber, ) title.insert(0, titleNode) for nodeList in (domhelpers.findNodesNamed(template, "title"), domhelpers.findElementsWithAttribute( template, "class", 'title')): if nodeList: for titleNode in title: nodeList[0].appendChild(titleNode.cloneNode(True))
def test_findNodesNamed(self): doc1 = microdom.parseString('<doc><foo/><bar/><foo>a</foo></doc>') node_list = domhelpers.findNodesNamed(doc1, 'foo') actual = len(node_list) expected = 2 assert actual == expected, 'expected %d, got %d' % (expected, actual)
def munge(document, template, linkrel, dir, fullpath, ext, url, config, outfileGenerator=getOutputFileName): """ Mutate C{template} until it resembles C{document}. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @type template: A DOM Node or Document @param template: The template document which defines the desired presentation format of the content. @type linkrel: C{str} @param linkrel: An prefix to apply to all relative links in C{src} or C{href} attributes in the input document when generating the output document. @type dir: C{str} @param dir: The directory in which to search for source listing files. @type fullpath: C{str} @param fullpath: The file name which contained the input document. @type ext: C{str} @param ext: The extension to use when selecting an output file name. This replaces the extension of the input file name. @type url: C{str} @param url: A string which will be interpolated with the fully qualified Python name of any API reference encountered in the input document, the result of which will be used as a link to API documentation for that name in the output document. @type config: C{dict} @param config: Further specification of the desired form of the output. Valid keys in this dictionary:: noapi: If present and set to a True value, links to API documentation will not be generated. version: A string which will be included in the output to indicate the version of this documentation. @type outfileGenerator: Callable of C{str}, C{str} returning C{str} @param outfileGenerator: Output filename factory. This is invoked with the intput filename and C{ext} and the output document is serialized to the file with the name returned. @return: C{None} """ fixRelativeLinks(template, linkrel) addMtime(template, fullpath) removeH1(document) if not config.get('noapi', False): fixAPI(document, url) fontifyPython(document) fixLinks(document, ext) addPyListings(document, dir) addHTMLListings(document, dir) addPlainListings(document, dir) putInToC(template, generateToC(document)) footnotes(document) notes(document) setIndexLink(template, indexer.getIndexFilename()) setVersion(template, config.get('version', '')) # Insert the document into the template chapterNumber = htmlbook.getNumber(fullpath) title = domhelpers.findNodesNamed(document, 'title')[0].childNodes setTitle(template, title, chapterNumber) if numberer.getNumberSections() and chapterNumber: numberDocument(document, chapterNumber) index(document, outfileGenerator(os.path.split(fullpath)[1], ext), htmlbook.getReference(fullpath)) authors = domhelpers.findNodesNamed(document, 'link') authors = [(node.getAttribute('title') or '', node.getAttribute('href') or '') for node in authors if node.getAttribute('rel') == 'author'] setAuthors(template, authors) body = domhelpers.findNodesNamed(document, "body")[0] tmplbody = domhelpers.findElementsWithAttribute(template, "class", "body")[0] tmplbody.childNodes = body.childNodes tmplbody.setAttribute("class", "content")
import re if __name__ == '__main__': files = { 'lomVocab': Path('exe') / 'webui' / 'schemas' / 'scorm2004' / 'common' / 'vocabValues.xsd', 'lomesVocab': Path('exe') / 'webui' / 'schemas' / 'scorm2004' / 'vocab' / 'lomesvocab.xsd' } response = '' vocab = {} for varname, f in files.items(): document = parseString(f.bytes(), escapeAttributes=0) nodes = findNodesNamed(document, 'xs:simpletype') for node in nodes: name = node.getAttribute('name', str()) enumerations = findNodesNamed(node, 'xs:enumeration') vocab[name] = [] for enumeration in enumerations: vocab[name].append([ enumeration.getAttribute('value'), '_(%s)' % enumeration.getAttribute('value') ]) response += '%s = %s;\n\n' % (varname, json.dumps( vocab, indent=4).encode('utf-8')) outfile = Path('exe') / 'jsui' / 'scripts' / 'lomvocab.js' response = re.sub('"_\(', '_("', response) response = re.sub('\)"', '")', response) outfile.write_bytes(response)
def test_findNodesNamed(self): doc1 = self.dom.parseString('<doc><foo/><bar/><foo>a</foo></doc>') node_list = domhelpers.findNodesNamed(doc1, 'foo') actual = len(node_list) self.assertEqual(actual, 2)
def check_style(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'style'): if domhelpers.getNodeText(node) != '': self._reportError(filename, node, 'hand hacked style')
def check(_): dom = microdom.parseXMLString(request.written.getvalue()) spanElems = domhelpers.findNodesNamed(dom, 'span') for spanElem in spanElems: self.failUnlessEqual('The Result', spanElem.childNodes[0].data)
def check_anchor_in_heading(self, dom, filename): headingNames = ['h%d' % n for n in range(1, 7)] for hname in headingNames: for node in domhelpers.findNodesNamed(dom, hname): if domhelpers.findNodesNamed(node, 'a'): self._reportError(filename, node, 'anchor in heading')