def __tidy(self, content): tidy = Tidy() tidy.setIndentAttributes(False) tidy.setIndentContent(False) tidy.setPrintBodyOnly(True) tidy.setSmartIndent(False) tidy.setWraplen(0) tidy.setXHTML(False) tidy.setNumEntities(True) out = ByteArrayOutputStream() doc = tidy.parseDOM(ByteArrayInputStream(String(content).getBytes()), out) content = out.toString("UTF-8") return content, doc
def __tidy(self, content): tidy = Tidy() tidy.setIndentAttributes(False) tidy.setIndentContent(False) tidy.setPrintBodyOnly(True) tidy.setSmartIndent(False) tidy.setWraplen(0) tidy.setXHTML(True) tidy.setNumEntities(True) tidy.setShowWarnings(False) tidy.setQuiet(True) out = ByteArrayOutputStream() tidy.parse(IOUtils.toInputStream(content, "UTF-8"), out) return out.toString("UTF-8")
def __getContent(self, oid): slash = oid.rfind("/") pid = os.path.splitext(oid[slash+1:])[0] + ".htm" payload = Services.storage.getObject(oid).getPayload(pid) tidy = Tidy() tidy.setIndentAttributes(False) tidy.setIndentContent(False) tidy.setPrintBodyOnly(True) tidy.setSmartIndent(False) tidy.setWraplen(0) tidy.setXHTML(False) tidy.setNumEntities(True) out = ByteArrayOutputStream() try: doc = tidy.parseDOM(payload.getInputStream(), out) content = out.toString("UTF-8") content = self.__processMedia(oid, doc, content) #print "[\n%s\n]" % content except Exception, e: print " * blog.py: Failed to get content: %s" % e.getMessage()