def sanitise(self, text, markdown = True): if markdown: text = md(text) dom = PyQuery(text) for a in dom.find('a[href^="javascript:"]'): a = PyQuery(a) a.replaceWith(a.text()) for obj in UNCLEAN_TAGS: dom.find(obj).remove() for attr in UNCLEAN_ATTRS: dom.find('[%s]' % attr).removeAttr(attr) text = dom.outerHtml() if markdown: dom = HTML2Text() text = dom.handle(text) return text
def sanitise(text, markdown=False): if markdown: text = md(text) dom = PyQuery(text) for a in dom.find('a[href^="javascript:"]'): a = PyQuery(a) a.replaceWith(a.text()) for obj in UNCLEAN_TAGS: dom.find(obj).remove() for attr in UNCLEAN_ATTRS: dom.find('[%s]' % attr).removeAttr(attr) text = dom.outerHtml() if markdown: dom = HTML2Text() text = dom.handle(text) return text