Пример #1
0
	def sanitise(self, text, markdown = True):
		if markdown:
			text = md(text)
		
		dom = PyQuery(text)
		
		for a in dom.find('a[href^="javascript:"]'):
			a = PyQuery(a)
			a.replaceWith(a.text())

		for obj in UNCLEAN_TAGS:
			dom.find(obj).remove()

		for attr in UNCLEAN_ATTRS:
			dom.find('[%s]' % attr).removeAttr(attr)
		
		text = dom.outerHtml()
		if markdown:
			dom = HTML2Text()
			text = dom.handle(text)

		return text
Пример #2
0
def sanitise(text, markdown=False):
    if markdown:
        text = md(text)

    dom = PyQuery(text)

    for a in dom.find('a[href^="javascript:"]'):
        a = PyQuery(a)
        a.replaceWith(a.text())

    for obj in UNCLEAN_TAGS:
        dom.find(obj).remove()

    for attr in UNCLEAN_ATTRS:
        dom.find('[%s]' % attr).removeAttr(attr)

    text = dom.outerHtml()
    if markdown:
        dom = HTML2Text()
        text = dom.handle(text)

    return text