def drop_ignore_trees(html): for tag in defs.ignore_tags: html = drop_tree(html, tag) dom = fromstring(html) # remove blogs comments for e in dom.cssselect("#comments,.comments"): e.drop_tree() return unicode(tostring(dom))
def a_droped_text(self): droped = drop_tree(drop_tree(self.body, 'a'), '\s') if len(droped) > 0: return text_content(droped) return u''