def tag_attr_add(soup, attrspec): if not soup: return '' soup = lxml_soup(soup) for attr in attrspec.split('|'): var, val = attr.split('=', 1) soup.attrib[var] = val return mark_safe(lxml_tostring(soup))
def tag_attr_add(soup, attrspec): '''Add html attribute(s) to a root tag of a passed html fragment. Attributes should be specified in "{k1}={v1}|{k2}={v2}|..." form. Example: "class=pull-right clear|title=Some floater element"''' if not soup: return '' soup = lxml_soup(soup) for attr in attrspec.split('|'): var, val = attr.split('=', 1) soup.attrib[var] = val return mark_safe(lxml_tostring(soup))
def prettyhtml_nostyle(soup, autoescape=None): '''Cleans up html fragment, just like "prettyhtml" does, but also strips all style-affecting attributes (classes, width, size, etc) from it.''' if not soup: return '' soup = lxml_soup(soup) for e in soup.iter(): attrs = e.attrib for name in attrs.keys(): if name not in nostyle_allowed_attrs: del attrs[name] soup = lxml_tostring(soup) return escape(soup) if autoescape\ and not isinstance(soup, SafeData) else mark_safe(soup)
def tag_pick(soup, xpaths): 'Pick subset from passed html fragment by xpath.' for xpath in xpaths.split(u'||'): match = lxml_soup(soup).xpath(xpath) if match: return lxml_tostring(match[0]) else: return soup
def tag_pick_text(soup): 'Strip all tags from passed html fragment, returning only text they contain.' return lxml_soup(soup).text_content()
def tag_pick(soup, xpaths): for xpath in xpaths.split(u'||'): match = lxml_soup(soup).xpath(xpath) if match: return lxml_tostring(match[0]) else: return soup
def tag_pick_text(soup): return mark_safe(lxml_soup(soup).text_content())