Пример #1
0
def strip_tags(text, valid_tags={}):
    """strip tags, remove invalid tag attrs.
    example::

        >>> strip_tags('this <a href="">xxx</a>')
        u'this xxx'

        >>> strip_tags('this <a href="">xxx</a>',{'a':'href'})
        u'this <a href="">xxx</a>'

    """
    from app.BeautifulSoup import BeautifulSoup, Comment

    soup = BeautifulSoup(text)
    for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
        comment.extract()
    for tag in soup.findAll(True):
        if tag.name in valid_tags:
            valid_attrs = valid_tags[tag.name]
            tag.attrs = [(attr, val.replace('javascript:', ''))
                for attr, val in tag.attrs if attr in valid_attrs]
        else:
            tag.hidden = True
    return soup.renderContents().decode('utf8')