# -*- coding: utf-8 -*- import re import cgi from talon.quotations import (register_xpath_extensions, extract_from_html, extract_from_plain) # noqa register_xpath_extensions() from HTMLParser import HTMLParser # http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python class MLStripper(HTMLParser): strippedTags = ["title", "script", "style"] def __init__(self): self.reset() self.fed = [] self.strip_tag_contents_mode = False def handle_starttag(self, tag, attrs): # Strip the contents of a tag when it's # in strippedTags. We can do this because # HTMLParser won't try to parse the inner # contents of a tag. if tag.lower() in MLStripper.strippedTags: self.strip_tag_contents_mode = True def handle_endtag(self, tag): self.strip_tag_contents_mode = False
def init(path_to_models=None): register_xpath_extensions() if ML_ENABLED: signature.initialize(path_to_models)
# -*- coding: utf-8 -*- import re import cgi from HTMLParser import HTMLParser, HTMLParseError from talon.quotations import (register_xpath_extensions, extract_from_html, extract_from_plain) # noqa register_xpath_extensions() from inbox.log import get_logger __all__ = ['strip_tags', 'plaintext2html', 'extract_from_html', 'extract_from_plain'] # http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python class MLStripper(HTMLParser): strippedTags = ["title", "script", "style"] def __init__(self): self.reset() self.fed = [] self.strip_tag_contents_mode = False def handle_starttag(self, tag, attrs): # Strip the contents of a tag when it's # in strippedTags. We can do this because # HTMLParser won't try to parse the inner # contents of a tag. if tag.lower() in MLStripper.strippedTags: self.strip_tag_contents_mode = True
def init(): register_xpath_extensions()
def init(): register_xpath_extensions() if ML_ENABLED: signature.initialize()
def init(): register_xpath_extensions() signature.initialize()