示例#1
0
def strip_html(value):
    '''
    >>> strip_html('a<b>b</b>c')
    'abc'
    '''
    value = util.compact_html(value)
    return re.sub('<.+?>', '', value)
示例#2
0
 def filter(self, value):
     '''
     >>> f = StripHtmlFilter()
     >>> f.filter(None) is None
     True
     >>> f.filter('a<b>b</b>c')
     'abc'
     '''
     if value is None:
         return None
     value = util.compact_html(value)
     return re.sub('<.+?>', '', value)
示例#3
0
def extract_from_url(url, rule, charset=None, fmt='yaml'):
    html = util.curl(url, charset=charset)
    html = util.compact_html(html)
    return extract(html, rule, fmt=fmt)
示例#4
0
文件: main.py 项目: rabywan/textminer
def extract_from_url(url, rule, fmt='yaml'):
    from textminer import util
    html = requests.get(url).text
    html = util.compact_html(html)
    return extract(html, rule, fmt=fmt)
示例#5
0
def extract_from_url(url, rule, charset=None, fmt='yaml'):
    html = util.curl(url, charset=charset)
    html = util.compact_html(html)
    return extract(html, rule, fmt=fmt)