Exemplo n.º 1
0
    def handle_data(self, data):
        '''
        获取标签中的文本信息
        :param data:文本信息
        :return:
        '''
        if len(data.replace(" ", "").replace("\n", "")) > 0:
            # print("tag_data:{0},len:{1}".format(data, len(data)))

            # 父方法获取对应的文本
            HTMLParser.handle_data(self, data)

            # 新建文本节点
            node = wn.TextNode()
            # 为其赋值标签和父节点等信息
            node.tag = "text"
            node.father = self.now
            node.content = data
            node.index = node.father.index + 1
            node.path = node.father.path + "-" + "{0}[{1}]".format("text", len(node.father.children) + 1)
            # 设定其特别的类型
            node.type = "text"
            # 将其加入父节点的子节点列表中
            self.now.children.append(node)
            # 保留其父节点完整的content属性
            self.now.content += data
Exemplo n.º 2
0
 def _strip_tags(self, html):
     result = []
     parser = HTMLParser()
     parser.handle_data = result.append
     parser.feed(html)
     parser.close()
     return ''.join(result)
Exemplo n.º 3
0
 def _strip_tags(self, html):
     result = []
     parser = HTMLParser()
     parser.handle_data = result.append
     parser.feed(html)
     parser.close()
     return ''.join(result)
Exemplo n.º 4
0
def strip_html(text):
    if text is None:
        return ''
    parts = []
    parser = HTMLParser()
    parser.handle_data = parts.append
    parser.feed(text)
    return ''.join(parts)
Exemplo n.º 5
0
def strip_tags(html):
    html = html.strip()
    html = html.strip("\n")
    result = []
    parse = HTMLParser()
    parse.handle_data = result.append
    parse.feed(html)
    parse.close()
    return "".join(result)
Exemplo n.º 6
0
 def strip_tags(self, htmlStr):
     htmlStr = htmlStr.strip()
     htmlStr = htmlStr.strip("\n")
     result = []
     parser = HTMLParser()
     parser.handle_data = result.append
     parser.feed(htmlStr)
     parser.close()
     return ''.join(result)
Exemplo n.º 7
0
def strip_tags(html):
    from html.parser import HTMLParser
    html = html.strip()
    html = html.strip("\n")
    result = []
    parser = HTMLParser()
    parser.handle_data = result.append
    parser.feed(html)
    parser.close()
    return ''.join(result)
Exemplo n.º 8
0
def strip_tags(html):
    """
    Python中过滤HTML标签的函数
    """
    html = html.strip()
    parser = HTMLParser()
    result = []
    parser.handle_data = result.append
    parser.feed(html)
    parser.close()
    return result
Exemplo n.º 9
0
def strip_tags(html):
    '''
    Removes html tags from a string
    '''
    from html.parser import HTMLParser
    result = []
    parser = HTMLParser()
    parser.handle_data = result.append
    parser.feed(html)
    parser.close()
    return ''.join(result)
Exemplo n.º 10
0
def strip_tags(html):
    if html:
        html = html.strip()
        html = html.strip("\n")
        result = []
        parse = HTMLParser()
        parse.handle_data = result.append
        parse.feed(html)
        parse.close()
        return "".join(result)
    return ''
Exemplo n.º 11
0
def html_strip(html):
    html = html.replace("#", '')
    html = html.replace(">", '')
    html = html.replace("-", '')
    html = html.replace("*", '')
    html = html.strip()
    html = html.strip("\n")
    result = []
    parse = HTMLParser()
    parse.handle_data = result.append
    parse.feed(html)
    parse.close()
    return "".join(result)
Exemplo n.º 12
0
def strip_tags(html):
    # Python中过滤HTML标签的函数
    # >>> str_text=strip_tags("<font color=red>hello</font>")
    # >>> print str_text
    # hello

    from html.parser import HTMLParser
    html = html.strip()
    html = html.strip("\n")
    result = []
    parser = HTMLParser()
    parser.handle_data = result.append
    parser.feed(html)
    parser.close()
    return ''.join(result)
Exemplo n.º 13
0
    def strip_tags(self, htmlStr):
        '''
        使用HTMLParser进行html标签过滤
        :param htmlStr:
        '''

        self.htmlStr = htmlStr
        htmlStr = htmlStr.strip()
        htmlStr = htmlStr.strip("\n")
        result = []
        parser = HTMLParser()
        parser.handle_data = result.append
        parser.feed(htmlStr)
        parser.close()
        return ''.join(result)
Exemplo n.º 14
0
    def strip_tags_parser(self, html):
        """
        去除文本中的HTML标签.用到了HTMLParser
        使用示例:
        str_text=strip_tags("<font color=red>hello</font>")

        :return: String
        """
        from html.parser import HTMLParser
        html = html.strip('\n')
        html = html.strip('\t')
        html = html.strip(' ')
        html = html.strip()

        result = []
        parser = HTMLParser()
        parser.handle_data = result.append
        parser.feed(html)
        parser.close()
        return '$'.join(result)
Exemplo n.º 15
0
def fetch_words_from_source(source):
    pq = PyQuery(source)
    data = HTMLParser.handle_data(pq)
    return tag.text()
Exemplo n.º 16
0
 def handle_data(self, data):
     HTMLParser.handle_data(self, data)
     print('data is ... %s ' % data)