def handle_data(self, data): ''' 获取标签中的文本信息 :param data:文本信息 :return: ''' if len(data.replace(" ", "").replace("\n", "")) > 0: # print("tag_data:{0},len:{1}".format(data, len(data))) # 父方法获取对应的文本 HTMLParser.handle_data(self, data) # 新建文本节点 node = wn.TextNode() # 为其赋值标签和父节点等信息 node.tag = "text" node.father = self.now node.content = data node.index = node.father.index + 1 node.path = node.father.path + "-" + "{0}[{1}]".format("text", len(node.father.children) + 1) # 设定其特别的类型 node.type = "text" # 将其加入父节点的子节点列表中 self.now.children.append(node) # 保留其父节点完整的content属性 self.now.content += data
def _strip_tags(self, html): result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(html) parser.close() return ''.join(result)
def _strip_tags(self, html): result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(html) parser.close() return ''.join(result)
def strip_html(text): if text is None: return '' parts = [] parser = HTMLParser() parser.handle_data = parts.append parser.feed(text) return ''.join(parts)
def strip_tags(html): html = html.strip() html = html.strip("\n") result = [] parse = HTMLParser() parse.handle_data = result.append parse.feed(html) parse.close() return "".join(result)
def strip_tags(self, htmlStr): htmlStr = htmlStr.strip() htmlStr = htmlStr.strip("\n") result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(htmlStr) parser.close() return ''.join(result)
def strip_tags(html): from html.parser import HTMLParser html = html.strip() html = html.strip("\n") result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(html) parser.close() return ''.join(result)
def strip_tags(html): """ Python中过滤HTML标签的函数 """ html = html.strip() parser = HTMLParser() result = [] parser.handle_data = result.append parser.feed(html) parser.close() return result
def strip_tags(html): ''' Removes html tags from a string ''' from html.parser import HTMLParser result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(html) parser.close() return ''.join(result)
def strip_tags(html): if html: html = html.strip() html = html.strip("\n") result = [] parse = HTMLParser() parse.handle_data = result.append parse.feed(html) parse.close() return "".join(result) return ''
def html_strip(html): html = html.replace("#", '') html = html.replace(">", '') html = html.replace("-", '') html = html.replace("*", '') html = html.strip() html = html.strip("\n") result = [] parse = HTMLParser() parse.handle_data = result.append parse.feed(html) parse.close() return "".join(result)
def strip_tags(html): # Python中过滤HTML标签的函数 # >>> str_text=strip_tags("<font color=red>hello</font>") # >>> print str_text # hello from html.parser import HTMLParser html = html.strip() html = html.strip("\n") result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(html) parser.close() return ''.join(result)
def strip_tags(self, htmlStr): ''' 使用HTMLParser进行html标签过滤 :param htmlStr: ''' self.htmlStr = htmlStr htmlStr = htmlStr.strip() htmlStr = htmlStr.strip("\n") result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(htmlStr) parser.close() return ''.join(result)
def strip_tags_parser(self, html): """ 去除文本中的HTML标签.用到了HTMLParser 使用示例: str_text=strip_tags("<font color=red>hello</font>") :return: String """ from html.parser import HTMLParser html = html.strip('\n') html = html.strip('\t') html = html.strip(' ') html = html.strip() result = [] parser = HTMLParser() parser.handle_data = result.append parser.feed(html) parser.close() return '$'.join(result)
def fetch_words_from_source(source): pq = PyQuery(source) data = HTMLParser.handle_data(pq) return tag.text()
def handle_data(self, data): HTMLParser.handle_data(self, data) print('data is ... %s ' % data)