def handle_endtag(self, tag): ''' 获取结束标签 </xxx> 其中也会包括直接闭合标签例如meta,hr,br :param tag:标签名 :return: ''' # 父方法获取对应的标签 HTMLParser.handle_endtag(self, tag) # 排除掉直接闭合标签的干扰 if tag in startendtag_list: return # print("tag_end:{0}".format(tag)) # print("now.tag:{0}".format(self.now.tag)) # print("now.path:{0}".format(self.now.path)) # print("now.tag==tag:{0}".format(self.now.tag == tag)) # print("now:{0}".format(self.now), end="\n\n") if not self.now.tag == tag: return # 标签到了闭合的地方,其必然是有父标签的,将当前节点加入父标签的子节点列表中 self.now.father.children.append(self.now) # 这里还需要对当前now节点,如果其children只有一个并且是TextNode对象,则不再需要这个子节点 children = self.now.children if len(children) == 1 and isinstance(children[0], wn.TextNode): self.now.content = children[0].content self.now.children = [] # 将当前节点赋值为父节点,向上移一层 self.now = self.now.father return
def handle_endtag(self, tag): HTMLParser.handle_endtag(self, tag) if tag.lower() in self.strip_tags: self.collect = True
def handle_endtag(self, tag): HTMLParser.handle_endtag(self, tag) print('</' + tag + '>')