def handle_startendtag(self, tag, attrs): ''' 处理类似于<br/>这样的直接闭合的标签 :param tag: :param attrs: :return: ''' # print("tag_startend:{0}".format(tag)) # 父方法获取对应的标签和属性 HTMLParser.handle_startendtag(self, tag, attrs) # 新建该节点 并分类 node = wn.WebNode() if tag == "br": node = wn.BrNode() elif tag == "hr": node = wn.HrNode() elif tag == "meta": node = wn.MetaNode() elif tag == "img": node = wn.ImgNode() # 赋值标签信息 node.tag = tag node.father = self.now # 直接闭合标签必然是有父节点的 node.index = node.father.index + 1 node.path = node.father.path + "-" + "{0}[{1}]".format(tag, len(node.father.children) + 1) # 给其赋值其属性 for each in attrs: node.attr[each[0]] = each[1] # 设定其特别的类型 node.type = "startendtag" # 将其加入其父节点的子节点列表中 self.now.children.append(node)
def handle_startendtag(self, tag, attrs): HTMLParser.handle_startendtag(self, tag, attrs)