def __drawTree__(self, treeroot, graph, rootId, names): childId = len(names) graph.add_vertices(1) names.append(util.getTag(treeroot)) if rootId >= 0: graph.add_edges([(rootId, childId)]) children = list(treeroot) for child in children: self.__drawTree__(child, graph, childId, names)
def handle(self, xmltree, xmlnode, dstree, dsnode): # Determine whether to ignore if xmlnode.get(IGNORE) == IGNORE_TRUE: return dsnode if xmlnode.getparent() != None: parentTag = util.getTag(xmlnode.getparent()) if self.peudodsnode != None and parentTag != 'p': self.peudoTrigger = True dsnode = self.peudodsnode # Process tag tag = util.getTag(xmlnode) if tag == 'em': return self.__handle_em__(xmlnode, dstree, dsnode) elif tag == 'html': return self.__handle_html__(xmlnode, dstree, dsnode) elif tag == 'title': return self.__handle_title__(xmlnode, dstree, dsnode) elif tag == 'abbr': return self.__handle_abbr__(xmlnode, dstree, dsnode) elif tag == 'acronym': return self.__handle_acronym__(xmlnode, dstree, dsnode) elif tag == 'address': return self.__handle_address__(xmlnode, dstree, dsnode) elif tag == 'blockquote': return self.__handle_blockquote__(xmlnode, dstree, dsnode) elif tag == 'br': return self.__handle_br__(xmlnode, dstree, dsnode) elif tag == 'cite': return self.__handle_cite__(xmlnode, dstree, dsnode) elif tag == 'dfn': return self.__handle_dfn__(xmlnode, dstree, dsnode) elif tag == 'em': return self.__handle_em__(xmlnode, dstree, dsnode) elif tag == 'h1': return self.__handle_h1__(xmlnode, dstree, dsnode) elif tag == 'h2': return self.__handle_h2__(xmlnode, dstree, dsnode) elif tag == 'h3': return self.__handle_h3__(xmlnode, dstree, dsnode) elif tag == 'h4': return self.__handle_h4__(xmlnode, dstree, dsnode) elif tag == 'h5': return self.__handle_h5__(xmlnode, dstree, dsnode) elif tag == 'h6': return self.__handle_h6__(xmlnode, dstree, dsnode) elif tag == 'kbd': return self.__handle_kbd__(xmlnode, dstree, dsnode) elif tag == 'p': return self.__handle_p__(xmlnode, dstree, dsnode) elif tag == 'pre': return self.__handle_pre__(xmlnode, dstree, dsnode) elif tag == 'q': return self.__handle_q__(xmlnode, dstree, dsnode) elif tag == 'samp': return self.__handle_samp__(xmlnode, dstree, dsnode) elif tag == 'span': return self.__handle_span__(xmlnode, dstree, dsnode) elif tag == 'strong': return self.__handle_strong__(xmlnode, dstree, dsnode) elif tag == 'var': return self.__handle_var__(xmlnode, dstree, dsnode) elif tag == 'a': return self.__handle_a__(xmlnode, dstree, dsnode) elif tag == 'dl': return self.__handle_dl__(xmlnode, dstree, dsnode) elif tag == 'dt': return self.__handle_dt__(xmlnode, dstree, dsnode) elif tag == 'dd': return self.__handle_dd__(xmlnode, dstree, dsnode) elif tag == 'ol': return self.__handle_ol__(xmlnode, dstree, dsnode) elif tag == 'ul': return self.__handle_ul__(xmlnode, dstree, dsnode) elif tag == 'li': return self.__handle_li__(xmlnode, dstree, dsnode) elif tag == 'b': return self.__handle_b__(xmlnode, dstree, dsnode) elif tag == 'big': return self.__handle_big__(xmlnode, dstree, dsnode) elif tag == 'i': return self.__handle_i__(xmlnode, dstree, dsnode) elif tag == 'small': return self.__handle_small__(xmlnode, dstree, dsnode) elif tag == 'sub': return self.__handle_sub__(xmlnode, dstree, dsnode) elif tag == 'sup': return self.__handle_sup__(xmlnode, dstree, dsnode) elif tag == 'tt': return self.__handle_tt__(xmlnode, dstree, dsnode) elif tag == 'del': return self.__handle_del__(xmlnode, dstree, dsnode) elif tag == 'ins': return self.__handle_ins__(xmlnode, dstree, dsnode) elif tag == 'caption': return self.__handle_caption__(xmlnode, dstree, dsnode) elif tag == 'table': return self.__handle_table__(xmlnode, dstree, dsnode) elif tag == 'img': return self.__handle_img__(xmlnode, dstree, dsnode) elif tag == 'tr': # Ignore subtree xmlchildtree = etree.ElementTree(xmlnode) for xmlchildnode in xmlchildtree.iter(tag=etree.Element): xmlchildnode.set(IGNORE, IGNORE_TRUE) return dsnode else: return dsnode
def isIndent(self, xmlnode): tag = util.getTag(xmlnode) return tag == 'blockquote'
def getLevel(node): return util.getTag(node)