def StripElements(node, stripElements, stripState=0): if node.nodeType == Node.DOCUMENT_NODE: for c in node.childNodes: StripElements(c, stripElements, stripState) elif node.nodeType == Node.ELEMENT_NODE: #See if we need to change the strip state if node.getAttributeNS(XML_NAMESPACE, 'space') == 'preserve': #Force the state to preserve stripState = 0 elif node.getAttributeNS(XML_NAMESPACE, 'space'): #Force to strip stripState = 1 else: #See if it is a perserve or strip element for (uri, local, strip) in stripElements: if (uri, local) in [(node.namespaceURI, node.localName), (EMPTY_NAMESPACE, '*'), (node.namespaceURI, '*')]: stripState = strip break for c in node.childNodes: StripElements(c, stripElements, stripState) elif node.nodeType == Node.TEXT_NODE: if stripState and IsXmlSpace(node.data): #Kill'em all node.parentNode.removeChild(node)
def characters(self, data): state = self._state_stack[-1] # verify that the current element can have text children validation = state.validation token = ContentInfo.TEXT_NODE next = validation.get(token) if next is None and ContentInfo.ELSE in validation: next = validation[ContentInfo.ELSE].get(token) if next is None: # If the parent can have element children, but not text nodes, # ignore pure whitespace nodes. This clarification is from # XSLT 2.0 [3.4] Whitespace Stripping. # e.g. xsl:stylesheet, xsl:apply-templates, xsl:choose if not (ContentInfo.EMPTY not in validation and IsXmlSpace(data)): raise XsltParserException(Error.ILLEGAL_TEXT_CHILD_PARSE, self._locator, repr(Truncate(data, 10)), state.node.nodeName) #self._debug_validation(expandedName) else: # update validation state.validation = next node = StylesheetTree.XsltText(self._ownerDoc, self._locator.getSystemId(), data) state.node.appendChild(node) return
def __completeTextNode(self): #FIXME: This does not allow multiple root nodes, which is required to be supported if self.__currText: if IsXmlSpace(self.__currText): self.__saxHandler.ignorableWhitespace(self.__currText) else: self.__saxHandler.characters(self.__currText) self.__currText = u'' return
def _TryEntityAsDocumentEntity(entity): # If the entity only has 1 top-level element, strip top-level whitespace # only text nodes to match how a document entity would have been parsed. elements = 0 for node in entity.childNodes: elements += int(node.nodeType == Node.ELEMENT_NODE) if elements == 1: # OK to strip WS-only text nodes nodes = [ x for x in entity.childNodes if x.nodeType == Node.TEXT_NODE and IsXmlSpace(x.data) ] for node in nodes: entity.removeChild(node) return
def _visit_text(self, context, node, preserveSpace): if preserveSpace or not IsXmlSpace(node.data): self.writers[-1].text(node.data) return
def NodeCompare(node1, node2, ignoreWhitespace=0, ignoreComments=0, ignoreNsDecls=0): """ A function that compares two XML DOM nodes by traversing their attributes and descendants recursively until a mismatch is found. It has the side effect of reporting differences to stdout. Returns true if the nodes compare equal. ignoreWhitespace controls whether whitespace differences in text nodes are ignored. ignoreComments controls whether comment nodes are ignored. ignoreNsDecls controls whether namespace declarations are ignored. """ if node1.nodeType != node2.nodeType: return __ReportError(node1, node2, 'nodeType') # -- Document Nodes -------------------------------------- if node1.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): if ignoreComments: children1 = filter(lambda n: n.nodeType != Node.COMMENT_NODE, node1.childNodes) children2 = filter(lambda n: n.nodeType != Node.COMMENT_NODE, node2.childNodes) else: children1 = node1.childNodes children2 = node2.childNodes for child1, child2 in map(None, children1, children2): if not NodeCompare(child1, child2, ignoreWhitespace, ignoreComments, ignoreNsDecls): return 0 # -- Document Type Nodes --------------------------------- elif node1.nodeType == Node.DOCUMENT_TYPE_NODE: if node1.name != node2.name: return __ReportError(node1, node2, 'name') if node1.publicId != node2.publicId: return __ReportError(node1, node2, 'publicId') if node1.systemId != node2.systemId: return __ReportError(node1, node2, 'systemId') if node1.internalSubset != node2.internalSubset: return __ReportError(node1, node2, 'internalSubset') if len(node1.entities) != len(node2.entities): return __ReportError(node1, node2, 'entities') if len(node1.notations) != len(node2.notations): return __ReportError(node1, node2, 'notations') # -- Element Nodes --------------------------------------- elif node1.nodeType == Node.ELEMENT_NODE: if node1.localName != node2.localName: return __ReportError(node1, node2, 'localName') if node1.namespaceURI != node2.namespaceURI: return __ReportError(node1, node2, 'namespaceURI') # Compare attributes attrs1 = node1.attributes.values() attrs2 = node2.attributes.values() if ignoreNsDecls: # Remove XML Namespace declarations attrs1 = [a for a in attrs1 if a.namespaceURI != XMLNS_NAMESPACE] attrs2 = [a for a in attrs2 if a.namespaceURI != XMLNS_NAMESPACE] if len(attrs1) != len(attrs2): return __ReportError(node1, node2, 'attributes') # Sort the attributes by qualified name attrs1.sort(lambda a, b: cmp(a.name, b.name)) attrs2.sort(lambda a, b: cmp(a.name, b.name)) for attr1, attr2 in zip(attrs1, attrs2): if attr1.localName != attr2.localName: print node1.attributes.keys() print node2.attributes.keys() return __ReportError(attr1, attr2, 'localName') if attr1.namespaceURI != attr2.namespaceURI: return __ReportError(attr1, attr2, 'namespaceURI') # Compare children if ignoreComments: # Remove comment nodes children1 = [ c for c in node1.childNodes if c.nodeType != Node.COMMENT_NODE ] children2 = [ c for c in node2.childNodes if c.nodeType != Node.COMMENT_NODE ] else: children1 = node1.childNodes children2 = node2.childNodes if len(children1) != len(children2): return __ReportError(node1, node2, 'childNodes') for child1, child2 in zip(children1, children2): if not NodeCompare(child1, child2, ignoreWhitespace, ignoreComments, ignoreNsDecls): return 0 # -- Text Nodes ------------------------------------------ elif node1.nodeType == Node.TEXT_NODE: text1 = node1.data text2 = node2.data if ignoreWhitespace: if IsXmlSpace(text1): text1 = None if IsXmlSpace(text2): text2 = None if cmp(text1, text2): return __ReportError(node1, node2, 'data') # -- Comment Nodes --------------------------------------- elif node1.nodeType == Node.COMMENT_NODE: if node1.data != node2.data: return __ReportError(node1, node2, 'data') # -- Processing Instruction Nodes ------------------------ elif node1.nodeType == Node.PROCESSING_INSTRUCTION_NODE: if node1.target != node2.target: return __ReportError(node1, node2, 'target') if node1.data != node2.data: return __ReportError(node1, node2, 'data') # All tests pass, they are the same return 1
def text(self, *args, **kw): self._stack.append(('text', args, kw)) # Non-whitespace characters, cannot be HTML/XHTML if not IsXmlSpace(args[0]): self._finalize(_XML_METHOD) return