def FindLocalHREF(self, href, elt, headers=1): '''Find a local HREF in the data elements. ''' if href[0] != '#': raise EvaluateException( 'Absolute HREF ("%s") not implemented' % href, self.Backtrace(elt)) frag = href[1:] # Already found? e = self.id_cache.get(frag) if e: return e # Do a breadth-first search, in the data first. Most likely # to find multi-ref targets shallow in the data area. list = self.data_elements[:] + [self.body_root] if headers: list.extend(self.header_elements) while list: e = list.pop() if e.nodeType == _Node.ELEMENT_NODE: nodeid = _find_id(e) if nodeid: self.id_cache[nodeid] = e if nodeid == frag: return e list += _children(e) raise EvaluateException('''Can't find node for HREF "%s"''' % href, self.Backtrace(elt))
def parse(self, elt, ps): href = _find_href(elt) if href: if _children(elt): raise EvaluateException('Array has content and HREF', ps.Backtrace(elt)) elt = ps.FindLocalHREF(href, elt) if self.nilled(elt, ps): return Nilled if not _find_arraytype(elt) and self.undeclared is False: raise EvaluateException('Array expected', ps.Backtrace(elt)) t = _find_type(elt) if t: pass # XXX should check the type, but parsing that is hairy. offset = self.parse_offset(elt, ps) v, vlen = [], 0 if offset and not self.sparse: while vlen < offset: vlen += 1 v.append(self.fill) for c in _child_elements(elt): item = self.ofwhat.parse(c, ps) position = self.parse_position(c, ps) or offset if self.sparse: v.append((position, item)) else: while offset < position: offset += 1 v.append(self.fill) v.append(item) offset += 1 return v
def _check_for_pi_nodes(self, list, inheader): '''Raise an exception if any of the list descendants are PI nodes. ''' list = list[:] while list: elt = list.pop() t = elt.nodeType if t == _Node.PROCESSING_INSTRUCTION_NODE: raise ParseException('Found processing instruction "<?' + \ elt.nodeName + '...>"', inheader, elt.parentNode, self.dom) elif t == _Node.DOCUMENT_TYPE_NODE: raise ParseException('Found DTD', inheader, elt.parentNode, self.dom) list += _children(elt)
def _check_for_legal_children(self, name, elt, mustqualify=1): '''Check if all children of this node are elements or whitespace-only text nodes. ''' inheader = name == "Header" for n in _children(elt): t = n.nodeType if t == _Node.COMMENT_NODE: continue if t != _Node.ELEMENT_NODE: if t == _Node.TEXT_NODE and n.nodeValue.strip() == "": continue raise ParseException("Non-element child in " + name, inheader, elt, self.dom) if mustqualify and not n.namespaceURI: raise ParseException('Unqualified element "' + \ n.nodeName + '" in ' + name, inheader, elt, self.dom)
def parse(self, elt, ps): debug = self.logger.debugOn() debug and self.logger.debug('parse') xtype = self.checkname(elt, ps) if self.type and xtype not in [self.type, (None, None)]: if not isinstance(self, TypeDefinition): raise EvaluateException(\ 'ComplexType for %s has wrong type(%s), looking for %s' % (self.pname, self.checktype(elt,ps), self.type), ps.Backtrace(elt)) else: #TODO: mabye change MRO to handle this debug and self.logger.debug('delegate to substitute type') what = TypeDefinition.getSubstituteType(self, elt, ps) return what.parse(elt, ps) href = _find_href(elt) if href: if _children(elt): raise EvaluateException('Struct has content and HREF', ps.Backtrace(elt)) elt = ps.FindLocalHREF(href, elt) c = _child_elements(elt) count = len(c) if self.nilled(elt, ps): return Nilled # Create the object. v = {} # parse all attributes contained in attribute_typecode_dict (user-defined attributes), # the values (if not None) will be keyed in self.attributes dictionary. attributes = self.parse_attributes(elt, ps) if attributes: v[self.attrs_aname] = attributes #MIXED if self.mixed is True: v[self.mixed_aname] = self.simple_value(elt, ps, mixed=True) # Clone list of kids (we null it out as we process) c, crange = c[:], range(len(c)) # Loop over all items we're expecting if debug: self.logger.debug("ofwhat: %s", str(self.ofwhat)) any = None for i, what in [(i, self.ofwhat[i]) for i in range(len(self.ofwhat))]: # retrieve typecode if it is hidden if callable(what): what = what() # Loop over all available kids if debug: self.logger.debug("what: (%s,%s)", what.nspname, what.pname) for j, c_elt in [(j, c[j]) for j in crange if c[j]]: # Parse value, and mark this one done. if debug: self.logger.debug("child node: (%s,%s)", c_elt.namespaceURI, c_elt.tagName) match = False if what.name_match(c_elt): match = True value = what.parse(c_elt, ps) else: # substitutionGroup head must be a global element declaration # if successful delegate to matching GED subwhat = _get_substitute_element(what, c_elt, ps) if subwhat: match = True value = subwhat.parse(c_elt, ps) if debug: self.logger.debug("substitutionGroup: %s", subwhat) if match: if what.maxOccurs > 1: if v.has_key(what.aname): v[what.aname].append(value) else: v[what.aname] = [value] c[j] = None continue else: v[what.aname] = value c[j] = None break if debug: self.logger.debug("no element (%s,%s)", what.nspname, what.pname) # No match; if it was supposed to be here, that's an error. if self.inorder is True and i == j: raise EvaluateException('Out of order complexType', ps.Backtrace(c_elt)) else: # only supporting 1 <any> declaration in content. if isinstance(what, AnyElement): any = what elif hasattr(what, 'default'): v[what.aname] = what.default elif what.minOccurs > 0 and not v.has_key(what.aname): raise EvaluateException('Element "' + what.aname + \ '" missing from complexType', ps.Backtrace(elt)) # Look for wildcards and unprocessed children # XXX Stick all this stuff in "any", hope for no collisions if any is not None: occurs = 0 v[any.aname] = [] for j, c_elt in [(j, c[j]) for j in crange if c[j]]: value = any.parse(c_elt, ps) if any.maxOccurs == UNBOUNDED or any.maxOccurs > 1: v[any.aname].append(value) else: v[any.aname] = value occurs += 1 # No such thing as nillable <any> if any.maxOccurs == 1 and occurs == 0: v[any.aname] = None elif occurs < any.minOccurs or (any.maxOccurs != UNBOUNDED and any.maxOccurs < occurs): raise EvaluateException( 'occurances of <any> elements(#%d) bound by (%d,%s)' % (occurs, any.minOccurs, str(any.maxOccurs)), ps.Backtrace(elt)) if not self.pyclass: return v # type definition must be informed of element tag (nspname,pname), # element declaration is initialized with a tag. try: pyobj = self.pyclass() except Exception, e: raise TypeError("Constructing element (%s,%s) with pyclass(%s), %s" \ %(self.nspname, self.pname, self.pyclass.__name__, str(e)))
def parse(self, elt, ps): debug = self.logger.debugOn() debug and self.logger.debug('parse') xtype = self.checkname(elt, ps) if self.type and xtype not in [ self.type, (None,None) ]: if not isinstance(self, TypeDefinition): raise EvaluateException(\ 'ComplexType for %s has wrong type(%s), looking for %s' % (self.pname, self.checktype(elt,ps), self.type), ps.Backtrace(elt)) else: #TODO: mabye change MRO to handle this debug and self.logger.debug('delegate to substitute type') what = TypeDefinition.getSubstituteType(self, elt, ps) return what.parse(elt, ps) href = _find_href(elt) if href: if _children(elt): raise EvaluateException('Struct has content and HREF', ps.Backtrace(elt)) elt = ps.FindLocalHREF(href, elt) c = _child_elements(elt) count = len(c) if self.nilled(elt, ps): return Nilled # Create the object. v = {} # parse all attributes contained in attribute_typecode_dict (user-defined attributes), # the values (if not None) will be keyed in self.attributes dictionary. attributes = self.parse_attributes(elt, ps) if attributes: v[self.attrs_aname] = attributes #MIXED if self.mixed is True: v[self.mixed_aname] = self.simple_value(elt,ps, mixed=True) # Clone list of kids (we null it out as we process) c, crange = c[:], range(len(c)) # Loop over all items we're expecting if debug: self.logger.debug("ofwhat: %s",str(self.ofwhat)) any = None for i,what in [ (i, self.ofwhat[i]) for i in range(len(self.ofwhat)) ]: # retrieve typecode if it is hidden if callable(what): what = what() # Loop over all available kids if debug: self.logger.debug("what: (%s,%s)", what.nspname, what.pname) for j,c_elt in [ (j, c[j]) for j in crange if c[j] ]: # Parse value, and mark this one done. if debug: self.logger.debug("child node: (%s,%s)", c_elt.namespaceURI, c_elt.tagName) match = False if what.name_match(c_elt): match = True value = what.parse(c_elt, ps) else: # substitutionGroup head must be a global element declaration # if successful delegate to matching GED subwhat = _get_substitute_element(what, c_elt, ps) if subwhat: match = True value = subwhat.parse(c_elt, ps) if debug: self.logger.debug("substitutionGroup: %s", subwhat) if match: if what.maxOccurs > 1: if v.has_key(what.aname): v[what.aname].append(value) else: v[what.aname] = [value] c[j] = None continue else: v[what.aname] = value c[j] = None break if debug: self.logger.debug("no element (%s,%s)", what.nspname, what.pname) # No match; if it was supposed to be here, that's an error. if self.inorder is True and i == j: raise EvaluateException('Out of order complexType', ps.Backtrace(c_elt)) else: # only supporting 1 <any> declaration in content. if isinstance(what,AnyElement): any = what elif hasattr(what, 'default'): v[what.aname] = what.default elif what.minOccurs > 0 and not v.has_key(what.aname): raise EvaluateException('Element "' + what.aname + \ '" missing from complexType', ps.Backtrace(elt)) # Look for wildcards and unprocessed children # XXX Stick all this stuff in "any", hope for no collisions if any is not None: occurs = 0 v[any.aname] = [] for j,c_elt in [ (j, c[j]) for j in crange if c[j] ]: value = any.parse(c_elt, ps) if any.maxOccurs == UNBOUNDED or any.maxOccurs > 1: v[any.aname].append(value) else: v[any.aname] = value occurs += 1 # No such thing as nillable <any> if any.maxOccurs == 1 and occurs == 0: v[any.aname] = None elif occurs < any.minOccurs or (any.maxOccurs!=UNBOUNDED and any.maxOccurs<occurs): raise EvaluateException('occurances of <any> elements(#%d) bound by (%d,%s)' %( occurs, any.minOccurs,str(any.maxOccurs)), ps.Backtrace(elt)) if not self.pyclass: return v # type definition must be informed of element tag (nspname,pname), # element declaration is initialized with a tag. try: pyobj = self.pyclass() except Exception, e: raise TypeError("Constructing element (%s,%s) with pyclass(%s), %s" \ %(self.nspname, self.pname, self.pyclass.__name__, str(e)))
class ParsedSoap: '''A Parsed SOAP object. Convert the text to a DOM tree and parse SOAP elements. Instance data: reader -- the DOM reader dom -- the DOM object ns_cache -- dictionary (by id(node)) of namespace dictionaries id_cache -- dictionary (by XML ID attr) of elements envelope -- the node holding the SOAP Envelope header -- the node holding the SOAP Header (or None) body -- the node holding the SOAP Body body_root -- the serialization root in the SOAP Body data_elements -- list of non-root elements in the SOAP Body trailer_elements -- list of elements following the SOAP body ''' defaultReaderClass = DefaultReader def __init__(self, input, readerclass=None, keepdom=False, trailers=False, resolver=None, envelope=True, **kw): '''Initialize. Keyword arguments: trailers -- allow trailer elments (default is zero) resolver -- function (bound method) to resolve URI's readerclass -- factory class to create a reader keepdom -- do not release the DOM envelope -- look for a SOAP envelope. ''' self.readerclass = readerclass self.keepdom = keepdom if not self.readerclass: self.readerclass = self.defaultReaderClass try: self.reader = self.readerclass() if type(input) in _stringtypes: self.dom = self.reader.fromString(input) else: self.dom = self.reader.fromStream(input) except Exception, e: # Is this in the header? Your guess is as good as mine. #raise ParseException("Can't parse document (" + \ # str(e.__class__) + "): " + str(e), 0) raise self.ns_cache = { id(self.dom): { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE, '': '' } } self.trailers, self.resolver, self.id_cache = trailers, resolver, {} # Exactly one child element c = [ E for E in _children(self.dom) if E.nodeType == _Node.ELEMENT_NODE ] if len(c) == 0: raise ParseException("Document has no Envelope", 0) if len(c) != 1: raise ParseException("Document has extra child elements", 0) if envelope is False: self.body_root = c[0] return # And that one child must be the Envelope elt = c[0] if elt.localName != "Envelope" \ or elt.namespaceURI != SOAP.ENV: raise ParseException('Document has "' + elt.localName + \ '" element, not Envelope', 0) self._check_for_legal_children("Envelope", elt) for a in _attrs(elt): name = a.nodeName if name.find(":") == -1 and name not in ["xmlns", "id"]: raise ParseException('Unqualified attribute "' + \ name + '" in Envelope', 0) self.envelope = elt if not _valid_encoding(self.envelope): raise ParseException("Envelope has invalid encoding", 0) # Get Envelope's child elements. c = [ E for E in _children(self.envelope) if E.nodeType == _Node.ELEMENT_NODE ] if len(c) == 0: raise ParseException("Envelope is empty (no Body)", 0) # Envelope's first child might be the header; if so, nip it off. elt = c[0] if elt.localName == "Header" \ and elt.namespaceURI == SOAP.ENV: self._check_for_legal_children("Header", elt) self._check_for_pi_nodes(_children(elt), 1) self.header = c.pop(0) self.header_elements = _child_elements(self.header) else: self.header, self.header_elements = None, [] # Now the first child must be the body if len(c) == 0: raise ParseException("Envelope has header but no Body", 0) elt = c.pop(0) if elt.localName != "Body" \ or elt.namespaceURI != SOAP.ENV: if self.header: raise ParseException('Header followed by "' + \ elt.localName + \ '" element, not Body', 0, elt, self.dom) else: raise ParseException('Document has "' + \ elt.localName + \ '" element, not Body', 0, elt, self.dom) self._check_for_legal_children("Body", elt, 0) self._check_for_pi_nodes(_children(elt), 0) self.body = elt if not _valid_encoding(self.body): raise ParseException("Body has invalid encoding", 0) # Trailer elements. if not self.trailers: if len(c): raise ParseException("Element found after Body", 0, elt, self.dom) # Don't set self.trailer_elements = []; if user didn't ask # for trailers we *want* to throw an exception. else: self.trailer_elements = c for elt in self.trailer_elements: if not elt.namespaceURI: raise ParseException('Unqualified trailer element', 0, elt, self.dom) # Find the serialization root. Divide the Body children into # root (root=1), no (root=0), maybe (no root attribute). self.body_root, no, maybe = None, [], [] for elt in _child_elements(self.body): root = _find_root(elt) if root == "1": if self.body_root: raise ParseException("Multiple seralization roots found", 0, elt, self.dom) self.body_root = elt elif root == "0": no.append(elt) elif not root: maybe.append(elt) else: raise ParseException('Illegal value for root attribute', 0, elt, self.dom) # If we didn't find a root, get the first one that didn't # say "not me", unless they all said "not me." if self.body_root is None: if len(maybe): self.body_root = maybe[0] else: raise ParseException('No serialization root found', 0, self.body, self.dom) if not _valid_encoding(self.body_root): raise ParseException("Invalid encoding", 0, elt, self.dom) # Now get all the non-roots (in order!). rootid = id(self.body_root) self.data_elements = [ E for E in _child_elements(self.body) if id(E) != rootid ] self._check_for_pi_nodes(self.data_elements, 0)