def startElementNS(self, name, qname, attrs): """ Handle start tag. """ if self._state != STATE_LITERAL: self.flush() self.bnode = None tagURI = ((name[0] or "") + name[1]) if verbosity() > 80: indent = ". " * len(self._stack) if not attrs: progress(indent+'# State was', self._state, ', start tag: <' + tagURI + '>') else: str = '# State =%s, start tag= <%s ' %( self._state, tagURI) for name, value in attrs.items(): str = str + " " + `name` + '=' + '"' + `value` + '"' progress(indent + str + '>') self._stack.append([self._state, self._context, self._predicate, self._subject, self._delayedStatement, self._base]) self._delayedStatement = None self._base = uripath.join(self._base, attrs.get((XML_NS_URI, "base"), self._base)) x = self._base.find("#") if x >= 0: self._base = self._base[:x] # See rdf-tests/rdfcore/xmlbase/test013.rdf try: tagURI = uripath.join(self._base, tagURI) # If relative, make absolute. Not needed for standard. except ValueError: pass # Needed for portable RDF generated with --rdf=z self._language = attrs.get((XML_NS_URI, "lang"), None) value = attrs.get((RDF_NS_URI, "datatype"), None) if value != None: self._datatype = self.sink.newSymbol(self.uriref(value)) else: self._datatype = None if self._state == STATE_OUTERMOST: if tagURI == RDF_NS_URI + "RDF": self._state = STATE_NO_SUBJECT else: if "R" not in self.flags: self._state = STATE_NOT_RDF # Ignore random XML without rdf:RDF else: self._nodeElement(tagURI, attrs) # Parse it as RDF. # http://www.w3.org/2000/10/rdf-tests/rdfcore/rdf-element-not-mandatory/test001.rdf elif self._state == STATE_NOT_RDF: if tagURI == RDF_NS_URI + "RDF" and "T" in self.flags: self._state = STATE_NO_SUBJECT else: pass # Ignore embedded RDF elif self._state == STATE_NO_SUBJECT: #MS1.0 6.2 obj :: desription | container self._nodeElement(tagURI, attrs) elif self._state == STATE_DESCRIPTION: # Expect predicate (property) PropertyElt # propertyElt #MS1.0 6.12 # http://www.w3.org/2000/03/rdf-tracking/#rdf-containers-syntax-ambiguity if tagURI == RDF_NS_URI + "li": item = self._items[-1] + 1 self._predicate = self.sink.newSymbol("%s_%s" % (RDF_NS_URI, item)) self._items[-1] = item else: if tagURI in propertyElementExceptions: raise BadSyntax(sys.exc_info(), 'Invalid predicate URI: %s' % tagURI) self._predicate = self.sink.newSymbol(tagURI) self._state = STATE_VALUE # May be looking for value but see parse type # self._datatype = None # self._language = None self.testdata = "" # Flush value data # print "\n attributes:", `attrs` properties = [] gotSubject = 0 haveResource = 0 haveParseType = 0 haveExtras = 0 for name, value in attrs.items(): ns, name = name if name == "ID": print "# Warning: ID=%s on statement ignored" % (value) # I consider these a bug raise ValueError("ID attribute? Reification not supported.") elif name == "parseType": haveParseType = 1 # x = value.find(":") # if x>=0: pref = value[:x] # else: pref = "" # nsURI = self._nsmap[-1].get(pref, None) if value == "Resource": c = self._context s = self._subject # self._subject = self.sink.newBlankNode(self._context, why=self._reason2) self.idAboutAttr(attrs) #@@ not according to current syntax @@@@@@@@@@@ self.sink.makeStatement(( c, self._predicate, s, self._subject), why=self._reason2) self._state = STATE_DESCRIPTION # Nest description elif value == "Quote": c = self._context s = self._subject self.idAboutAttr(attrs) # set subject and context for nested description self._subject = self.sink.newFormula() # Forget anonymous genid - context is subect if self._predicate is self.merge: # magic :-( self._stack[-1][3] = self._subject # St C P S retrofit subject of outer level! self._delayedStatement = 1 # flag else: self._delayedStatement = c, self._predicate, s, self._subject self._context = self._subject self._subject = None self._state = STATE_NO_SUBJECT # Inside quote, there is no subject elif (value=="Collection" or value[-11:] == ":collection"): # Is this a daml:collection qname? self._state = STATE_LIST # Linked list of obj's elif value == "Literal" or "S" in self.flags: # Strictly, other types are literal SYN#7.2.20 self._state = STATE_LITERAL # That's an XML subtree not a string self._litDepth = 1 self.LiteralNS = [{}] self.testdata = '' #"@@sax2rdf.py bug@@" # buggy implementation self._datatype = self.sink.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") if XMLLiteralsAsDomTrees: self.domDocument = self.domImplementation.createDocument( 'http://www.w3.org/1999/02/22-rdf-syntax-ns', 'envelope', None) self.domElement = self.domDocument.documentElement else: raise SyntaxError("Unknown parse type '%s'" % value ) elif name == "nodeID": assert not gotSubject if not isXML.isNCName(value): raise BadSyntax(sys.exc_info(), 'A nodeID must be a NCName %s' % value) obj = self._nodeIDs.get(value, None) if obj == None: obj = self.newBlankNode() self._nodeIDs[value] = obj self.sink.makeStatement((self._context, self._predicate, self._subject, obj ), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value self._subject = obj gotSubject = 1 elif name == "resource": haveResource = 1 assert not gotSubject x = self.sink.newSymbol(self.uriref(value)) self.sink.makeStatement((self._context, self._predicate, self._subject, x ), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value self._subject = x gotSubject = 1 elif name == "datatype": pass # Already set elif ns == XML_NS_URI or name[:3] == "xml": # Ignore (lang is already done) pass # see rdf-tests/rdfcore/unrecognised-xml-attributes/test002.rdf else: haveExtras = 1 properties.append((ns, name, value)) # wait till subject is clear assert haveResource + haveParseType <= 1 assert haveParseType + haveExtras <= 1 if not gotSubject and properties: obj = self.newBlankNode() self.sink.makeStatement((self._context, self._predicate, self._subject, obj ), why=self._reason2) self._state = STATE_NOVALUE # NOT looking for value self._subject = obj for ns, name, value in properties: self._propertyAttr(ns, name, value) elif self._state == STATE_LIST: # damlCollection :: objs - make list # Subject and predicate are set and dangling. c = self._context s = self._subject # The tail of the list so far p = self._predicate pair = self.newBlankNode() # The new pair self.sink.makeStatement(( c, # Link in new pair p, s, pair ), why=self._reason2) self.idAboutAttr(attrs) # set subject (the next item) and context if tagURI != RDF_NS_URI + "Description": self.sink.makeStatement((c, self.sink.newSymbol(RDF_NS_URI +"type"), self._subject, self.sink.newSymbol(tagURI) ), why=self._reason2) self.sink.makeStatement(( c, self.sink.newSymbol(List_NS + "first"), pair, self._subject), why=self._reason2) # new item if "S" in self.flags: # Strictly to spec self.sink.makeStatement(( c, self.sink.newSymbol(RDF_NS_URI + "type"), self.sink.newSymbol(List_NS + "List"), self._subject), why=self._reason2) # new item self._stack[-1][2] = self.sink.newSymbol(List_NS + "rest") # Leave dangling link #@check self._stack[-1][3] = pair # Underlying state tracks tail of growing list elif self._state == STATE_VALUE: # Value :: Obj in this case #MS1.0 6.17 6.2 c = self._context p = self._predicate s = self._subject self._nodeElement(tagURI, attrs) # Parse the object thing's attributes self.sink.makeStatement((c, p, s, self._subject), why=self._reason2) self._stack[-1][0] = STATE_NOVALUE # When we return, cannot have literal now elif self._state == STATE_NOVALUE: str = "" for e in self._stack: str = str + `e`+"\n" raise BadSyntax(sys.exc_info(), """Expected no value, found name=%s; qname=%s, attrs=%s in nested context:\n%s""" %(name, qname, attrs, str)) elif self._state == STATE_LITERAL: self._litDepth = self._litDepth + 1 if XMLLiteralsAsDomTrees: # progress("@@@ XML literal name: ", name) self.literal_element_start_DOM(name, qname, attrs) else: self.literal_element_start(name, qname, attrs) #@@ need to capture the literal else: raise RuntimeError, ("Unknown state in RDF parser", self._stack) # Unknown state
def idAboutAttr(self, attrs): #MS1.0 6.5 also proprAttr 6.10 """ set up subject and maybe context from attributes """ self._subject = None self._state = STATE_DESCRIPTION self._items.append(0) properties = [] for name, value in attrs.items(): ns, ln = name # The following section was a kludge to work with presumably old bad RDF # files while RDF was being defined way back when. # if ns: # Removed 2010 as this is a kludge which creaks with sioc:about - timbl 2010-07-19 # if string.find("ID about aboutEachPrefix bagID type", ln)>0: # if ns != RDF_NS_URI: # print ("# Warning -- %s attribute in %s namespace not RDF NS." % # name, ln) # ns = RDF_NS_URI # Allowed as per dajobe: ID, bagID, about, resource, parseType or type if ns == RDF_NS_URI or ns == None: # Opinions vary sometimes none but RDF_NS is common :-( if ln == "ID": if not isXML.isName(value): raise BadSyntax(sys.exc_info(), 'An ID must be a Name %s' % value) if (self._base, value) in self._usedIDs: raise BadSyntax(sys.exc_info(), "Two elements cannot have the same ID, %s" % value) self._usedIDs.add((self._base, value)) if self._subject: print "# oops - subject already", self._subject raise BadSyntax(sys.exc_info(), ">1 subject") self._subject = self.sink.newSymbol(self.uriref("#" + value)) elif ln == "about": if self._subject: raise BadSyntax(sys.exc_info(), "Subject already defined to be %s, can't have attribute about='%s'" % (`self._subject`, value)) self._subject = self.sink.newSymbol(self.uriref(value)) elif ln == "nodeID": if self._subject: raise BadSyntax(sys.exc_info(), "Subject already defined to be %s, can't have attribute nodeID='%s'" % (`self._subject`, value)) if not isXML.isNCName(value): raise BadSyntax(sys.exc_info(), 'A nodeID must be a NCName %s' % value) s = self._nodeIDs.get(value, None) if s == None: s = self.newBlankNode() self._nodeIDs[value] = s self._subject = s elif ln == "aboutEachPrefix": if value == " ": # OK - a trick to make NO subject self._subject = None else: raise ooops # can't do about each prefix yet elif ln == "bagID": if not isXML.isName(value): raise BadSyntax(sys.exc_info(), 'A bagID must be a Name %s' % value) c = self._context #@@dwc: this is broken, no? self._context = FORMULA, self.uriref("#" + value) #@@ non-ascii elif ln == "parseType": pass #later - object-related elif ln == "resource": pass #later elif ln == "datatype": pass #later elif RDF_NS_URI + ln in propertyAttributeExceptions: raise BadSyntax(sys.exc_info(), "%s is not a valid attribute named here" % RDF_NS_URI + ln) else: if not ns: if "L" not in self.flags: # assume local? raise BadSyntax(sys.exc_info(), "No namespace on property attribute %s" % ln) properties.append((self._thisDoc + "#" + ln, value)) else: properties.append((RDF_NS_URI + ln, value))# If no uri, syntax error @@ # self.sink.makeComment("xml2rdf: Ignored attribute "+ RDF_NS_URI + ln) elif ns == XML_NS_URI: pass # lang already done, others ignored else: # Property attribute propAttr #MS1.0 6.10 uri = (ns + ln); properties.append((uri, value)) # print "@@@@@@ <%s> <%s>" % properties[-1] if self._subject == None: self._subject = self.newBlankNode() for pred, obj in properties: if pred == RDF_NS_URI + "type": self.sink.makeStatement(( self._context, self.sink.newSymbol(pred), self._subject, self.sink.newSymbol(self.uriref(obj)) ), why=self._reason2) else: dt = self._datatype if dt == None: lang = self._language else: lang = None self.sink.makeStatement(( self._context, self.sink.newSymbol(pred), self._subject, self.sink.newLiteral(obj, dt, lang) ), why=self._reason2)