def addToSchema(self, stmts): self._beginTxn() propsChanged = False typesChanged = False #you can declare subproperties to rdf:type, rdfs:subClassOf, rdfs:subPropertyOf #but it will only take effect in the next call to addToSchema #also they can not be removed consistently #thus they should be declared in the initial schemas for stmt in stmts: #handle "rdf1" entailment rule in the RDF Semantics spec self._addEntailment(Statement(stmt.predicate, RDF_MS_BASE+u'type', RDF_MS_BASE+u'Property',OBJECT_TYPE_RESOURCE)) #"rdfs4b" entailment rule (rdfs4a isn't necessary to make explicit, we just make this #one explicit to guarantee that the object is the subject of at least one statement) if stmt.objectType == OBJECT_TYPE_RESOURCE: self.entailments.addStatement( Statement(stmt.object, RDF_MS_BASE+u'type', RDF_SCHEMA_BASE+u'Resource',OBJECT_TYPE_RESOURCE)) #"rdfs2" and "rdfs3" entailment rules: infer types from domain and range of predicate for predicate in self.currentSuperProperties.get(stmt.predicate,[stmt.predicate]): domains = self.domains.get(predicate, []) for domain in domains: key = (stmt.subject, domain) if key not in self.inferences: typeStmt = Statement(stmt.subject, RDF_MS_BASE+u'type', domain, OBJECT_TYPE_RESOURCE) self._addTypeStatement(typeStmt) self.inferences[key] = 1 else: self.inferences[key] += 1 if stmt.objectType != OBJECT_TYPE_RESOURCE: continue ranges = self.ranges.get(predicate, []) for range in ranges: key = (stmt.object, range) if key not in self.inferences: typeStmt = Statement(stmt.object, RDF_MS_BASE+u'type', range, OBJECT_TYPE_RESOURCE) self._addTypeStatement(typeStmt) self.inferences[key] = 1 else: self.inferences[key] += 1 #todo: we could do a consistency check here to make sure the object type conforms #the subclass and subproperty rules ("rdfs5" - "rdfs11") are handled dynamically #except for "rdfs8": uuu rdf:type rdfs:Class -> uuu rdfs:subClassOf rdfs:Resource #which isn't needed if stmt.predicate in self.subPropPreds: self.currentSubProperties.setdefault(stmt.object, [stmt.object]).append(stmt.subject) #add this subproperty if this is the only reference to it so far self.currentSubProperties.setdefault(stmt.subject, [stmt.subject]) self.currentSuperProperties.setdefault(stmt.subject, [stmt.subject]).append(stmt.object) #add this superproperty if this is the only reference to it so far self.currentSuperProperties.setdefault(stmt.object, [stmt.object]) #if any superproperties are subject of any domain or range statements #deduce new type statements from existing statements for predicate in self.currentSuperProperties[stmt.object]: domaintypes = self.domains.get(predicate,[]) rangetypes = self.ranges.get(predicate,[]) if rangetypes or domaintypes: for targetStmt in self.model.getStatements(predicate=predicate): for type in domaintypes: key = (targetStmt.subject, type) refCount = self.inferences.get(key, 0) if not refCount: typeStmt = Statement(targetStmt.subject, RDF_MS_BASE+u'type', type, OBJECT_TYPE_RESOURCE) self._addTypeStatement(typeStmt) self.inferences[key ] = 1 else: self.inferences[key ] += 1 if stmt.objectType != OBJECT_TYPE_RESOURCE: continue for type in rangetypes: key = (targetStmt.object, type) refCount = self.inferences.get(key, 0) if not refCount: typeStmt = Statement(targetStmt.object, RDF_MS_BASE+u'type', type, OBJECT_TYPE_RESOURCE) self._addTypeStatement(typeStmt) self.inferences[key ] = 1 else: self.inferences[key ] += 1 propsChanged = True elif stmt.predicate in self.subClassPreds: self.currentSubTypes.setdefault(stmt.object, [stmt.object]).append(stmt.subject) #add this subclass if this is the only reference to it so far self.currentSubTypes.setdefault(stmt.subject, [stmt.subject]) self.currentSuperTypes.setdefault(stmt.subject, [stmt.subject]).append(stmt.object) #add this superclass if this is the only reference to it so far self.currentSuperTypes.setdefault(stmt.object, [stmt.object]) typesChanged = True elif stmt.predicate in self.typePreds: self._addTypeStatement(stmt, addStmt=False) else: self.currentSubProperties.setdefault(stmt.predicate, [stmt.predicate]) self.currentSuperProperties.setdefault(stmt.predicate, [stmt.predicate]) #if we're adding a domain or range statement infer type for resources that already have statements #with that predicate or subproperty if self.isCompatibleProperty(stmt.predicate, RDF_SCHEMA_BASE+u'domain'): self.domains.setdefault(stmt.subject, []).append(stmt.object) for predicate in self.currentSubProperties[stmt.subject]: for targetStmt in self.model.getStatements(predicate=stmt.subject): key = (targetStmt.subject, stmt.object) if key not in self.inferences: typeStmt = Statement(targetStmt.subject, RDF_MS_BASE+u'type', stmt.object, OBJECT_TYPE_RESOURCE) self._addTypeStatement(typeStmt) self.inferences[key ] = 1 else: self.inferences[key ] += 1 if self.isCompatibleProperty(stmt.predicate, RDF_SCHEMA_BASE+u'range'): self.ranges.setdefault(stmt.subject, []).append(stmt.object) for predicate in self.currentSubProperties[stmt.subject]: for targetStmt in self.model.getStatements( predicate=stmt.subject, objecttype=OBJECT_TYPE_RESOURCE): key = (targetStmt.object, stmt.object) if key not in self.inferences: typeStmt = Statement(targetStmt.object, RDF_MS_BASE+u'type', stmt.object, OBJECT_TYPE_RESOURCE) self._addTypeStatement(typeStmt) self.inferences[key] = 1 else: self.inferences[key] += 1 if typesChanged: self.currentSubTypes = getTransitiveClosure(self.currentSubTypes) #XXX if self.saveSubtypes: # self.saveSubtypes() XXX if self.autocommit: self.subtypes = self.currentSubTypes if propsChanged: self.currentSubProperties = getTransitiveClosure(self.currentSubProperties) if self.autocommit: self.subproperties = self.currentSubProperties #just in case a subproperty of any of these were added self.subClassPreds = self.currentSubProperties[self.SUBCLASSOF] self.subPropPreds = self.currentSubProperties[self.SUBPROPOF] self.typePreds = self.currentSubProperties[RDF_MS_BASE+u'type']
def _findJoinPreds(self, root, joins): joinsByName = {} simpleJoinCandidates = [] complexJoinCandidates = [] aliases = {} for join in joins: # for each filter # if Eq and both sides are simple references (?label or id or ?ref.id) # add to aliases and remove filter from join (we'll join together later) aliases.setdefault(join.name, []) joinsByName[join.name] = join for filter in join.depthfirst(descendPredicate=lambda op: op is join or not isinstance(op, ResourceSetOp)): if not isinstance(filter, Filter): continue for arg in filter.args: if isinstance(arg, Eq): leftname, leftprop = getNameIfSimpleJoinRef(arg.left, join.name, filter) rightname, rightprop = getNameIfSimpleJoinRef(arg.right, join.name, filter) # if both sides are either a project or label if leftname and rightname: # its an alias if not leftprop and not rightprop: # None or 0 (SUBJECT) # its an alias if arg.maybe: raise QueryException("maybe on an aliasing join not allowed", arg) if leftname != rightname: aliases.setdefault(leftname, []).append(rightname) aliases.setdefault(rightname, []).append(leftname) # remove this predicate from the filter arg.parent = None continue else: # expressions like ?foo = ?bar.prop or ?a.prop = ?b.prop candidate = (join.name, arg, leftname, leftprop, rightname, rightprop) simpleJoinCandidates.append(candidate) continue # elif leftname or rightname: # XXX support cases where one side is a complex expression complexJoinCandidates.append(filter) if not filter.args: # we must have removed all its predicates so # remove the filter and its join condition join.removeArg(filter.parent) # combine together joins that are just aliases of each other # assumes join list is in doc order so taht nested joins gets subsumed by outermost join aliases = getTransitiveClosure(aliases) renamed = {} removed = [] for j in joins: for name in aliases[j.name]: if name == j.name: continue renamed[name] = j.name ja = joinsByName.get(name) if not ja: continue for child in ja.args: j.appendArg(child) ja.name = j.name self.prepareJoinMove(ja) ja.parent = None removed.append(ja) joins = [j for j in joins if j not in removed] import itertools for join in itertools.chain((root,), joins): for child in join.depthfirst(descendPredicate=lambda op: op is join or not isinstance(op, ResourceSetOp)): if isinstance(child, (Label, Join)): if child.name in renamed: child.name = renamed[child.name] elif isinstance(child, Project) and child.varref: if child.varref in renamed: child.varref = renamed[child.varref] assert len(set([j.name for j in joins])) == len(joins), "join names not unique" # now that we figured out all the aliases, we can look for join predicates simpleJoins = [] for joinname, pred, leftname, leftprop, rightname, rightprop in simpleJoinCandidates: leftname = renamed.get(leftname, leftname) rightname = renamed.get(rightname, rightname) filter = pred.parent if leftname == rightname: # both references point to same join, so not a join predicate after all if pred.maybe: raise QueryException("MAYBE can not be used on a filter that is not a join condition", pred) if leftprop == rightprop: # identity (a=a), so remove predicate # XXX add user warning pred.parent = None if not filter.args: filter.parent.parent = None else: # expression operates on more than one project, need to # execute after both Projects have been retreived filter.complexPredicates = True else: simpleJoins.append((leftname, leftprop, rightname, rightprop, filter, pred.maybe)) # pred is just a Projects so its already handled by another # filter predicate (see makeJoinExpr() when skipRoot = True) # , so remove this one if not pred.siblings: filter.parent.parent = None else: pred.parent = None # xxx handle case like { ?bar ?foo.prop = func() or func(?foo) } # aren't these complex (cross) joins but rather misplaced filters # that belong in the ?foo join? # check if the remaining filter predicates are complex joins complexJoins = [] projectPreds = [] for filter in complexJoinCandidates: if not filter.parent: continue join = filter.parent.parent # if the filter predicates have reference to another join, its a complex join # and while we're at it, collect Project predicates for maybe analysis and fixup for pred in filter.args: joinrefs = {} for label in pred.depthfirst(): # check if the filter has reference to a different join if isinstance(label, Label): joinrefs.setdefault(label.name, []).append(label) if label.name == join.name: # to reduce the number of equivalent ops # replace this with Project(SUBJECT) newchild = Project(SUBJECT, label.name, maybe=label.maybe) label.parent.replaceArg(label, newchild) elif isinstance(label, Project): if not label.varref: label.varref = join.name joinrefs.setdefault(label.varref, []).append(label) propertyname = None if isinstance(label.name, int): if label.name == PROPERTY: propertyname = filter.labelFromPosition(OBJECT) else: propertyname = label.name if propertyname: projectPreds.append((label.varref or join.name, propertyname, label.maybe, pred)) if len(joinrefs) == 2 and isinstance(pred, Eq): (leftname, leftops), (rightname, rightops) = joinrefs.items() simple = self._makeHalfSimpleJoin(pred, join.name, leftname, leftops, rightname) if not simple: # try reverse order simple = self._makeHalfSimpleJoin(pred, join.name, rightname, rightops, leftname) if simple: simpleJoins.append(simple) else: complexJoins.append((filter, set([leftname, rightname]), pred.maybe)) elif len(joinrefs) > 1: complexJoins.append((filter, set(joinrefs.keys()), pred.maybe)) else: if pred.maybe and not isinstance(pred, Project): raise QueryException("MAYBE can not be used on a filter that is not a join condition", pred) # if there's only 1 joinref but it not referencing the join # that its part of, move the filter to the join that is referencing if len(joinrefs) == 1 and iter(joinrefs).next() != join.name: # XXX implement this raise QueryException("Filters that refer to a different filter set are not yet implemented.") _fixMaybeFilters(projectPreds) return joins, simpleJoins, complexJoins
if stmt.predicate in self.subClassPreds: try: self.currentSubTypes[stmt.object].remove(stmt.subject) self.currentSuperTypes[stmt.subject].remove(stmt.object) except KeyError, ValueError: pass#todo warn if not found typesChanged = True if typesChanged: newsubtypes = {} for k, v in self.currentSuperTypes.items(): for supertype in v: newsubtypes.setdefault(supertype, []).append(k) self.currentSubTypes = getTransitiveClosure(newsubtypes) if self.autocommit: self.subtypes = self.currentSubTypes if propsChanged: newsubprops = {} for k, v in self.currentSuperProperties.items(): for superprop in v: newsubprops.setdefault(superprop, []).append(k) self.currentSubProperties = getTransitiveClosure(newsubprops) if self.autocommit: self.subproperties = self.currentSubProperties #just in case a subproperty of any of these were removed self.subClassPreds = self.currentSubProperties[self.SUBCLASSOF]