def __init__(self, nsmap: PrefixMap, shape: Shape, shape_interpreter): self._shape = shape self.containing_schema = shape_interpreter w_shape = PyxbWrapper(shape) self.tripleconstraints = self.depth_first_triples(w_shape) self.predmap = {} mp_num = 0 while mp_num < len(self.tripleconstraints): k = nsmap.uri_for(self.tripleconstraints[mp_num].predicate) self.predmap.setdefault(k, []) self.predmap[k].append(str(mp_num) + ' ') mp_num += 1 self._triple_number = 0 self._pattern = r''.join(self.depth_first_pattern(w_shape)) # Generate an additional match for every extra predicate # Extra patterns are recognized by the leading '0 for e in self._shape.extra: mp_num += 1 ptn = '0' + str(mp_num) + ' ' k = nsmap.uri_for(e.ref) self.predmap.setdefault(k, []) self.predmap[k].append(ptn) self._pattern += "(" + ptn + ")*" self.matchpattern = re.compile(self._pattern)
class ShapeInterpreter: def __init__(self, schema: Schema, schema_dom, g: Graph = None): """ Schema interpreter :param schema: definition :param schema_dom: equivalent in dom to get the namespace map """ self.schema = schema self._nsmap = PrefixMap(schema, schema_dom) self._default_namespace = schema.default_namespace if schema.default_namespace else '' self._shapes = {self._nsmap.uri_for(sh.label): sh for sh in schema.shape} self._compiled_shapes = {} self._triple_results = {} self._graph = g @property def graph(self): return self._graph @graph.setter def graph(self, g: Graph): self._graph = g self._triple_results = {} def _i_shape_subject(self, subj, cs: CompiledShape) -> bool: """ Evaluate the shape against the set of triples with subject subj. :param subj: subject to evaluate :param cs: compiled shape to use in interpretation :return: True and a list of predicate/object permutations """ # Everything passes an empty shape if len(cs.tripleconstraints) == 0: return True # Create a list of unique permutations of all the triples in the graph with subject = subj # If there are any predicates in the predicate_objects list that aren't constrained, fail if # the shape is closed and ignore if it is open predicate_objects = [e for e in self.graph.predicate_objects(subject=subj)] unmatched_predicate_objects = [e for e in predicate_objects if str(e[0]) not in cs.predmap] if unmatched_predicate_objects and cs.closed: return False target_predicate_objects = [po for po in predicate_objects if po not in unmatched_predicate_objects] if len(target_predicate_objects) > 4: pred_obj_permutations = permutations(target_predicate_objects) else: pred_obj_permutations = list(set(permutations(target_predicate_objects))) # if len(pred_obj_permutations) > 100: # print(" PERMUTATIONS: %d" % len(pred_obj_permutations)) # For each permutation, create a cross product of all candidate tripleconstraints with the same # predicate as that in the permutation. The string equivalent of this permutation becomes the pattern # that we match against the complied shape match pattern. # # If the permutation matches, add it to the candidates list. The key is the particular permutation and # the value is the list of all possible tripleconstraints that it will pass # # If the target predicate isn't in the predicate isn't in the predicate to tripleconstraint map, it # is ignored if the shape candidates = dict() for pred_obj_permutation in pred_obj_permutations: tc_idx_list = [cs.predmap[str(pred)] for (pred, _) in pred_obj_permutation if str(pred) in cs.predmap] tc_idx_cross_product = list(product(*tc_idx_list)) for tc_idx_list in tc_idx_cross_product: match_str = ''.join(tc_idx_list) if cs.matchpattern.fullmatch(match_str): candidates.setdefault(pred_obj_permutation, []).append(tc_idx_list) # We now have a set of candidates, the key being a particular ordering of the list of predicate/objects for # the supplied subject and the data being a list of the tripleconstraint(s) that the ordering would have to # pass for the shape to pass. # Determine which, if any, predicate/object orderings pass the corresponding triples in the list for pred_obj_permutation, tc_idx_lists in candidates.items(): for tc_idx_list in tc_idx_lists: if self.i_tc_idx_entry(subj, cs, tc_idx_list, pred_obj_permutation): return True return False def i_tc_idx_entry(self, subj, cs: CompiledShape, tc_idx_list: list, pred_obj_permutation: list) -> bool: """ Interpret the set of triple constraints in the particular predicate/object list and triple_constraint indices :param subj: Subject being tested :param cs: complied shape :param tc_idx_list: :param pred_obj_permutation: :return: """ # TODO: Clean this up # hit = tc_idx_list == ('0 ', '2 ', '2 ', '3 ') # if hit: # print("EVAL: " + ''.join(tc_idx_list)) # print(', '.join(["( %s, %s)" % (e[0].split('/')[-1], e[1].split('/')[-1]) for e in pred_obj_permutation])) hit = False for i in range(len(tc_idx_list)): idx = tc_idx_list[i] pred, obj = pred_obj_permutation[i][0], pred_obj_permutation[i][1] if int(idx) != 0 and idx.startswith('0'): # Extra: The permutation that matched this can't have matched any other triple. for tripc_num in cs.predmap[str(pred)]: if int(tripc_num) == 0 or not tripc_num.startswith('0'): if self.i_tripleconstraint(Triple(subj, pred, obj), cs.tripleconstraints[int(tripc_num)]): return False else: rslt = self.i_tripleconstraint(Triple(subj, pred, obj), cs.tripleconstraints[int(tc_idx_list[i])]) # if hit: # print("entry: %i (%s) (%s, %s, %s) = %s" % (i, tc_idx_list[i], subj, pred, obj, rslt)) if not rslt: return False return True def i_shape(self, subj: URIRef, shape: ShapeLabel) -> bool: """ Interpret subject subj and shape in the graph :param subj: subject of interpretation :param shape: name of shape :return: success indicator """ assert self.graph is not None, "Graph must be supplied" if str(shape) not in self._shapes: raise SchemaException("Unresolved shape reference: %s" % str(shape)) if shape not in self._compiled_shapes: self._compiled_shapes[shape] = CompiledShape(self._nsmap, self._shapes[str(shape)], self) cs = self._compiled_shapes[shape] if subj: return self._i_shape_subject(subj, cs) else: for s in self.graph.subjects(): if self._i_shape_subject(s, cs): return True return False def i_tripleconstraint(self, t: Triple, c: TripleConstraint) -> bool: k = (t, c) if k in self._triple_results: return self._triple_results[k] if URIRef(self._nsmap.uri_for(c.predicate)) == t.p: rslt = (not c.objectConstraint or self.i_so_constraint(c.objectConstraint, t.o)) and \ (not c.subjectConstraint or self.i_so_constraint(c.subjectConstraint, t.s)) and \ (not c.object or self.i_so(c.object, t.o)) and \ (not c.subject or self.i_so(c.subject, t.s)) and \ (not c.objectShape or self.i_so_shape(c.objectShape, t.o)) and \ (not c.subjectShape or self.i_so_shape(c.subjectShape, t.s)) and \ (not c.objectType or self.i_so_type(c.objectType, t.o)) and \ (not c.subjectType or self.i_so_type(c.subjectType, t.s)) and \ (not c.datatype or self.i_datatype(c.datatype, t.o)) and \ (not c.valueClass or self.i_value_class(c.valueClass, t.o)) else: rslt = False rval = rslt if c.negated is None or not c.negated else not rslt self._triple_results[k] = rval return rval def i_so_constraint(self, c: TripleConstraintValueClass, o: RDFTerm) -> bool: rslt = (not c.facet or self.i_facet(c.facet, o)) and \ (not c.valueSet or self.i_value_set(c.valueSet, o)) return rslt def i_so(self, c: IRI, o: RDFTerm) -> bool: return o.is_iri and str(o.iri) == str(c) def i_so_shape(self, c: ShapeLabel, o: RDFTerm) -> bool: return False if o.is_literal else self.i_shape(o.val, c) @staticmethod def i_so_type(c: NodeType, o: RDFTerm) -> bool: return (c == NodeType.LITERAL and o.is_literal) or (c == NodeType.IRI and o.is_iri) or \ (c == NodeType.BNODE and o.is_bnode) or (c == NodeType.NONLITERAL and not o.is_literal) def i_datatype(self, c: IRI, o: RDFTerm) -> bool: # TODO: Flesh this out return o.is_literal and \ ((str(o.literal.datatype) == self._nsmap.uri_for(c)) or (not(o.literal.datatype) and self._nsmap.uri_for(c) == 'xsd:string')) def i_value_class(self, c: ValueClassLabel, o) -> bool: return True def i_object(self, c: IRI, o: RDFTerm) -> bool: return isinstance(o, URIRef) and URIRef(self._nsmap.uri_for(c)) == o @staticmethod def i_facet(fct: XSFacet, o: RDFTerm) -> bool: for f in fct: if f.pattern: rslt = bool(re.fullmatch(f.pattern, str(o.val))) elif f.not_: rslt = bool(re.fullmatch(f.not_, str(o.val))) elif f.minLength: rslt = len(str(o.val)) <= f.minLength elif f.maxLength: rslt = len(str(o.val)) >= f.maxLength elif f.length: rslt = len(str(o.val)) == f.length elif f.minValue: v = ShapeInterpreter._coerce_numtype(o, o.literal.value) f = ShapeInterpreter._coerce_numtype(o, f.minValue) rslt = False if v is None or f is None else f < v if f.minValue.open else f <= v elif f.maxValue: v = ShapeInterpreter._coerce_numtype(o, o.literal.value) f = ShapeInterpreter._coerce_numtype(o, f.maxValue) rslt = False if v is None or f is None else f > v if f.maxValue.open else f >= v elif f.totalDigits: rslt = bool(test_numeric_facet(o.literal.value, total_digits=f.totalDigits)) elif f.fractionDigits: rslt = bool(test_numeric_facet(o.literal.value, fraction_digits=f.fractionDigits)) else: assert False, "Unhandled facet" if not rslt: return False return True dtlist = {XSD.integer: int, XSD.decimal: float, XSD.double: float} @staticmethod def _coerce_numtype(o: RDFTerm, v) -> str: if not o.is_literal or o.literal.datatype not in ShapeInterpreter.dtlist: return None return ShapeInterpreter.dtlist[o.literal.datatype](str(v)) def i_value_set(self, vs: ValueSet, o: RDFTerm) -> bool: for vse in PyxbChoice(vs).elements: if vse.iriRange: rslt = self.i_iri_range(vse.iriRange, o) elif vse.rdfLiteral: rslt = self.i_rdf_literal(vse.rdfLiteral, o) elif vse.integer: rslt = self.i_integer(vse.integer, o) elif vse.decimal: rslt = self.i_decimal(vse.decimal, o) elif vse.double: rslt = self.i_double(vse.double, o) elif vse.boolean: rslt = self.i_boolean(vse.boolean, o) else: assert False, "Unknown vse type" if not rslt: return False return True def i_iri_range(self, ir: IRIRange, o: RDFTerm): return self.i_iri_stem(ir, o) and not any(self.i_iri_stem(ex, o) for ex in ir.exclusion) @staticmethod def i_iri_stem(ist: IRIStem, o: RDFTerm): return not o.is_iri or (str(ist.base) == str(o.iri) if not ist.stem else str(o.iri).startswith(ist.base)) def i_rdf_literal(self, rdfl: RDFLiteral, o): if not o.is_literal: return False lit = Literal(rdfl.value(), lang=rdfl.langtag, datatype=URIRef(self._nsmap.uri_for(rdfl.datatype)) if rdfl.datatype else None) return lit == o.literal @staticmethod def i_integer(intv, o): if not o.is_literal: return False lit = Literal(intv, datatype=XSD.integer) return lit == o.literal @staticmethod def i_decimal(decv, o): if not o.is_literal: return False lit = Literal(decv, datatype=XSD.decimal) return lit == o.literal @staticmethod def i_double(doubv, o): if not o.is_literal: return False lit = Literal(doubv, datatype=XSD.double) return lit == o.literal @staticmethod def i_boolean(boolv, o): if not o.is_literal: return False lit = Literal(boolv, datatype=XSD.boolean) return lit == o.literal
class ShExSchema: """ ShEx XML Schema to JSON wrapper """ def __init__(self, dom_schema): """ Constructor - convert the supplied schema to json :param dom_schema: DOM document to convert """ self.schema = CreateFromDOM(dom_schema) self.json = dict(type="schema") self._prefixmap = PrefixMap(self.schema, dom_schema) self._exclude_prefixes = self.schema.exclude_prefixes.split(' ') + ['xml', 'xmlns'] self.shex_schema() def shex_schema(self): """ <code>xs:Element name="Schema" type="shex:Schema</code> """ self.json["prefixes"] = {prefix: url for prefix, url in self._prefixmap.namespaces().items() if prefix is not None and url and prefix not in self._exclude_prefixes} if self.schema.startActions: self.json["startActs"] = self.shex_semantic_actions(self.schema.startActions) if self.schema.shape: self.json["shapes"] = {self._uri(s.label): self.shex_shape(s) for s in self.schema.shape} if self.schema.valueClass: self.json["valueClasses"] = \ {self.shex_iri(vc.definition.valueClassLabel if vc.definition else vc.external.ref): self.shex_value_class_definition(vc) for vc in self.schema.valueClass} if self.schema.start: self.json["start"] = self._uri(self.schema.start) def shex_shape(self, shape: Shape) -> dict: """ <code>xs:complexType name="shape"</code> :param shape: XML Shape :return: S-JSON Shape Entry """ rval = dict(type="shape") w_shape = PyxbWrapper(shape) self.shex_annotations_and_actions(rval, w_shape) [self.shex_expression_choice(rval, e) for e in w_shape.elements] for e in w_shape.elements: if e.type == "import_": rval.setdefault("inherit", []).append(self.shex_shape_ref(e.value.node)) elif e.type == "extra": rval.setdefault(e.type, []).append(self._uri(e.value.node.ref)) # shape.label is the dictionary key in the Schema container if shape.virtual: rval["virtual"] = shape.virtual if shape.closed: rval["closed"] = shape.closed return rval @staticmethod def _typed_expression(typ: str, val: dict) -> dict: val["type"] = typ return val def shex_expression_choice(self, target: dict, e: PyxbWrapper.PyxbElement) -> dict: """ <code>xs:group name="ExpressionChoice"</code> :param target: target type with ExpressionChoice mixin :param e: Wrapper for ExpressionChoice element :return: target """ if e.type in ["someOf", "group"]: expr = self.shex_shape_constraint(e.value.node) elif e.type == "tripleConstraint": expr = self.shex_triple_constraint(e.value.node) elif e.type == "include": expr = dict(include=self._uri(e.value.node.ref)) else: expr = None if expr: target["expression"] = self._typed_expression(e.type, expr) return target def shex_annotations_and_actions(self, target: dict, ew: PyxbWrapper): """ <code>xs:group name="AnnotationsAndActions</code> :param target: dictionary using the group :param ew: xml element that contains the group """ for e in ew.elements: if e.type == "actions": target["semActs"] = self.shex_semantic_actions(e.value.node) elif e.type == "annotation": target.setdefault("annotations", []).append(self.shex_annotation(e.value.node)) def shex_shape_constraint(self, sc: ShapeConstraint) -> dict: """ <code>xs:complexType name="ShapeConstraint"</code> :param sc: A complete shape constraint :return: S-JSON expression """ rval = dict() sc_wrapper = PyxbWrapper(sc) for e in sc_wrapper.elements: entry = self.shex_expression_choice({}, e) if "expression" in entry: rval.setdefault("expressions", []).append(entry["expression"]) self.shex_annotations_and_actions(rval, sc_wrapper) self.shex_cardinality(rval, sc_wrapper) return rval def shex_triple_constraint(self, tc: TripleConstraint) -> dict: """ <code>xs:complexType name="TripleConstraint"</code> :param tc: TripleConstraint to process :return: SJson equivalent """ assert not ((tc.objectConstraint or tc.object or tc.objectShape or tc.objectType) and (tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType)), \ "Cannot mix subject and object constraints" tc_dict = dict(type="tripleConstraint", predicate=self.shex_iri(tc.predicate)) if tc.valueClass: tc_dict["valueClassRef"] = self.shex_value_class_label(tc.valueClass) else: vc_dict = dict(type="valueClass") if tc.objectConstraint: self.shex_triple_constraint_value_class(vc_dict, tc.objectConstraint) if tc.object: vc_dict["values"] = [self.shex_iri(tc.object)] if tc.objectShape: vc_dict["reference"] = self.shex_shape_label(tc.objectShape) if tc.objectType: vc_dict["nodeKind"] = self.shex_node_type(tc.objectType) if tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType or tc.inverse: tc_dict["inverse"] = True if tc.subjectConstraint: self.shex_triple_constraint_value_class(vc_dict, tc.subjectConstraint) if tc.subject: vc_dict["values"] = [self.shex_iri(tc.subject)] if tc.subjectShape: vc_dict["reference"] = self.shex_shape_label(tc.subjectShape) if tc.subjectType: vc_dict["nodeKind"] = self.shex_node_type(tc.subjectType) if tc.datatype: vc_dict["datatype"] = self._uri(tc.datatype) if tc.negated: tc_dict["negated"] = tc.negated tc_wrapper = PyxbWrapper(tc) self.shex_annotations_and_actions(tc_dict, tc_wrapper) self.shex_cardinality(tc_dict, tc_wrapper) tc_dict["value"] = vc_dict return tc_dict @staticmethod def shex_node_type(nt: NodeType): return str(nt).lower() def shex_annotation(self, annot: Annotation) -> list: """ <code>xs:complexType name="Annotation"</code> :param annot: Annotation :return: S-JSON equivalent """ rval = [self._uri(annot.iri)] if annot.iri else [] if annot.literal: rval.append(self.shex_rdf_literal(annot.literal)) else: rval.append(self.shex_iri_ref(annot.iriref)) return rval def shex_semantic_actions(self, acts: SemanticActions) -> list: """ <code>xs:complexType name="SemanticActions"</code> :param acts: actions :return: list of actions """ return [self.shex_semantic_action(a) for a in acts.action] def shex_semantic_action(self, act: SemanticAction) -> dict: """ <code>xs:complexType name="SemanticAction"</code> :param act: action :return: S-JSON representation """ # TODO: validating rval = {} if act.productionName: rval['name'] = self._uri(act.productionName.ref) if act.codeDecl: rval['contents'] = self.shex_code_decl(act.codeDecl) return rval @staticmethod def shex_code_decl(cd: CodeDecl): """ <code>xs:complexType name="CodeDecl" mixed="true"</code> :param cd: :return: """ return PyxbWrapper.mixed_content(cd) def shex_value_class_definition(self, vcd: ValueClassDefinition) -> dict: """ <code>xs:complexType name="ValueClassDefinition"</code> :param vcd: :return: """ rval = dict(type="valueClass") if vcd.external: rval["external"] = self.shex_value_class_ref(vcd.external) else: self.shex_inline_value_class_definition(rval, vcd.definition) if vcd.definition.actions: rval["semActs"] = self.shex_semantic_actions(vcd.definition.actions) return rval def shex_inline_value_class_definition(self, vc: dict, ivcd: InlineValueClassDefinition) -> list: """ <code>xs:complexType name="InlineValueClassDefinition"</code> :param vc: dictionary to record the actual elements :param ivcd: :return: """ # valueClassLabel becomes the identity vcd_wrapper = PyxbWrapper(ivcd) for e in vcd_wrapper.elements: if e.type == "nodetype": vc["nodeKind"] = self.shex_node_type(e.value.node) elif e.type == "datatype": vc[e.type] = self._uri(e.value.node) elif e.type == "facet": self.shex_xs_facet(vc, e.value.node) elif e.type == "or_": vc["reference"] = self.shex_group_shape_constr(e.value.node) elif e.type == "valueSet": vc["values"] = self.shex_value_set(e.value.node) else: assert False, "Unknown ValueClassExpression choice entry: %s" % e.type def shex_group_shape_constr(self, gsc: GroupShapeConstr) -> dict: """ <code>xs:complexType name="GroupShapeConstr"</code> :param gsc: :return: """ rval = dict(type="or", disjuncts=[self.shex_shape_ref(d) for d in gsc.disjunct]) if gsc.stringFacet: [self.shex_xs_facet(rval, e) for e in gsc.stringFacet] return rval # noinspection PyTypeChecker def shex_triple_constraint_value_class(self, vc: dict, tcvc: TripleConstraintValueClass) -> (dict, dict): return self.shex_inline_value_class_definition(vc, tcvc) def shex_value_class_label(self, l: ValueClassLabel) -> str: """ <code>xs:simpleType name="ValueClassLabel"</code> :param l: :return: """ return self.shex_iri(l) def shex_value_class_ref(self, lr: ValueClassRef) -> str: """ <code>xs:complexType name="ValueClassRef"</code> :param lr: :return: """ return self.shex_value_class_label(lr.ref) def shex_shape_label(self, sl: ShapeLabel) -> str: """ <code>xs:simpleType name="ShapeLabel"</code> :param sl: :return: """ return self.shex_iri(sl) def shex_shape_ref(self, sr: ShapeRef) -> str: """ <code>xs:complexType name="ShapeRef"</code> :param sr: :return: """ return self.shex_shape_label(sr.ref) @staticmethod def shex_code_label(cl: ProductionName) -> str: """ <code>xs:complexType name="CodeLabel"</code> :param cl: :return: """ return cl.ref.value() @staticmethod def _normalize_value(v): return int(v.integer) if v.integer is not None else \ float(v.double) if v.double is not None else float(v.decimal) @staticmethod def shex_xs_facet(target: dict, f: XSFacet): """ <code>xs:complexType name="XSFacet"</code> :param target: target dictionary (ValueClass) :param f: facet to transform """ if f.pattern: target["pattern"] = f.pattern elif f.not_: target["negated"] = True elif f.minLength: target["minlength"] = f.minLength elif f.maxLength: target["maxlength"] = f.maxLength elif f.length: target["length"] = f.length elif f.minValue: if f.minValue.open: target["minexclusive"] = ShExSchema._normalize_value(f.minValue) else: target["mininclusive"] = ShExSchema._normalize_value(f.minValue) elif f.maxValue: if f.maxValue.open: target["maxexclusive"] = ShExSchema._normalize_value(f.maxValue) else: target["maxinclusive"] = ShExSchema._normalize_value(f.maxValue) elif f.totalDigits: target["totaldigits"] = f.totalDigits elif f.fractionDigits: target["fractiondigits"] = f.fractionDigits else: assert False, "Unknown facet %s" % f # shex_endpoint is covered in the xs_facet logic above # noinspection PyTypeChecker @staticmethod def shex_string_facet(target: dict, sf: StringFacet): ShExSchema.shex_xs_facet(target, sf) # noinspection PyTypeChecker @staticmethod def shex_numeric_facet(target: dict, nf: NumericFacet): ShExSchema.shex_xs_facet(target, nf) def shex_value_set(self, vs: ValueSet) -> list: if vs.iriRange: return [self.shex_iri_range(e) for e in vs.iriRange] elif vs.rdfLiteral: return [self.shex_rdf_literal(e) for e in vs.rdfLiteral] elif vs.integer: return ['"%i"^^%s' % (e, XSD.integer) for e in vs.integer] elif vs.decimal: return ['"%d"^^%s' % (e, XSD.decimal) for e in vs.decimal] elif vs.double: return ['"%e"^^%s' % (e, XSD.double) for e in vs.double] elif vs.boolean: return ['"%s"^^%s' % (e, XSD.boolean) for e in vs.boolean] else: assert False, "Unknown ValueSet type" def shex_iri_stem(self, ist: IRIStem) -> dict: if ist.base and not ist.stem: return self.shex_iri(ist.base) else: return dict(stem=self.shex_iri(ist.base)) if ist.base else dict(stem=dict(type="wildcard")) def shex_iri_range(self, irir: IRIRange) -> object: """ :param irir: :return: """ def add_stem_type(d: dict, v: IRIStem): if v.stem: d["type"] = "stem" return d # If just a base, return the IRI if irir.base and not irir.stem and not irir.exclusion: return self.shex_iri(irir.base) rval = dict(type="stemRange") rval.update(self.shex_iri_stem(irir)) if irir.exclusion: rval["exclusions"] = [add_stem_type(self.shex_iri_stem(e), e) for e in irir.exclusion] return rval def shex_rdf_literal(self, lit: RDFLiteral) -> str: rval = '"' + lit.value() + '"' if lit.datatype: rval += '^^' + self.shex_iri(lit.datatype) if lit.langtag: rval += '@' + lit.langtag return rval def shex_iri(self, iri: IRI) -> str: return self._uri(str(iri)) def shex_iri_ref(self, ref: IRIRef) -> str: return self.shex_iri(ref.ref) def shex_prefixed_name(self, pn: PrefixedName) -> str: return self._uri(str(pn)) @staticmethod def shex_cardinality(target: dict, card: PyxbWrapper): minv = card.node.min if card.node.min is not None else 1 maxv = card.node.max if card.node.max is not None else 1 if minv == maxv: if minv != 1: # TODO: Fix comparison tests so we can substitute length here # target["length"] = minv target["min"] = minv target["max"] = maxv else: target["min"] = minv target["max"] = '*' if maxv == "unbounded" else maxv def _uri(self, element): """ Map element into a complete URI :param element: URI or QNAME :return: URI """ return self._prefixmap.uri_for(PyxbWrapper.proc_unicode(element))
class ShExSchema: """ ShEx XML Schema to JSON wrapper """ def __init__(self, dom_schema): """ Constructor - convert the supplied schema to json :param dom_schema: DOM document to convert """ self.schema = CreateFromDOM(dom_schema) self.json = dict(type="schema") self._prefixmap = PrefixMap(self.schema, dom_schema) self._exclude_prefixes = self.schema.exclude_prefixes.split(' ') + [ 'xml', 'xmlns' ] self.shex_schema() def shex_schema(self): """ <code>xs:Element name="Schema" type="shex:Schema</code> """ self.json["prefixes"] = { prefix: url for prefix, url in self._prefixmap.namespaces().items() if prefix is not None and url and prefix not in self._exclude_prefixes } if self.schema.startActions: self.json["startActs"] = self.shex_semantic_actions( self.schema.startActions) if self.schema.shape: self.json["shapes"] = { self._uri(s.label): self.shex_shape(s) for s in self.schema.shape } if self.schema.valueClass: self.json["valueClasses"] = \ {self.shex_iri(vc.definition.valueClassLabel if vc.definition else vc.external.ref): self.shex_value_class_definition(vc) for vc in self.schema.valueClass} if self.schema.start: self.json["start"] = self._uri(self.schema.start) def shex_shape(self, shape: Shape) -> dict: """ <code>xs:complexType name="shape"</code> :param shape: XML Shape :return: S-JSON Shape Entry """ rval = dict(type="shape") w_shape = PyxbWrapper(shape) self.shex_annotations_and_actions(rval, w_shape) [self.shex_expression_choice(rval, e) for e in w_shape.elements] for e in w_shape.elements: if e.type == "import_": rval.setdefault("inherit", []).append(self.shex_shape_ref(e.value.node)) elif e.type == "extra": rval.setdefault(e.type, []).append(self._uri(e.value.node.ref)) # shape.label is the dictionary key in the Schema container if shape.virtual: rval["virtual"] = shape.virtual if shape.closed: rval["closed"] = shape.closed return rval @staticmethod def _typed_expression(typ: str, val: dict) -> dict: val["type"] = typ return val def shex_expression_choice(self, target: dict, e: PyxbWrapper.PyxbElement) -> dict: """ <code>xs:group name="ExpressionChoice"</code> :param target: target type with ExpressionChoice mixin :param e: Wrapper for ExpressionChoice element :return: target """ if e.type in ["someOf", "group"]: expr = self.shex_shape_constraint(e.value.node) elif e.type == "tripleConstraint": expr = self.shex_triple_constraint(e.value.node) elif e.type == "include": expr = dict(include=self._uri(e.value.node.ref)) else: expr = None if expr: target["expression"] = self._typed_expression(e.type, expr) return target def shex_annotations_and_actions(self, target: dict, ew: PyxbWrapper): """ <code>xs:group name="AnnotationsAndActions</code> :param target: dictionary using the group :param ew: xml element that contains the group """ for e in ew.elements: if e.type == "actions": target["semActs"] = self.shex_semantic_actions(e.value.node) elif e.type == "annotation": target.setdefault("annotations", []).append( self.shex_annotation(e.value.node)) def shex_shape_constraint(self, sc: ShapeConstraint) -> dict: """ <code>xs:complexType name="ShapeConstraint"</code> :param sc: A complete shape constraint :return: S-JSON expression """ rval = dict() sc_wrapper = PyxbWrapper(sc) for e in sc_wrapper.elements: entry = self.shex_expression_choice({}, e) if "expression" in entry: rval.setdefault("expressions", []).append(entry["expression"]) self.shex_annotations_and_actions(rval, sc_wrapper) self.shex_cardinality(rval, sc_wrapper) return rval def shex_triple_constraint(self, tc: TripleConstraint) -> dict: """ <code>xs:complexType name="TripleConstraint"</code> :param tc: TripleConstraint to process :return: SJson equivalent """ assert not ((tc.objectConstraint or tc.object or tc.objectShape or tc.objectType) and (tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType)), \ "Cannot mix subject and object constraints" tc_dict = dict(type="tripleConstraint", predicate=self.shex_iri(tc.predicate)) if tc.valueClass: tc_dict["valueClassRef"] = self.shex_value_class_label( tc.valueClass) else: vc_dict = dict(type="valueClass") if tc.objectConstraint: self.shex_triple_constraint_value_class( vc_dict, tc.objectConstraint) if tc.object: vc_dict["values"] = [self.shex_iri(tc.object)] if tc.objectShape: vc_dict["reference"] = self.shex_shape_label(tc.objectShape) if tc.objectType: vc_dict["nodeKind"] = self.shex_node_type(tc.objectType) if tc.subjectConstraint or tc.subject or tc.subjectShape or tc.subjectType or tc.inverse: tc_dict["inverse"] = True if tc.subjectConstraint: self.shex_triple_constraint_value_class( vc_dict, tc.subjectConstraint) if tc.subject: vc_dict["values"] = [self.shex_iri(tc.subject)] if tc.subjectShape: vc_dict["reference"] = self.shex_shape_label(tc.subjectShape) if tc.subjectType: vc_dict["nodeKind"] = self.shex_node_type(tc.subjectType) if tc.datatype: vc_dict["datatype"] = self._uri(tc.datatype) if tc.negated: tc_dict["negated"] = tc.negated tc_wrapper = PyxbWrapper(tc) self.shex_annotations_and_actions(tc_dict, tc_wrapper) self.shex_cardinality(tc_dict, tc_wrapper) tc_dict["value"] = vc_dict return tc_dict @staticmethod def shex_node_type(nt: NodeType): return str(nt).lower() def shex_annotation(self, annot: Annotation) -> list: """ <code>xs:complexType name="Annotation"</code> :param annot: Annotation :return: S-JSON equivalent """ rval = [self._uri(annot.iri)] if annot.iri else [] if annot.literal: rval.append(self.shex_rdf_literal(annot.literal)) else: rval.append(self.shex_iri_ref(annot.iriref)) return rval def shex_semantic_actions(self, acts: SemanticActions) -> list: """ <code>xs:complexType name="SemanticActions"</code> :param acts: actions :return: list of actions """ return [self.shex_semantic_action(a) for a in acts.action] def shex_semantic_action(self, act: SemanticAction) -> dict: """ <code>xs:complexType name="SemanticAction"</code> :param act: action :return: S-JSON representation """ # TODO: validating rval = {} if act.productionName: rval['name'] = self._uri(act.productionName.ref) if act.codeDecl: rval['contents'] = self.shex_code_decl(act.codeDecl) return rval @staticmethod def shex_code_decl(cd: CodeDecl): """ <code>xs:complexType name="CodeDecl" mixed="true"</code> :param cd: :return: """ return PyxbWrapper.mixed_content(cd) def shex_value_class_definition(self, vcd: ValueClassDefinition) -> dict: """ <code>xs:complexType name="ValueClassDefinition"</code> :param vcd: :return: """ rval = dict(type="valueClass") if vcd.external: rval["external"] = self.shex_value_class_ref(vcd.external) else: self.shex_inline_value_class_definition(rval, vcd.definition) if vcd.definition.actions: rval["semActs"] = self.shex_semantic_actions( vcd.definition.actions) return rval def shex_inline_value_class_definition( self, vc: dict, ivcd: InlineValueClassDefinition) -> list: """ <code>xs:complexType name="InlineValueClassDefinition"</code> :param vc: dictionary to record the actual elements :param ivcd: :return: """ # valueClassLabel becomes the identity vcd_wrapper = PyxbWrapper(ivcd) for e in vcd_wrapper.elements: if e.type == "nodetype": vc["nodeKind"] = self.shex_node_type(e.value.node) elif e.type == "datatype": vc[e.type] = self._uri(e.value.node) elif e.type == "facet": self.shex_xs_facet(vc, e.value.node) elif e.type == "or_": vc["reference"] = self.shex_group_shape_constr(e.value.node) elif e.type == "valueSet": vc["values"] = self.shex_value_set(e.value.node) else: assert False, "Unknown ValueClassExpression choice entry: %s" % e.type def shex_group_shape_constr(self, gsc: GroupShapeConstr) -> dict: """ <code>xs:complexType name="GroupShapeConstr"</code> :param gsc: :return: """ rval = dict(type="or", disjuncts=[self.shex_shape_ref(d) for d in gsc.disjunct]) if gsc.stringFacet: [self.shex_xs_facet(rval, e) for e in gsc.stringFacet] return rval # noinspection PyTypeChecker def shex_triple_constraint_value_class( self, vc: dict, tcvc: TripleConstraintValueClass) -> (dict, dict): return self.shex_inline_value_class_definition(vc, tcvc) def shex_value_class_label(self, l: ValueClassLabel) -> str: """ <code>xs:simpleType name="ValueClassLabel"</code> :param l: :return: """ return self.shex_iri(l) def shex_value_class_ref(self, lr: ValueClassRef) -> str: """ <code>xs:complexType name="ValueClassRef"</code> :param lr: :return: """ return self.shex_value_class_label(lr.ref) def shex_shape_label(self, sl: ShapeLabel) -> str: """ <code>xs:simpleType name="ShapeLabel"</code> :param sl: :return: """ return self.shex_iri(sl) def shex_shape_ref(self, sr: ShapeRef) -> str: """ <code>xs:complexType name="ShapeRef"</code> :param sr: :return: """ return self.shex_shape_label(sr.ref) @staticmethod def shex_code_label(cl: ProductionName) -> str: """ <code>xs:complexType name="CodeLabel"</code> :param cl: :return: """ return cl.ref.value() @staticmethod def _normalize_value(v): return int(v.integer) if v.integer is not None else \ float(v.double) if v.double is not None else float(v.decimal) @staticmethod def shex_xs_facet(target: dict, f: XSFacet): """ <code>xs:complexType name="XSFacet"</code> :param target: target dictionary (ValueClass) :param f: facet to transform """ if f.pattern: target["pattern"] = f.pattern elif f.not_: target["negated"] = True elif f.minLength: target["minlength"] = f.minLength elif f.maxLength: target["maxlength"] = f.maxLength elif f.length: target["length"] = f.length elif f.minValue: if f.minValue.open: target["minexclusive"] = ShExSchema._normalize_value( f.minValue) else: target["mininclusive"] = ShExSchema._normalize_value( f.minValue) elif f.maxValue: if f.maxValue.open: target["maxexclusive"] = ShExSchema._normalize_value( f.maxValue) else: target["maxinclusive"] = ShExSchema._normalize_value( f.maxValue) elif f.totalDigits: target["totaldigits"] = f.totalDigits elif f.fractionDigits: target["fractiondigits"] = f.fractionDigits else: assert False, "Unknown facet %s" % f # shex_endpoint is covered in the xs_facet logic above # noinspection PyTypeChecker @staticmethod def shex_string_facet(target: dict, sf: StringFacet): ShExSchema.shex_xs_facet(target, sf) # noinspection PyTypeChecker @staticmethod def shex_numeric_facet(target: dict, nf: NumericFacet): ShExSchema.shex_xs_facet(target, nf) def shex_value_set(self, vs: ValueSet) -> list: if vs.iriRange: return [self.shex_iri_range(e) for e in vs.iriRange] elif vs.rdfLiteral: return [self.shex_rdf_literal(e) for e in vs.rdfLiteral] elif vs.integer: return ['"%i"^^%s' % (e, XSD.integer) for e in vs.integer] elif vs.decimal: return ['"%d"^^%s' % (e, XSD.decimal) for e in vs.decimal] elif vs.double: return ['"%e"^^%s' % (e, XSD.double) for e in vs.double] elif vs.boolean: return ['"%s"^^%s' % (e, XSD.boolean) for e in vs.boolean] else: assert False, "Unknown ValueSet type" def shex_iri_stem(self, ist: IRIStem) -> dict: if ist.base and not ist.stem: return self.shex_iri(ist.base) else: return dict(stem=self.shex_iri(ist.base)) if ist.base else dict( stem=dict(type="wildcard")) def shex_iri_range(self, irir: IRIRange) -> object: """ :param irir: :return: """ def add_stem_type(d: dict, v: IRIStem): if v.stem: d["type"] = "stem" return d # If just a base, return the IRI if irir.base and not irir.stem and not irir.exclusion: return self.shex_iri(irir.base) rval = dict(type="stemRange") rval.update(self.shex_iri_stem(irir)) if irir.exclusion: rval["exclusions"] = [ add_stem_type(self.shex_iri_stem(e), e) for e in irir.exclusion ] return rval def shex_rdf_literal(self, lit: RDFLiteral) -> str: rval = '"' + lit.value() + '"' if lit.datatype: rval += '^^' + self.shex_iri(lit.datatype) if lit.langtag: rval += '@' + lit.langtag return rval def shex_iri(self, iri: IRI) -> str: return self._uri(str(iri)) def shex_iri_ref(self, ref: IRIRef) -> str: return self.shex_iri(ref.ref) def shex_prefixed_name(self, pn: PrefixedName) -> str: return self._uri(str(pn)) @staticmethod def shex_cardinality(target: dict, card: PyxbWrapper): minv = card.node.min if card.node.min is not None else 1 maxv = card.node.max if card.node.max is not None else 1 if minv == maxv: if minv != 1: # TODO: Fix comparison tests so we can substitute length here # target["length"] = minv target["min"] = minv target["max"] = maxv else: target["min"] = minv target["max"] = '*' if maxv == "unbounded" else maxv def _uri(self, element): """ Map element into a complete URI :param element: URI or QNAME :return: URI """ return self._prefixmap.uri_for(PyxbWrapper.proc_unicode(element))