def directed_predicates_in_expression( expression: ShExJ.shapeExpr, cntxt: Context) -> Dict[IRIREF, PredDirection]: """ Directed predicates in expression -- return all predicates in shapeExpr along with which direction(s) they evaluate :param expression: Expression to scan :param cntxt: :return: """ dir_predicates = {} def predicate_finder(predicates: Dict[IRIREF, PredDirection], tc: ShExJ.TripleConstraint, _: Context) -> None: if isinstance(tc, ShExJ.TripleConstraint): predicates.setdefault(tc.predicate, PredDirection()).dir(tc.inverse is None or not tc.inverse) def triple_expr_finder(predicates: Dict[IRIREF, PredDirection], expr: ShExJ.shapeExpr, cntxt_: Context) -> None: if isinstance(expr, ShExJ.Shape) and expr.expression is not None: cntxt_.visit_triple_expressions(expr.expression, predicate_finder, predicates) # TODO: follow_inner_shapes as True probably goes too far, but we definitely need to cross shape/triplecons cntxt.visit_shapes(expression, triple_expr_finder, dir_predicates, follow_inner_shapes=False) return dir_predicates
def test_example_2(self): schema, _ = setup_test(shex_2, None) cntxt = Context(None, schema) shapes_visited = [] triples_visited = [] cntxt.visit_shapes(schema.shapes[0], visit_shape, shapes_visited) self.assertEqual(["http://schema.example/S1", "http://schema.example/S2" ], shapes_visited)
def nodeSatisfiesDataType(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, c: DebugContext) -> bool: """ `5.4.3 Datatype Constraints <http://shex.io/shex-semantics/#datatype>`_ For a node n and constraint value v, nodeSatisfies(n, v) if n is an Literal with the datatype v and, if v is in the set of SPARQL operand data types[sparql11-query], an XML schema string with a value of the lexical form of n can be cast to the target type v per XPath Functions 3.1 section 19 Casting[xpath-functions]. Only datatypes supported by SPARQL MUST be tested but ShEx extensions MAY add support for other datatypes. """ if nc.datatype is None: return True if c.debug: #print(f" Datatype: {nc.datatype}") print(" Datatype: " + nc.datatype) if not isinstance(n, Literal): #cntxt.fail_reason = f"Datatype constraint ({nc.datatype}) " \ # f"does not match {type(n).__name__} {cntxt.n3_mapper.n3(n)}" cntxt.fail_reason = "Datatype constraint " + nc.datatype + " does not match " + type(n).__name__ +" " + cntxt.n3_mapper.n3(n) cntxt.dump_bnode(n) return False actual_datatype = _datatype(n) if actual_datatype == str(nc.datatype) or \ (is_sparql_operand_datatype(nc.datatype) and can_cast_to(n, nc.datatype)): return True #cntxt.fail_reason = f"Datatype mismatch - expected: {nc.datatype} actual: {actual_datatype}" cntxt.fail_reason = "Datatype mismatch - expected: "+ nc.datatype + " actual: "+ actual_datatype return False
def _fail_triples(cntxt: Context, T: RDFGraph) -> None: tlist = list(T) if len(tlist): cntxt.fail_reason = "Triples:" for t in sorted(tlist): cntxt.fail_reason = " " + cntxt.n3_mapper.n3(t) if len(tlist) > 5: cntxt.fail_reason = " ... "
def matchesTripleExprLabel(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, c: DebugContext) -> bool: if c.debug: print(f" {expr}") te = cntxt.tripleExprFor(expr) if te: return matchesCardinality(cntxt, T, te) cntxt.fail_reason = f"{expr}: Labeled triple expression not found" return False
def matchesTripleExprRef(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, _: DebugContext) -> bool: """ expr is an tripleExprRef and satisfies(value, tripleExprWithId(tripleExprRef), G, m). The tripleExprWithId function is defined in Triple Expression Reference Requirement below. """ expr = cntxt.tripleExprFor(expr) if expr is None: cntxt.fail_reason = "{expr}: Reference not found" return False return all(matchesTripleConstraint(cntxt, t, expr) for t in T)
def nodeSatisfiesStringFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool: """ `5.4.5 XML Schema String Facet Constraints <ttp://shex.io/shex-semantics/#xs-string>`_ String facet constraints apply to the lexical form of the RDF Literals and IRIs and blank node identifiers (see note below regarding access to blank node identifiers). """ # Let lex = # # * if the value n is an RDF Literal, the lexical form of the literal (see[rdf11-concepts] section 3.3 Literals). # * if the value n is an IRI, the IRI string (see[rdf11-concepts] section 3.2 IRIs). # * if the value n is a blank node, the blank node identifier (see[rdf11-concepts] section 3.4 Blank Nodes). if nc.length is not None or nc.minlength is not None or nc.maxlength is not None \ or nc.pattern is not None: lex = str(n) # Let len = the number of unicode codepoints in lex # For a node n and constraint value v, nodeSatisfies(n, v): # # * for "length" constraints, v = len, # * for "minlength" constraints, v >= len, # * for "maxlength" constraints, v <= len, # * for "pattern" constraints, v is unescaped into a valid XPath 3.1 regular expression[xpath-functions-31] # re and invoking fn:matches(lex, re) returns fn:true. If the flags parameter is present, it is passed # as a third argument to fn:matches. The pattern may have XPath 3.1 regular expression escape sequences # per the modified production [10] in section 5.6.1.1 as well as numeric escape sequences of the # form 'u' HEX HEX HEX HEX or 'U' HEX HEX HEX HEX HEX HEX HEX HEX. Unescaping replaces numeric escape # sequences with the corresponding unicode codepoint # TODO: Figure out whether we need to connect this to the lxml exslt functions # TODO: Map flags if not if (nc.length is None or len(lex) == nc.length) and \ (nc.minlength is None or len(lex) >= nc.minlength) and \ (nc.maxlength is None or len(lex) <= nc.maxlength) and \ (nc.pattern is None or pattern_match(nc.pattern, nc.flags, lex)): return True elif nc.length is not None and len(lex) != nc.length: #cntxt.fail_reason = f"String length mismatch - expected: {nc.length} actual: {len(lex)}" cntxt.fail_reason = "String length mismatch - expected: " + nc.length + " actual: " + len(lex) elif nc.minlength is not None and len(lex) < nc.minlength: #cntxt.fail_reason = f"String length violation - minimum: {nc.minlength} actual: {len(lex)}" cntxt.fail_reason = "String lenght violation - minimum: " + nc.minlength + " actual: " + len(lex) elif nc.maxlength is not None and len(lex) > nc.maxlength: #cntxt.fail_reason = f"String length violation - maximum: {nc.maxlength} actual: {len(lex)}" cntxt.fail_reason = "String length violation - maximum: " + nc.maxlength + " actual: " + len(lex) elif nc.pattern is not None and not pattern_match(nc.pattern, nc.flags, lex): #cntxt.fail_reason = f"Pattern match failure - pattern: {nc.pattern} flags:{nc.flags}" \ # f" string: {lex}" cntxr.fail_reason = "Pattern match failure - pattern: " + nc.pattern + " flags:" + nc.flags + " string: " + lex else: cntxt.fail_reason = "Programming error - flame the programmer" return False else: return True
def matchesCardinality(cntxt: Context, T: RDFGraph, expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel], c: DebugContext, extras: Optional[Set[URIRef]] = None) -> bool: """ Evaluate cardinality expression expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn, matches(Tn, expr, m) by the remaining rules in this list. """ # TODO: Cardinality defaults into spec min_ = expr.min if expr.min is not None else 1 max_ = expr.max if expr.max is not None else 1 cardinality_text = f"{{{min_},{'*' if max_ == -1 else max_}}}" if c.debug and (min_ != 0 or len(T) != 0): print(f"{cardinality_text} matching {len(T)} triples") if min_ == 0 and len(T) == 0: return True if isinstance(expr, ShExJ.TripleConstraint): if len(T) < min_: if len(T) > 0: _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples less than {cardinality_text}" else: cntxt.fail_reason = f" No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}" return False # Don't include extras in the cardinality check if extras: must_match = RDFGraph([ t for t in T if t.p not in extras ]) # The set of things NOT consumed in extra else: must_match = T if 0 <= max_ < len(must_match): # Don't do a cardinality check _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples exceeds max {cardinality_text}" return False elif len(must_match): return all( matchesTripleConstraint(cntxt, t, expr) for t in must_match) else: return any(matchesTripleConstraint(cntxt, t, expr) for t in T) else: for partition in _partitions(T, min_, max_): if all(matchesExpr(cntxt, part, expr) for part in partition): return True if min_ != 1 or max_ != 1: _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples cannot be partitioned into {cardinality_text} passing groups" return False
def predicates_in_tripleexpr(expression: ShExJ.tripleExpr, cntxt: Context) -> Set[IRIREF]: predicates = set() def triple_expr_visitor(predicates: Set[IRIREF], expr: ShExJ.tripleExpr, cntxt_: Context) -> None: if isinstance(expr, ShExJ.TripleConstraint): predicates.add(expr.predicate) cntxt.visit_triple_expressions(expression, triple_expr_visitor, predicates) return predicates
def nodeSatisfiesNumericFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool: """ `5.4.5 XML Schema Numeric Facet Constraints <http://shex.io/shex-semantics/#xs-numeric>`_ Numeric facet constraints apply to the numeric value of RDF Literals with datatypes listed in SPARQL 1.1 Operand Data Types[sparql11-query]. Numeric constraints on non-numeric values fail. totaldigits and fractiondigits constraints on values not derived from xsd:decimal fail. """ if nc.mininclusive is not None or nc.minexclusive is not None or nc.maxinclusive is not None \ or nc.maxexclusive is not None or nc.totaldigits is not None or nc.fractiondigits is not None: if is_numeric(n): v = n.value if isinstance(v, numbers.Number): if (nc.mininclusive is None or v >= nc.mininclusive) and \ (nc.minexclusive is None or v > nc.minexclusive) and \ (nc.maxinclusive is None or v <= nc.maxinclusive) and \ (nc.maxexclusive is None or v < nc.maxexclusive) and \ (nc.totaldigits is None or (total_digits(n) is not None and total_digits(n) <= nc.totaldigits)) and \ (nc.fractiondigits is None or (fraction_digits(n) is not None and fraction_digits(n) <= nc.fractiondigits)): return True else: if nc.mininclusive is not None and v < nc.mininclusive: #cntxt.fail_reason = f"Numeric value volation - minimum inclusive: " \ # f"{nc.mininclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - minimum inclusive: " + nc.mininclusive + " actual: " + v elif nc.minexclusive is not None and v <= nc.minexclusive: #cntxt.fail_reason = f"Numeric value volation - minimum exclusive: " \ # f"{nc.minexclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - minimum exclusive " + nc.minexclusive + " actual: " + v elif nc.maxinclusive is not None and v > nc.maxinclusive: #cntxt.fail_reason = f"Numeric value volation - maximum inclusive: " \ # f"{nc.maxinclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - maximum inclusive: " + nc.maxinclusive + " actual : " + v elif nc.maxexclusive is not None and v >= nc.maxexclusive: #cntxt.fail_reason = f"Numeric value volation - maximum exclusive: " \ # f"{nc.maxexclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - maximum exclusive: " + nc.maxexclusive + " actual: " + v elif nc.totaldigits is not None and (total_digits(n) is None or total_digits(n) > nc.totaldigits): #cntxt.fail_reason = f"Numeric value volation - max total digits: " \ # f"{nc.totaldigits} value: {v}" cntxt.fail_reason = "Numeric value violation - max total digits: " + nc.totaldigits + " value: " + v elif nc.fractiondigits is not None and (fraction_digits(n) is None or total_digits(n) > nc.fractiondigits): #cntxt.fail_reason = f"Numeric value volation - max fractional digits: " \ # f"{nc.fractiondigits} value: {v}" cntxt.fail_reason = "Numeric value violation - max fractional digits: " + nc.fractiondigits + " value: " + v else: cntxt.fail_reason = "Impossible error - kick the programmer" return False else: cntxt.fail_reason = "Numeric test on non-number: " + v return False else: cntxt.fail_reason = "Numeric test on non-number: " + n return False return True
def satisfiesExternal(cntxt: Context, n: Node, se: ShExJ.ShapeExternal, c: DebugContext) -> bool: """ Se is a ShapeExternal and implementation-specific mechansims not defined in this specification indicate success. """ if c.debug: print(f"id: {se.id}") extern_shape = cntxt.external_shape_for(se.id) if extern_shape: return satisfies(cntxt, n, extern_shape) cntxt.fail_reason = f"{se.id}: Shape is not in Schema" return False
def triple_constraints_in_expression( expression: ShExJ.shapeExpr, cntxt: Context) -> List[ShExJ.TripleConstraint]: tes = [] def triple_expr_visitor(tes: List[ShExJ.TripleConstraint], expr: ShExJ.TripleConstraint, _: Context) -> None: if isinstance(expr, ShExJ.TripleConstraint): tes.append(expr) cntxt.visit_triple_expressions(expression, triple_expr_visitor, tes) return tes
def isValid(cntxt: Context, m: FixedShapeMap) -> Tuple[bool, List[str]]: """`5.2 Validation Definition <http://shex.io/shex-semantics/#validation>`_ The expression isValid(G, m) indicates that for every nodeSelector/shapeLabel pair (n, s) in m, s has a corresponding shape expression se and satisfies(n, se, G, m). satisfies is defined below for each form of shape expression :param cntxt: evaluation context - includes graph and schema :param m: list of NodeShape pairs to test :return: Success/failure indicator and, if fail, a list of failure reasons """ if not cntxt.is_valid: return False, cntxt.error_list parse_nodes = [] for nodeshapepair in m: n = nodeshapepair.nodeSelector if not isinstance_(n, Node): #return False, [f"{n}: Triple patterns are not implemented"] return False, [n + ":Triple patterns are not implemented"] # The third test below is because the spec asserts that completely empty graphs pass in certain circumstances elif not (next( cntxt.graph.predicate_objects(nodeshapepair.nodeSelector), None) or next( cntxt.graph.subject_predicates(nodeshapepair.nodeSelector), None) or not next(cntxt.graph.triples( (None, None, None)), None)): #return False, [f"Focus: {nodeshapepair.nodeSelector} not in graph"] return False, [ "Focus: " + nodeshapepair.nodeSelector + " not in graph" ] else: s = cntxt.shapeExprFor(START if nodeshapepair.shapeLabel is None or nodeshapepair.shapeLabel is START else nodeshapepair.shapeLabel) cntxt.current_node = ParseNode(satisfies, s, n, cntxt) if not s: if nodeshapepair.shapeLabel is START or nodeshapepair.shapeLabel is None: cntxt.fail_reason = "START node is not specified or is invalid" else: #cntxt.fail_reason = f"Shape: {nodeshapepair.shapeLabel} not found in Schema" cntxt.fail.reason = "Shape: " + nodeshapepair.shapeLabel + " not found in Schema" return False, cntxt.process_reasons() parse_nodes.append(cntxt.current_node) if not satisfies(cntxt, n, s): cntxt.current_node.result = False return False, cntxt.process_reasons() else: cntxt.current_node.result = True return True, []
def wrapper(cntxt: Context, T: RDFGraph, expr: JSGObject) -> bool: parent_parse_node = cntxt.current_node cntxt.current_node = ParseNode(f, expr, T, cntxt) parent_parse_node.nodes.append(cntxt.current_node) c = cntxt.debug_context c.splus() if c.debug: c.print(c.i(0, f'--> {f.__name__} {c.d()}'), not newline) rval = f(cntxt, T, expr, c) if c.debug: c.print(c.i(0, f'<-- {f.__name__} {c.d()} {rval}')) c.sminus() cntxt.current_node.result = rval cntxt.current_node = parent_parse_node return rval
def evaluate(g: Graph, schema: Union[str, ShExJ.Schema], focus: Optional[Union[str, URIRef, IRIREF]], start: Optional[Union[str, URIRef, IRIREF, START, START_TYPE]] = None, debug_trace: bool = False) -> Tuple[bool, Optional[str]]: """ Evaluate focus node `focus` in graph `g` against shape `shape` in ShEx schema `schema` :param g: Graph containing RDF :param schema: ShEx Schema -- if str, it will be parsed :param focus: focus node in g. If not specified, all URI subjects in G will be evaluated. :param start: Starting shape. If omitted, the Schema start shape is used :param debug_trace: Turn on debug tracing :return: None if success or failure reason if failure """ if isinstance(schema, str): schema = SchemaLoader().loads(schema) if schema is None: return False, "Error parsing schema" if not isinstance(focus, URIRef): focus = URIRef(str(focus)) if start is None: start = str(schema.start) if schema.start else None if start is None: return False, "No starting shape" if not isinstance( start, IRIREF) and start is not START and start is not START_TYPE: start = IRIREF(str(start)) cntxt = Context(g, schema) cntxt.debug_context.debug = debug_trace map_ = FixedShapeMap() map_.add(ShapeAssociation(focus, start)) test_result, reasons = isValid(cntxt, map_) return test_result, '\n'.join(reasons)
def matchesTripleConstraint(cntxt: Context, t: RDFTriple, expr: ShExJ.TripleConstraint, c: DebugContext) -> bool: """ expr is a TripleConstraint and: * t is a triple * t's predicate equals expr's predicate. Let value be t's subject if inverse is true, else t's object. * if inverse is true, t is in arcsIn, else t is in arcsOut. """ from pyshex.shape_expressions_language.p5_3_shape_expressions import satisfies if c.debug: print(c.i(1, " triple: " + t)) print(c.i(1, '', expr._as_json_dumps().split('\n'))) if uriref_matches_iriref(t.p, expr.predicate): value = t.s if expr.inverse else t.o return expr.valueExpr is None or satisfies(cntxt, value, expr.valueExpr) else: cntxt.fail_reason = "Predicate mismatch: " + t.p + " ≠ " + expr.predicate return False
def matchesCardinality(cntxt: Context, T: RDFGraph, expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel], c: DebugContext) -> bool: """ Evaluate cardinality expression expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn, matches(Tn, expr, m) by the remaining rules in this list. """ # TODO: Cardinality defaults into spec min_ = expr.min if expr.min is not None else 1 max_ = expr.max if expr.max is not None else 1 cardinality_text = "{{" + str(min_) + "," + '*' if max_ == -1 else str( max_) + "}}" if c.debug and (min_ != 0 or len(T) != 0): print(cardinality_text + " matching " + len(T) + " triples") if min_ == 0 and len(T) == 0: return True if isinstance(expr, ShExJ.TripleConstraint): if len(T) < min_: if len(T) > 0: _fail_triples(cntxt, T) cntxt.fail_reason = " " + len( T) + " triples less than " + cardinality_text else: cntxt.fail_reason = " No matching triples found for predicate " + cntxt.n3_mapper.n3( expr.predicate) return False elif 0 <= max_ < len(T): _fail_triples(cntxt, T) cntxt.fail_reason = " " + str( len(T)) + " triples exceeds max " + cardinality_text return False else: return all(matchesTripleConstraint(cntxt, t, expr) for t in T) else: for partition in _partitions(T, min_, max_): if all(matchesExpr(cntxt, part, expr) for part in partition): return True if min_ != 1 or max_ != 1: _fail_triples(cntxt, T) cntxt.fail_reason = " " + str( len(T) ) + " triples cannot be partitioned into " + cardinality_text + " passing groups" return False
def extern_shape_for(self, ref: ShExJ.IRIREF) -> Optional[ShExJ.Shape]: for extern in self.externs: extern_schema = self.owner.schema_loader.load(extern) if extern_schema: cntxt = Context(None, extern_schema) if ref in cntxt.schema_id_map: return cntxt.schema_id_map[ref] return None
def satisfiesShapeExprRef(cntxt: Context, n: Node, se: ShExJ.shapeExprLabel, c: DebugContext) -> bool: """ Se is a shapeExprRef and there exists in the schema a shape expression se2 with that id and satisfies(n, se2, G, m). """ if c.debug: print(f"id: {se}") for shape in cntxt.schema.shapes: if shape.id == se: return satisfies(cntxt, n, shape) cntxt.fail_reason = f"{se}: Shape is not in Schema" return False
def wrapper(cntxt: Context, n: Node, expr: JSGObject) -> bool: parent_parse_node = cntxt.current_node cntxt.current_node = ParseNode(f, expr, n, cntxt) parent_parse_node.nodes.append(cntxt.current_node) c = cntxt.debug_context c.splus() if c.debug and not skip_trace(expr): c.print( c.i( 0, '--> ' + f.__name__ + ' ' + c.d() + ' node: ' + cntxt.n3_mapper.n3(n)), not newline) rval = f(cntxt, n, expr, c) if c.debug and not skip_trace(expr): c.print( c.i( 0, '<-- ' + f.__name__ + ' ' + c.d() + ' node: ' + cntxt.n3_mapper.n3(n) + ':' + rval)) c.sminus() cntxt.current_node.set_result(rval) cntxt.current_node = parent_parse_node return rval
def nodeSatisfiesValues(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool: """ `5.4.5 Values Constraint <http://shex.io/shex-semantics/#values>`_ For a node n and constraint value v, nodeSatisfies(n, v) if n matches some valueSetValue vsv in v. """ if nc.values is None: return True else: if any(_nodeSatisfiesValue(cntxt, n, vsv) for vsv in nc.values): return True else: #cntxt.fail_reason = f"Node: {cntxt.n3_mapper.n3(n)} not in value set:\n\t " \ # f"{as_json(cntxt.type_last(nc), indent=None)[:60]}..." cntxt.fail_reason = "Node: " + cntxt.n3_mapper.n3(n) + " not in value set:\n\t" + as_json(cntxt.type_last(nc), indent=None)[:60] + "..." return False
def nodeSatisfiesNodeKind(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, c: DebugContext) -> bool: """ `5.4.2 Node Kind Constraints <http://shex.io/shex-semantics/#nodeKind>`_ For a node n and constraint value v, nodeSatisfies(n, v) if: * v = "iri" and n is an IRI. * v = "bnode" and n is a blank node. * v = "literal" and n is a Literal. * v = "nonliteral" and n is an IRI or blank node. """ if c.debug and nc.nodeKind is not None: print(f" Kind: {nc.nodeKind}") if nc.nodeKind is None or \ (nc.nodeKind == 'iri' and isinstance(n, URIRef)) or \ (nc.nodeKind == 'bnode' and isinstance(n, BNode)) or \ (nc.nodeKind == 'literal' and isinstance(n, Literal)) or \ (nc.nodeKind == 'nonliteral' and isinstance(n, (URIRef, BNode))): return True cntxt.fail_reason = f"Node kind mismatch have: {type(n).__name__} expected: {nc.nodeKind}" return False
def satisfiesShape(cntxt: Context, n: Node, S: ShExJ.Shape, c: DebugContext) -> bool: """ `5.5.2 Semantics <http://shex.io/shex-semantics/#triple-expressions-semantics>`_ For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)` if and only if: * `neigh(G, n)` can be partitioned into two sets matched and remainder such that `matches(matched, expression, m)`. If expression is absent, remainder = `neigh(G, n)`. :param n: focus node :param S: Shape to be satisfied :param cntxt: Evaluation context :param c: Debug context :return: true iff `satisfies(n, S, cntxt)` """ # Recursion detection. If start_evaluating returns a boolean value, this is the assumed result of the shape # evaluation. If it returns None, then an initial evaluation is needed rslt = cntxt.start_evaluating(n, S) if rslt is None: cntxt.evaluate_stack.append((n, S.id)) predicates = directed_predicates_in_expression(S, cntxt) matchables = RDFGraph() # Note: The code below does an "over-slurp" for the sake of expediency. If you are interested in # getting EXACTLY the needed triples, set cntxt.over_slurp to false if isinstance(cntxt.graph, SlurpyGraph) and cntxt.over_slurp: with slurper(cntxt, n, S) as g: _ = g.triples((n, None, None)) for predicate, direction in predicates.items(): with slurper(cntxt, n, S) as g: matchables.add_triples( g.triples((n if direction.is_fwd else None, iriref_to_uriref(predicate), n if direction.is_rev else None))) if c.debug: print( c.i(1, "predicates:", sorted(cntxt.n3_mapper.n3(p) for p in predicates.keys()))) print( c.i(1, "matchables:", sorted(cntxt.n3_mapper.n3(m) for m in matchables))) print() if S.closed: # TODO: Is this working correctly on reverse items? non_matchables = RDFGraph( [t for t in arcsOut(cntxt.graph, n) if t not in matchables]) if len(non_matchables): cntxt.fail_reason = "Unmatched triples in CLOSED shape:" cntxt.fail_reason = '\n'.join("\t" + t for t in non_matchables) if c.debug: print( c.i( 0, "<--- Satisfies shape " + c.d() + " FAIL - ", len(non_matchables) + " non-matching triples on a closed shape")) print(c.i(1, "", list(non_matchables))) print() return False # Evaluate the actual expression. Start assuming everything matches... if S.expression: if matches(cntxt, matchables, S.expression): rslt = True else: extras = {iriref_to_uriref(e) for e in S.extra} if S.extra is not None else {} if len(extras): permutable_matchables = RDFGraph( [t for t in matchables if t.p in extras]) non_permutable_matchables = RDFGraph([ t for t in matchables if t not in permutable_matchables ]) if c.debug: print( c.i(1, "Complete match failed -- evaluating extras", list(extras))) for matched, remainder in partition_2( permutable_matchables): permutation = non_permutable_matchables.union(matched) if matches(cntxt, permutation, S.expression): rslt = True break rslt = rslt or False else: rslt = True # Empty shape # If an assumption was made and the result doesn't match the assumption, switch directions and try again done, consistent = cntxt.done_evaluating(n, S, rslt) if not done: rslt = satisfiesShape(cntxt, n, S) rslt = rslt and consistent cntxt.evaluate_stack.pop() return rslt
def eval_entry(self, entry_name: str) -> bool: mes = self.mfst.entries[entry_name] for me in mes: # There can be more than one entry per name... # Determine the start point if not self.started: if not me.name.startswith(START_AFTER): self.start_skipped += 1 return True else: self.started = True if VERBOSE: print( f"STARTED - Skipped {self.start_skipped} entries") # Determine whether this entry should be skipped should_skip = False # Skip skipped_traits = list(me.traits.intersection(skip_traits)) if skipped_traits: if VERBOSE: print( f"Skipping {me.name} ({', '.join([self.URIname(t) for t in me.traits])}) - Skipped trait" ) key = str(skipped_traits[0]).replace(str(SHT), 'sht:') if key not in self.skip_reasons: self.skip_reasons[key] = 0 self.skip_reasons[key] = self.skip_reasons[key] + 1 self.skip(me.name) should_skip = True elif me.name in self.expected_failures: if VERBOSE: print( f"Skipping {me.name} ({', '.join([self.URIname(t) for t in me.traits])})" f" - {self.expected_failures[me.name]}") key = self.expected_failures[me.name] if key not in self.skip_reasons: self.skip_reasons[key] = 0 self.skip_reasons[key] = self.skip_reasons[key] + 1 self.skip(me.name) should_skip = True if should_skip and not TEST_SKIPS_ONLY: return True if TEST_SKIPS_ONLY and not should_skip: return True # Validate the entry if VERBOSE: shex_uri = self.mfst.schema_loader.location_rewrite( me.schema_uri) data_uri = self.mfst.data_redirector.uri_for(me.data_uri) \ if self.mfst.data_redirector else me.data_uri print( f"Testing {me.name} ({'P' if me.should_pass else 'F'}): {shex_uri} - {data_uri}" ) g, s = me.data_graph(), me.shex_schema() if g is None and me.data_uri: print("\t ERROR: Unable to load data file") print(f"\t TRAITS: ({','.join(me.traits)})") self.skip(me.name) return True if not s: print(f"\t ERROR: Unable to load schema {me.schema_uri}") print(f"\t TRAITS: ({','.join(me.traits)})") self.nskipped += 1 self.skip(me.name) return False cntxt = Context(g, s, me.extern_shape_for, base_namespace=BASE_FILE_LOC) cntxt.debug_context.debug = DEBUG map_ = FixedShapeMap() focus = self.mfst.data_uri(me.focus) if not focus: print("\t***** FAIL *****") print(f"\tFocus: {me.focus} not in schema") print(f"\t TRAITS: ({','.join(me.traits)})") self.fail(me.name) return False # if ':' not in focus: # focus = "file://" + focus map_.add( ShapeAssociation( focus, ShExJ.IRIREF(me.shape) if me.shape else START)) ################################# # Actual validation occurs here ################################# rslt = isValid(cntxt, map_) test_result, reasons = rslt[0] or not me.should_pass, rslt[1] # Analyze the result if not VERBOSE and not test_result: print( f"Failed {me.name} ({'P' if me.should_pass else 'F'}): {me.schema_uri} - {me.data_uri}" ) print(f"\t TRAITS: ({','.join(me.traits)})") if test_result: self.pass_(me.name) else: if VERBOSE: print("\t**** FAIL *****") print(f"\t TRAITS: ({','.join(me.traits)})") for reason in reasons: print(f"\t{reason}") self.fail(me.name) return test_result
def triple_expr_finder(predicates: Dict[IRIREF, PredDirection], expr: ShExJ.shapeExpr, cntxt_: Context) -> None: if isinstance(expr, ShExJ.Shape) and expr.expression is not None: cntxt_.visit_triple_expressions(expr.expression, predicate_finder, predicates)
def setup_context(shex_str: str, rdf_str: Optional[str]) -> Context: schema, g = setup_test(shex_str, rdf_str) if g is None: g = Graph() g.parse(rdf_header) return Context(g, schema)
def evaluate( self, rdf: Optional[Union[str, Graph]] = None, shex: Optional[Union[str, ShExJ.Schema]] = None, focus: Optional[URIPARM] = None, start: STARTPARM = None, rdf_format: Optional[str] = None, debug: Optional[bool] = None, debug_slurps: Optional[bool] = None, over_slurp: Optional[bool] = None, output_sink: Optional[Callable[[EvaluationResult], bool]] = None ) -> List[EvaluationResult]: if rdf is not None or shex is not None or focus is not None or start is not None: evaluator = ShExEvaluator( rdf=rdf if rdf is not None else self.g, schema=shex if shex is not None else self._schema, focus=focus if focus is not None else self.focus, start=start if start is not None else self.start if self.start else START, rdf_format=rdf_format if rdf_format is not None else self.rdf_format, output_sink=output_sink if output_sink is not None else self.output_sink) else: evaluator = self self.eval_result = [] if evaluator.output_sink is None: def sink(e: EvaluationResult) -> bool: self.eval_result.append(e) return True evaluator.output_sink = sink processing = True self.nerrors = 0 self.nnodes = 0 if START in evaluator.start and evaluator._schema.start is None: self.nerrors += 1 evaluator.output_sink( EvaluationResult(False, None, None, 'START node is not specified')) return self.eval_result # Experimental -- xfer all ShEx namespaces to g if self.pfx and evaluator.g is not None: self.pfx.add_bindings(evaluator.g) cntxt = Context(evaluator.g, evaluator._schema) cntxt.debug_context.debug = debug if debug is not None else self.debug cntxt.debug_context.trace_slurps = debug_slurps if debug_slurps is not None else self.debug_slurps cntxt.over_slurp = self.over_slurp if over_slurp is not None else self.over_slurp for focus in evaluator.foci: self.nnodes += 1 start_list: List[Union[URIRef, START]] = [] for start in evaluator.start: if start is START: start_list.append(evaluator._schema.start) elif isinstance(start, START_TYPE): start_list += list( evaluator.g.objects(focus, start.start_predicate)) else: start_list.append(start) if start_list: for start_node in start_list: map_ = FixedShapeMap() map_.add(ShapeAssociation(focus, start_node)) cntxt.reset() success, fail_reasons = isValid(cntxt, map_) if not success: self.nerrors += 1 if not evaluator.output_sink( EvaluationResult( success, focus, start_node, '\n'.join(fail_reasons) if not success else '')): processing = False break else: self.nerrors += 1 evaluator.output_sink( EvaluationResult(False, focus, None, "No start node located")) if not processing: break return self.eval_result
def triple_expr_finder(predicates: List[URIRef], expr: ShExJ.shapeExpr, cntxt: Context) -> None: if isinstance(expr, ShExJ.Shape) and expr.expression is not None: cntxt.visit_triple_expressions(expr.expression, predicate_finder, predicates)