def run_test(self, manifest_uri: str, num_entries: Optional[int]=None, verbose: bool=True, debug: bool=False, stop_on_fail: bool=False, debug_slurps: bool=False, save_graph_dir: Optional[str]=None) \ -> List[EvaluationResult]: """ Run the test identified by manifest_uri :param manifest_uri: uri of manifest :param num_entries: number of manifest elements to test :param verbose: True means talk about it :param debug: debug setting for shex evaluator :param stop_on_fail: True means run until failure :param debug_slurps: True means emit SPARQL_slurper statistics :param save_graph_dir: If present, save the final graph in this directory :return: """ manifest = loads(self.fetch_uri(manifest_uri)) rval: List[EvaluationResult] = [] for case in manifest: if verbose: print(case._as_json_dumps()) sparql_endpoint = case.data.replace("Endpoint: ", "") shex = self.fetch_uri(case.schemaURL) evaluator = ShExEvaluator(schema=shex, debug=debug) prefixes = PrefixLibrary(shex, SKOS=SKOS) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") dfs: List[str] = self.get_sparql_dataframe(sparql_endpoint, sparql_query) dfs_slice = dfs[:num_entries] if num_entries is not None else dfs for df in dfs_slice: slurper = SlurpyGraphWithAgent(sparql_endpoint) # slurper.debug_slurps = debug_slurps prefixes.add_bindings(slurper) print(f"Evaluating: {df}") results = evaluator.evaluate(rdf=slurper, focus=df, debug=debug, debug_slurps=debug_slurps, over_slurp=False) rval += results if save_graph_dir: element_name = df.rsplit('/', 1)[1] file_name = os.path.join(save_graph_dir, element_name + '.ttl') print(f"Writing: {file_name}") slurper.serialize(file_name, format="turtle") if stop_on_fail and not all(r.result for r in results): break return rval
class ShExEvaluator: """ Shape Expressions Evaluator """ def __init__( self, rdf: Optional[Union[str, Graph]] = None, schema: Optional[Union[str, ShExJ.Schema]] = None, focus: Optional[URIPARM] = None, start: STARTPARM = None, rdf_format: str = "turtle", debug: bool = False, debug_slurps: bool = False, over_slurp: bool = None, output_sink: Optional[Callable[[EvaluationResult], bool]] = None) -> None: """ Evaluator constructor. All of the parameters below can be set in the constructor or at runtime :param rdf: RDF string, file name, URL or Graph for evaluation. :param schema: ShEx Schema to evaluate. Can be ShExC, ShExJ or a pre-parsed schema :param focus: focus node(s). If absent, all non-BNode subjects in the graph are evaluated :param start: start node(s). If absent, the START node in the schema is used :param rdf_format: format for RDF. Default: "Turtle" :param debug: emit semi-helpful debug information :param debug: debug graph fetch calls :param over_slurp: Controls whether SPARQL slurper does exact or over slurps :param output_sink: Function for accepting evaluation results and returns whether to keep evaluating """ self.pfx: PrefixLibrary = None self.rdf_format = rdf_format self.g = None self.rdf = rdf self._schema = None self.schema = schema self._focus = None self.focus = focus self.start = start self.debug = debug self.debug_slurps = debug_slurps self.over_slurp = over_slurp self.output_sink = output_sink self.nerrors = 0 self.nnodes = 0 self.eval_result = [] @property def rdf(self) -> str: """ :return: The rendering of whatever RDF is currently being evaluated """ return self.g.serialize(format=self.rdf_format).decode() @rdf.setter def rdf(self, rdf: Optional[Union[str, Graph]]) -> None: """ Set the RDF DataSet to be evaulated. If ``rdf`` is a string, the presence of a return is the indicator that it is text instead of a location. :param rdf: File name, URL, representation of rdflib Graph """ if isinstance(rdf, Graph): self.g = rdf else: self.g = Graph() if isinstance(rdf, str): if '\n' in rdf or '\r' in rdf: self.g.parse(data=rdf, format=self.rdf_format) elif ':' in rdf: self.g.parse(location=rdf, format=self.rdf_format) else: self.g.parse(source=rdf, format=self.rdf_format) @property def schema(self) -> Optional[str]: """ :return: The ShExC representation of the schema if one is supplied """ return str(ShExC(self._schema)) if self._schema else None @schema.setter def schema(self, shex: Optional[Union[str, ShExJ.Schema]]) -> None: """ Set the schema to be used. Schema can either be a ShExC or ShExJ string or a pre-parsed schema. :param shex: Schema """ self.pfx = None if shex is not None: if isinstance(shex, ShExJ.Schema): self._schema = shex else: shext = shex.strip() loader = SchemaLoader() if ('\n' in shex or '\r' in shex) or shext[0] in '#<_: ': self._schema = loader.loads(shex) else: self._schema = loader.load(shex) if isinstance( shex, str) else shex if self._schema is None: raise ValueError("Unable to parse shex file") self.pfx = PrefixLibrary(loader.schema_text) @property def focus(self) -> Optional[List[URIRef]]: """ :return: The list of focus nodes (if any) """ return self._focus @property def foci(self) -> List[URIRef]: """ :return: The current set of focus nodes """ return self._focus if self._focus else sorted( [s for s in set(self.g.subjects()) if isinstance(s, URIRef)]) @focus.setter def focus(self, focus: Optional[URIPARM]) -> None: """ Set the focus node(s). If no focus node is specified, the evaluation will occur for all non-BNode graph subjects. Otherwise it can be a string, a URIRef or a list of string/URIRef combinations :param focus: None if focus should be all URIRefs in the graph otherwise a URI or list of URI's """ self._focus = normalize_uriparm(focus) if focus else None @property def start(self) -> STARTPARM: """ :return: The schema start node(s) """ return self._start @start.setter def start(self, start: STARTPARM) -> None: self._start = normalize_startparm(start) if start else [START] def evaluate( self, rdf: Optional[Union[str, Graph]] = None, shex: Optional[Union[str, ShExJ.Schema]] = None, focus: Optional[URIPARM] = None, start: STARTPARM = None, rdf_format: Optional[str] = None, debug: Optional[bool] = None, debug_slurps: Optional[bool] = None, over_slurp: Optional[bool] = None, output_sink: Optional[Callable[[EvaluationResult], bool]] = None ) -> List[EvaluationResult]: if rdf is not None or shex is not None or focus is not None or start is not None: evaluator = ShExEvaluator( rdf=rdf if rdf is not None else self.g, schema=shex if shex is not None else self._schema, focus=focus if focus is not None else self.focus, start=start if start is not None else self.start if self.start else START, rdf_format=rdf_format if rdf_format is not None else self.rdf_format, output_sink=output_sink if output_sink is not None else self.output_sink) else: evaluator = self self.eval_result = [] if evaluator.output_sink is None: def sink(e: EvaluationResult) -> bool: self.eval_result.append(e) return True evaluator.output_sink = sink processing = True self.nerrors = 0 self.nnodes = 0 if START in evaluator.start and evaluator._schema.start is None: self.nerrors += 1 evaluator.output_sink( EvaluationResult(False, None, None, 'START node is not specified')) return self.eval_result # Experimental -- xfer all ShEx namespaces to g if self.pfx and evaluator.g is not None: self.pfx.add_bindings(evaluator.g) cntxt = Context(evaluator.g, evaluator._schema) cntxt.debug_context.debug = debug if debug is not None else self.debug cntxt.debug_context.trace_slurps = debug_slurps if debug_slurps is not None else self.debug_slurps cntxt.over_slurp = self.over_slurp if over_slurp is not None else self.over_slurp for focus in evaluator.foci: self.nnodes += 1 start_list: List[Union[URIRef, START]] = [] for start in evaluator.start: if start is START: start_list.append(evaluator._schema.start) elif isinstance(start, START_TYPE): start_list += list( evaluator.g.objects(focus, start.start_predicate)) else: start_list.append(start) if start_list: for start_node in start_list: map_ = FixedShapeMap() map_.add(ShapeAssociation(focus, start_node)) cntxt.reset() success, fail_reasons = isValid(cntxt, map_) if not success: self.nerrors += 1 if not evaluator.output_sink( EvaluationResult( success, focus, start_node, '\n'.join(fail_reasons) if not success else '')): processing = False break else: self.nerrors += 1 evaluator.output_sink( EvaluationResult(False, focus, None, "No start node located")) if not processing: break return self.eval_result
def test_basics(self): """ Test basic functions """ pl = PrefixLibrary() print(str(pl)) g = Graph() pl.add_bindings(g) self.assertEqual("""@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .""", g.serialize(format="turtle").decode().strip()) pl = PrefixLibrary("""@prefix owl: <http://www.w3.org/2002/07/owl#> . @prefix wikibase: <http://wikiba.se/ontology-beta#> . @prefix wds: <http://www.wikidata.org/entity/statement/> . @prefix wdata: <https://www.wikidata.org/wiki/Special:EntityData/> . @prefix skos: <http://www.w3.org/2004/02/skos/core#> . @prefix schema: <http://schema.org/> . @prefix cc: <http://creativecommons.org/ns#> . @prefix geo: <http://www.opengis.net/ont/geosparql#> . @prefix prov: <http://www.w3.org/ns/prov#> . @prefix wdref: <http://www.wikidata.org/reference/> . @prefix wdv: <http://www.wikidata.org/value/> . @prefix wd: <http://www.wikidata.org/entity/> . @prefix wdt: <http://www.wikidata.org/prop/direct/> . @prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> . @prefix p: <http://www.wikidata.org/prop/> . @prefix ps: <http://www.wikidata.org/prop/statement/> . @prefix psv: <http://www.wikidata.org/prop/statement/value/> . @prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> . @prefix pq: <http://www.wikidata.org/prop/qualifier/> . @prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> . @prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> . @prefix pr: <http://www.wikidata.org/prop/reference/> . @prefix prv: <http://www.wikidata.org/prop/reference/value/> . @prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> . @prefix wdno: <http://www.wikidata.org/prop/novalue/> . and some junk""") self.assertEqual( [('OWL', Namespace('http://www.w3.org/2002/07/owl#')), ('WIKIBASE', Namespace('http://wikiba.se/ontology-beta#')), ('WDS', Namespace('http://www.wikidata.org/entity/statement/')), ('WDATA', Namespace('https://www.wikidata.org/wiki/Special:EntityData/')), ('SKOS', Namespace('http://www.w3.org/2004/02/skos/core#')), ('SCHEMA', Namespace('http://schema.org/')), ('CC', Namespace('http://creativecommons.org/ns#')), ('GEO', Namespace('http://www.opengis.net/ont/geosparql#')), ('PROV', Namespace('http://www.w3.org/ns/prov#')), ('WDREF', Namespace('http://www.wikidata.org/reference/')), ('WDV', Namespace('http://www.wikidata.org/value/')), ('WD', Namespace('http://www.wikidata.org/entity/')), ('WDT', Namespace('http://www.wikidata.org/prop/direct/')), ('WDTN', Namespace('http://www.wikidata.org/prop/direct-normalized/')), ('P', Namespace('http://www.wikidata.org/prop/')), ('PS', Namespace('http://www.wikidata.org/prop/statement/')), ('PSV', Namespace('http://www.wikidata.org/prop/statement/value/')), ('PSN', Namespace('http://www.wikidata.org/prop/statement/value-normalized/')), ('PQ', Namespace('http://www.wikidata.org/prop/qualifier/')), ('PQV', Namespace('http://www.wikidata.org/prop/qualifier/value/')), ('PQN', Namespace('http://www.wikidata.org/prop/qualifier/value-normalized/')), ('PR', Namespace('http://www.wikidata.org/prop/reference/')), ('PRV', Namespace('http://www.wikidata.org/prop/reference/value/')), ('PRN', Namespace('http://www.wikidata.org/prop/reference/value-normalized/')), ('WDNO', Namespace('http://www.wikidata.org/prop/novalue/'))], [e for e in pl] ) pl = PrefixLibrary(""" PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX p: <http://www.wikidata.org/prop/> PREFIX pr: <http://www.wikidata.org/prop/reference/> PREFIX prv: <http://www.wikidata.org/prop/reference/value/> PREFIX pv: <http://www.wikidata.org/prop/value/> PREFIX ps: <http://www.wikidata.org/prop/statement/> PREFIX gw: <http://genewiki.shape/> start = @gw:cancer gw:cancer { p:P1748 { prov:wasDerivedFrom @<reference> }+ } <reference> { pr:P248 IRI ; pr:P813 xsd:dateTime ; pr:P699 LITERAL }""", foaf=known_prefixes.FOAF, owl=known_prefixes.OWL, rdfs=standard_prefixes.RDFS) self.assertEqual( [('XSD', Namespace('http://www.w3.org/2001/XMLSchema#')), ('PROV', Namespace('http://www.w3.org/ns/prov#')), ('P', Namespace('http://www.wikidata.org/prop/')), ('PR', Namespace('http://www.wikidata.org/prop/reference/')), ('PRV', Namespace('http://www.wikidata.org/prop/reference/value/')), ('PV', Namespace('http://www.wikidata.org/prop/value/')), ('PS', Namespace('http://www.wikidata.org/prop/statement/')), ('GW', Namespace('http://genewiki.shape/')), ('FOAF', Namespace('http://xmlns.com/foaf/0.1/')), ('OWL', Namespace('http://www.w3.org/2002/07/owl#')), ('RDFS', Namespace('http://www.w3.org/2000/01/rdf-schema#'))], [e for e in pl]) pl = PrefixLibrary(None, ex="http://example.org/") self.assertEqual("http://example.org/", str(pl.EX)) known_prefixes.add_bindings(g) self.assertEqual("""@prefix dc: <http://purl.org/dc/elements/1.1/> . @prefix dcterms: <http://purl.org/dc/terms/> . @prefix doap: <http://usefulinc.com/ns/doap#> . @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix owl: <http://www.w3.org/2002/07/owl#> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix skos: <http://www.w3.org/2004/02/skos/core#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xmlns: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .""", g.serialize(format="turtle").decode().strip())