def test_andras_loop(): manifast = \ "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json" manifest = jsonasobj.loads(requests.get(manifast).text) for case in manifest: print(case._as_json_dumps()) if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema print("==== Schema =====") # print(shex._as_json_dumps()) evaluator = ShExEvaluator(schema=shex, debug=True) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = get_sparql_dataframe(sparql_endpoint, sparql_query) for wdid in df.item: slurpeddata = requests.get(wdid + ".ttl") results = evaluator.evaluate(rdf=slurpeddata.text, focus=wdid, debug=False) for result in results: if result.result: print(str(result.focus) + ": CONFORMS") else: print("item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start))
def test_no_start(self): g = Graph() g.add((EX.x, EX.p, EX.x)) e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x) rslt = e.evaluate()[0] self.assertFalse(rslt.result) self.assertEqual('START node is not specified', rslt.reason.strip())
def test_bad_start(self): g = Graph() g.add((EX.x, EX.p, EX.x)) e = ShExEvaluator(rdf=g, schema=shex, start=EX.c, focus=EX.x) rslt = e.evaluate()[0] self.assertFalse(rslt.result) self.assertEqual('Shape: http://a.example/c not found in Schema', rslt.reason.strip())
def test_lists(self): with open(self.meta_rdf_path) as rdf: evaluator = ShExEvaluator( rdf.read(), shex2, focus="https://biolink.github.io/metamodel/ontology/meta.ttl", start="http://bioentity.io/vocab/SchemaDefinition") self.assertTrue(self.eval_results(evaluator.evaluate()))
def test_infinite_loop(self): g = Graph() g.add((EX.Obs1, FHIR.status, Literal("final"))) e = ShExEvaluator(rdf=g, schema=shex, focus=EX.Obs1, start=FHIR.ObservationShape, debug=False) self.assertTrue(e.evaluate()[0].result)
def test_inconsistent(self): shex = """<http://a.example/S> {<http://a.example/p> not @<http://a.example/S>}""" g = Graph() g.add((EX.x, EX.p, EX.x)) e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False) rslt = e.evaluate() self.assertFalse(rslt[0].result) self.assertEqual("""Testing <http://a.example/x> against shape http://a.example/S Testing <http://a.example/x> against shape http://a.example/S http://a.example/S: Inconsistent recursive shape reference""", rslt[0].reason.strip())
def test_probe(self): """ Test for determining performance problem """ shex_file = os.path.join(self.source_path, 'probe.shex') data_dir = os.path.join(self.cwd, 'data') focus = "http://identifiers.org/drugbank:DB00005" start = BIOLINK_NS.Drug evaluator = ShExEvaluator(None, shex_file, focus, start) rdf_file = os.path.join(data_dir, 'probe.ttl') results = evaluator.evaluate(rdf_file, debug=False) self.assertTrue(self._evaluate_shex_results(results))
def test_closed(self): """ Test closed definition """ e = ShExEvaluator(rdf=rdf, schema=shex, focus=EXC['42'], start=EXE.Person) pprint(e.evaluate()) self.assertFalse(e.evaluate()[0].result) from pyshex.evaluate import evaluate g = Graph() g.parse(data=rdf, format="turtle") pprint(evaluate(g, shex, focus=EXC['42'], start=EXE.Person))
def test_multiple_evaluate(self): """ Test calling evaluate multiple times in a row """ p = PrefixLibrary(shex) e = ShExEvaluator(rdf=rdf, schema=shex, focus=p.EX.s) # conformant for _ in range(NUM_ITERS): self.assertTrue(e.evaluate()[0].result) # non-conformant for _ in range(NUM_ITERS): self.assertFalse(e.evaluate(focus=p.EX.a)[0].result)
def test_biolink_correct_rdf(self): """ Test some conforming RDF """ self.single_file_generator('shexj', ShExGenerator, format='json') # Make sure ShEx is current shex_file = env.expected_path('biolink-model.shexj') focus = "http://identifiers.org/drugbank:DB00005" start = BIOLINK_NS.Drug evaluator = ShExEvaluator(None, shex_file, focus, start) rdf_file = env.input_path('probe.ttl') results = evaluator.evaluate(rdf_file, debug=False) self.assertTrue(self._evaluate_shex_results(results))
def test_biolink_correct_rdf(self): """ Test some conforming RDF """ self.single_file_generator('shex', ShExGenerator) shex_file = os.path.join(self.source_path, 'biolink-model.shex') data_dir = os.path.join(self.cwd, 'data') focus = "http://identifiers.org/drugbank:DB00005" start = BIOLINK_NS.Drug evaluator = ShExEvaluator(None, shex_file, focus, start) rdf_file = os.path.join(data_dir, 'correct.ttl') results = evaluator.evaluate(rdf_file, debug=False) self.assertTrue(self._evaluate_shex_results(results))
def test_full_meta(self): with open(self.meta_rdf_path) as rdf: with open(self.meta_shex_path) as shexf: evaluator = ShExEvaluator( rdf.read(), shexf.read(), focus= "https://biolink.github.io/metamodel/ontology/meta.ttl", start="http://bioentity.io/vocab/SchemaDefinition") # Fails because # ---> Testing http://bioentity.io/vocab/local_name against (inner shape) # ---> Testing http://www.w3.org/2001/XMLSchema#string against http://bioentity.io/vocab/Element # No matching triples found for predicate http://www.w3.org/1999/02/22-rdf-syntax-ns#label self.assertFalse(evaluator.evaluate()[0].result)
def test_biolink_shexeval(self) -> None: base_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', 'data')) g = CFGraph() g.load(os.path.join(base_dir, 'validation', 'biolink-model.ttl'), format="turtle") evaluator = ShExEvaluator( g, os.path.join(base_dir, 'schemas', 'meta.shex'), "https://biolink.github.io/biolink-model/ontology/biolink.ttl", "http://bioentity.io/vocab/SchemaDefinition") result = evaluator.evaluate(debug=False) for rslt in result: if not rslt.result: print(f"Error: {rslt.reason}") self.assertTrue(all(r.result for r in result))
def run_test(self, manifest_uri: str, num_entries: Optional[int]=None, verbose: bool=True, debug: bool=False, stop_on_fail: bool=False, debug_slurps: bool=False, save_graph_dir: Optional[str]=None) \ -> List[EvaluationResult]: """ Run the test identified by manifest_uri :param manifest_uri: uri of manifest :param num_entries: number of manifest elements to test :param verbose: True means talk about it :param debug: debug setting for shex evaluator :param stop_on_fail: True means run until failure :param debug_slurps: True means emit SPARQL_slurper statistics :param save_graph_dir: If present, save the final graph in this directory :return: """ manifest = loads(self.fetch_uri(manifest_uri)) rval: List[EvaluationResult] = [] for case in manifest: if verbose: print(case._as_json_dumps()) sparql_endpoint = case.data.replace("Endpoint: ", "") shex = self.fetch_uri(case.schemaURL) evaluator = ShExEvaluator(schema=shex, debug=debug) prefixes = PrefixLibrary(shex, SKOS=SKOS) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") dfs: List[str] = self.get_sparql_dataframe(sparql_endpoint, sparql_query) dfs_slice = dfs[:num_entries] if num_entries is not None else dfs for df in dfs_slice: slurper = SlurpyGraphWithAgent(sparql_endpoint) # slurper.debug_slurps = debug_slurps prefixes.add_bindings(slurper) print(f"Evaluating: {df}") results = evaluator.evaluate(rdf=slurper, focus=df, debug=debug, debug_slurps=debug_slurps, over_slurp=False) rval += results if save_graph_dir: element_name = df.rsplit('/', 1)[1] file_name = os.path.join(save_graph_dir, element_name + '.ttl') print(f"Writing: {file_name}") slurper.serialize(file_name, format="turtle") if stop_on_fail and not all(r.result for r in results): break return rval
def validate_items(schema, sparql): items_to_validate = wikidata2df(sparql)["item"].to_list() results = [] for item in tqdm(items_to_validate): entity_url = f"http://www.wikidata.org/entity/{item}" rdfdata = Graph() rdfdata.parse(f"{entity_url}.ttl") for result in ShExEvaluator( rdf=rdfdata, schema=schema, focus=entity_url, ).evaluate(): shex_result = dict() if result.result: shex_result["result"] = True else: shex_result["result"] = False shex_result["reason"] = result.reason results.append([item, shex_result["result"], shex_result["reason"]]) return results
def test_te_labels(self): """ Test triple expression labels """ e = ShExEvaluator( rdf=passing, schema=shex, focus="http://examples.org/ex/t").evaluate(debug=False) pprint(e) self.assertTrue(e[0].result) e = ShExEvaluator(rdf=failing_1, schema=shex, focus="http://examples.org/ex/t").evaluate() self.assertFalse(e[0].result) e = ShExEvaluator(rdf=failing_2, schema=shex, focus="http://examples.org/ex/t").evaluate() self.assertFalse(e[0].result)
def test_range_construct(self): """ A range can be a builtin type, a TypeDefinition or a ClassDefinition """ with open(self.meta_rdf_path) as rdf: evaluator = ShExEvaluator( rdf.read(), shex3, focus=[ "http://bioentity.io/vocab/abstract", "http://bioentity.io/vocab/class_definition_is_a", "http://bioentity.io/vocab/defining_slots" ], start="http://bioentity.io/vocab/SlotDefinition") self.assertTrue(self.eval_results(evaluator.evaluate())) results = evaluator.evaluate( rdf=fail_rdf_1, focus="http://bioentity.io/vocab/definitional") self.assertFalse(any(r.result for r in results))
def shextest(self, rdf_file: str, shex_file: str, focus: str, cfgraph: bool = False) -> None: base_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '..')) g = CFGraph() if cfgraph else Graph() g.load(os.path.join(base_dir, 'rdf', rdf_file), format="turtle") evaluator = ShExEvaluator( g, os.path.join(base_dir, 'shex', shex_file), focus, "http://w3id.org/biolink/vocab/SchemaDefinition") result = evaluator.evaluate(debug=False) for rslt in result: if not rslt.result: print(f"Error: {rslt.reason}") self.assertTrue(all(r.result for r in result))
def test_performance_problem(self): """ Test a performance problem brought about by two possible type arcs in a definition """ e = ShExEvaluator( rdf=rdf, schema=os.path.join(self.test_data, 'shex', 'issue_54.shex'), focus="http://identifiers.org/drugbank:DB00005", start="https://w3id.org/biolink/vocab/Drug").evaluate() self.assertTrue(e[0].result)
def test_eric(self): p = PrefixLibrary(rdf) for result in ShExEvaluator( rdf=rdf, schema=schema, focus=p.INST.Eric, start=p.SCHOOL.Enrollee).evaluate(debug=False): print( f"{result.focus}: {'Passing' if result.result else 'Failing'}: \n{result.reason}" ) self.assertFalse(result.result)
def test_fail(self): pl = PrefixLibrary(shex) results = ShExEvaluator().evaluate(rdf, shex, focus=pl.EX.s, debug=False) self.assertTrue(results[0].result) results = ShExEvaluator().evaluate(rdf, shex, focus=pl.EX.t) self.assertFalse(results[0].result) self.assertEqual('Focus: http://example.org/ex/t not in graph', results[0].reason) results2 = ShExEvaluator().evaluate(rdf, shex, focus=[pl.EX.s, pl.EX.t2]) self.assertTrue(results2[0].result) self.assertFalse(results2[1].result) self.assertEqual('Focus: http://example.org/ex/t2 not in graph', results2[1].reason)
def _validate(graph: Graph, shex: str, valid_graph: Graph, focus: URIRef, shape: URIRef) -> bool: node_result = ShExEvaluator().evaluate(rdf=graph, shex=shex, focus=focus, start=shape)[0] if node_result.result: for triple in graph.triples((focus, None, None)): valid_graph.add(triple) return node_result.result
def test_empty_constructor(self): evaluator = ShExEvaluator() # rdflib no longer emits unused prefixes -- an empty evaluator is now empty self.assertEqual("", evaluator.rdf.strip()) self.assertIsNone(evaluator.schema) self.assertIsNone(evaluator.focus) self.assertEqual([], evaluator.foci) self.assertEqual([START], evaluator.start) self.assertEqual("turtle", evaluator.rdf_format) self.assertTrue(isinstance(evaluator.g, Graph))
def evaluate(self, rdf, shex, resource, shex_type): results = ShExEvaluator().evaluate(rdf, shex, focus= resource, start=shex_type) failures = [] for item in results: if item.result: print("PASS:", str(item.focus), str(item.start)) else: failures.append(item.reason) return failures
def run_shex_manifest(): print(os.environ["SHEX_MANIFEST"]) manifest = jsonasobj.loads(requests.get(os.environ["SHEX_MANIFEST"]).text) for case in manifest: if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema evaluator = ShExEvaluator(schema=shex, debug=True) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query) for row in df["results"]["bindings"]: wdid = row["item"]["value"] slurpeddata = SlurpyGraph(sparql_endpoint) try: if os.environ["debug"] == "True": debug = True elif os.environ["debug"] == "False": debug = False results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=debug) for result in results: if result.result: print(str(result.focus) + ": INFO") msg = wdi_helpers.format_msg( wdid, wdid, None, 'CONFORMS', '') wdi_core.WDItemEngine.log("INFO", msg) else: msg = wdi_helpers.format_msg( wdid, wdid, None, '', result.reason) wdi_core.WDItemEngine.log("ERROR", msg) except RuntimeError: print( "Continue after 1 minute, no validation happened on" + wdid) continue
def test_biolink_shex_incorrect_rdf(self): """ Test some non-conforming RDF """ self.single_file_generator('shexj', ShExGenerator, format='json') shex_file = env.expected_path('biolink-model.shexj') focus = "http://identifiers.org/drugbank:DB00005" start = BIOLINK_NS.Drug evaluator = ShExEvaluator(None, shex_file, focus, start) # incorrect.ttl has 16 error lines (more or less). rdf_file = env.temp_file_path('incorrect.ttl') errs_file = env.temp_file_path('incorrect.errs') results = evaluator.evaluate(rdf_file) self.assertFalse(self._evaluate_shex_results(results, printit=False)) self.assertEqual(1, len(results)) self.assertTrue('Unmatched triples in CLOSED shape' in results[0].reason) ntabs = results[0].reason.count('\n\t') self.assertEqual(13, ntabs) if not os.path.exists(errs_file): with open(errs_file, 'w') as f: f.write(shex_results_as_string(results[0]))
def run_shex_manifest(): #manifest = \ # "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json" # manifest = jsonasobj.loads(requests.get(os.environ['MANIFEST_URL']).text) manifest_loc = "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/manifest_all.json" manifest = jsonasobj.loads(requests.get(manifest_loc).text) # print(os.environ['MANIFEST_URL']) for case in manifest: print(case._as_json_dumps()) if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema # print("==== Schema =====") #print(shex._as_json_dumps()) evaluator = ShExEvaluator(schema=shex, debug=False) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = get_sparql_dataframe(sparql_endpoint, sparql_query) for wdid in df.item: slurpeddata = SlurpyGraph(sparql_endpoint) # slurpeddata = requests.get(wdid + ".ttl") results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False, debug_slurps=True) for result in results: if result.result: print(str(result.focus) + ": CONFORMS") else: if str(result.focus) in [ "http://www.wikidata.org/entity/Q33525", "http://www.wikidata.org/entity/Q62736", "http://www.wikidata.org/entity/Q112670" ]: continue print( "item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start)) # run_shex_manifest()
def test_empty_constructor(self): evaluator = ShExEvaluator() self.assertEqual( """@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .""", evaluator.rdf.strip()) self.assertIsNone(evaluator.schema) self.assertIsNone(evaluator.focus) self.assertEqual([], evaluator.foci) self.assertEqual([START], evaluator.start) self.assertEqual("turtle", evaluator.rdf_format) self.assertTrue(isinstance(evaluator.g, Graph))
def run_shex_manifest(): manifest = jsonasobj.loads( requests.get( "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/reactome/manifest.json" ).text) for case in manifest: if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema evaluator = ShExEvaluator(schema=shex, debug=True) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query) for row in df["results"]["bindings"]: wdid = row["item"]["value"] slurpeddata = SlurpyGraph(sparql_endpoint) try: results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False) for result in results: if result.result: print(str(result.focus) + ": INFO") msg = wdi_helpers.format_msg( wdid, wdid, None, 'CONFORMS', '') wdi_core.WDItemEngine.log("INFO", msg) else: msg = wdi_helpers.format_msg( wdid, wdid, None, '', '') wdi_core.WDItemEngine.log("ERROR", s) except RuntimeError: print( "Continue after 1 minute, no validation happened on" + wdid) continue
def test_complete_constructor(self): test_rdf = os.path.join( os.path.split(os.path.abspath(__file__))[0], '..', 'test_issues', 'data', 'Q18557122.ttl') evaluator = ShExEvaluator( test_rdf, shex_schema, [loc_prefixes.WIKIDATA, loc_prefixes.WIKIDATA.Q18557112], loc_prefixes.WIKIDATA.cancer) results = evaluator.evaluate() self.assertFalse(results[0].result) self.assertEqual(URIRef('http://www.wikidata.org/entity/'), results[0].focus) self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'), results[0].start) self.assertEqual('Focus: http://www.wikidata.org/entity/ not in graph', results[0].reason) self.assertEqual(URIRef('http://www.wikidata.org/entity/Q18557112'), results[1].focus) self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'), results[1].start) self.assertEqual( ' Shape: http://www.wikidata.org/entity/cancer not found in Schema', results[1].reason)