def test_andras_loop(): manifast = \ "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json" manifest = jsonasobj.loads(requests.get(manifast).text) for case in manifest: print(case._as_json_dumps()) if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema print("==== Schema =====") # print(shex._as_json_dumps()) evaluator = ShExEvaluator(schema=shex, debug=True) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = get_sparql_dataframe(sparql_endpoint, sparql_query) for wdid in df.item: slurpeddata = requests.get(wdid + ".ttl") results = evaluator.evaluate(rdf=slurpeddata.text, focus=wdid, debug=False) for result in results: if result.result: print(str(result.focus) + ": CONFORMS") else: print("item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start))
def main(argv: Optional[List[str]] = None): opts = genargs().parse_args(argv) if not os.path.isdir(opts.indir): print("Input directory {} doesn't exist".format(opts.indir), file=sys.stderr) sys.exit(1) os.makedirs(opts.outdir, exist_ok=True) conversion_count = 0 for fname in os.listdir(opts.indir): if fname.endswith('.shexj'): out_fname = change_suffix(fname, '.shexj', '.shexc') try: with open(os.path.join(opts.indir, fname)) as f: tweaked_shex = dumps(tweak_shexj(load(f)), indent=' ') if opts.save: with open( os.path.join( opts.outdir, change_suffix(fname, '.shexj', '.shexjt')), 'w') as outf: outf.write(tweaked_shex) shexc = str(ShExC(tweaked_shex)) with open(os.path.join(opts.outdir, out_fname), 'w') as of: of.write(shexc) conversion_count += 1 except ValueError as e: print(f"{os.path.join(opts.indir, fname)} conversion failure") print(str(e)) print(f"{conversion_count} files converted")
def end_schema(self, output: Optional[str]) -> None: shex = as_json(self.shex) if self.format == 'rdf': g = Graph() g.parse(data=shex, format="json-ld") g.bind('owl', OWL) g.bind('biolink', BIOENTITY) g.bind('meta', META) shex = g.serialize(format='turtle').decode() elif self.format == 'shex': # TODO: wait until the better ShExC emitter is committed shex = str(ShExC(self.shex)) if output: with open(output, 'w') as outf: outf.write(shex) else: print(shex)
def end_schema(self, output: Optional[str] = None, **_) -> None: self.shex.shapes = self.shapes if self.shapes else [Shape()] shex = as_json(self.shex) if self.format == 'rdf': g = Graph() g.parse(data=shex, format="json-ld") g.bind('owl', OWL) shex = g.serialize(format='turtle').decode() elif self.format == 'shex': g = Graph() self.namespaces.load_graph(g) shex = str(ShExC(self.shex, base=sfx(self.namespaces._base), namespaces=g)) if output: with open(output, 'w') as outf: outf.write(shex) else: print(shex)
def run_shex_manifest(): print(os.environ["SHEX_MANIFEST"]) manifest = jsonasobj.loads(requests.get(os.environ["SHEX_MANIFEST"]).text) for case in manifest: if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema evaluator = ShExEvaluator(schema=shex, debug=True) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query) for row in df["results"]["bindings"]: wdid = row["item"]["value"] slurpeddata = SlurpyGraph(sparql_endpoint) try: if os.environ["debug"] == "True": debug = True elif os.environ["debug"] == "False": debug = False results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=debug) for result in results: if result.result: print(str(result.focus) + ": INFO") msg = wdi_helpers.format_msg( wdid, wdid, None, 'CONFORMS', '') wdi_core.WDItemEngine.log("INFO", msg) else: msg = wdi_helpers.format_msg( wdid, wdid, None, '', result.reason) wdi_core.WDItemEngine.log("ERROR", msg) except RuntimeError: print( "Continue after 1 minute, no validation happened on" + wdid) continue
def run_shex_manifest(): #manifest = \ # "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json" # manifest = jsonasobj.loads(requests.get(os.environ['MANIFEST_URL']).text) manifest_loc = "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/manifest_all.json" manifest = jsonasobj.loads(requests.get(manifest_loc).text) # print(os.environ['MANIFEST_URL']) for case in manifest: print(case._as_json_dumps()) if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema # print("==== Schema =====") #print(shex._as_json_dumps()) evaluator = ShExEvaluator(schema=shex, debug=False) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = get_sparql_dataframe(sparql_endpoint, sparql_query) for wdid in df.item: slurpeddata = SlurpyGraph(sparql_endpoint) # slurpeddata = requests.get(wdid + ".ttl") results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False, debug_slurps=True) for result in results: if result.result: print(str(result.focus) + ": CONFORMS") else: if str(result.focus) in [ "http://www.wikidata.org/entity/Q33525", "http://www.wikidata.org/entity/Q62736", "http://www.wikidata.org/entity/Q112670" ]: continue print( "item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start)) # run_shex_manifest()
def run_shex_manifest(): manifest = jsonasobj.loads( requests.get( "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/reactome/manifest.json" ).text) for case in manifest: if case.data.startswith("Endpoint:"): sparql_endpoint = case.data.replace("Endpoint: ", "") schema = requests.get(case.schemaURL).text shex = ShExC(schema).schema evaluator = ShExEvaluator(schema=shex, debug=True) sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query) for row in df["results"]["bindings"]: wdid = row["item"]["value"] slurpeddata = SlurpyGraph(sparql_endpoint) try: results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False) for result in results: if result.result: print(str(result.focus) + ": INFO") msg = wdi_helpers.format_msg( wdid, wdid, None, 'CONFORMS', '') wdi_core.WDItemEngine.log("INFO", msg) else: msg = wdi_helpers.format_msg( wdid, wdid, None, '', '') wdi_core.WDItemEngine.log("ERROR", s) except RuntimeError: print( "Continue after 1 minute, no validation happened on" + wdid) continue
def schema(self) -> Optional[str]: """ :return: The ShExC representation of the schema if one is supplied """ return str(ShExC(self._schema)) if self._schema else None