def test_andras_loop():
    manifast = \
        "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json"
    manifest = jsonasobj.loads(requests.get(manifast).text)

    for case in manifest:
        print(case._as_json_dumps())
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            print("==== Schema =====")
            # print(shex._as_json_dumps())

            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = get_sparql_dataframe(sparql_endpoint, sparql_query)
            for wdid in df.item:
                slurpeddata = requests.get(wdid + ".ttl")
                results = evaluator.evaluate(rdf=slurpeddata.text,
                                             focus=wdid,
                                             debug=False)
                for result in results:
                    if result.result:
                        print(str(result.focus) + ": CONFORMS")
                    else:
                        print("item with issue: " + str(result.focus) + " - " +
                              "shape applied: " + str(result.start))
Пример #2
0
 def test_no_start(self):
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x)
     rslt = e.evaluate()[0]
     self.assertFalse(rslt.result)
     self.assertEqual('START node is not specified', rslt.reason.strip())
Пример #3
0
 def test_bad_start(self):
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, start=EX.c, focus=EX.x)
     rslt = e.evaluate()[0]
     self.assertFalse(rslt.result)
     self.assertEqual('Shape: http://a.example/c not found in Schema',
                      rslt.reason.strip())
Пример #4
0
 def test_lists(self):
     with open(self.meta_rdf_path) as rdf:
         evaluator = ShExEvaluator(
             rdf.read(),
             shex2,
             focus="https://biolink.github.io/metamodel/ontology/meta.ttl",
             start="http://bioentity.io/vocab/SchemaDefinition")
     self.assertTrue(self.eval_results(evaluator.evaluate()))
Пример #5
0
 def test_infinite_loop(self):
     g = Graph()
     g.add((EX.Obs1, FHIR.status, Literal("final")))
     e = ShExEvaluator(rdf=g,
                       schema=shex,
                       focus=EX.Obs1,
                       start=FHIR.ObservationShape,
                       debug=False)
     self.assertTrue(e.evaluate()[0].result)
Пример #6
0
 def test_inconsistent(self):
     shex = """<http://a.example/S> {<http://a.example/p> not @<http://a.example/S>}"""
     g = Graph()
     g.add((EX.x, EX.p, EX.x))
     e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False)
     rslt = e.evaluate()
     self.assertFalse(rslt[0].result)
     self.assertEqual("""Testing <http://a.example/x> against shape http://a.example/S
 Testing <http://a.example/x> against shape http://a.example/S
   http://a.example/S: Inconsistent recursive shape reference""", rslt[0].reason.strip())
Пример #7
0
    def test_probe(self):
        """ Test for determining performance problem """
        shex_file = os.path.join(self.source_path, 'probe.shex')
        data_dir = os.path.join(self.cwd, 'data')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)
        rdf_file = os.path.join(data_dir, 'probe.ttl')
        results = evaluator.evaluate(rdf_file, debug=False)
        self.assertTrue(self._evaluate_shex_results(results))
Пример #8
0
    def test_closed(self):
        """ Test closed definition """

        e = ShExEvaluator(rdf=rdf, schema=shex, focus=EXC['42'], start=EXE.Person)
        
        pprint(e.evaluate())
        self.assertFalse(e.evaluate()[0].result)

        from pyshex.evaluate import evaluate
        g = Graph()
        g.parse(data=rdf, format="turtle")
        pprint(evaluate(g, shex, focus=EXC['42'], start=EXE.Person))
Пример #9
0
    def test_multiple_evaluate(self):
        """ Test calling evaluate multiple times in a row """
        p = PrefixLibrary(shex)
        e = ShExEvaluator(rdf=rdf, schema=shex, focus=p.EX.s)

        # conformant
        for _ in range(NUM_ITERS):
            self.assertTrue(e.evaluate()[0].result)

        # non-conformant
        for _ in range(NUM_ITERS):
            self.assertFalse(e.evaluate(focus=p.EX.a)[0].result)
Пример #10
0
    def test_biolink_correct_rdf(self):
        """ Test some conforming RDF  """
        self.single_file_generator('shexj', ShExGenerator, format='json')  # Make sure ShEx is current

        shex_file = env.expected_path('biolink-model.shexj')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)

        rdf_file = env.input_path('probe.ttl')
        results = evaluator.evaluate(rdf_file, debug=False)
        self.assertTrue(self._evaluate_shex_results(results))
Пример #11
0
    def test_biolink_correct_rdf(self):
        """ Test some conforming RDF  """
        self.single_file_generator('shex', ShExGenerator)
        shex_file = os.path.join(self.source_path, 'biolink-model.shex')
        data_dir = os.path.join(self.cwd, 'data')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)

        rdf_file = os.path.join(data_dir, 'correct.ttl')
        results = evaluator.evaluate(rdf_file, debug=False)
        self.assertTrue(self._evaluate_shex_results(results))
Пример #12
0
 def test_full_meta(self):
     with open(self.meta_rdf_path) as rdf:
         with open(self.meta_shex_path) as shexf:
             evaluator = ShExEvaluator(
                 rdf.read(),
                 shexf.read(),
                 focus=
                 "https://biolink.github.io/metamodel/ontology/meta.ttl",
                 start="http://bioentity.io/vocab/SchemaDefinition")
     # Fails because
     # ---> Testing http://bioentity.io/vocab/local_name against (inner shape)
     #   ---> Testing http://www.w3.org/2001/XMLSchema#string against http://bioentity.io/vocab/Element
     #       No matching triples found for predicate http://www.w3.org/1999/02/22-rdf-syntax-ns#label
     self.assertFalse(evaluator.evaluate()[0].result)
Пример #13
0
 def test_biolink_shexeval(self) -> None:
     base_dir = os.path.abspath(
         os.path.join(os.path.dirname(__file__), '..', 'data'))
     g = CFGraph()
     g.load(os.path.join(base_dir, 'validation', 'biolink-model.ttl'),
            format="turtle")
     evaluator = ShExEvaluator(
         g, os.path.join(base_dir, 'schemas', 'meta.shex'),
         "https://biolink.github.io/biolink-model/ontology/biolink.ttl",
         "http://bioentity.io/vocab/SchemaDefinition")
     result = evaluator.evaluate(debug=False)
     for rslt in result:
         if not rslt.result:
             print(f"Error: {rslt.reason}")
     self.assertTrue(all(r.result for r in result))
Пример #14
0
    def run_test(self, manifest_uri: str, num_entries: Optional[int]=None, verbose: bool=True, debug: bool=False,
                 stop_on_fail: bool=False, debug_slurps: bool=False, save_graph_dir: Optional[str]=None) \
            -> List[EvaluationResult]:
        """ Run the test identified by manifest_uri

        :param manifest_uri: uri of manifest
        :param num_entries: number of manifest elements to test
        :param verbose: True means talk about it
        :param debug: debug setting for shex evaluator
        :param stop_on_fail: True means run until failure
        :param debug_slurps: True means emit SPARQL_slurper statistics
        :param save_graph_dir: If present, save the final graph in this directory
        :return:
        """
        manifest = loads(self.fetch_uri(manifest_uri))
        rval: List[EvaluationResult] = []
        for case in manifest:
            if verbose:
                print(case._as_json_dumps())
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            shex = self.fetch_uri(case.schemaURL)
            evaluator = ShExEvaluator(schema=shex, debug=debug)
            prefixes = PrefixLibrary(shex, SKOS=SKOS)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")
            dfs: List[str] = self.get_sparql_dataframe(sparql_endpoint,
                                                       sparql_query)
            dfs_slice = dfs[:num_entries] if num_entries is not None else dfs
            for df in dfs_slice:
                slurper = SlurpyGraphWithAgent(sparql_endpoint)
                # slurper.debug_slurps = debug_slurps
                prefixes.add_bindings(slurper)
                print(f"Evaluating: {df}")
                results = evaluator.evaluate(rdf=slurper,
                                             focus=df,
                                             debug=debug,
                                             debug_slurps=debug_slurps,
                                             over_slurp=False)
                rval += results
                if save_graph_dir:
                    element_name = df.rsplit('/', 1)[1]
                    file_name = os.path.join(save_graph_dir,
                                             element_name + '.ttl')
                    print(f"Writing: {file_name}")
                    slurper.serialize(file_name, format="turtle")
                if stop_on_fail and not all(r.result for r in results):
                    break
        return rval
Пример #15
0
def validate_items(schema, sparql):

    items_to_validate = wikidata2df(sparql)["item"].to_list()
    results = []
    for item in tqdm(items_to_validate):
        entity_url = f"http://www.wikidata.org/entity/{item}"

        rdfdata = Graph()
        rdfdata.parse(f"{entity_url}.ttl")

        for result in ShExEvaluator(
                rdf=rdfdata,
                schema=schema,
                focus=entity_url,
        ).evaluate():
            shex_result = dict()
            if result.result:
                shex_result["result"] = True
            else:
                shex_result["result"] = False
            shex_result["reason"] = result.reason

        results.append([item, shex_result["result"], shex_result["reason"]])

    return results
Пример #16
0
    def test_te_labels(self):
        """ Test triple expression labels """
        e = ShExEvaluator(
            rdf=passing, schema=shex,
            focus="http://examples.org/ex/t").evaluate(debug=False)
        pprint(e)
        self.assertTrue(e[0].result)

        e = ShExEvaluator(rdf=failing_1,
                          schema=shex,
                          focus="http://examples.org/ex/t").evaluate()
        self.assertFalse(e[0].result)

        e = ShExEvaluator(rdf=failing_2,
                          schema=shex,
                          focus="http://examples.org/ex/t").evaluate()
        self.assertFalse(e[0].result)
Пример #17
0
    def test_range_construct(self):
        """ A range can be a builtin type, a TypeDefinition or a ClassDefinition """
        with open(self.meta_rdf_path) as rdf:
            evaluator = ShExEvaluator(
                rdf.read(),
                shex3,
                focus=[
                    "http://bioentity.io/vocab/abstract",
                    "http://bioentity.io/vocab/class_definition_is_a",
                    "http://bioentity.io/vocab/defining_slots"
                ],
                start="http://bioentity.io/vocab/SlotDefinition")
        self.assertTrue(self.eval_results(evaluator.evaluate()))

        results = evaluator.evaluate(
            rdf=fail_rdf_1, focus="http://bioentity.io/vocab/definitional")
        self.assertFalse(any(r.result for r in results))
Пример #18
0
 def shextest(self,
              rdf_file: str,
              shex_file: str,
              focus: str,
              cfgraph: bool = False) -> None:
     base_dir = os.path.abspath(
         os.path.join(os.path.dirname(__file__), '..'))
     g = CFGraph() if cfgraph else Graph()
     g.load(os.path.join(base_dir, 'rdf', rdf_file), format="turtle")
     evaluator = ShExEvaluator(
         g, os.path.join(base_dir, 'shex', shex_file), focus,
         "http://w3id.org/biolink/vocab/SchemaDefinition")
     result = evaluator.evaluate(debug=False)
     for rslt in result:
         if not rslt.result:
             print(f"Error: {rslt.reason}")
     self.assertTrue(all(r.result for r in result))
Пример #19
0
    def test_performance_problem(self):
        """ Test a performance problem brought about by two possible type arcs in a definition """

        e = ShExEvaluator(
            rdf=rdf,
            schema=os.path.join(self.test_data, 'shex', 'issue_54.shex'),
            focus="http://identifiers.org/drugbank:DB00005",
            start="https://w3id.org/biolink/vocab/Drug").evaluate()
        self.assertTrue(e[0].result)
Пример #20
0
 def test_eric(self):
     p = PrefixLibrary(rdf)
     for result in ShExEvaluator(
             rdf=rdf, schema=schema, focus=p.INST.Eric,
             start=p.SCHOOL.Enrollee).evaluate(debug=False):
         print(
             f"{result.focus}: {'Passing' if result.result else 'Failing'}: \n{result.reason}"
         )
         self.assertFalse(result.result)
Пример #21
0
 def test_fail(self):
     pl = PrefixLibrary(shex)
     results = ShExEvaluator().evaluate(rdf,
                                        shex,
                                        focus=pl.EX.s,
                                        debug=False)
     self.assertTrue(results[0].result)
     results = ShExEvaluator().evaluate(rdf, shex, focus=pl.EX.t)
     self.assertFalse(results[0].result)
     self.assertEqual('Focus: http://example.org/ex/t not in graph',
                      results[0].reason)
     results2 = ShExEvaluator().evaluate(rdf,
                                         shex,
                                         focus=[pl.EX.s, pl.EX.t2])
     self.assertTrue(results2[0].result)
     self.assertFalse(results2[1].result)
     self.assertEqual('Focus: http://example.org/ex/t2 not in graph',
                      results2[1].reason)
Пример #22
0
 def _validate(graph: Graph, shex: str, valid_graph: Graph, focus: URIRef,
               shape: URIRef) -> bool:
     node_result = ShExEvaluator().evaluate(rdf=graph,
                                            shex=shex,
                                            focus=focus,
                                            start=shape)[0]
     if node_result.result:
         for triple in graph.triples((focus, None, None)):
             valid_graph.add(triple)
     return node_result.result
Пример #23
0
 def test_empty_constructor(self):
     evaluator = ShExEvaluator()
     # rdflib no longer emits unused prefixes -- an empty evaluator is now empty
     self.assertEqual("", evaluator.rdf.strip())
     self.assertIsNone(evaluator.schema)
     self.assertIsNone(evaluator.focus)
     self.assertEqual([], evaluator.foci)
     self.assertEqual([START], evaluator.start)
     self.assertEqual("turtle", evaluator.rdf_format)
     self.assertTrue(isinstance(evaluator.g, Graph))
Пример #24
0
 def evaluate(self, rdf, shex, resource, shex_type):
     results = ShExEvaluator().evaluate(rdf, shex, focus= resource, start=shex_type)
     failures = []
     for item in results:
         if item.result:
             print("PASS:", str(item.focus), str(item.start))
         else:
             failures.append(item.reason)
     
     return failures
Пример #25
0
def run_shex_manifest():
    print(os.environ["SHEX_MANIFEST"])
    manifest = jsonasobj.loads(requests.get(os.environ["SHEX_MANIFEST"]).text)
    for case in manifest:
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query)
            for row in df["results"]["bindings"]:
                wdid = row["item"]["value"]
                slurpeddata = SlurpyGraph(sparql_endpoint)
                try:
                    if os.environ["debug"] == "True":
                        debug = True
                    elif os.environ["debug"] == "False":
                        debug = False
                    results = evaluator.evaluate(rdf=slurpeddata,
                                                 focus=wdid,
                                                 debug=debug)
                    for result in results:
                        if result.result:
                            print(str(result.focus) + ": INFO")
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, 'CONFORMS', '')

                            wdi_core.WDItemEngine.log("INFO", msg)
                        else:
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, '', result.reason)
                            wdi_core.WDItemEngine.log("ERROR", msg)

                except RuntimeError:
                    print(
                        "Continue after 1 minute, no validation happened on" +
                        wdid)
                    continue
Пример #26
0
    def test_biolink_shex_incorrect_rdf(self):
        """ Test some non-conforming RDF  """
        self.single_file_generator('shexj', ShExGenerator, format='json')
        shex_file = env.expected_path('biolink-model.shexj')

        focus = "http://identifiers.org/drugbank:DB00005"
        start = BIOLINK_NS.Drug
        evaluator = ShExEvaluator(None, shex_file, focus, start)

        # incorrect.ttl has 16 error lines (more or less).
        rdf_file = env.temp_file_path('incorrect.ttl')
        errs_file = env.temp_file_path('incorrect.errs')
        results = evaluator.evaluate(rdf_file)
        self.assertFalse(self._evaluate_shex_results(results, printit=False))
        self.assertEqual(1, len(results))
        self.assertTrue('Unmatched triples in CLOSED shape' in results[0].reason)
        ntabs = results[0].reason.count('\n\t')
        self.assertEqual(13, ntabs)
        if not os.path.exists(errs_file):
            with open(errs_file, 'w') as f:
                f.write(shex_results_as_string(results[0]))
Пример #27
0
def run_shex_manifest():
    #manifest = \
    #    "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json"
    # manifest = jsonasobj.loads(requests.get(os.environ['MANIFEST_URL']).text)
    manifest_loc = "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/manifest_all.json"
    manifest = jsonasobj.loads(requests.get(manifest_loc).text)
    # print(os.environ['MANIFEST_URL'])
    for case in manifest:
        print(case._as_json_dumps())
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            # print("==== Schema =====")
            #print(shex._as_json_dumps())

            evaluator = ShExEvaluator(schema=shex, debug=False)
            sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "")

            df = get_sparql_dataframe(sparql_endpoint, sparql_query)
            for wdid in df.item:
                slurpeddata = SlurpyGraph(sparql_endpoint)
                # slurpeddata = requests.get(wdid + ".ttl")

                results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False, debug_slurps=True)
                for result in results:
                    if result.result:
                        print(str(result.focus) + ": CONFORMS")
                    else:
                        if str(result.focus) in [
                            "http://www.wikidata.org/entity/Q33525",
                            "http://www.wikidata.org/entity/Q62736",
                            "http://www.wikidata.org/entity/Q112670"
                        ]:
                            continue
                        print(
                            "item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start))


# run_shex_manifest()
Пример #28
0
    def test_empty_constructor(self):
        evaluator = ShExEvaluator()
        self.assertEqual(
            """@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .""", evaluator.rdf.strip())
        self.assertIsNone(evaluator.schema)
        self.assertIsNone(evaluator.focus)
        self.assertEqual([], evaluator.foci)
        self.assertEqual([START], evaluator.start)
        self.assertEqual("turtle", evaluator.rdf_format)
        self.assertTrue(isinstance(evaluator.g, Graph))
Пример #29
0
def run_shex_manifest():
    manifest = jsonasobj.loads(
        requests.get(
            "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/reactome/manifest.json"
        ).text)
    for case in manifest:
        if case.data.startswith("Endpoint:"):
            sparql_endpoint = case.data.replace("Endpoint: ", "")
            schema = requests.get(case.schemaURL).text
            shex = ShExC(schema).schema
            evaluator = ShExEvaluator(schema=shex, debug=True)
            sparql_query = case.queryMap.replace("SPARQL '''",
                                                 "").replace("'''@START", "")

            df = wdi_core.WDItemEngine.execute_sparql_query(sparql_query)
            for row in df["results"]["bindings"]:
                wdid = row["item"]["value"]
                slurpeddata = SlurpyGraph(sparql_endpoint)
                try:
                    results = evaluator.evaluate(rdf=slurpeddata,
                                                 focus=wdid,
                                                 debug=False)
                    for result in results:
                        if result.result:
                            print(str(result.focus) + ": INFO")
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, 'CONFORMS', '')

                            wdi_core.WDItemEngine.log("INFO", msg)
                        else:
                            msg = wdi_helpers.format_msg(
                                wdid, wdid, None, '', '')
                            wdi_core.WDItemEngine.log("ERROR", s)

                except RuntimeError:
                    print(
                        "Continue after 1 minute, no validation happened on" +
                        wdid)
                    continue
Пример #30
0
 def test_complete_constructor(self):
     test_rdf = os.path.join(
         os.path.split(os.path.abspath(__file__))[0], '..', 'test_issues',
         'data', 'Q18557122.ttl')
     evaluator = ShExEvaluator(
         test_rdf, shex_schema,
         [loc_prefixes.WIKIDATA, loc_prefixes.WIKIDATA.Q18557112],
         loc_prefixes.WIKIDATA.cancer)
     results = evaluator.evaluate()
     self.assertFalse(results[0].result)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/'),
                      results[0].focus)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'),
                      results[0].start)
     self.assertEqual('Focus: http://www.wikidata.org/entity/ not in graph',
                      results[0].reason)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/Q18557112'),
                      results[1].focus)
     self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'),
                      results[1].start)
     self.assertEqual(
         '  Shape: http://www.wikidata.org/entity/cancer not found in Schema',
         results[1].reason)