def test_shex(self): test_file_path = os.path.abspath( os.path.join(os.path.dirname(__file__), 'data')) with open( os.path.join(test_file_path, '1refbnode_with_spanning_PN_CHARS_BASE1.shex'), 'rb') as f: # shex = [c if ord(c) < 65536 else c.encode().decode('utf-16') for c in f.read().decode()] shex = f.read().decode() parse(shex) self.assertTrue(True, "Parsing was valid")
def test_explicit_base(self): shex_str = f'BASE <{str(FOO)}>\n<S1> {{<p1> [<o1>]}}' shex: ShExJ.Schema = parse(shex_str, str(BASE)) self.assertEqual(str(FOO.S1), str(shex.shapes[0].id)) self.assertEqual(str(FOO.p1), str(shex.shapes[0].expression.predicate)) self.assertEqual(str(FOO.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
def validate_shexc(shexc_str: str, input_fname: str) -> bool: """ Validate json_str against ShEx Schema :param shexc_str: String to validate :param input_fname: Name of source file for error reporting :return: True if pass """ if has_invalid_chars(shexc_str): print("ANTLR does not support unicode literals > 4 hex digits.") return False log = MemLogger('\t') logger = Logger(log) shexj = parse(shexc_str) shexj['@context'] = "http://www.w3.org/ns/shex.jsonld" if shexj is None: return False shex_obj = jsg_loads(shexj._as_json, ShExJ) if not shex_obj._is_valid(logger): print("File: {} - ".format(input_fname)) print(log.log) return False elif not compare_json(input_fname, shex_obj._as_json, log): print("File: {} - ".format(input_fname)) print(log.log) return False return True
def test_default_base(self): shex_str = '<S1> {<p1> [<o1>]}' shex: ShExJ.Schema = parse(shex_str, str(BASE)) self.assertEqual(str(BASE.S1), str(shex.shapes[0].id)) self.assertEqual(str(BASE.p1), str(shex.shapes[0].expression.predicate)) self.assertEqual(str(BASE.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
def test_explicit_base(self): shex_str = f'BASE <{str(FOO)}>\n<S1> {{<p1> [<o1>]}}' shex: ShExJ.Schema = parse(shex_str, str(BASE)) self.assertEqual(str(FOO.S1), str(shex.shapes[0].id)) self.assertEqual(str(FOO.p1), str(shex.shapes[0].expression.predicate)) self.assertEqual(str(FOO.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
def test_default_base(self): shex_str = '<S1> {<p1> [<o1>]}' shex: ShExJ.Schema = parse(shex_str, str(BASE)) self.assertEqual(str(BASE.S1), str(shex.shapes[0].id)) self.assertEqual(str(BASE.p1), str(shex.shapes[0].expression.predicate)) self.assertEqual(str(BASE.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
def test_explicit_uris(self): shex_str = f""" BASE <{str(FOO)}> PREFIX ex: <{EX}> ex:S1 {{ex:p1 [ex:o1]}}""" shex: ShExJ.Schema = parse(shex_str, str(BASE)) self.assertEqual(str(EX.S1), str(shex.shapes[0].id)) self.assertEqual(str(EX.p1), str(shex.shapes[0].expression.predicate)) self.assertEqual(str(EX.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
def test_explicit_uris(self): shex_str = f""" BASE <{str(FOO)}> PREFIX ex: <{EX}> ex:S1 {{ex:p1 [ex:o1]}}""" shex: ShExJ.Schema = parse(shex_str, str(BASE)) self.assertEqual(str(EX.S1), str(shex.shapes[0].id)) self.assertEqual(str(EX.p1), str(shex.shapes[0].expression.predicate)) self.assertEqual(str(EX.o1), str(shex.shapes[0].expression.valueExpr.values[0]))
def loads(self, schema_txt: str) -> ShExJ.Schema: """ Parse and return schema as a ShExJ Schema :param schema_txt: ShExC or ShExJ representation of a ShEx Schema :return: ShEx Schema representation of schema """ self.schema_text = schema_txt if schema_txt.strip()[0] == '{': # TODO: figure out how to propagate self.base_location into this parse return cast(ShExJ.Schema, loads(schema_txt, ShExJ)) else: return generate_shexj.parse(schema_txt, self.base_location)
def __init__(self, schema: Union[ShExJ.Schema, str], base: Optional[str] = None) -> None: """ Construct a converter :param schema: schema string or instance to parse """ self.base = base if isinstance(schema, ShExJ.Schema): self.schema = schema else: self.schema = generate_shexj.parse(schema)
def __init__(self, schema: Union[ShExJ.Schema, str], base: Optional[str] = None, namespaces: Optional[Union[NamespaceManager, Graph]] = None) -> None: """ Construct a converter :param schema: schema string or instance to parse :param base: module base :param namespaces: Used for namespace maps """ self.base = base if isinstance(schema, ShExJ.Schema): self.schema = schema else: self.schema = generate_shexj.parse(schema) self.namespaces = namespaces.namespace_manager if isinstance(namespaces, Graph) else namespaces self.referenced_prefixes = set()
def load_shapes(self): self.shapes = {} shapes_to_load = [ "ProteinContainingComplex", "MolecularFunction", "CellularComponent", "BiologicalProcess", "AnatomicalEntity", "InformationBiomacromolecule" ] shex_url = "https://raw.githubusercontent.com/geneontology/go-shapes/master/shapes/go-cam-shapes.shex" shex_response = requests.get(shex_url) shex_raw = shex_response.text shex_json_str = generate_shexj.parse(shex_raw)._as_json_dumps() full_shex_ds = json.loads(shex_json_str) for shape in full_shex_ds["shapes"]: shape_name = path.basename(shape["id"]) if shape_name in shapes_to_load: self.shapes[shape_name] = {} shexps = shape.get('shapeExprs') if shexps is None: shexps = [shape] for shexp in shexps: if isinstance(shexp, dict) and 'expression' in shexp: for exp in shexp['expression']['expressions']: if exp['type'] == 'TripleConstraint': predicate = contract_uri_wrapper( exp['predicate'])[0] self.shapes[shape_name][predicate] = [] values = exp['valueExpr'] if isinstance(values, dict): values = values['shapeExprs'] else: values = [values] for v in values: # path.basename(v) - Gets the Shape name minus the URL prefix # E.g. - self.shapes[shape_name][predicate].append( path.basename(v)) del self.shapes[shape_name]["rdf:type"] # TODO: Get this into ShEx spec or delete this when we decide against it self.shapes["CellularComponent"]["BFO:0000050"].append( "CellularComponent") # CC-part_of->CC
def validate_shexc(shexc_str: str, input_fname: str) -> bool: """ Validate json_str against ShEx Schema :param shexc_str: String to validate :param input_fname: Name of source file for error reporting :return: True if pass """ shexj = parse(shexc_str) if shexj is None: return False shexj['@context'] = "http://www.w3.org/ns/shex.jsonld" shex_obj = jsg_loads(as_json(shexj), ShExJ) log = StringIO() rval = True with redirect_stdout(log): if not shex_obj._is_valid(): rval = False elif not compare_json(input_fname, as_json(shex_obj), log): rval = False if not rval: print("File: {} - ".format(input_fname)) print(log.getvalue()) return rval
def generate_shexj_from_shexstament(shexstatement): """ This method can be used to generate ShEx from data string. However, the input data string must contain one or more lines. Each line contains '|' separated values. Parameters ---------- shexstatement : str shex Returns ------- shexj shape expression in JSON (ShExJ) """ shexj = "" try: shexjson = parse(shexstatement)._as_json parsed = json.loads(shexjson) shexj = json.dumps(parsed, indent=4, sort_keys=False) except Exception as e: print("Unable to parse. Error: " + str(e)) return shexj
E = m.group(0) with open(dir + E, 'r') as f: Edict[E] = f.read() else: print("{} does not match: {}".format(l, m)) # make sure BASE is set to http://www.wikidata.org/wiki/EntitySchema:Exyz in each Exyz file for E, schema in Edict.items(): newschema = '' base_is_set = False for line in schema.split('\n'): if line.startswith('BASE '): line = 'BASE <' + WIKIBASE + E + '>' base_is_set = True newschema = newschema + line + '\n' Edict[ E] = newschema if base_is_set else 'BASE <' + WIKIBASE + E + '>\n' + newschema # convert for E, schema in Edict.items(): print('-----processing {}:{}'.format(E, schema), file=sys.stderr) shexj = parse(InputStream(schema)) shexj['@context'] = "http://www.w3.org/ns/shex.jsonld" shexj['@id'] = WIKIBASE + E g = Graph().parse(data=as_json(shexj, indent=None), format="json-ld") # put original ShExC code as rdfs:comment into schema g.add((URIRef(WIKIBASE + E), RDFS.comment, Literal(schema))) # write to N3 g.serialize(open(dir + E + '.n3', "wb"), format='n3')
def test_no_base(self): shex_str = '<S1> {<p1> [<o1>]}' shex: ShExJ.Schema = parse(shex_str) self.assertEqual("S1", str(shex.shapes[0].id)) self.assertEqual("p1", str(shex.shapes[0].expression.predicate)) self.assertEqual("o1", str(shex.shapes[0].expression.valueExpr.values[0]))
def shexc_to_shexj(self, shexc, base=None) -> ShExJ.Schema: shex: ShExJ.Schema = parse(shexc, default_base=base) self.assertIsNotNone(shex, "Compile error") shex['@context'] = "http://www.w3.org/ns/shex.jsonld" return shex
def test_shex(self): schema = parse(shex) self.assertEqual(shexj, as_json(schema))
def test_1(self): parse(shex_schema) self.assertTrue(True, "Parser didn't die")
def test_no_base(self): shex_str = '<S1> {<p1> [<o1>]}' shex: ShExJ.Schema = parse(shex_str) self.assertEqual("S1", str(shex.shapes[0].id)) self.assertEqual("p1", str(shex.shapes[0].expression.predicate)) self.assertEqual("o1", str(shex.shapes[0].expression.valueExpr.values[0]))