def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password, namespace=None, skip_person=False, confirmed_orcid_id=False): with Store(data_path) as store: #Crosswalk (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri, person_id=person_id, skip_person=skip_person, person_class=person_class, confirmed_orcid_id=confirmed_orcid_id) graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower()) previous_graph = Graph(namespace_manager=ns_manager) #Load last graph if os.path.exists(graph_filepath): log.debug("Loading previous graph %s", graph_filepath) previous_graph.parse(graph_filepath, format="turtle") #Diff against last graph (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph) #SPARQL Update log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id) sparql_delete(delete_graph, endpoint, username, password) sparql_insert(add_graph, endpoint, username, password) #Save new last graph log.debug("Saving new graph %s", graph_filepath) with codecs.open(graph_filepath, "w") as out: graph.serialize(format="turtle", destination=out) #Touch store.touch(orcid_id) return graph, add_graph, delete_graph
def parse(self, basefile): # Find out possible skeleton entries by loading the entire # graph of resource references, and find resources that only # exist as objects. # # Note: if we used download_from_triplestore we know that this list # is clean -- we could just iterate the graph w/o filtering g = Graph() self.log.info("Parsing %s" % basefile) g.parse(self.store.downloaded_path(basefile), format="nt") self.log.info("Compiling object set") # create a uri -> True dict mapping -- maybe? objects = dict(zip([str(o).split("#")[0] for (s, p, o) in g], True)) self.log.info("Compiling subject set") subjects = dict(zip([str(s).split("#")[0] for (s, p, o) in g], True)) self.log.info("%s objects, %s subjects. Iterating through existing objects" % (len(objects), len(subjects))) for o in objects: if not o.startswith(self.config.url): continue if '9999:999' in o: continue if o in subjects: continue for repo in otherrepos: skelbase = repo.basefile_from_uri(repo) if skelbase: skel = repo.triples_from_uri(o) # need to impl with self.store.open_distilled(skelbase, "wb") as fp: fp.write(skel.serialize(format="pretty-xml")) self.log.info("Created skel for %s" % o)
def generate_artistlist(config, data): g = Graph('IOMemory', BNode()) for artist_data in data['artists']: artist = URIRef(link(artist_data['link'])+"#subject") add_lang_names(g, artist, artist_data['names'], rel=[FOAF.name]) g.add((artist, RDF.type, SCHEMA.MusicGroup)) return g
def test_post_no_type_to_root(self): graph = Graph() created = BNode() graph.add((self.my_ktbs.uri, RDFS.seeAlso, created)) graph.add((created, RDF.type, KTBS.Base)) with assert_raises(RdfRestException): self.my_ktbs.post_graph(graph)
def generate_productlist(config, data): g = Graph('IOMemory', BNode()) for product_data in data['products']: product = URIRef(link(product_data['link'])+"#subject") add_lang_names(g, product, product_data['names'], rel=[SCHEMA.name, DCTERMS.title]) g.add((product, RDF.type, SCHEMA.CreativeWork)) return g
def find_location(textlocation): """ returns a 2-tuple containing the RDFLIB node of textlocation as for the geonames api search, and the RDF-Graph with its description in Geonames. raise NotFoundException if textlocation was not found in GeoNames """ payload = {'q' : textlocation, 'username' : 'edsa_project', 'featureClass' : 'P', 'isNameRequired' : 'true', 'maxRows' : '1'} #TODO: For extra precision, countries need to be translated to ISO-3166. # The problem is that US locations have the state. r = requests.get('http://api.geonames.org/searchRDF', params=payload) g = Graph() g.parse(data=r.text, format="xml") spquery= """ SELECT DISTINCT ?iri WHERE {?iri gn:name ?y} """ qres = g.query(spquery) iri = '' for row in qres: iri = row.iri if iri == '': raise NotFoundException("Could not found "+textlocation) else: return (iri,g)
def _read_id_from_install_rdf(self, installrdfpath): from rdflib import Graph rdf = Graph() installrdf = rdf.parse(file=file(installrdfpath)) for i in installrdf.all_nodes(): if re.search(".*@.*\..*", i): return i.decode()
def testGraphAdd(self): g1 = self.graph g2 = Graph(store=g1.store) tarek = self.tarek # michel = self.michel bob = self.bob likes = self.likes # hates = self.hates pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g2.add((bob, likes, cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, True)
def fragment(gp, broker, agora, channel, updating, gen): try: gp_match = re.search(r'\{(.*)\}', gp).groups(0) if len(gp_match) != 1: raise click.ClickException('Invalid graph pattern') STOA = { "broker_host": broker[0], "broker_port": broker[1], "agora_host": agora[0], "agora_port": agora[1], "exchange": channel[0], "topic_pattern": channel[1], "response_prefix": channel[2] } tps = re.split('\. ', gp_match[0]) prefixes, fragment_gen = get_fragment_generator(*tps, monitoring=30, STOA=STOA, updating=updating, gen=gen) graph = Graph() for prefix in prefixes: graph.bind(prefix, prefixes[prefix]) click.echo('@prefix {}: <{}> .'.format(prefix, prefixes[prefix])) click.echo('') for chunk in fragment_gen: if chunk is not None: headers, (c, s, p, o) = chunk triple = u'{} {} {} .'.format(s.n3(graph.namespace_manager), p.n3(graph.namespace_manager), o.n3(graph.namespace_manager)) click.echo(triple) except Exception as e: raise click.ClickException('There was a problem with the request: {}'.format(e.message))
def parse(self): if "workflowBundle.ttl" in self.zip.namelist(): format = "n3" rootfile = "workflowBundle.ttl" elif "workflowBundle.rdf" in self.zip.namelist(): rootfile = "workflowBundle.rdf" format = "xml" else: raise Scufl2Error("Can't find workflowBundle.ttl or " "workflowBundle.rdf") self.uri = "file://" + urllib.pathname2url(os.path.abspath(self.filename)) + "/" early_graph = Graph() rdf_file = self.zip.open(rootfile) early_graph.parse(rdf_file, self.uri, format=format) sameBaseAs = list(early_graph.objects(subject=URIRef(self.uri), predicate=Scufl2NS.sameBaseAs)) if not sameBaseAs: # Fall back to the file:/// URIs self.graph = early_graph else: # Use the sameBaseAs as the base self.uri = sameBaseAs[0] self.graph = Graph() # Reparse it rdf_file = self.zip.open(rootfile) self.graph.parse(rdf_file, self.uri, format=format) self.parse_all_graphs(self.uri)
def __init__(self, filepath, uri): graph.__init__(self) self.parse(filepath, format='turtle') self.filename = os.path.basename(filepath) self.uri = uri print(" - Resource {0} has {1} triples.".format( self.filename, len(self)))
def main(): graph = Graph() graph.parse(sys.argv[1], format="n3") if len(sys.argv) > 2: doc = URIRef(sys.argv[2]) else: docs = [] for c in (RIF.Document, RIF.BLDDocument, RIF.PRDDocument, RIF.CoreDocument): for x in graph.subjects(RDF.type, c): docs.append(x) if len(docs) == 1: doc = docs[0] elif len(docs) > 1: print >>sys.stderr, "Input contains multiple Document nodes" print >>sys.stderr, indent+",".join([repr(x) for x in docs]) print >>sys.stderr, "Name one on the command line to select it" sys.exit(1) elif len(docs) < 1: print >>sys.stderr, "Input contains no Document nodes" for (s,p,o) in graph: print s,p,o sys.exit(1) out = sys.stdout to_rif(out, graph, doc, root=True)
def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(self.graph), len(graph))
def create_store_with_identifier(identifier): ident = URIRef(identifier) store = plugin.get("SQLAlchemy", Store)(identifier=ident) graph = Graph(store, identifier=ident) uri = Literal(os.environ.get("DATABASE_URL")) graph.open(uri, create=True) graph.parse(join(join(settings.BASE_DIR, 'static'), 'output.xml'))
def annotateConfidence(target, un, con, com): # thisAnnotation id is the full string, eg: # http://chartex.org/user/jjon/annotation/dc9d7cbdd0ebefb583e46fc2b79bc8cedde34d68 # the last element being a hash (hashlib.sha1(oa:hastarget).hexdigest()) of this full string: # http://chartex.org/graphid/Person_11139might_bePerson_11339 (this triple is actually in there, why?, weird! target = re.sub('[<>]', '', target) thisAnnotationURI = "http://chartex.org/user/%s/annotation/%s" % (un, sha1(target).hexdigest()) confidence = Literal(con) if con == 'nochange' else Literal(con,datatype=XSD.decimal) #TODO: if no change, create no confidenceMetric triple for the annotation OR insert original decimal value if (int(annotationExists('<' + thisAnnotationURI + '>')) > 0): return ("You've already annotated this statement: %s \nPresumably you could make a separate annotation with a different username. If you start doing that, you should keep track of all your usernames. When we have authentication and session logic, this won't be necessary.\n\nAnnotation triples:\n" % (target,), getSingleConfidenceAnnotation('<' + thisAnnotationURI + '>', 'application/rdf+xml')) else: thisann = URIRef(thisAnnotationURI) g = Graph() bodyNode = BNode() triples = [ (thisann, RDF.type, oa.Annotation), (thisann, oa.hasTarget, URIRef(target)), (thisann, oa.hasBody, bodyNode), (bodyNode, chartex.suggestedConfidenceMetric, confidence), (bodyNode, chartex.userComment, Literal(com)) ] for t in triples: g.add(t) r = requests.post( AGVM_VC_REPO + "/statements", headers={'Content-Type': 'text/turtle'}, data=g.serialize(format='turtle'), auth=AG_AUTH ) return (g.serialize(format='pretty-xml'), r.__dict__)
def test_graph_prefix(): """ This is issue https://github.com/RDFLib/rdflib/issues/313 """ g1 = Graph() g1.parse(data=""" @prefix : <urn:ns1:> . :foo <p> 42. """, format="n3") g2 = Graph() g2.parse(data=""" @prefix : <urn:somethingelse:> . <urn:ns1:foo> <p> 42. """, format="n3") assert isomorphic(g1, g2) q_str = (""" PREFIX : <urn:ns1:> SELECT ?val WHERE { :foo ?p ?val } """) q_prepared = prepareQuery(q_str) expected = [(Literal(42),)] eq_(list(g1.query(q_prepared)), expected) eq_(list(g2.query(q_prepared)), expected) eq_(list(g1.query(q_str)), expected) eq_(list(g2.query(q_str)), expected)
def open_store(identifier): ident = URIRef(identifier) store = plugin.get("SQLAlchemy", Store)(identifier=ident) graph = Graph(store, identifier=ident) uri = Literal(os.environ.get("DATABASE_URL")) graph.open(uri, create=False) return graph
class RDFPage(Page): format = None def __init__(self, response): self.data = Graph() self.links = FilterableList() self.queries = FilterableList(base_url=response.url) super(RDFPage, self).__init__(response) def extract_data(self): self.data = Graph() self.data.parse(data=self.response.text, format=self.format, publicID=self.url) def extract_links(self): for p, o in self.data.predicate_objects(URIRef(self.url)): if isinstance(o, URIRef): link = Link(p.toPython(), o.toPython()) self.links.append(link) def extract_queries(self): rows = self.data.query(''' PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX schema: <http://schema.org/> PREFIX hydra: <http://www.w3.org/ns/hydra/core#> SELECT ?rel ?template WHERE { ?url ?rel ?action . ?action rdf:type hydra:IriTemplate . ?action hydra:template ?template . } ''') for rel, template in rows: self.queries.append(Query(str(rel), str(template), base_url=self.response.url))
def rdf_get(self, departments): us_dept = URIRef('https://en.wikipedia.org/wiki/List_of_federal_agencies_in_the_United_States') g = Graph() for dept in departments: this_dept = URIRef('http://127.0.0.1:5000/departments/{0}'.format(urllib.quote(dept))) g.add((this_dept, RDF.type, us_dept,)) return g.serialize(format='n3')
def test_history_turtle(self): with self.client as client: res = client.patch( '/d/', data=self.patch, content_type='application/json', headers={'Authorization': 'Bearer ' + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'}) res = client.post( urlparse(res.headers['Location']).path + 'merge', buffered=True, headers={'Authorization': 'Bearer ' + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'}) res1 = self.client.get('/history.ttl') self.assertEqual(res1.status_code, http.client.OK) self.assertEqual(res1.headers['Content-Type'], 'text/turtle') self.assertEqual( res1.headers['Cache-Control'], 'public, max-age={}'.format(cache.SHORT_TIME)) self.assertEqual( res1.headers['Content-Disposition'], 'attachment; filename="periodo-history.ttl"') g = Graph() g.parse(data=res1.get_data(as_text=True), format='turtle') self.assertIn((HOST['h#patch-1'], FOAF.page, HOST['patches/1/patch.jsonpatch']), g) self.assertIn((HOST['d'], DCTERMS.provenance, HOST['h#changes']), g) res3 = self.client.get('/history.ttl/') self.assertEqual(res3.status_code, http.client.NOT_FOUND)
def get_response(project_uri, query_string, include_n3=True): d = { 'results': list(), } project_graph = projects.get_project_graph(project_uri) graph = Graph() query_set = SearchQuerySet().models(Text).filter( content=AutoQuery(query_string), project__exact=project_uri ) highlighter = Highlighter(query_string, html_tag='span', css_class=CSS_RESULT_MATCH_CLASS) title_highlighter = TitleHighlighter(query_string, html_tag='span', css_class=CSS_RESULT_MATCH_CLASS) d['spelling_suggestion'] = query_set.spelling_suggestion() for result in query_set: text_uri = URIRef(result.get_stored_fields()['identifier']) if annotations.has_annotation_link(project_graph, text_uri) or projects.is_top_level_project_resource(project_uri, text_uri): d['results'].append(search_result_to_dict(result, project_uri, highlighter, title_highlighter)) if include_n3: graph += utils.metadata_triples(project_graph, text_uri) if include_n3: d['n3'] = graph.serialize(format='n3') return d
def test_period_json(self): res1 = self.client.get('/trgkvwbjd.json') self.assertEqual(res1.status_code, http.client.OK) self.assertEqual(res1.headers['Content-Type'], 'application/json') self.assertEqual( res1.headers['Content-Disposition'], 'attachment; filename="periodo-period-trgkvwbjd.json"') context = json.loads(res1.get_data(as_text=True))['@context'] self.assertEqual(context, [ 'http://localhost.localdomain:5000/c', {'@base': 'http://n2t.net/ark:/99152/'}]) res2 = self.client.get('/trgkvwbjd.jsonld') self.assertEqual(res2.status_code, http.client.OK) self.assertEqual(res2.headers['Content-Type'], 'application/ld+json') self.assertEqual( res2.headers['Content-Disposition'], 'attachment; filename="periodo-period-trgkvwbjd.json"') jsonld = json.loads(res1.get_data(as_text=True)) context = json.loads(self.client.get('/c', buffered=True) .get_data(as_text=True)) g = Graph().parse( data=json.dumps({**jsonld, **context}), format='json-ld') self.assertIsNone( g.value(predicate=RDF.type, object=SKOS.ConceptScheme)) self.assertIn((PERIODO['p0trgkvwbjd'], FOAF.isPrimaryTopicOf, HOST['trgkvwbjd.json']), g) self.assertIn((HOST['trgkvwbjd.json'], VOID.inDataset, HOST['d']), g) self.assertIn((PERIODO['p0trgkvwbjd'], SKOS.inScheme, PERIODO['p0trgkv']), g) res3 = self.client.get('/trgkvwbjd.json.html') self.assertEqual(res3.status_code, http.client.OK) self.assertEqual(res3.headers['Content-Type'], 'text/html')
def ext_json(): rdfUrl = '' tok = Tokenizer() if request.method == 'POST': rdf = request.form['data'] status_test = "0"#request.form['status'] filters = ""#request.form['exculdeurls'] #rdf = "http://jpp.no-ip.org/MAD_J.rdf" try: #r = requests.get(rdf) gg = Graph() #g.load(rdfUrl) rdf_content = StringIO.StringIO(rdf.encode('utf-8')) #print rdf_content.readline() gg.parse(rdf_content, format="xml") ext = Extractor(gg) uris = ext.getUris() mapping = MapFactory() for uri in uris: term = tok.tokenized_url(uri) uri_status = "" if status_test == "1": uri_status = ext.testUri(uri) else: uri_status = "N/A" uri_lookup = str(uri)+"\"" lnum = ext.get_lines(rdf_content, uri_lookup) ent = MapEntry(uri, term, "", lnum, uri_status) mapping.add(ent) jsonized_result = json.dumps(mapping.get()) return Response(jsonized_result, mimetype='application/json') except requests.exceptions.ConnectionError: X2Rwarning = 'X2R Warning: The requested URL raises ConnectionError~!!!' return X2Rwarning
def all_products(): params = request.args.get('categoria') g = Graph() try: g.parse('prueba.rdf', format='xml') except Exception,e: print str(e)
def __init__(self, err, data, namespace=None): self.err = err self.manifest = u"urn:mozilla:install-manifest" self.namespace = namespace or "http://www.mozilla.org/2004/em-rdf" if isinstance(data, types.StringTypes): data = StringIO(data) # Wrap data in a pseudo-file from rdflib.plugins.parsers import rdfxml orig_create_parser = rdfxml.create_parser try: # Patch rdflib to not resolve URL entities. def create_parser(*args, **kwargs): parser = orig_create_parser(*args, **kwargs) parser.setEntityResolver(AddonRDFEntityResolver(err)) return parser rdfxml.create_parser = create_parser # Load up and parse the file in XML format. graph = Graph() graph.parse(data, format="xml") self.rdf = graph except ParserError as ex: # Re-raise the exception in a local exception type. raise RDFException(message=ex.message) except SAXParseException as ex: # Raise the SAX parse exceptions so we get some line info. raise RDFException(orig_exception=ex) finally: # If we fail, we don't want to sully up the creation function. rdfxml.create_parser = orig_create_parser
def test_post_no_type_to_base(self): graph = Graph() created = BNode() graph.add((self.my_ktbs.uri, RDFS.seeAlso, created)) graph.add((created, RDF.type, KTBS.hasModel)) # in correct NS with assert_raises(RdfRestException): self.my_ktbs.post_graph(graph)
def test_load_from_model(self): """Can we round trip through a RDF model""" model = Graph() path = '/root/42BW9AAXX/C1-38/Project_12345/' filename = '12345_AAATTT_L003_R1_001.fastq.gz' seq = sequences.parse_fastq(path, filename) seq.save_to_model(model) seq_id = 'file://'+path+filename seqNode = URIRef(seq_id) libNode = URIRef('http://localhost/library/12345') model.add((seqNode, libraryOntology['library'], libNode)) seq2 = sequences.SequenceFile.load_from_model(model, seq_id) self.assertEqual(seq.flowcell, seq2.flowcell) self.assertEqual(seq.flowcell, '42BW9AAXX') self.assertEqual(seq.filetype, seq2.filetype) self.assertEqual(seq2.filetype, 'split_fastq') self.assertEqual(seq.lane, seq2.lane) self.assertEqual(seq2.lane, '3') self.assertEqual(seq.read, seq2.read) self.assertEqual(seq2.read, 1) self.assertEqual(seq.project, seq2.project) self.assertEqual(seq2.project, '12345') self.assertEqual(seq.index, seq2.index) self.assertEqual(seq2.index, 'AAATTT') self.assertEqual(seq.split, seq2.split) self.assertEqual(seq2.split, '001') self.assertEqual(seq.cycle, seq2.cycle) self.assertEqual(seq.pf, seq2.pf) self.assertEqual(seq2.libraryNode, libNode) self.assertEqual(seq.path, seq2.path)
def test_dataset_description_linksets(self): res = self.client.get('/.well-known/void') self.assertEqual(res.status_code, http.client.OK) self.assertEqual(res.headers['Content-Type'], 'text/turtle') g = Graph() g.parse(format='turtle', data=res.get_data(as_text=True)) # http://dbpedia.org/void/Dataset q = sparql.prepareQuery(''' SELECT ?triples WHERE { ?linkset a void:Linkset . ?linkset void:subset <http://n2t.net/ark:/99152/p0d> . ?linkset void:subjectsTarget <http://n2t.net/ark:/99152/p0d> . ?linkset void:linkPredicate ?predicate . ?linkset void:objectsTarget ?dataset . ?linkset void:triples ?triples . } ''', initNs={'void': VOID}) dbpedia = URIRef('http://dbpedia.org/void/Dataset') triples = next(iter(g.query( q, initBindings={'dataset': dbpedia, 'predicate': DCTERMS.spatial})))['triples'].value self.assertEqual(triples, 3) worldcat = URIRef('http://purl.oclc.org/dataset/WorldCat') triples = next(iter(g.query( q, initBindings={'dataset': worldcat, 'predicate': DCTERMS.isPartOf})))['triples'].value self.assertEqual(triples, 1)
def test_post_bad_type_to_base(self): graph = Graph() created = BNode() graph.add((self.my_ktbs.uri, KTBS.contains, created)) graph.add((created, RDF.type, RDFS.Resource)) with assert_raises(RdfRestException): self.my_ktbs.post_graph(graph)
def test_period_turtle(self): res1 = self.client.get('/trgkvwbjd.ttl') self.assertEqual(res1.status_code, http.client.OK) self.assertEqual(res1.headers['Content-Type'], 'text/turtle') self.assertEqual( res1.headers['Cache-Control'], 'public, max-age={}'.format(cache.SHORT_TIME)) self.assertEqual( res1.headers['Content-Disposition'], 'attachment; filename="periodo-period-trgkvwbjd.ttl"') g = Graph().parse(data=res1.get_data(as_text=True), format='turtle') self.assertIsNone( g.value(predicate=RDF.type, object=SKOS.ConceptScheme)) self.assertIn((PERIODO['p0trgkvwbjd'], FOAF.isPrimaryTopicOf, HOST['trgkvwbjd.ttl']), g) self.assertIn((HOST['trgkvwbjd.ttl'], VOID.inDataset, HOST['d']), g) self.assertIn((PERIODO['p0trgkvwbjd'], SKOS.inScheme, PERIODO['p0trgkv']), g) res2 = self.client.get('/trgkvwbjd.ttl.html') self.assertEqual(res2.status_code, http.client.OK) self.assertEqual(res2.headers['Content-Type'], 'text/html') self.assertEqual( res2.headers['Cache-Control'], 'public, max-age={}'.format(cache.SHORT_TIME))
def setUp(self): self.manifest = manifest = Graph(store=self.store) manifest.open(self.path) manifest.load( cached_file( "http://www.w3.org/2000/10/rdf-tests/rdfcore/Manifest.rdf"))
class SDOGraphSetupTestCase(unittest.TestCase): @classmethod def loadGraphs(self): from rdflib import Graph import rdflib self.rdflib_data = Graph() store = getMasterStore() graphs = list(store.graphs()) log.info("Loading test graph from MasterStore") for g in graphs: id = str(g.identifier) if not id.startswith("http://"): #skip some internal graphs continue self.rdflib_data += g @classmethod def setUpClass(self): log.info("Graph tests require rdflib.") try: log.info("Trying to import rdflib...") import rdflib from rdflib import Graph except Exception as e: raise unittest.SkipTest( "Need rdflib installed to do graph tests: %s" % e) SDOGraphSetupTestCase.loadGraphs() def test_graphsLoaded(self): self.assertTrue( len(self.rdflib_data) > 0, "Graph rdflib_data should have some triples in it.") # SPARQLResult http://rdflib.readthedocs.org/en/latest/apidocs/rdflib.plugins.sparql.html # "A list of dicts (solution mappings) is returned" def test_found_sixplus_inverseOf(self): inverseOf_results = self.rdflib_data.query( "select ?x ?y where { ?x <http://schema.org/inverseOf> ?y }") log.info("inverseOf result count: %s" % len(inverseOf_results)) self.assertTrue( len(inverseOf_results) >= 6, "Six or more inverseOf expected. Found: %s " % len(inverseOf_results)) def test_even_number_inverseOf(self): inverseOf_results = self.rdflib_data.query( "select ?x ?y where { ?x <http://schema.org/inverseOf> ?y }") self.assertTrue( len(inverseOf_results) % 2 == 0, "Even number of inverseOf triples expected. Found: %s " % len(inverseOf_results)) def test_non_equal_inverseOf(self): results = self.rdflib_data.query( "select ?x ?y where { ?x <http://schema.org/inverseOf> ?y }") for result in results: self.assertTrue( result[0] != result[1], "%s should not be equal to %s" % (result[0], result[1])) def test_non_equal_supercededBy(self): results = self.rdflib_data.query( "select ?x ?y where { ?x <http://schema.org/supercededBy> ?y }") for result in results: self.assertTrue( result[0] != result[1], "%s should not be equal to %s" % (result[0], result[1])) @unittest.expectedFailure # autos def test_needlessDomainIncludes(self): global warnings # check immediate subtypes don't declare same domainIncludes # TODO: could we use property paths here to be more thorough? # rdfs:subClassOf+ should work but seems not to. ndi1 = ('''SELECT ?prop ?c1 ?c2 WHERE { ?prop <http://schema.org/domainIncludes> ?c1 . ?prop <http://schema.org/domainIncludes> ?c2 . ?c1 rdfs:subClassOf ?c2 . FILTER (?c1 != ?c2) . FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?prop ''') ndi1_results = self.rdflib_data.query(ndi1) if (len(ndi1_results) > 0): for row in ndi1_results: warn = "WARNING property %s defining domain, %s, [which is subclassOf] %s unnecessarily" % ( row["prop"], row["c1"], row["c2"]) warnings.append(warn) log.info(warn + "\n") self.assertEqual( len(ndi1_results), 0, "No subtype need redeclare a domainIncludes of its parents. Found: %s " % len(ndi1_results)) @unittest.expectedFailure def test_needlessRangeIncludes(self): global warnings # as above, but for range. We excuse URL as it is special, not best seen as a Text subtype. # check immediate subtypes don't declare same domainIncludes # TODO: could we use property paths here to be more thorough? nri1 = ('''SELECT ?prop ?c1 ?c2 WHERE { ?prop <http://schema.org/rangeIncludes> ?c1 . ?prop <http://schema.org/rangeIncludes> ?c2 . ?c1 rdfs:subClassOf ?c2 . FILTER (?c1 != ?c2) . FILTER (?c1 != <http://schema.org/URL>) . FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?prop ''') nri1_results = self.rdflib_data.query(nri1) if (len(nri1_results) > 0): for row in nri1_results: warn = "WARNING property %s defining range, %s, [which is subclassOf] %s unnecessarily" % ( row["prop"], row["c1"], row["c2"]) warnings.append(warn) log.info(warn + "\n") self.assertEqual( len(nri1_results), 0, "No subtype need redeclare a rangeIncludes of its parents. Found: %s" % len(nri1_results)) # def test_supersededByAreLabelled(self): # supersededByAreLabelled_results = self.rdflib_data.query("select ?x ?y ?z where { ?x <http://schema.org/supersededBy> ?y . ?y <http://schema.org/name> ?z }") # self.assertEqual(len(inverseOf_results ) % 2 == 0, True, "Even number of inverseOf triples expected. Found: %s " % len(inverseOf_results ) ) def test_validRangeIncludes(self): nri1 = ('''SELECT ?prop ?c1 WHERE { ?prop <http://schema.org/rangeIncludes> ?c1 . OPTIONAL{ ?c1 rdf:type ?c2 . ?c1 rdf:type rdfs:Class . }. FILTER (!BOUND(?c2)) FILTER NOT EXISTS { ?prop <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?prop ''') nri1_results = self.rdflib_data.query(nri1) for row in nri1_results: log.info("Property %s invalid rangeIncludes value: %s\n" % (row["prop"], row["c1"])) self.assertEqual( len(nri1_results), 0, "RangeIncludes should define valid type. Found: %s" % len(nri1_results)) def test_validDomainIncludes(self): nri1 = ('''SELECT ?prop ?c1 WHERE { ?prop <http://schema.org/domainIncludes> ?c1 . OPTIONAL{ ?c1 rdf:type ?c2 . ?c1 rdf:type rdfs:Class . }. FILTER (!BOUND(?c2)) FILTER NOT EXISTS { ?prop <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?prop ''') nri1_results = self.rdflib_data.query(nri1) for row in nri1_results: log.info("Property %s invalid domainIncludes value: %s\n" % (row["prop"], row["c1"])) self.assertEqual( len(nri1_results), 0, "DomainIncludes should define valid type. Found: %s" % len(nri1_results)) # These are place-holders for more sophisticated SPARQL-expressed checks. @unittest.expectedFailure def test_readSchemaFromRDFa(self): self.assertTrue( True, False, "We should know how to locally get /docs/schema_org_rdfa.html but this requires fixes to api.py." ) #@unittest.expectedFailure def test_simpleLabels(self): s = "" complexLabels = self.rdflib_data.query( "select distinct ?term ?label where { ?term rdfs:label ?label FILTER regex(?label,'[^a-zA-Z0-9_ ]','i'). } " ) for row in complexLabels: s += (" term %s has complex label: %s\n" % (row["term"], row["label"])) self.assertTrue( len(complexLabels) == 0, "No complex term labels expected; alphanumeric only please. Found: %s Details: %s\n" % (len(complexLabels), s)) # Whitespace is tolerated, for now. # we don't deal well with non definitional uses of rdfs:label yet - non terms are flagged up. # https://github.com/schemaorg/schemaorg/issues/1136 # # TODO: https://github.com/schemaorg/schemaorg/issues/662 # # self.assertEqual(len(ndi1_results), 0, "No domainIncludes or rangeIncludes value should lack a type. Found: %s " % len(ndi1_results ) ) def test_labelMatchesTermId(self): nri1 = ('''select ?term ?label where { ?term rdfs:label ?label. BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") FILTER(SUBSTR(?strVal, 19) != STR(?label)) } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Label matching errors:") for row in nri1_results: log.info("Term '%s' has none-matching label: '%s'" % (row["term"], row["label"])) self.assertEqual( len(nri1_results), 0, "Term should have matching rdfs:label. Found: %s" % len(nri1_results)) def test_superTypesExist(self): nri1 = ('''select ?term ?super where { ?term rdfs:subClassOf ?super. ?term rdf:type rdfs:Class. FILTER NOT EXISTS { ?super rdf:type rdfs:Class } BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") BIND(STR(?super) AS ?superStrVal) FILTER(STRLEN(?superStrVal) >= 18 && SUBSTR(?superStrVal, 1, 18) = "http://schema.org/") FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Invalid SuperType errors!!!\n") for row in nri1_results: log.info("Term '%s' has nonexistent supertype: '%s'" % (row["term"], row["super"])) self.assertEqual( len(nri1_results), 0, "Types with nonexistent SuperTypes. Found: %s" % len(nri1_results)) def test_superPropertiesExist(self): nri1 = ('''select ?term ?super where { ?term rdf:type rdf:Property. ?term rdfs:subPropertyOf ?super. FILTER NOT EXISTS { ?super rdf:type rdf:Property } BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") BIND(STR(?super) AS ?superStrVal) FILTER(STRLEN(?superStrVal) >= 18 && SUBSTR(?superStrVal, 1, 18) = "http://schema.org/") FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Invalid Super-Property errors!!!\n") for row in nri1_results: log.info("Term '%s' has nonexistent super-property: '%s'" % (row["term"], row["super"])) self.assertEqual( len(nri1_results), 0, "Properties with nonexistent SuperProperties. Found: %s" % len(nri1_results)) def test_selfReferencingInverse(self): nri1 = ('''select ?term ?inverse where { ?term rdf:type rdf:Property. ?term <http://schema.org/inverseOf> ?inverse. BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") FILTER(str(?term) = str(?inverse)) FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Self referencing inverseOf errors!!!\n") for row in nri1_results: log.info("Term '%s' is defined as inverseOf self" % (row["term"])) self.assertEqual( len(nri1_results), 0, "Types with self referencing inverseOf Found: %s" % len(nri1_results)) def test_sameInverseAndSupercededByTarget(self): nri1 = ('''select ?term ?inverse ?super where { ?term rdf:type rdf:Property. ?term <http://schema.org/inverseOf> ?inverse. ?term <http://schema.org/supercededBy> ?super. BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") FILTER(str(?inverse) = str(?super)) FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("InverseOf supercededBy shared target errors!!!\n") for row in nri1_results: log.info( "Term '%s' defined ase inverseOf AND supercededBy %s" % (row["term"], row["inverse"])) self.assertEqual( len(nri1_results), 0, "Types with inverseOf supercededBy shared target Found: %s" % len(nri1_results)) @unittest.expectedFailure def test_commentEndWithPeriod(self): nri1 = ('''select ?term ?com where { ?term rdfs:comment ?com. BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") FILTER regex(str(?com), '[^.]$') } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Coment without ending '.' errors!!!\n") for row in nri1_results: log.info("Term '%s' has a comment without an ending '.'" % (row["term"])) self.assertEqual( len(nri1_results), 0, "Coment without ending '.' Found: %s" % len(nri1_results)) def test_typeLabelCase(self): nri1 = ('''select ?term ?label where { ?term rdf:type rdfs:Class. ?term rdfs:label ?label. BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") FILTER (!regex(str(?label), '^[0-9]*[A-Z].*')) } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Type label [A-Z] errors!!!\n") for row in nri1_results: log.info( "Type '%s' has a label without upper case 1st character" % (row["term"])) self.assertEqual( len(nri1_results), 0, "Type label not [A-Z] 1st non-numeric char Found: %s" % len(nri1_results)) def test_propertyLabelCase(self): nri1 = ('''select ?term ?label where { ?term rdf:type rdf:Property. ?term rdfs:label ?label. BIND(STR(?term) AS ?strVal) FILTER(STRLEN(?strVal) >= 18 && SUBSTR(?strVal, 1, 18) = "http://schema.org/") FILTER (!regex(str(?label), '^[0-9]*[a-z].*')) } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Property label [a-z] errors!!!\n") for row in nri1_results: log.info( "Property '%s' has a label without lower case 1st non-numeric character" % (row["term"])) self.assertEqual( len(nri1_results), 0, "Property label not [a-z] 1st char Found: %s" % len(nri1_results)) def test_superTypeInAttic(self): nri1 = ('''select ?term ?super where { { ?term rdfs:subClassOf ?super. } UNION { ?term rdfs:subPropertyOf ?super. } ?super <http://schema.org/isPartOf> <http://attic.schema.org> . FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Super-term in attic errors!!!\n") for row in nri1_results: log.info("Term '%s' is sub-term of %s a term in attic" % (row["term"], row["super"])) self.assertEqual(len(nri1_results), 0, "Super-term in attic Found: %s" % len(nri1_results)) def test_referenceTermInAttic(self): nri1 = ('''select ?term ?rel ?ref where { { ?term <http://schema.org/domainIncludes> ?ref. ?term ?rel ?ref. } UNION { ?term <http://schema.org/rangeIncludes> ?ref. ?term ?rel ?ref. } UNION { ?term <http://schema.org/inverseOf> ?ref. ?term ?rel ?ref. } UNION { ?term <http://schema.org/supercededBy> ?ref. ?term ?rel ?ref. } ?ref <http://schema.org/isPartOf> <http://attic.schema.org> . FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Reference to attic term errors!!!\n") for row in nri1_results: log.info( "Term '%s' makes a %s reference to %s a term in attic" % (row["term"], row["rel"], row["ref"])) self.assertEqual( len(nri1_results), 0, "Reference to attic term Found: %s" % len(nri1_results)) def test_termIn2PlusExtensions(self): nri1 = ('''select ?term (count(?part) as ?count) where { ?term <http://schema.org/isPartOf> ?part. } GROUP BY ?term HAVING (count(?part) > 1) ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Term in +1 extensions errors!!!\n") for row in nri1_results: log.info("Term '%s' isPartOf %s extensions" % (row["term"], row["count"])) self.assertEqual( len(nri1_results), 0, "Term in +1 extensions Found: %s" % len(nri1_results)) def test_termNothttps(self): nri1 = ('''select distinct ?term where { ?term ?p ?o. FILTER strstarts(str(?term),"https://schema.org") } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Term defined as https errors!!!\n") for row in nri1_results: log.info("Term '%s' is defined as https " % (row["term"])) self.assertEqual( len(nri1_results), 0, "Term defined as https Found: %s" % len(nri1_results)) def test_targetNothttps(self): nri1 = ('''prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix schema: <http://schema.org/> select ?term ?target where { ?term schema:domainIncludes | schema:rangeIncludes | rdfs:subClassOf | rdfs:subPropertyOf | schema:supercededBy | schema:inverseOf ?target. filter strstarts(str(?target),"https://schema.org") } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info("Target defined as https errors!!!\n") for row in nri1_results: log.info("Term '%s' references term %s as https " % (row["term"], row["target"])) self.assertEqual( len(nri1_results), 0, "Term defined as https Found: %s" % len(nri1_results)) @unittest.expectedFailure def test_EnumerationWithoutEnums(self): nri1 = ('''select ?term where { ?term rdfs:subClassOf/rdfs:subClassOf* <http://schema.org/Enumeration> . FILTER NOT EXISTS { ?enum a ?term. } FILTER NOT EXISTS { ?term <http://schema.org/isPartOf> <http://attic.schema.org> .} } ORDER BY ?term ''') nri1_results = self.rdflib_data.query(nri1) if len(nri1_results): log.info( "Enumeration Type without Enumeration value(s) errors!!!\n") for row in nri1_results: log.info("Enumeration Type '%s' has no matching enum values" % (row["term"])) self.assertEqual( len(nri1_results), 0, "Enumeration Type without Enumeration value(s) Found: %s" % len(nri1_results))
#! /usr/bin/env python from rdflib import Graph, URIRef from rdflib.plugins.stores import sparqlstore endpoint = 'http://<IP>:7200/repositories/SciGraph' store = sparqlstore.SPARQLStore() store.open(endpoint) graph_name_ref = URIRef(u'http://www.springernature.com/scigraph/graphs/articles.dds') ng = Graph(store,identifier=graph_name_ref) store.bind('sg', 'http://www.springernature.com/scigraph/ontologies/core/') q = 'select ?s ?t where { ?s a sg:Article . ?s sg:title ?t } limit 10 ' print(q) for s, o in ng.query(q): print 'article Id:' +s + '\t article Title:' +o
from rdflib.namespace import RDF from rdflib import Graph, URIRef, BNode import sys import re g = Graph() g.parse(sys.argv[1], format="turtle") roots = set([s for s, p, o in g if len(list(g.subjects(None, s))) == 0]) def make_name(n): s = str(n) i = 0 if '#' in s: i = s.rindex('#') if '/' in s: i = max(i, s.rindex('/')) if i + 1 < len(s) and s[i + 1].isdigit(): return "Synset " + s[i + 1:] t = s[i + 1:] if t == "": return "_node" else: return s[i + 1:] def write_obj(o): if isinstance(o, URIRef): return "<%s>" % str(o) elif isinstance(o, BNode):
import bs4 import requests import time import os import urllib.parse import csv import glob from rdflib import URIRef, BNode, Literal, Graph from rdflib.namespace import RDF, RDFS, FOAF, XSD from rdflib import Namespace files = glob.glob("data/kotobank_kani/*.json") arr = [] all = Graph() t = "https://nakamura196.github.io/hi_person/term/type/Kani.json" subject = URIRef(t) stmt = (subject, URIRef("http://www.w3.org/2000/01/rdf-schema#label"), Literal("官位")) all.add(stmt) stmt = (subject, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("http://www.w3.org/2000/01/rdf-schema#Class")) all.add(stmt) path = t.replace("https://nakamura196.github.io/hi_person", "../docs")
def fix_reference(timestamp, accept, citing, cited, reference): rf, of, cp, cdh = create_resources() s = Storer(cp.graph_set(), context_map={context_path: context_file_path}, dir_split=dir_split_number, n_file_item=items_per_file, default_dir=default_dir) r_text = unquote(reference) g_add_be = Graph(identifier=base_iri + "be/") g_remove_be = Graph(identifier=base_iri + "be/") g_add_br = Graph(identifier=base_iri + "br/") g_remove_br = Graph(identifier=base_iri + "br/") ref_res = rf.retrieve_reference(base_iri + citing, base_iri + cited) g_add_be.add((ref_res, GraphEntity.has_content, Literal(r_text))) ref_res_text = rf.retrieve_reference_text(ref_res) g_remove_be.add((ref_res, GraphEntity.has_content, ref_res_text)) if accept == "false": citing_res = URIRef(base_iri + citing) cited_res = URIRef(base_iri + cited) cur_time = datetime.fromtimestamp( int(timestamp)).strftime('%Y-%m-%dT%H:%M:%S') mod_date = str(rf.retrieve_modification_date(ref_res)) if cur_time == mod_date: # It didn't exist before cur_dir_path, cur_file_path = s.dir_and_file_paths( g_remove_br, base_dir, base_iri) cur_g = s.load(cur_file_path) for s, p, o in cur_g.triples((cited_res, None, None)): if p != RDF.type or o != GraphEntity.expression: g_remove_br.add(s, p, o) else: # It exists already new_cited = URIRef( str(cp.graph_set().add_br(cp.name, doi_curator, bcite_base_iri))) gen_prov_and_store_data(cp, rf, timestamp) g_remove_br.add((citing_res, GraphEntity.cites, cited_res)) g_remove_be.add((ref_res, GraphEntity.references, cited_res)) g_add_br.add((citing_res, GraphEntity.cites, new_cited)) g_add_be.add((ref_res, GraphEntity.references, new_cited)) s.update(g_add_be, g_remove_be, base_dir, base_iri, context_path, temp_dir_for_rdf_loading) s.update(g_add_br, g_remove_br, base_dir, base_iri, context_path, temp_dir_for_rdf_loading) s.update_all([g_add_br, g_add_be], [g_remove_br, g_remove_be], triplestore_url, base_dir) return timestamp, accept, citing, cited, quote(ref_res_text)
class GraphEntity(AbstractEntity): BIRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/biro/") C4O: ClassVar[Namespace] = Namespace("http://purl.org/spar/c4o/") CO: ClassVar[Namespace] = Namespace("http://purl.org/co/") CITO: ClassVar[Namespace] = Namespace("http://purl.org/spar/cito/") DATACITE: ClassVar[Namespace] = Namespace("http://purl.org/spar/datacite/") DCTERMS: ClassVar[Namespace] = Namespace("http://purl.org/dc/terms/") DEO: ClassVar[Namespace] = Namespace("http://purl.org/spar/deo/") DOCO: ClassVar[Namespace] = Namespace("http://purl.org/spar/doco/") FABIO: ClassVar[Namespace] = Namespace("http://purl.org/spar/fabio/") FOAF: ClassVar[Namespace] = Namespace("http://xmlns.com/foaf/0.1/") FRBR: ClassVar[Namespace] = Namespace("http://purl.org/vocab/frbr/core#") LITERAL: ClassVar[Namespace] = Namespace( "http://www.essepuntato.it/2010/06/literalreification/") OA: ClassVar[Namespace] = Namespace("http://www.w3.org/ns/oa#") OCO: ClassVar[Namespace] = Namespace("https://w3id.org/oc/ontology/") PRISM: ClassVar[Namespace] = Namespace( "http://prismstandard.org/namespaces/basic/2.0/") PRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/pro/") iri_has_subtitle: ClassVar[URIRef] = FABIO.hasSubtitle iri_has_publication_date: ClassVar[URIRef] = PRISM.publicationDate iri_bibliographic_reference: ClassVar[URIRef] = BIRO.BibliographicReference iri_references: ClassVar[URIRef] = BIRO.references iri_denotes: ClassVar[URIRef] = C4O.denotes iri_has_content: ClassVar[URIRef] = C4O.hasContent iri_intextref_pointer: ClassVar[URIRef] = C4O.InTextReferencePointer iri_is_context_of: ClassVar[URIRef] = C4O.isContextOf iri_singleloc_pointer_list: ClassVar[ URIRef] = C4O.SingleLocationPointerList iri_has_element: ClassVar[URIRef] = CO.element iri_citation: ClassVar[URIRef] = CITO.Citation iri_cites: ClassVar[URIRef] = CITO.cites iri_citation_characterisation: ClassVar[ URIRef] = CITO.hasCitationCharacterisation iri_has_citing_entity: ClassVar[URIRef] = CITO.hasCitingEntity iri_has_cited_entity: ClassVar[URIRef] = CITO.hasCitedEntity iri_oci: ClassVar[URIRef] = DATACITE.oci iri_doi: ClassVar[URIRef] = DATACITE.doi iri_pmid: ClassVar[URIRef] = DATACITE.pmid iri_pmcid: ClassVar[URIRef] = DATACITE.pmcid iri_orcid: ClassVar[URIRef] = DATACITE.orcid iri_xpath: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"] iri_intrepid: ClassVar[URIRef] = DATACITE["intrepid"] iri_xmlid: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"] iri_has_identifier: ClassVar[URIRef] = DATACITE.hasIdentifier iri_identifier: ClassVar[URIRef] = DATACITE.Identifier iri_isbn: ClassVar[URIRef] = DATACITE.isbn iri_issn: ClassVar[URIRef] = DATACITE.issn iri_url: ClassVar[URIRef] = DATACITE.url iri_uses_identifier_scheme: ClassVar[ URIRef] = DATACITE.usesIdentifierScheme iri_title: ClassVar[URIRef] = DCTERMS["title"] iri_caption: ClassVar[URIRef] = DEO.Caption iri_discourse_element: ClassVar[URIRef] = DEO.DiscourseElement iri_footnote: ClassVar[URIRef] = DOCO.Footnote iri_paragraph: ClassVar[URIRef] = DOCO.Paragraph iri_part: ClassVar[URIRef] = DOCO.Part iri_section: ClassVar[URIRef] = DOCO.Section iri_section_title: ClassVar[URIRef] = DOCO.SectionTitle iri_sentence: ClassVar[URIRef] = DOCO.Sentence iri_table: ClassVar[URIRef] = DOCO.Table iri_text_chunk: ClassVar[URIRef] = DOCO.TextChunk iri_academic_proceedings: ClassVar[URIRef] = FABIO.AcademicProceedings iri_book: ClassVar[URIRef] = FABIO.Book iri_book_chapter: ClassVar[URIRef] = FABIO.BookChapter iri_book_series: ClassVar[URIRef] = FABIO.BookSeries iri_book_set: ClassVar[URIRef] = FABIO.BookSet iri_data_file: ClassVar[URIRef] = FABIO.DataFile iri_expression: ClassVar[URIRef] = FABIO.Expression iri_expression_collection: ClassVar[URIRef] = FABIO.ExpressionCollection iri_has_sequence_identifier: ClassVar[URIRef] = FABIO.hasSequenceIdentifier iri_journal: ClassVar[URIRef] = FABIO.Journal iri_journal_article: ClassVar[URIRef] = FABIO.JournalArticle iri_journal_issue: ClassVar[URIRef] = FABIO.JournalIssue iri_journal_volume: ClassVar[URIRef] = FABIO.JournalVolume iri_manifestation: ClassVar[URIRef] = FABIO.Manifestation iri_proceedings_paper: ClassVar[URIRef] = FABIO.ProceedingsPaper iri_reference_book: ClassVar[URIRef] = FABIO.ReferenceBook iri_reference_entry: ClassVar[URIRef] = FABIO.ReferenceEntry iri_report_document: ClassVar[URIRef] = FABIO.ReportDocument iri_series: ClassVar[URIRef] = FABIO.Series iri_specification_document: ClassVar[URIRef] = FABIO.SpecificationDocument iri_thesis: ClassVar[URIRef] = FABIO.Thesis iri_agent: ClassVar[URIRef] = FOAF.Agent iri_family_name: ClassVar[URIRef] = FOAF.familyName iri_given_name: ClassVar[URIRef] = FOAF.givenName iri_name: ClassVar[URIRef] = FOAF.name iri_embodiment: ClassVar[URIRef] = FRBR.embodiment iri_part_of: ClassVar[URIRef] = FRBR.partOf iri_contains_reference: ClassVar[URIRef] = FRBR.part iri_contains_de: ClassVar[URIRef] = FRBR.part iri_has_literal_value: ClassVar[URIRef] = LITERAL.hasLiteralValue iri_ending_page: ClassVar[URIRef] = PRISM.endingPage iri_starting_page: ClassVar[URIRef] = PRISM.startingPage iri_author: ClassVar[URIRef] = PRO.author iri_editor: ClassVar[URIRef] = PRO.editor iri_is_held_by: ClassVar[URIRef] = PRO.isHeldBy iri_publisher: ClassVar[URIRef] = PRO.publisher iri_is_document_context_for: ClassVar[URIRef] = PRO.isDocumentContextFor iri_role_in_time: ClassVar[URIRef] = PRO.RoleInTime iri_with_role: ClassVar[URIRef] = PRO.withRole iri_note: ClassVar[URIRef] = OA.Annotation iri_has_body: ClassVar[URIRef] = OA.hasBody iri_has_annotation: ClassVar[ URIRef] = OCO.hasAnnotation # inverse of OA.hasTarget iri_has_next: ClassVar[URIRef] = OCO.hasNext iri_archival_document: ClassVar[URIRef] = FABIO.ArchivalDocument iri_viaf: ClassVar[URIRef] = DATACITE.viaf iri_crossref: ClassVar[ URIRef] = DATACITE.crossref # TODO: add to datacite! iri_wikidata: ClassVar[ URIRef] = DATACITE.wikidata # TODO: add to datacite! iri_wikipedia: ClassVar[ URIRef] = DATACITE.wikipedia # TODO: add to datacite! iri_has_edition: ClassVar[URIRef] = PRISM.edition iri_relation: ClassVar[URIRef] = DCTERMS.relation iri_has_citation_creation_date: ClassVar[ URIRef] = CITO.hasCitationCreationDate iri_has_citation_time_span: ClassVar[URIRef] = CITO.hasCitationTimeSpan iri_digital_manifestation: ClassVar[URIRef] = FABIO.DigitalManifestation iri_print_object: ClassVar[URIRef] = FABIO.PrintObject iri_has_url: ClassVar[URIRef] = FRBR.exemplar iri_self_citation: ClassVar[URIRef] = CITO.SelfCitation iri_affiliation_self_citation: ClassVar[ URIRef] = CITO.AffiliationSelfCitation iri_author_network_self_citation: ClassVar[ URIRef] = CITO.AuthorNetworkSelfCitation iri_author_self_citation: ClassVar[URIRef] = CITO.AuthorSelfCitation iri_funder_self_citation: ClassVar[URIRef] = CITO.FunderSelfCitation iri_journal_self_citation: ClassVar[URIRef] = CITO.JournalSelfCitation iri_journal_cartel_citation: ClassVar[URIRef] = CITO.JournalCartelCitation iri_distant_citation: ClassVar[URIRef] = CITO.DistantCitation iri_has_format: ClassVar[URIRef] = DCTERMS["format"] short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = { 'an': iri_note, 'ar': iri_role_in_time, 'be': iri_bibliographic_reference, 'br': iri_expression, 'ci': iri_citation, 'de': iri_discourse_element, 'id': iri_identifier, 'pl': iri_singleloc_pointer_list, 'ra': iri_agent, 're': iri_manifestation, 'rp': iri_intextref_pointer } def __init__(self, g: Graph, g_set: GraphSet, res: URIRef = None, res_type: URIRef = None, resp_agent: str = None, source: str = None, count: str = None, label: str = None, short_name: str = "", preexisting_graph: Graph = None) -> None: super(GraphEntity, self).__init__() self.g: Graph = g self.resp_agent: str = resp_agent self.source: str = source self.short_name: str = short_name self.g_set: GraphSet = g_set self.preexisting_graph: Graph = Graph(identifier=g.identifier) self._merge_list: Tuple[GraphEntity] = tuple() # FLAGS self._to_be_deleted: bool = False self._was_merged: bool = False # If res was not specified, create from scratch the URI reference for this entity, # otherwise use the provided one if res is None: self.res = self._generate_new_res(g, count) else: self.res = res if g_set is not None: # If not already done, register this GraphEntity instance inside the GraphSet if self.res not in g_set.res_to_entity: g_set.res_to_entity[self.res] = self if preexisting_graph is not None: # Triples inside self.g are entirely replaced by triples from preexisting_graph. # This has maximum priority with respect to every other self.g initializations. # It's fundamental that the preexisting graph gets passed as an argument of the constructor: # allowing the user to set this value later through a method would mean that the user could # set the preexisting graph AFTER having modified self.g (which would not make sense). self.remove_every_triple() for p, o in preexisting_graph.predicate_objects(self.res): self.g.add((self.res, p, o)) self.preexisting_graph.add((self.res, p, o)) else: # Add mandatory information to the entity graph self._create_type(res_type) if label is not None: self.create_label(label) @staticmethod def _generate_new_res(g: Graph, count: str) -> URIRef: return URIRef(str(g.identifier) + count) @property def to_be_deleted(self) -> bool: return self._to_be_deleted @property def was_merged(self) -> bool: return self._was_merged @property def merge_list(self) -> Tuple[GraphEntity]: return self._merge_list def mark_as_to_be_deleted(self) -> None: # Here we must REMOVE triples pointing # to 'self' [THIS CANNOT BE UNDONE]: for res, entity in self.g_set.res_to_entity.items(): triples_list: List[Tuple] = list( entity.g.triples((res, None, self.res))) for triple in triples_list: entity.g.remove(triple) self._to_be_deleted = True def merge(self, other: GraphEntity) -> None: # Here we must REDIRECT triples pointing # to 'other' to make them point to 'self': for res, entity in self.g_set.res_to_entity.items(): triples_list: List[Tuple] = list( entity.g.triples((res, None, other.res))) for triple in triples_list: entity.g.remove(triple) new_triple = (triple[0], triple[1], self.res) entity.g.add(new_triple) types: List[URIRef] = other.get_types() for cur_type in types: self._create_type(cur_type) label: Optional[str] = other.get_label() if label is not None: self.create_label(label) self._was_merged = True self._merge_list = (*self._merge_list, other) # 'other' must be deleted AFTER the redirection of # triples pointing to it, since mark_as_to_be_deleted # also removes every triple pointing to 'other' other.mark_as_to_be_deleted() def commit_changes(self): self.preexisting_graph = Graph(identifier=self.g.identifier) if self._to_be_deleted: self.remove_every_triple() else: for triple in self.g.triples((self.res, None, None)): self.preexisting_graph.add(triple) self._to_be_deleted = False self._was_merged = False self._merge_list = tuple()
def wrapper(self, doc): # call the actual function that creates the doc data oldbasefile = doc.basefile ret = f(self, doc) if doc.basefile != oldbasefile: # means that basefile was adjusted. Touch the old parsed # path first so we don't regenerate. with self.store.open_parsed(oldbasefile, "w"): pass # move any intermediate files (in particular extracted # image backgrounds from PDF files) that might be # needed later. old_intermediate = self.store.intermediate_path(oldbasefile) new_intermediate = self.store.intermediate_path(doc.basefile) if self.store.storage_policy == "dir": old_intermediate = os.path.dirname(old_intermediate) new_intermediate = os.path.dirname(new_intermediate) if os.path.exists( old_intermediate) and not os.path.exists(new_intermediate): util.ensure_dir(new_intermediate) os.rename(old_intermediate, new_intermediate) # now render thath doc data as files (JSON, XHTML, RDF/XML) if self.config.serializejson == True: with self.store.open_serialized(doc.basefile, "wb") as fp: r = serialize(doc, format="json") # should be a (unicode) str fp.write(r.encode('utf-8')) self.log.debug( "%s: Created %s" % (doc.basefile, self.store.serialized_path(doc.basefile))) # css file + background images + png renderings of text resources = self.create_external_resources(doc) if resources: cssuris = [ cssuri(doc.uri, x) for x in resources if x.endswith(".css") ] else: cssuris = [] if cssuris: doc.cssuris = cssuris updated = self.render_xhtml(doc, self.store.parsed_path(doc.basefile)) if updated: self.log.debug( "%s: Created %s" % (doc.basefile, self.store.parsed_path(doc.basefile))) # Extract all triples on the XHTML/RDFa data to a separate # RDF/XML file distilled_graph = Graph() with codecs.open(self.store.parsed_path(doc.basefile), encoding="utf-8") as fp: # unicode distilled_graph.parse(data=fp.read(), format="rdfa", publicID=doc.uri) # The act of parsing from RDFa binds a lot of namespaces # in the graph in an unneccesary manner. Particularly it # binds both 'dc' and 'dcterms' to # 'http://purl.org/dc/terms/', which makes serialization # less than predictable. Blow these prefixes away. distilled_graph.bind("dc", URIRef("http://purl.org/dc/elements/1.1/")) distilled_graph.bind( "dcterms", URIRef("http://example.org/this-prefix-should-not-be-used")) util.ensure_dir(self.store.distilled_path(doc.basefile)) with open(self.store.distilled_path(doc.basefile), "wb") as distilled_file: # print("============distilled===============") # print(distilled_graph.serialize(format="turtle").decode('utf-8')) distilled_graph.serialize(distilled_file, format="pretty-xml") self.log.debug('%s: %s triples extracted to %s', doc.basefile, len(distilled_graph), self.store.distilled_path(doc.basefile)) # Validate that all required triples are present (we check # distilled_graph, but we could just as well check doc.meta) required = sorted(set(self.get_required_predicates(doc))) for p in required: x = distilled_graph.value(URIRef(doc.uri), p) if not x: self.log.warning("%s: Metadata is missing a %s triple" % (doc.basefile, distilled_graph.qname(p))) if 'validaterdfa' in self.config and self.config.validaterdfa: # Validate that all triples specified in doc.meta and any # .meta property on any body object is present in the # XHTML+RDFa file. NOTE: graph_diff has suddenly become # glacial on medium-large graphs (> ~1000 triples). Maybe we # don't have to validate them? huge_graph = False for g in iterate_graphs(doc.body): doc.meta += g if len(doc.meta) > 1000: huge_graph = True break if huge_graph: self.log.warning("%s: Graph seems huge, skipping validation" % doc.basefile) else: # self.log.debug("%s: diffing graphs" % doc.basefile) (in_both, in_first, in_second) = graph_diff(doc.meta, distilled_graph) self.log.debug("%s: graphs diffed (-%s, +%s)" % (doc.basefile, len(in_first), len(in_second))) if in_first: # original metadata not present in the XHTML filee self.log.warning( "%s: %d triple(s) from the original metadata was " "not found in the serialized XHTML file:\n%s", doc.basefile, len(in_first), in_first.serialize(format="n3").decode("utf-8")) # Validate that entry.title and entry.id has been filled # (might be from doc.meta and doc.uri, might be other things entry = DocumentEntry(self.store.documententry_path(doc.basefile)) if not entry.id: self.log.warning("%s: entry.id missing" % doc.basefile) if not entry.title: self.log.warning("%s: entry.title missing" % doc.basefile) return ret
def export_html(self, model_view='pdm'): """ Exports this instance in HTML, according to a given model from the list of supported models. :param model_view: string of one of the model view names available for Sample objects ['igsn', 'dc', '', 'default'] :return: HTML string """ if model_view == 'pdm': view_title = 'PDM Ontology view' sample_table_html = render_template( 'class_site_pdm.html', site_no=self.site_no, description=self.description, wkt=self._generate_wkt(), state=None, # TODO: calculate site_type_alink=self._make_vocab_alink(self.site_type), entry_date=self.entry_date ) elif model_view == 'prov': view_title = 'PROV Ontology view' prov_turtle = self.export_rdf('prov', 'text/turtle') g = Graph().parse(data=prov_turtle, format='turtle') sample_table_html = render_template( 'class_site_prov.html', visjs=self._make_vsjs(g), prov_turtle=prov_turtle, ) else: # elif model_view == 'dc': view_title = 'Dublin Core view' sample_table_html = render_template( 'class_site_dc.html', identifier=self.site_no, description=self.description, date=self.entry_date, type=self.site_type, wkt=self._generate_wkt(), creator='<a href="{}">Geoscience Australia</a>'.format(Site.URI_GA), publisher='<a href="{}">Geoscience Australia</a>'.format(Site.URI_GA), ) # add in the Pingback header links as they are valid for all HTML views pingback_uri = conf.URI_SITE_INSTANCE_BASE + self.site_no + "/pingback" headers = { 'Link': '<{}>;rel = "http://www.w3.org/ns/prov#pingback"'.format(pingback_uri) } return Response( render_template( 'page_site.html', view=model_view, site_no=self.site_no, entry_date=self.entry_date, view_title=view_title, sample_table_html=sample_table_html, date_now=datetime.now().strftime('%d %B %Y'), gm_key=conf.GOOGLE_MAPS_API_KEY, google_maps_js=self._generate_google_map_js(), lat=self.centroid_y, lon=self.centroid_x, geometry_type=self.geometry_type, coords=self.coords ), headers=headers )
results.add((test, RESULT["test"], uri)) results.add((test, RESULT["system"], system)) if not result: results.add((test, RDF.type, RESULT["PassingRun"])) else: results.add((test, RDF.type, RESULT["FailingRun"])) total += 1 num_failed += result self.assertEqual(num_failed, 0, "Failed: %s of %s." % (num_failed, total)) RESULT = Namespace("http://www.w3.org/2002/03owlt/resultsOntology#") FOAF = Namespace("http://xmlns.com/foaf/0.1/") results = Graph() system = BNode("system") results.add((system, FOAF["homepage"], URIRef("http://rdflib.net/"))) results.add((system, RDFS.label, Literal("RDFLib"))) results.add((system, RDFS.comment, Literal(""))) if __name__ == "__main__": manifest = Graph() manifest.load( cached_file( "http://www.w3.org/2000/10/rdf-tests/rdfcore/Manifest.rdf")) import sys import getopt try: optlist, args = getopt.getopt(sys.argv[1:], 'h:', ["help"])
EnumeratedClass, OWL_NS, Property, Restriction, ) from FuXi.Syntax.InfixOWL import some from FuXi.Syntax.InfixOWL import max from rdflib.namespace import ( Namespace, NamespaceManager, ) from pprint import pformat exNs = Namespace('http://example.com/') namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('ex', exNs, override=False) namespace_manager.bind('owl', OWL_NS, override=False) g = Graph() g.namespace_manager = namespace_manager # Now we have an empty Graph, we can construct OWL classes in it using the # Python classes defined in this module a = Class(exNs.Opera, graph=g) # Now we can assert rdfs:subClassOf and owl:equivalentClass relationships # (in the underlying graph) with other classes using the subClassOf and # equivalentClass descriptors which can be set to a list of objects for # the corresponding predicates.
def forwards_func(apps, schema_editor): # We get the model from the versioned app registry; # if we directly import it, it'll be the wrong version extensions = [ os.path.join(settings.ONTOLOGY_PATH, x) for x in settings.ONTOLOGY_EXT ] management.call_command('load_ontology', source=os.path.join(settings.ONTOLOGY_PATH, settings.ONTOLOGY_BASE), version=settings.ONTOLOGY_BASE_VERSION, ontology_name=settings.ONTOLOGY_BASE_NAME, id=settings.ONTOLOGY_BASE_ID, extensions=','.join(extensions), verbosity=0) Ontology = apps.get_model("models", "Ontology") Node = apps.get_model("models", "Node") Edge = apps.get_model("models", "Edge") for ontology in Ontology.objects.filter(parentontology=None): g = Graph() g.parse(ontology.path.path) for extension in Ontology.objects.filter(parentontology=ontology): g.parse(extension.path.path) ontology_classes = set() ontology_properties = set() for ontology_property, p, o in g.triples((None, None, RDF.Property)): ontology_properties.add(ontology_property) for s, p, domain_class in g.triples( (ontology_property, RDFS.domain, None)): ontology_classes.add(domain_class) for s, p, range_class in g.triples( (ontology_property, RDFS.range, None)): ontology_classes.add(range_class) for ontology_class, p, o in g.triples((None, None, RDFS.Class)): ontology_classes.add(ontology_class) for ontology_class in ontology_classes: for node in Node.objects.filter( ontologyclass=str(ontology_class).split('/')[-1], graph__in=ontology.graphs.all()): node.ontologyclass = ontology_class node.save() for ontology_property in ontology_properties: for edge in Edge.objects.filter( ontologyproperty=str(ontology_property).split('/')[-1], graph__in=ontology.graphs.all()): edge.ontologyproperty = ontology_property edge.save() # index base Arches concept arches_concept = Concept().get(id='00000000-0000-0000-0000-000000000001', include=['label']) arches_concept.index() DValueType = apps.get_model("models", "DValueType") DValueType.objects.create(valuetype='identifier', category='identifiers', namespace='dcterms', datatype='text')
from rdflib import Graph, Literal, RDF, URIRef import requests # create a Graph g = Graph() g.parse("GuitarShop.owl", format='turtle') # loop through each triple in the graph (subj, pred, obj) for subj, pred, obj in g: # check if there is at least one triple in the Graph if (subj, pred, obj) not in g: raise Exception("It better be!") if g.label(subj): print(g.label(subj)) # print the number of "triples" in the Graph print("graph has {} statements.".format(len(g))) # prints graph has 86 statements. # print out the entire Graph in the RDF Turtle format # print(g.serialize(format="turtle").decode("utf-8")) res = g.query( """PREFIX : <http://webprotege.stanford.edu/GuitarShop#> SELECT ?guitar WHERE { ?guitar :hasManufacturer :Fender; :hasPrice ?price . FILTER(?price > 500) }""")
def export_rdf(self, model_view='pdm', rdf_mime='text/turtle'): """ Exports this instance in RDF, according to a given model from the list of supported models, in a given rdflib RDF format :param model_view: string of one of the model view names available for Sample objects ['igsn', 'dc', '', 'default'] :param rdf_mime: string of one of the rdflib serlialization format ['n3', 'nquads', 'nt', 'pretty-xml', 'trig', 'trix', 'turtle', 'xml'], from http://rdflib3.readthedocs.io/en/latest/plugin_serializers.html :return: RDF string """ ''' <http://pid.geoscience.gov.au/site/9810> a <http://vocabulary.odm2.org/samplingfeaturetype/borehole>, <http://www.w3.org/2002/07/owl#NamedIndividual> ; samfl:samplingElevation [ a samfl:Elevation ; samfl:elevation "231.69716"^^xsd:float ; samfl:verticalDatum "http://spatialreference.org/ref/epsg/4283/"^^xsd:anyUri ] ; geosp:hasGeometry [ a geosp:Geometry ; geosp:asWKT "SRID=GDA94;POINT(143.36786389 -25.94903611)"^^geosp:wktLiteral ] . <http://registry.it.csiro.au/sandbox/csiro/oznome/feature/earth-realm/lithosphere> a sosa:FeatureOfInterest ; skos:exactMatch <http://sweetontology.net/realmGeol/Lithosphere> . <http://vocabulary.odm2.org/samplingfeaturetype/borehole> rdfs:subClassOf sosa:Sample . ''' # things that are applicable to all model views; the graph and some namespaces g = Graph() GEO = Namespace('http://www.opengis.net/ont/geosparql#') g.bind('geo', GEO) # URI for this site this_site = URIRef(conf.URI_SITE_INSTANCE_BASE + self.site_no) g.add((this_site, RDF.type, URIRef(self.site_type))) g.add((this_site, RDF.type, URIRef('http://www.w3.org/2002/07/owl#NamedIndividual'))) g.add((this_site, RDFS.label, Literal('Site ' + self.site_no, datatype=XSD.string))) g.add((this_site, RDFS.comment, Literal(self.description, datatype=XSD.string))) site_geometry = BNode() g.add((this_site, GEO.hasGeometry, site_geometry)) g.add((site_geometry, RDF.type, GEO.Geometry)) g.add((site_geometry, GEO.asWKT, Literal(self._generate_wkt(), datatype=GEO.wktLiteral))) return g.serialize(format=LDAPI.get_rdf_parser_for_mimetype(rdf_mime))
def _test_load_graph_size(self, file: str, graph_serialisation: str, queries: list): g = Graph() g.parse(file, format=graph_serialisation) self._test_graph_size(g, queries, file)
def openfile(self, append=False): """Make sure self.outfile is a valid and open FileType""" self.graph = Graph() self.outfile = self.graph # we use the graph as the output object.
def test_meta_output(self): """ Generate a context AND a jsonld for the metamodel and make sure it parses as RDF """ cwd = os.path.abspath(os.path.join(os.path.dirname(__file__))) jsonld_path = os.path.join(self.testdir_path, 'metajson.jsonld') rdf_path = os.path.join(self.testdir_path, 'metardf.ttl') yaml_path = os.path.abspath(os.path.join(cwd, '..', '..', 'meta.yaml')) meta_context_path = os.path.join(self.testdir_path, 'metacontext.jsonld') # Generate an image of the metamodel with open(meta_context_path, 'w') as tfile: tfile.write(ContextGenerator(yaml_path).serialize()) with open(jsonld_path, 'w') as tfile: tfile.write( JSONLDGenerator(yaml_path).serialize( context=meta_context_path)) g = Graph() g.load(jsonld_path, format="json-ld") g.serialize(rdf_path, format="ttl") g.bind('bioentity', BIOENTITY) new_ttl = g.serialize(format="turtle").decode() new_g = Graph() new_g.parse(data=new_ttl, format="turtle") self.check_size( g, new_g, URIRef("https://biolink.github.io/metamodel/ontology/meta.ttl"), 8, 71, 0, "meta")
#!/usr/bin/env python # -*- coding: iso-8859-1 -*- from rdflib import Graph g = Graph() #g.parse("http://bigasterisk.com/foaf.rdf") #g.parse("demo.nt", format="nt") #g.parse("http://bigasterisk.com/foaf.rdf") my_data = ''' <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:rdfs='http://www.w3.org/2000/01/rdf-schema#'> <rdf:Description> <rdfs:label>Example</rdfs:label> <rdfs:comment>This is really just an example.</rdfs:comment> </rdf:Description> </rdf:RDF> ''' import tempfile file_name = tempfile.mktemp() f = file(file_name, "w") f.write(my_data) f.close() g = Graph() result = g.parse(data=my_data, format="application/rdf+xml") print len(g) g1 = Graph() result = g1.parse(location='demo.xml', format="application/rdf+xml")
def main(): parser = ArgumentParser() parser.add_argument('graph_path', help='path to the graph json file') parser.add_argument('hypotheses_path', help='path to the hypotheses json directory') parser.add_argument('kb_path', help='path to the TA2 KB file (in AIF)') parser.add_argument('output_dir', help='path to output directory') parser.add_argument('run_id', help='TA3 run ID') parser.add_argument('sin_id_prefix', help='prefix of SIN IDs to name the final hypotheses') parser.add_argument('--top', default=50, type=int, help='number of top hypothesis to output') parser.add_argument( '-f', '--force', action='store_true', default=False, help='If specified, overwrite existing output files without warning') args = parser.parse_args() json_graph = JsonGraph.from_dict( util.read_json_file(args.graph_path, 'JSON graph')) graph_mappings = json_graph.build_cluster_member_mappings() hypotheses_file_paths = util.get_file_list(args.hypotheses_path, suffix='.json', sort=True) # TODO: there is a known bug in rdflib that # rdflib.Literal("2008", datatype=rdflib.XSD.gYear) would be parsed into # rdflib.term.Literal(u'2008-01-01', datatype=rdflib.XSD.gYear) automatically, # because a `parse_date` function is invoked for all rdflib.XSD.gYear literals. # This is a temporary workaround to patch the _toPythonMapping locally. # c.f.: https://github.com/RDFLib/rdflib/issues/806 # noinspection PyProtectedMember rdflib.term._toPythonMapping.pop(rdflib.XSD['gYear']) print('Reading kb from {}'.format(args.kb_path)) kb_graph = Graph() kb_graph.parse(args.kb_path, format='ttl') kb_nodes_by_category = catalogue_kb_nodes(kb_graph) kb_stmt_key_mapping = index_statement_nodes( kb_graph, kb_nodes_by_category['Statement']) kb_cm_key_mapping = index_cluster_membership_nodes( kb_graph, kb_nodes_by_category['ClusterMembership']) kb_type_stmt_key_mapping = index_type_statement_nodes( kb_graph, kb_nodes_by_category['TypeStatement']) output_dir = util.get_output_dir(args.output_dir, overwrite_warning=not args.force) run_id = args.run_id sin_id_prefix = args.sin_id_prefix for hypotheses_file_path in hypotheses_file_paths: hypotheses_json = util.read_json_file(hypotheses_file_path, 'hypotheses') print('Found {} hypotheses with probability {}'.format( len(hypotheses_json['probs']), hypotheses_json['probs'])) soin_id = sin_id_prefix + '_' + hypotheses_file_path.stem.split('_')[0] frame_id = soin_id + '_F1' top_count = 0 for hypothesis_idx, prob in sorted(enumerate(hypotheses_json['probs']), key=itemgetter(1), reverse=True): if prob <= 0.0: hypothesis_weight = math.exp(prob / 2.0) else: hypothesis_weight = 0.0001 hypothesis = hypotheses_json['support'][hypothesis_idx] top_count += 1 hypothesis_id = '{}_hypothesis_{:0>3d}'.format(frame_id, top_count) subgraph = build_subgraph_for_hypothesis( kb_graph=kb_graph, kb_nodes_by_category=kb_nodes_by_category, kb_stmt_key_mapping=kb_stmt_key_mapping, kb_cm_key_mapping=kb_cm_key_mapping, kb_type_stmt_key_mapping=kb_type_stmt_key_mapping, json_graph=json_graph, graph_mappings=graph_mappings, hypothesis=hypothesis, hypothesis_id=hypothesis_id, hypothesis_weight=hypothesis_weight) output_path = output_dir / '{}.{}.{}.H{:0>3d}.ttl'.format( run_id, soin_id, frame_id, top_count) print('Writing hypothesis #{:>2d} with prob {:>6.2f} to {}'.format( top_count, prob, output_path)) with open(output_path, 'w') as fout: fout.write(print_graph(subgraph)) if top_count >= args.top: break
def main(): '''Converts Organisation XMLs to Turtle files and stores these to local folder.''' # Settings xml_folder = "/home/iati/xml/organisations/" turtle_folder = "/home/iati/organisation/" Iati = Namespace("http://purl.org/collections/iati/") if not os.path.isdir(turtle_folder): os.makedirs(turtle_folder) document_count = 1 organisation_count = 1 # Retrieve XML files from the XML folder for document in glob.glob(xml_folder + '*.xml'): organisation_ids = [] doc_fail = False doc_id = str(document.rsplit('/', 1)[1])[:-4] doc_folder = turtle_folder + doc_id + '/' if not os.path.isdir(doc_folder): os.makedirs(doc_folder) provenance = Graph() provenance.bind('iati', Iati) # Parse the XML file try: xml = ET.parse(document) except ET.ParseError: print "Could not parse file " + document doc_fail = True if not doc_fail == True: root = xml.getroot() version = AttributeHelper.attribute_key(root, 'version') if (root.tag == 'iati-organisations') or (root.tag == 'organisations'): # Convert each organisation in XML file to RDFLib Graph for organisation in xml.findall('iati-organisation'): try: converter = IatiConverter.ConvertOrganisation( organisation) graph, id, last_updated = converter.convert(Iati) except TypeError as e: print "Error in " + document + ":" + str(e) print "Progress: Organisation #" + str( organisation_count) + " in document #" + str( document_count) if (not graph == None) and (not id == None): # Write organisation to Turtle and store in local folder graph_turtle = graph.serialize(format='turtle') with open( doc_folder + str(id.replace('/', '%2F')) + '.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle) organisation_count += 1 organisation_ids.append(id) for organisation in xml.findall('organisation'): try: converter = IatiConverter.ConvertOrganisation( organisation) graph, id, last_updated = converter.convert(Iati) except TypeError as e: print "Error in " + document + ":" + str(e) print "Progress: Organisation #" + str( organisation_count) + " in document #" + str( document_count) if (not graph == None) and (not id == None): # Write organisation to Turtle and store in local folder graph_turtle = graph.serialize(format='turtle') with open( doc_folder + str(id.replace('/', '%2F')) + '.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle) organisation_count += 1 organisation_ids.append(id) elif (root.tag == 'iati-organisation') or (root.tag == 'organisation'): try: converter = IatiConverter.ConvertOrganisation( xml.getroot()) graph, id, last_updated = converter.convert(Iati) except TypeError as e: print "Error in " + document + ":" + str(e) print "Progress: Organisation #" + str( organisation_count) + " in document #" + str( document_count) if (not graph == None) and (not id == None): # Write organisation to Turtle and store in local folder graph_turtle = graph.serialize(format='turtle') with open( doc_folder + str(id.replace('/', '%2F')) + '.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle) # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs. with open( doc_folder + str(id.replace('/', '%2F')) + '.ttl.graph', 'w') as graph_file: graph_file.write( str(Iati) + 'graph/organisation/' + str(id)) organisation_count += 1 organisation_ids.append(id) document_count += 1 # Add provenance from corresponding JSON file json_document = document[:-4] + '.json' try: with open(json_document, 'r') as open_json_doc: json_parsed = json.load(open_json_doc) except: print "Could not parse file " + json_document json_parsed = None provenance_converter = IatiConverter.ConvertProvenance( 'organisation', json_parsed, provenance, doc_id, last_updated, version, organisation_ids) provenance = provenance_converter.convert(Iati) # Write provenance graph to Turtle and store in local folder provenance_turtle = provenance.serialize(format='turtle') with open(doc_folder + 'provenance-' + doc_id + '.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs. with open(doc_folder + 'provenance-' + doc_id + '.ttl.graph', 'w') as graph_file: graph_file.write(str(Iati) + 'graph/provenance/') print "Done!"
def convert_graph(options, closureClass=None) : """ Entry point for external scripts (CGI or command line) to parse an RDF file(s), possibly execute OWL and/or RDFS closures, and serialize back the result in some format. Note that this entry point can be used requiring no entailment at all; because both the input and the output format for the package can be RDF/XML or Turtle, such usage would simply mean a format conversion. If OWL 2 RL processing is required, that also means that the owl:imports statements are interpreted. Ie, ontologies can be spread over several files. Note, however, that the output of the process would then include all imported ontologies, too. @param options: object with specific attributes, namely: - options.sources: list of uris or file names for the source data; for each one if the name ends with 'ttl', it is considered to be turtle, RDF/XML otherwise (this can be overwritten by the options.iformat, though) - options.text: direct Turtle encoding of a graph as a text string (useful, eg, for a CGI call using a text field) - options.owlClosure: can be yes or no - options.rdfsClosure: can be yes or no - options.owlExtras: can be yes or no; whether the extra rules beyond OWL 2 RL are used or not. - options.axioms: whether relevant axiomatic triples are added before chaining (can be a boolean, or the strings "yes" or "no") - options.daxioms: further datatype axiomatic triples are added to the output (can be a boolean, or the strings "yes" or "no") - options.format: output format, can be "turtle" or "rdfxml" - options.iformat: input format, can be "turtle", "rdfa", "json", "rdfxml", or "auto". "auto" means that the suffix of the file is considered: '.ttl'. '.html', 'json' or '.jsonld' respectively with 'xml' as a fallback - options.trimming: whether the extension to OWLRL should also include trimming @param closureClass: explicit class reference. If set, this overrides the various different other options to be used as an extension. """ def __check_yes_or_true(opt) : return opt is True or opt == "yes" or opt == "Yes" or opt == "True" or opt == "true" import warnings warnings.filterwarnings("ignore") if len(options.sources) == 0 and (options.text is None or len(options.text.strip()) == 0) : raise Exception("No graph specified either via a URI or text") graph = Graph() # Just to be sure that this attribute does not create issues with older versions of the service... # the try statement should be removed, eventually... iformat = AUTO try : iformat = options.iformat except : # exception can be raised if that attribute is not used at all, true for older versions pass # similar measure with the possible usage of the 'source' options try : if options.source is not None: options.sources.append(options.source) except: # exception can be raised if that attribute is not used at all, true for newer versions pass # Get the sources first. Note that a possible error is filtered out, namely to process the same file twice. This is done # by turning the input arguments into a set... for inp in set(options.sources): __parse_input(iformat, inp, graph) # add the possible extra text (ie, the text input on the HTML page) if options.text is not None: graph.parse(StringIO.StringIO(options.text), format="n3") # Get all the options right # noinspection PyPep8Naming owlClosure = __check_yes_or_true(options.owlClosure) # noinspection PyPep8Naming rdfsClosure = __check_yes_or_true(options.rdfsClosure) # noinspection PyPep8Naming owlExtras = __check_yes_or_true(options.owlExtras) try: trimming = __check_yes_or_true(options.trimming) except : trimming = False axioms = __check_yes_or_true(options.axioms) daxioms = __check_yes_or_true(options.daxioms) if owlClosure: interpret_owl_imports(iformat, graph) # adds to the 'beauty' of the output graph.bind("owl", "http://www.w3.org/2002/07/owl#") graph.bind("xsd", "http://www.w3.org/2001/XMLSchema#") #@@@@ some smarter choice should be used later to decide what the closure class is!!! That should # also control the import management. Eg, if the superclass includes OWL... if closureClass is not None : closure_class = closureClass else : closure_class = return_closure_class(owlClosure, rdfsClosure, owlExtras, trimming) DeductiveClosure(closure_class, improved_datatypes=True, rdfs_closure=rdfsClosure, axiomatic_triples=axioms, datatype_axioms=daxioms).expand(graph) if options.format == TURTLE: return graph.serialize(format="turtle") elif options.format == JSON: if json_ld_available : return graph.serialize(format="json-ld") else: raise Exception("JSON-LD serializer is not available") else: return graph.serialize(format="pretty-xml")
def build_subgraph_for_hypothesis(kb_graph, kb_nodes_by_category, kb_stmt_key_mapping, kb_cm_key_mapping, kb_type_stmt_key_mapping, json_graph, graph_mappings, hypothesis, hypothesis_id, hypothesis_weight): member_to_clusters = graph_mappings['member_to_clusters'] cluster_to_prototype = graph_mappings['cluster_to_prototype'] # Set of all KB edge statement nodes kb_edge_stmt_set = set() # Mapping from ERE to all its KB type statement nodes kb_type_stmt_set = set() # Mapping from KB edge statement nodes to importance values kb_stmt_importance = {} # Set of all ERE node labels ere_set = set() # Mapping from ERE node labels to importance values ere_importance = {} # logging.info('Processing all statements') for stmt_label, stmt_weight in zip(hypothesis['statements'], hypothesis['statementWeights']): # Rescale the stmt_weight to get the importance value if stmt_weight <= 0.0: stmt_weight = math.exp(stmt_weight / 100.0) else: stmt_weight = 0.0001 assert json_graph.is_statement(stmt_label) stmt_entry = json_graph.node_dict[stmt_label] stmt_subj = stmt_entry.subject stmt_pred = stmt_entry.predicate stmt_obj = stmt_entry.object assert stmt_subj is not None and stmt_pred is not None and stmt_obj is not None # Find the statement node in the KB kb_stmt_id = URIRef(stmt_label) if kb_stmt_id not in kb_nodes_by_category['Statement']: kb_stmt_pred = RDF.type if stmt_pred == 'type' else LDC_ONT.term( stmt_pred) kb_stmt_id = next( iter(kb_stmt_key_mapping[(URIRef(stmt_subj), kb_stmt_pred, URIRef(stmt_obj))])) # Add the subject of any statement to ere_set ere_set.add(stmt_subj) # Update the importance value of the subject of any statement based on stmt_weight if stmt_subj not in ere_importance or ere_importance[ stmt_subj] < stmt_weight: ere_importance[stmt_subj] = stmt_weight if stmt_pred == 'type': if kb_stmt_id is not None: # Add kb_stmt_id to the set of KB type statement nodes kb_type_stmt_set.add(kb_stmt_id) # kb_type_stmt_dict[stmt_subj].add(kb_stmt_id) else: if kb_stmt_id is not None: # Add kb_stmt_id to the set of KB edge statement nodes kb_edge_stmt_set.add(kb_stmt_id) # Update the importance value of the edge statement kb_stmt_importance[kb_stmt_id] = stmt_weight # Add the object of edge statements to ere_set ere_set.add(stmt_obj) # Update the importance value of the object of edge statements based on stmt_weight if stmt_obj not in ere_importance or ere_importance[ stmt_obj] < stmt_weight: ere_importance[stmt_obj] = stmt_weight # Set of all SameAsCluster node labels same_as_cluster_set = set() # Set of all KB ClusterMembership nodes kb_cluster_membership_set = set() # Set of all ERE node labels that are prototypes proto_ere_set = set() # Mapping from ERE prototype node labels to importance values proto_importance = {} # logging.info('Processing all EREs and clusters') cluster_memberships = hypothesis.get('clusterMemberships', None) if cluster_memberships is None: for ere in ere_set: ere_weight = ere_importance.get(ere, 0.0) for cluster in member_to_clusters[ere]: # Add all corresponding cluster label of each ERE node to same_as_cluster_set same_as_cluster_set.add(cluster) # Find the ClusterMembership node in the KB kb_cluster_membership_set.update( kb_cm_key_mapping[URIRef(cluster), URIRef(ere)]) proto_ere = cluster_to_prototype[cluster] if proto_ere not in proto_importance or proto_importance[ proto_ere] < ere_weight: proto_importance[proto_ere] = ere_weight else: for member, cluster in cluster_memberships: same_as_cluster_set.add(cluster) kb_cluster_membership_set.update(kb_cm_key_mapping[URIRef(cluster), URIRef(member)]) # Add the prototype of each SameAsCluster node to ere_set proto_ere = cluster_to_prototype[cluster] proto_ere_set.add(proto_ere) # Find the type statement node for the prototype proto_type_stmt_id_list = kb_type_stmt_key_mapping[URIRef( proto_ere)] highest_granularity_level = max([ len(type_ont.split('.')) for _, type_ont in proto_type_stmt_id_list ]) for type_stmt_id, type_ont in proto_type_stmt_id_list: if len(type_ont.split('.')) == highest_granularity_level: kb_type_stmt_set.add(type_stmt_id) # Find the ClusterMembership node for the prototype in the KB kb_cluster_membership_set.update( kb_cm_key_mapping[URIRef(cluster), URIRef(proto_ere)]) member_weight = ere_importance.get(member, 0.0) if proto_ere not in proto_importance or proto_importance[ proto_ere] < member_weight: proto_importance[proto_ere] = member_weight # Add all prototype ERE labels to ere_set ere_set |= proto_ere_set # All triples to be added to the subgraph # logging.info('Extracting all content triples') all_triples = set() for kb_stmt_id in kb_edge_stmt_set: all_triples.update(triples_for_edge_stmt(kb_graph, kb_stmt_id)) for kb_stmt_id in kb_type_stmt_set: all_triples.update(triples_for_type_stmt(kb_graph, kb_stmt_id)) # logging.info('Extracting triples for all EREs') # Add triples for all EREs for ere in ere_set: kb_ere_id = URIRef(ere) all_triples.update(triples_for_ere(kb_graph, kb_ere_id)) # logging.info('Extracting triples for all SameAsClusters') # Add triples for all SameAsClusters for cluster in same_as_cluster_set: kb_cluster_id = URIRef(cluster) all_triples.update(triples_for_cluster(kb_graph, kb_cluster_id)) # logging.info('Extracting triples for all ClusterMemberships') # Add triples for all ClusterMemberships for kb_cm_id in kb_cluster_membership_set: all_triples.update(triples_for_cluster_membership(kb_graph, kb_cm_id)) # logging.info('Constructing a subgraph') # Start building the subgraph subgraph = Graph() # Bind all prefixes of kb_graph to the subgraph for prefix, namespace in kb_graph.namespaces(): if str(namespace) not in [AIDA, LDC, LDC_ONT]: subgraph.bind(prefix, namespace) # Bind the AIDA, LDC, LDC_ONT, and UTEXAS namespaces to the subgraph subgraph.bind('aida', AIDA, override=True) subgraph.bind('ldc', LDC, override=True) subgraph.bind('ldcOnt', LDC_ONT, override=True) subgraph.bind('utexas', UTEXAS) # logging.info('Adding hypothesis related triples to the subgraph') # Add triple for the aida:Hypothesis node and its type kb_hypothesis_id = UTEXAS.term(hypothesis_id) subgraph.add((kb_hypothesis_id, RDF.type, AIDA.Hypothesis)) # Add triple for the hypothesis importance value subgraph.add((kb_hypothesis_id, AIDA.importance, Literal(hypothesis_weight, datatype=XSD.double))) # Add triple for the aida:Subgraph node and its type kb_subgraph_id = UTEXAS.term(hypothesis_id + '_subgraph') subgraph.add((kb_hypothesis_id, AIDA.hypothesisContent, kb_subgraph_id)) subgraph.add((kb_subgraph_id, RDF.type, AIDA.Subgraph)) # Add all EREs as contents of the aida:Subgraph node for ere in ere_set: kb_ere_id = URIRef(ere) subgraph.add((kb_subgraph_id, AIDA.subgraphContains, kb_ere_id)) # logging.info('Adding all content triples to the subgraph') # Add all triples for triple in all_triples: subgraph.add(triple) # Add importance values for all edge statements for kb_stmt_id, importance in kb_stmt_importance.items(): subgraph.add((kb_stmt_id, AIDA.importance, Literal(importance, datatype=XSD.double))) # Add importance values for all prototype EREs for proto_ere, proto_weight in proto_importance.items(): kb_proto_id = URIRef(proto_ere) subgraph.add((kb_proto_id, AIDA.importance, Literal(proto_weight, datatype=XSD.double))) # Compute handles for Entity clusters proto_handles = compute_handle_mapping(ere_set, json_graph, member_to_clusters, cluster_to_prototype) for proto_ere, handle in proto_handles.items(): kb_proto_id = URIRef(proto_ere) if len( list( subgraph.objects(subject=kb_proto_id, predicate=AIDA.handle))) == 0: subgraph.add( (kb_proto_id, AIDA.handle, Literal(handle, datatype=XSD.string))) return subgraph
class Model(object): def __init__(self): self.graph = Graph() self.top_nodes = [] self.serializedResources = [] def createResource(self, id=None): return Resource(id=id, model=self) def createProperty(self, id=None): return Property(id) def createLiteral(self, element): return Literal(element) def createTypedLiteral(self, element, type): return self._convert_element(element, type) def createOrderedList(self): return OrderedList() def createItemizedList(self): return ItemizedList() def createLinkedList(self): return LinkedList() def createNudeList(self): return NudeList() def _append_to_graph(self, subject, predicate, object): if isinstance(predicate, Property): self.graph.add((subject, URIRef(predicate.id), object)) else: self.graph.add((subject, predicate, object)) def _add_statement(self, statement): if statement is not None: #print "Adding statement", statement self._append_to_graph(statement.getSubject(), statement.getPredicate(), statement.getObject()) # Append various types of elements to the graph. This is the main method for determining # the type of a serializable object and creating the appropriate triple for it. All methods # that need to make additions to the graph should use it, unless handling raw triples # the the form of s-p-o of a statement that embeds them def _add_element(self, object, predicate, subject=None): from SmartAPI.rdf.Variant import Variant from SmartAPI.common.Tools import Tools if isinstance(object, Resource) and (object not in self.serializedResources): self.serializedResources.append(object) n = object.getNode() for p in object.listProperties(): if p is not None: if isinstance(p, list): for pe in p: self._add_statement(pe) if pe.getResource() is not None: self._add_element(pe.getResource(), pe.getPredicate(), n) else: self._add_statement(p) if p.getResource() is not None: self._add_element(p.getResource(), p.getPredicate(), n) for l in object.listLiterals(): self._add_statement(l) elif isinstance(object, Resource) and (object in self.serializedResources): pass elif isinstance(object, Property): if subject is not None and predicate is not None and object.id is not None: self._append_to_graph(subject, predicate, URIRef(object.id)) elif isinstance(object, Literal): if subject is not None and predicate is not None: self._append_to_graph(subject, predicate, object) elif isinstance(object, RdfLiteral): if subject is not None and predicate is not None: self._append_to_graph(subject, predicate, object.getValue()) elif isinstance(object, Variant): if subject is not None and predicate is not None: self._append_to_graph(subject, predicate, object.asTerm()) elif isinstance(object, URIRef): if not subject is None: self._append_to_graph(subject, predicate, object) else: if not subject is None: self._append_to_graph(subject, predicate, Literal(object)) """ elif isinstance(object, list): # ordered list is the default for raw lists self._add_ordered_list(object, predicate, subject) elif isinstance(object, OrderedList): self._add_ordered_list(object, predicate, subject) elif isinstance(object, LinkedList): self._add_linked_list(object, predicate, subject) elif isinstance(object, ItemizedList): self._add_itemized_list(object, predicate, subject) """ def _convert_element(self, element, type): return Literal(element, datatype=URIRef(type)) def is_list(self, node): item = self.graph.value(subject=node, predicate=RDF.first) return item is not None def parse_list(self, container, parent_node=None, klass=None, first=None): if first is None and parent_node is not None: first = self.graph.value(subject=parent_node, predicate=RDF.first) if first is not None: arr = self.graph.value(subject=parent_node, predicate=RDF.rest) if arr: return self._parse_linked_list(container, first, arr, klass) arr = self.graph.value(subject=first, predicate=URIRef(NS.SMARTAPI + "rawArray")) if arr: return self._parse_nude_list(container, first, klass) arr = self.graph.value(subject=first, predicate=URIRef(NS.SMARTAPI + "array")) if arr: return self._parse_itemized_list(container, first, klass) arr = self.graph.value(subject=first, predicate=URIRef(NS.SMARTAPI + "indexedArray")) if arr: return self._parse_ordered_list(container, first, klass) return None def _parse_list_entry(self, entry, klass=None, from_nude=False): from SmartAPI.model.ValueObject import ValueObject from SmartAPI.rdf.Variant import Variant from SmartAPI.common.Tools import Tools if from_nude and klass is not None: item = klass() item.fromNude(entry) return item if isinstance(entry, Literal): return Variant(entry.toPython()) elif isinstance(entry, URIRef): if entry == RDF.nil: return None return Variant(entry) else: if klass is None: types = [] sl = self.listStatements(subject=entry, predicate=URIRef(PROPERTY.RDF_TYPE), object=None) for s in sl: types.append(s.getResource().toString()) klass = Tools().mapper.getClass(types, default=Variant) item = klass() for s in self.find_statements_for_node(entry): if s.predicate == NS.SMARTAPI + "valueObject": itemv = ValueObject() for sv in self.find_statements_for_node(s.object): itemv.parse(sv) item.addValueObject(itemv) else: item.parseStatement(s) return item def _parse_linked_list(self, container, value, next, klass): if value is not None: item = self._parse_list_entry(value, klass) if item: container.append(item) while next is not None: value = self.graph.value(subject=next, predicate=RDF.first) next = self.graph.value(subject=next, predicate=RDF.rest) if value is not None: item = self._parse_list_entry(value, klass) if item: container.append(item) return 'LinkedList' def _parse_nude_list(self, container, current, klass): arr = self.graph.value(subject=current, predicate=URIRef(NS.SMARTAPI + "rawArray")) if arr: value = self.graph.value(subject=arr, predicate=RDF.value) v = simplejson.loads(value.toPython()) for o in v: container.append( self._parse_list_entry(o, klass, from_nude=True)) return 'NudeList' def _parse_itemized_list(self, container, current, klass): arr = self.graph.value(subject=current, predicate=URIRef(NS.SMARTAPI + "array")) size = self.graph.value(subject=current, predicate=URIRef(NS.SMARTAPI + "size")) if arr: for s, p, o in self.graph.triples((arr, RDF.value, None)): container.append(self._parse_list_entry(o, klass)) return 'ItemizedList' def _parse_ordered_list(self, container, current, klass): arr = self.graph.value(subject=current, predicate=URIRef(NS.SMARTAPI + "indexedArray")) size = self.graph.value(subject=current, predicate=URIRef(NS.SMARTAPI + "size")) if arr and size: # prefill for i in range(size.toPython()): container.append(None) #container = [None] * size.toPython() for s, p, o in self.graph.triples( (arr, URIRef(NS.SMARTAPI + "entry"), None)): index = self.graph.value(subject=o, predicate=URIRef(NS.SMARTAPI + "index")) value = self.graph.value(subject=o, predicate=RDF.value) container[index.toPython()] = self._parse_list_entry( value, klass) return 'OrderedList' # obsolete? """ def _add_linked_list(self, rdflist, predicate, subject): from SmartAPI.model.Obj import Obj from SmartAPI.rdf.Variant import Variant elements = rdflist.get_items() current = lst = BNode() self.graph.add((subject, URIRef(predicate.id), lst)) l = len(elements) for index, var in enumerate(elements): if isinstance(var, Variant): # support lists with raw values (not just wrapped inside Evaluation self.graph.add((current, RDF.first, var.asTerm())) elif isinstance(var, Obj): self._add_element(var.serialize(self), RDF.first, subject = current) elif isinstance(var, Resource): var_node = BNode() for p in var.properties: self._add_element(p[1], URIRef(p[0]), subject = var_node) self.graph.add((current, RDF.first, var_node)) else: self.graph.add((current, RDF.first, Literal(var))) next = RDF.nil if index == l-1 else BNode() # last item self.graph.add((current, RDF.rest, next)) current = next """ def add(self, statement): if isinstance(statement, list): for l in statement: self._add_element(l, None) else: self._add_element(statement, None) def findSubject(self, predicate, object): return Resource(model=self, node=self.graph.value(predicate=predicate, object=object)) def findObject(self, subject, predicate): return Statement(node=self.graph.value(subject=subject, predicate=predicate), subject=subject, predicate=predicate) def find_statements_for_node(self, node, predicate=None): r = [] for s, p, o in self.graph.triples((node, predicate, None)): r.append( Statement(model=self, subject=s, predicate=p, object=o, resource=Resource(model=self, node=o))) return r def listStatements(self, subject=None, predicate=None, object=None): r = [] for s, p, o in self.graph.triples((subject, predicate, object)): r.append( Statement(model=self, subject=s, predicate=p, object=o, resource=Resource(model=self, node=o))) return r def serialize(self, format=SERIALIZATION.JSON_LD): return self.graph.serialize(format=format) def parse(self, data=None, file=None, format=SERIALIZATION.JSON_LD): if data is not None: try: if format == SERIALIZATION.JSON_LD: json = simplejson.loads(data) if isinstance(json, dict) and json.has_key( '@graph') and json.has_key('@context'): self.graph.parse(data=simplejson.dumps(json['@graph']), format='json-ld', context=json['@context']) else: self.graph.parse(data=data, format='json-ld') # other formats else: self.graph.parse(data=data, format=format) except: print "Could not read the input data into a graph" traceback.print_exc() #traceback.print_stack() return elif file is not None: try: f = open(file) self.graph.parse(f, format=format) f.close() except: print "Could not read the file into a model" traceback.print_exc() return print "No input to parse into a graph"
from datetime import datetime from rdflib import Graph, URIRef, Literal, BNode, RDF, Namespace from rdflib.namespace import FOAF, DOAP, DC from nose.tools import nottest EARL = Namespace("http://www.w3.org/ns/earl#") report = Graph() report.bind("foaf", FOAF) report.bind("earl", EARL) report.bind("doap", DOAP) report.bind("dc", DC) me = URIRef("http://gromgull.net/me") report.add((me, RDF.type, FOAF.Person)) report.add((me, FOAF.homepage, URIRef("http://gromgull.net"))) report.add((me, FOAF.name, Literal("Gunnar Aastrand Grimnes"))) rdflib = URIRef("https://github.com/RDFLib/rdflib") report.add((rdflib, DOAP.homepage, rdflib)) report.add((rdflib, DOAP.name, Literal("rdflib"))) report.add((rdflib, DOAP.developer, me)) report.add((rdflib, RDF.type, DOAP.Project)) now = Literal(datetime.now())
def main(): with open(JSON) as infile: data = json.load(infile) gr = Graph() gr.namespace_manager.bind('skos', SKOS) gr.namespace_manager.bind('qml', QML) import pdb; pdb.set_trace() gr.add ( (SCHEME , RDF.type , SKOS.ConceptScheme)) gr.add ( (SCHEME , RDFS.label , Literal('Quality Indicators Dictionary and Markup Language - QualityML'))) for c in data['class']: name = c['id'] id = URIRef(QML[name]) gr.add( (id, RDF.type, SKOS.Concept )) gr.add( (id, RDF.type, QML.Class )) gr.add( (id, SKOS.topConceptOf, SCHEME )) gr.add( (id, SKOS.inScheme, SCHEME )) gr.add ( (SCHEME , SKOS.hasTopConcept , id )) label(gr, id, c ) for i in data['indicator'] : name = i['id'] qc = URIRef(QML[i['class']]) id = URIRef(QML[name]) gr.add( (id, RDF.type, SKOS.Concept )) gr.add( (id, RDF.type, QML.Indicator )) gr.add( (id, SKOS.broader, qc )) gr.add( (id, SKOS.inScheme, SCHEME )) label(gr, id, i ) for m in data['measure'] : name = m['id'] qc = URIRef(QML[i['class']]) id = URIRef(QML["/".join( ('measure',name))]) gr.add( (id, RDF.type, SKOS.Concept )) gr.add( (id, RDF.type, QML.Measure )) gr.add( (id, SKOS.broader, qc )) gr.add( (id, SKOS.inScheme, SCHEME )) label(gr, id, m ) with open ('qml.ttl' , "w") as outfile: outfile.write(gr.serialize(format='turtle'))
def __init__(self): self.graph = Graph() self.top_nodes = [] self.serializedResources = []
assert cr["warnings"] == ["warn", "warn", "warn"] cr.add_info(None) cr.add_warning(None) assert cr["info"] == ["info", "info", "info"] assert cr["warnings"] == ["warn", "warn", "warn"] cr.add_result(cr) # results all the way down assert cr["results"][0]["name"] == "test" assert cr["results"][0]["description"] == "test results of a test" assert not cr["results"][0]["passes"] assert cr["results"][0]["info"] == ["info", "info", "info"] assert cr["results"][0]["warnings"] == ["warn", "warn", "warn"] from rdflib import Graph, URIRef, Literal, BNode sg = Graph() # use OCX RDFS as schema graph b/c it is small sg.parse(location="tests/input/schema.ttl", format="turtle") dg = Graph() # a data graph for tests dg.parse(location="tests/input/data.ttl", format="turtle") dc = DataChecks(dg, sg) def test_init(): assert type(dc) == DataChecks assert len(dc.graph) == len(dg) assert dc.schema_graph.isomorphic(sg) # will fail if BNodes in sg def test_find_primary_entities(): result = dc.find_primary_entities([]) assert result["name"] == "primary entities present"
from rdflib import Graph, Literal, Namespace, RDF, URIRef, BNode import ast #.literal_eval import csv import pprint graph = Graph() skos = Namespace('http://www.w3.org/2004/02/skos/core#') rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#') rdf = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') schema = Namespace('https://schema.org/') eaccpf = Namespace('http://culturalis.org/eac-cpf#') dbo = Namespace('http://dbpedia.org/ontology/') rdaad = Namespace('http://rdaregistry.info/Elements/a/datatype/') djo = Namespace('http://dijest.ac.il/ontology/') djr = Namespace('http://dijest.ac.il/resource/') owl = Namespace('http://www.w3.org/2002/07/owl#') graph.bind('skos', skos) graph.bind('rdfs', rdfs) graph.bind('rdf', rdf) graph.bind('schema', schema) graph.bind('eac-cpf', eaccpf) graph.bind('dbo', dbo) graph.bind('rdaad', rdaad) graph.bind('djo', djo) graph.bind('djr', djr) graph.bind('owl', owl) #basis_uri = 'http://dijest.ac.il/person/'
classType = sys.argv[2] endpoint_uri = config['Mandatory']['endpointURI'] graph_uri = config['Mandatory']['graphURI'] # Set up endpoint and access to triple store sparql = SPARQLWrapper(endpoint_uri) sparql.setReturnFormat(JSON) sparql.setMethod(POST) store = SPARQLUpdateStore(endpoint_uri, endpoint_uri) # Specify the (named) graph we're working with sparql.addDefaultGraph(graph_uri) # Create an in memory graph g = Graph(store, identifier=graph_uri) query = "select ?p ?o where {<" + URI + "> ?p ?o}" properties = g.query(query) # Configurations mappings mapping = ConfigParser() mapping.read('mapping_fields.ini') propURI = "" props = "" for row in properties: propURI = str(row[0]) if propURI != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": for key in mapping[classType]: if mapping[classType][key] == propURI:
tei = {'tei': 'http://www.tei-c.org/ns/1.0'} from rdflib import Graph, Literal, BNode, Namespace, URIRef from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, OWL agrelon = Namespace("https://d-nb.info/standards/elementset/agrelon#") crm = Namespace("http://www.cidoc-crm.org/cidoc-crm/") frbroo = Namespace("http://iflastandards.info/ns/fr/frbr/frbroo/") pro = Namespace("http://purl.org/spar/pro/") proles = Namespace("http://www.essepuntato.it/2013/10/politicalroles/") prov = Namespace("http://www.w3.org/ns/prov#") schema = Namespace("https://schema.org/") tvc = Namespace("http://www.essepuntato.it/2012/04/tvc/") g = Graph() g.bind("agrelon", agrelon) g.bind("crm", crm) g.bind("frbroo", frbroo) g.bind("dcterms", DCTERMS) g.bind("schema", schema) g.bind("owl", OWL) g.bind("pro", pro) g.bind("proles", proles) g.bind("prov", prov) g.bind("tvc", tvc) ############################# # # # Persons #