def dump_as_rdf(g: Dataset, table_name: str) -> bool: """ Dump the contents of Graph g in RDF turtle :param g: Dataset to dump :param table_name: name of the base table :return: success indicator """ # Propagate the mapped concepts up the tree def add_to_ancestors(s: URIRef, vm: URIRef): g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm)) for parent in g.objects(s, SKOS.broader): add_to_ancestors(parent, vm) if COMPUTE_MEMBERS and EXPLICIT_MEMBERS: for subj, obj in g.subject_objects(SKOS.exactMatch): add_to_ancestors(subj, obj) # TODO: this gives us a list of all concepts in the scheme... useful? for scheme, tc in g.subject_objects(SKOS.hasTopConcept): for member in g.objects( tc, ISO['enumeratedConceptualDomain.hasMember']): g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'], member)) for name, ns in namespaces.items(): g.bind(name.lower(), ns) outfile = os.path.join(DATA_DIR, table_name + '.ttl') print(f"Saving output to {outfile}") g.serialize(outfile, format='turtle') print(f"{len(g)} triples written") return True
class Fragment(object): HYDRA = Namespace("http://www.w3.org/ns/hydra/core#") VOID = Namespace("http://rdfs.org/ns/void#") FOAF = Namespace("http://xmlns.com/foaf/0.1/") DCTERMS = Namespace("http://purl.org/dc/terms/") def __init__(self): self.rdf_graph = Dataset() def add_data_triple(self, subject, predicate, obj): self.rdf_graph.add((subject, predicate, obj)) def add_graph(self, identifier): self.rdf_graph.graph(identifier) def add_meta_quad(self, graph, subject, predicate, obj): self.rdf_graph.add((graph, subject, predicate, obj)) def add_prefix(self, prefix, uri): self.rdf_graph.bind(prefix, uri) def serialize(self): return self.rdf_graph.serialize(format="trig", encoding="utf-8")
csvfile, skipinitialspace=True, quotechar='"', delimiter=',')] return csv_contents #//*************** csv parser ****************//# graph_uri_base = resource path = 'source_datasets/' filename_population = 'all_population_by_type.csv' filename_unemployment = 'unemployment_eu.csv' filename_inflow = 'inflow_dataset.csv' filename_asylum = 'asylum_seekers.csv' dataset = Dataset() dataset.bind('mpr', RESOURCE) dataset.bind('mpo', VOCAB) dataset.bind('geo', GEO) dataset.bind('geo_country_code', GCC) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.bind('sdmx', SDMX) dataset.default_context.parse(VOCAB_FILE, format='turtle') dataset, unemployment_eu_graph = convert_unemployment_csv( filename_unemployment, dataset, URIRef(graph_uri_base + 'unemployment_eu_graph')) dataset, population_eu_graph = convert_population_csv( filename_population, dataset,
add quads directly to a specific Graph within the Dataset. This example file shows how to decalre a Dataset, add content to it, serialise it, query it and remove things from it. """ from rdflib import Dataset, URIRef, Literal, Namespace # # Create & Add # # Create an empty Dataset d = Dataset() # Add a namespace prefix to it, just like for Graph d.bind("ex", Namespace("http://example.com/")) # Declare a Graph URI to be used to identify a Graph graph_1 = URIRef("http://example.com/graph-1") # Add an empty Graph, identified by graph_1, to the Dataset d.graph(identifier=graph_1) # Add two quads to Graph graph_1 in the Dataset d.add(( URIRef("http://example.com/subject-x"), URIRef("http://example.com/predicate-x"), Literal("Triple X"), graph_1 )) d.add((
k: v for k, v in row.items() } for row in csv.DictReader( csvfile, skipinitialspace=True, quotechar='"', delimiter=',')] return csv_contents #//*************** csv parser ****************//# graph_uri_base = resource + 'movement_of_people/' path = 'source_datasets/' filename = 'Movement_of_people_across_borders_dataset.csv' dataset = Dataset() dataset.bind('trumpres', RESOURCE) dataset.bind('trumpvoc', VOCAB) dataset.bind('geo', GEO) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.default_context.parse(VOCAB_FILE, format='turtle') dataset, movement_graph = convert_csv( path + filename, dataset, URIRef(graph_uri_base + 'movement_graph')) serialize_upload(OUTPUT_DIR + 'movement_of_people.trig', dataset) ### Generate VoID metadata from rdflib.void import generateVoID from rdflib.namespace import VOID dcterms_uri = 'http://purl.org/dc/terms/'
source=None, date=Literal(datetime.datetime.now().isoformat(), datatype=XSD.datetime), created=None, issued=None, modified=None, exampleResource=exampleResource, vocabulary=[URIRef("https://schema.org/")], triples=sum(1 for i in ds.graph( identifier="https://data.create.humanities.uva.nl/id/kohier1674/"). subjects()), temporalCoverage=Literal("1674", datatype=XSD.gYear, normalize=False), licenseprop=URIRef( "https://creativecommons.org/licenses/by-nc-sa/4.0/"), distribution=download) ds.bind('owl', OWL) ds.bind('create', create) ds.bind('schema', schema) ds.bind('void', void) ds.bind('foaf', foaf) ds.bind('edm', edm) ds.bind('pnv', pnv) ds.bind('roar', roar) ds.bind('dc', dc) ds.bind('dcterms', dcterms) ds.bind('oa', oa) ds.bind('prov', prov) print("Serializing!") ds.serialize('data/kohier1674.trig', format='trig')
with open(filename, 'r', encoding="ISO-8859-1") as csvfile: # Set the right quote character and delimiter csv_contents = [{ k: v for k, v in row.items() } for row in csv.DictReader( csvfile, skipinitialspace=True, quotechar='"', delimiter=';')] # The URI for our dataset url = 'http://few.vu.nl/~mvr320/KRweb/resource/' + short[i] + '/' SETNAME = Namespace(url) graph_uri = URIRef(url) # We initialize a dataset, and bind our namespaces dataset = Dataset() dataset.bind('g13data', DATA) dataset.bind('g13vocab', VOCAB) dataset.bind('g13set', SETNAME) dataset.bind('geo', GEO) dataset.bind('geof', GEOF) dataset.bind('dbo', DBO) dataset.bind('dbp', DBP) dataset.bind('schema', SCHEMA) dataset.bind('vcard', VCARD) dataset.bind('wgs', WGS) dataset.bind('void', VOID) # We then get a new dataset object with our URI from the dataset. graph = dataset.graph(graph_uri) # Load the externally defined schema into the default dataset (context) of the dataset
def main(search=None, cache=None, identifiers=[]): ns = Namespace("https://data.create.humanities.uva.nl/id/rkd/") ds = Dataset() ds.bind('rdfs', RDFS) ds.bind('schema', schema) ds.bind('sem', sem) ds.bind('bio', bio) ds.bind('foaf', foaf) ds.bind('void', void) ds.bind('skos', SKOS) ds.bind('owl', OWL) ds.bind('dc', dc) ds.bind('rkdArtist', URIRef("https://data.rkd.nl/artists/")) ds.bind('rkdThes', nsThesaurus) ds.bind('rkdPerson', nsPerson) ds.bind('rkdImage', URIRef("https://rkd.nl/explore/images/")) ds.bind('rkdThumb', URIRef("https://images.rkd.nl/rkd/thumb/650x650/")) ds.bind('aat', URIRef("http://vocab.getty.edu/aat/")) ## First the images g = rdfSubject.db = ds.graph(identifier=ns) # Load cache thesaurus if os.path.isfile('rkdthesaurus.json'): with open('rkdthesaurus.json') as infile: thesaurusDict = json.load(infile) else: thesaurusDict = dict() # Load cache images if os.path.isfile('imagecache.json'): with open('imagecache.json') as infile: imageCache = json.load(infile) else: imageCache = dict() # to fetch all identifiers from the search if search: thesaurusDict, imageCache = parseURL(search, thesaurusDict=thesaurusDict, imageCache=imageCache) elif cache: # assume that everything in the thesaurus is also cached for doc in cache.values(): parseData(doc, thesaurusDict=thesaurusDict) elif identifiers: for i in identifiers: thesaurusDict, imageCache = parseURL(APIURL + str(i), thesaurusDict=thesaurusDict, imageCache=imageCache) # Any images without labels? # These were not included in the search, but fetch them anyway. print("Finding referred images that were not included") q = """ PREFIX schema: <http://schema.org/> SELECT ?uri WHERE { ?role a schema:Role ; schema:isRelatedTo ?uri . FILTER NOT EXISTS { ?uri schema:name ?name } } """ images = g.query(q) print(f"Found {len(images)}!") for i in images: identifier = str(i['uri']).replace('https://rkd.nl/explore/images/', '') thesaurusDict, imageCache = parseURL( "https://api.rkd.nl/api/record/images/" + str(identifier), thesaurusDict=thesaurusDict, imageCache=imageCache) ## Then the thesaurus print("Converting the thesaurus") rdfSubject.db = ds.graph(identifier=ns.term('thesaurus/')) ids = list(thesaurusDict.keys()) for i in ids: _, thesaurusDict = getThesaurus(i, thesaurusDict, 'concept') # Save updated cache with open('rkdthesaurus.json', 'w') as outfile: json.dump(thesaurusDict, outfile) with open('imagecache.json', 'w') as outfile: json.dump(imageCache, outfile) ## Serialize print("Serializing!") ds.serialize('rkdportraits14751825.trig', format='trig')
g7 += g g7.bind("dct", DCTERMS) g7.bind("skos", SKOS) assert "@xml:base" not in g7.serialize(format="xml").decode("utf-8") assert "@base <http://one.org/> ." in g7.serialize( format="n3", base=base_one).decode("utf-8") g7.base = base_two assert "@base <http://two.org/> ." in g7.serialize(format="n3").decode("utf-8") assert "@base <http://one.org/> ." in g7.serialize( format="n3", base=base_one).decode("utf-8") # 8. checking results for TriX & TriG # TriX can specify a base per graph but setting a base for the whole base_three = Namespace("http://three.org/") ds1 = Dataset() ds1.bind("dct", DCTERMS) ds1.bind("skos", SKOS) g8 = ds1.graph(URIRef("http://g8.com/"), base=base_one) g9 = ds1.graph(URIRef("http://g9.com/")) g8 += g g9 += g g9.base = base_two ds1.base = base_three trix = ds1.serialize(format="trix", base=Namespace("http://two.org/")).decode("utf-8") assert '<graph xml:base="http://one.org/">' in trix assert '<graph xml:base="http://two.org/">' in trix assert '<TriX xml:base="http://two.org/"' in trix trig = ds1.serialize(format="trig",
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash): """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations) that contains the data structure definition (from the DataCube vocabulary) and the mappings to external datasets. Arguments: dataset -- the name of the dataset variables -- the list of dictionaries with the variables and their mappings to URIs profile -- the Google signin profile source_path -- the path to the dataset file that was annotated source_hash -- the Git hash of the dataset file version of the dataset :returns: an RDF graph store containing a nanopublication """ BASE = Namespace('{}/'.format(dataset_base_uri)) dataset_uri = URIRef(dataset_base_uri) # Initialize a conjunctive graph for the whole lot rdf_dataset = Dataset() rdf_dataset.bind('qbrv', QBRV) rdf_dataset.bind('qbr', QBR) rdf_dataset.bind('qb', QB) rdf_dataset.bind('skos', SKOS) rdf_dataset.bind('prov', PROV) rdf_dataset.bind('np', NP) rdf_dataset.bind('foaf', FOAF) # Initialize the graphs needed for the nanopublication timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M") # Shorten the source hash to 8 digits (similar to Github) source_hash = source_hash[:8] hash_part = source_hash + '/' + timestamp # The Nanopublication consists of three graphs assertion_graph_uri = BASE['assertion/' + hash_part] assertion_graph = rdf_dataset.graph(assertion_graph_uri) provenance_graph_uri = BASE['provenance/' + hash_part] provenance_graph = rdf_dataset.graph(provenance_graph_uri) pubinfo_graph_uri = BASE['pubinfo/' + hash_part] pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri) # A URI that represents the author author_uri = QBR['person/' + profile['email']] rdf_dataset.add((author_uri, RDF.type, FOAF['Person'])) rdf_dataset.add((author_uri, FOAF['name'], Literal(profile['name']))) rdf_dataset.add((author_uri, FOAF['email'], Literal(profile['email']))) rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id']))) try: rdf_dataset.add( (author_uri, FOAF['depiction'], URIRef(profile['image']))) except KeyError: pass # A URI that represents the version of the dataset source file dataset_version_uri = BASE[source_hash] # Some information about the source file used rdf_dataset.add((dataset_version_uri, QBRV['path'], Literal(source_path, datatype=XSD.string))) rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'], Literal(source_hash, datatype=XSD.string))) # ---- # The nanopublication itself # ---- nanopublication_uri = BASE['nanopublication/' + hash_part] rdf_dataset.add((nanopublication_uri, RDF.type, NP['Nanopublication'])) rdf_dataset.add( (nanopublication_uri, NP['hasAssertion'], assertion_graph_uri)) rdf_dataset.add((assertion_graph_uri, RDF.type, NP['Assertion'])) rdf_dataset.add( (nanopublication_uri, NP['hasProvenance'], provenance_graph_uri)) rdf_dataset.add((provenance_graph_uri, RDF.type, NP['Provenance'])) rdf_dataset.add( (nanopublication_uri, NP['hasPublicationInfo'], pubinfo_graph_uri)) rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP['PublicationInfo'])) # ---- # The provenance graph # ---- # Provenance information for the assertion graph (the data structure definition itself) provenance_graph.add( (assertion_graph_uri, PROV['wasDerivedFrom'], dataset_version_uri)) provenance_graph.add( (dataset_uri, PROV['wasDerivedFrom'], dataset_version_uri)) provenance_graph.add((assertion_graph_uri, PROV['generatedAtTime'], Literal(timestamp, datatype=XSD.datetime))) provenance_graph.add( (assertion_graph_uri, PROV['wasAttributedTo'], author_uri)) # ---- # The publication info graph # ---- # The URI of the latest version of QBer # TODO: should point to the actual latest commit of this QBer source file. # TODO: consider linking to this as the plan of some activity, rather than an activity itself. qber_uri = URIRef('https://github.com/CLARIAH/qber.git') pubinfo_graph.add((nanopublication_uri, PROV['wasGeneratedBy'], qber_uri)) pubinfo_graph.add((nanopublication_uri, PROV['generatedAtTime'], Literal(timestamp, datatype=XSD.datetime))) pubinfo_graph.add( (nanopublication_uri, PROV['wasAttributedTo'], author_uri)) # ---- # The assertion graph # ---- structure_uri = BASE['structure'] assertion_graph.add((dataset_uri, RDF.type, QB['DataSet'])) assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name))) assertion_graph.add( (structure_uri, RDF.type, QB['DataStructureDefinition'])) assertion_graph.add((dataset_uri, QB['structure'], structure_uri)) for variable_id, variable in variables.items(): variable_uri = URIRef(variable['original']['uri']) variable_label = Literal(variable['original']['label']) variable_type = URIRef(variable['type']) codelist_uri = URIRef(variable['codelist']['original']['uri']) codelist_label = Literal(variable['codelist']['original']['label']) # The variable as component of the definition component_uri = safe_url(BASE, 'component/' + variable['original']['label']) # Add link between the definition and the component assertion_graph.add((structure_uri, QB['component'], component_uri)) # Add label to variable # TODO: We may need to do something with a changed label for the variable assertion_graph.add((variable_uri, RDFS.label, variable_label)) if 'description' in variable and variable['description'] != "": assertion_graph.add( (variable_uri, RDFS.comment, Literal(variable['description']))) # If the variable URI is not the same as the original, # it is a specialization of a prior variable property. if variable['uri'] != str(variable_uri): assertion_graph.add( (variable_uri, RDFS['subPropertyOf'], URIRef(variable['uri']))) if variable_type == QB['DimensionProperty']: assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB['dimension'], variable_uri)) # Coded variables are also of type coded property (a subproperty of dimension property) if variable['category'] == 'coded': assertion_graph.add( (variable_uri, RDF.type, QB['CodedProperty'])) elif variable_type == QB['MeasureProperty']: # The category 'other' assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB['measure'], variable_uri)) elif variable_type == QB['AttributeProperty']: # Actually never produced by QBer at this stage assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB['attribute'], variable_uri)) # If this variable is of category 'coded', we add codelist and URIs for # each variable (including mappings between value uris and etc....) if variable['category'] == 'coded': assertion_graph.add((codelist_uri, RDF.type, SKOS['Collection'])) assertion_graph.add( (codelist_uri, RDFS.label, Literal(codelist_label))) # The variable should point to the codelist assertion_graph.add((variable_uri, QB['codeList'], codelist_uri)) # The variable is mapped onto an external code list. # If the codelist uri is not the same as the original one, we # have a derived codelist. if variable['codelist']['uri'] != str(codelist_uri): assertion_graph.add((codelist_uri, PROV['wasDerivedFrom'], URIRef(variable['codelist']['uri']))) # Generate a SKOS concept for each of the values and map it to the # assigned codelist for value in variable['values']: value_uri = URIRef(value['original']['uri']) value_label = Literal(value['original']['label']) assertion_graph.add((value_uri, RDF.type, SKOS['Concept'])) assertion_graph.add( (value_uri, SKOS['prefLabel'], Literal(value_label))) assertion_graph.add((codelist_uri, SKOS['member'], value_uri)) # The value has been changed, and therefore there is a mapping if value['original']['uri'] != value['uri']: assertion_graph.add( (value_uri, SKOS['exactMatch'], URIRef(value['uri']))) assertion_graph.add( (value_uri, RDFS.label, Literal(value['label']))) elif variable['category'] == 'identifier': # Generate a SKOS concept for each of the values for value in variable['values']: value_uri = URIRef(value['original']['uri']) value_label = Literal(value['original']['label']) assertion_graph.add((value_uri, RDF.type, SKOS['Concept'])) assertion_graph.add( (value_uri, SKOS['prefLabel'], value_label)) # The value has been changed, and therefore there is a mapping if value['original']['uri'] != value['uri']: assertion_graph.add( (value_uri, SKOS['exactMatch'], URIRef(value['uri']))) assertion_graph.add( (value_uri, RDFS.label, Literal(value['label']))) elif variable['category'] == 'other': # Generate a literal for each of the values when converting the dataset (but not here) pass return rdf_dataset
UUIDNS = Namespace("urn:uuid:") DOCKER = Namespace("http://w3id.org/daspos/docker#") # W3C namespace: POSIX = Namespace("http://www.w3.org/ns/posix/stat#") ACL = Namespace("http://www.w3.org/ns/auth/acl#") # DASPOS namespaces SC = Namespace("https://w3id.org/daspos/smartcontainers#") CA = Namespace("https://w3id.org/daspos/computationalactivity#") CE = Namespace("https://w3id.org/daspos/computationalenvironment#") # Need to handle DOI # http://bitwacker.com/2010/02/04/dois-uris-and-cool-resolution/ ds.bind("prov", PROV) ds.bind("ore", ORE) ds.bind("owl", OWL) ds.bind("dc", DC) ds.bind("uuidns", UUIDNS) ds.bind("docker", DOCKER) ds.bind("posix", POSIX) ds.bind("acl", ACL) ds.bind("sc", SC) ds.bind("ca", CA) ds.bind("ce", CE) default_graph = ds # image_name = cmd_string.rsplit(' ', 1) [1]
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash): """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations) that contains the data structure definition (from the DataCube vocabulary) and the mappings to external datasets. Arguments: dataset -- the name of the dataset variables -- the list of dictionaries with the variables and their mappings to URIs profile -- the Google signin profile source_path -- the path to the dataset file that was annotated source_hash -- the Git hash of the dataset file version of the dataset :returns: an RDF graph store containing a nanopublication """ BASE = Namespace("{}/".format(dataset_base_uri)) dataset_uri = URIRef(dataset_base_uri) # Initialize a conjunctive graph for the whole lot rdf_dataset = Dataset() rdf_dataset.bind("qbrv", QBRV) rdf_dataset.bind("qbr", QBR) rdf_dataset.bind("qb", QB) rdf_dataset.bind("skos", SKOS) rdf_dataset.bind("prov", PROV) rdf_dataset.bind("np", NP) rdf_dataset.bind("foaf", FOAF) # Initialize the graphs needed for the nanopublication timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M") # Shorten the source hash to 8 digits (similar to Github) source_hash = source_hash[:8] hash_part = source_hash + "/" + timestamp # The Nanopublication consists of three graphs assertion_graph_uri = BASE["assertion/" + hash_part] assertion_graph = rdf_dataset.graph(assertion_graph_uri) provenance_graph_uri = BASE["provenance/" + hash_part] provenance_graph = rdf_dataset.graph(provenance_graph_uri) pubinfo_graph_uri = BASE["pubinfo/" + hash_part] pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri) # A URI that represents the author author_uri = QBR["person/" + profile["email"]] rdf_dataset.add((author_uri, RDF.type, FOAF["Person"])) rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"]))) rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"]))) rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"]))) try: rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"]))) except KeyError: pass # A URI that represents the version of the dataset source file dataset_version_uri = BASE[source_hash] # Some information about the source file used rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string))) rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string))) # ---- # The nanopublication itself # ---- nanopublication_uri = BASE["nanopublication/" + hash_part] rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"])) rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri)) rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"])) rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri)) rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"])) rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri)) rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"])) # ---- # The provenance graph # ---- # Provenance information for the assertion graph (the data structure definition itself) provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri)) provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri)) provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime))) provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri)) # ---- # The publication info graph # ---- # The URI of the latest version of QBer # TODO: should point to the actual latest commit of this QBer source file. # TODO: consider linking to this as the plan of some activity, rather than an activity itself. qber_uri = URIRef("https://github.com/CLARIAH/qber.git") pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri)) pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime))) pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri)) # ---- # The assertion graph # ---- structure_uri = BASE["structure"] assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"])) assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name))) assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"])) assertion_graph.add((dataset_uri, QB["structure"], structure_uri)) for variable_id, variable in variables.items(): variable_uri = URIRef(variable["original"]["uri"]) variable_label = Literal(variable["original"]["label"]) variable_type = URIRef(variable["type"]) codelist_uri = URIRef(variable["codelist"]["original"]["uri"]) codelist_label = Literal(variable["codelist"]["original"]["label"]) # The variable as component of the definition component_uri = safe_url(BASE, "component/" + variable["original"]["label"]) # Add link between the definition and the component assertion_graph.add((structure_uri, QB["component"], component_uri)) # Add label to variable # TODO: We may need to do something with a changed label for the variable assertion_graph.add((variable_uri, RDFS.label, variable_label)) if "description" in variable and variable["description"] != "": assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"]))) # If the variable URI is not the same as the original, # it is a specialization of a prior variable property. if variable["uri"] != str(variable_uri): assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"]))) if variable_type == QB["DimensionProperty"]: assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["dimension"], variable_uri)) # Coded variables are also of type coded property (a subproperty of dimension property) if variable["category"] == "coded": assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"])) elif variable_type == QB["MeasureProperty"]: # The category 'other' assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["measure"], variable_uri)) elif variable_type == QB["AttributeProperty"]: # Actually never produced by QBer at this stage assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["attribute"], variable_uri)) # If this variable is of category 'coded', we add codelist and URIs for # each variable (including mappings between value uris and etc....) if variable["category"] == "coded": assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"])) assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label))) # The variable should point to the codelist assertion_graph.add((variable_uri, QB["codeList"], codelist_uri)) # The variable is mapped onto an external code list. # If the codelist uri is not the same as the original one, we # have a derived codelist. if variable["codelist"]["uri"] != str(codelist_uri): assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"]))) # Generate a SKOS concept for each of the values and map it to the # assigned codelist for value in variable["values"]: value_uri = URIRef(value["original"]["uri"]) value_label = Literal(value["original"]["label"]) assertion_graph.add((value_uri, RDF.type, SKOS["Concept"])) assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label))) assertion_graph.add((codelist_uri, SKOS["member"], value_uri)) # The value has been changed, and therefore there is a mapping if value["original"]["uri"] != value["uri"]: assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"]))) assertion_graph.add((value_uri, RDFS.label, Literal(value["label"]))) elif variable["category"] == "identifier": # Generate a SKOS concept for each of the values for value in variable["values"]: value_uri = URIRef(value["original"]["uri"]) value_label = Literal(value["original"]["label"]) assertion_graph.add((value_uri, RDF.type, SKOS["Concept"])) assertion_graph.add((value_uri, SKOS["prefLabel"], value_label)) # The value has been changed, and therefore there is a mapping if value["original"]["uri"] != value["uri"]: assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"]))) assertion_graph.add((value_uri, RDFS.label, Literal(value["label"]))) elif variable["category"] == "other": # Generate a literal for each of the values when converting the dataset (but not here) pass return rdf_dataset
# line[str] = unicode(line[str], errors='replace') # #print line with open(filename,'r') as csvfile: csv_contents = [{k: v for k, v in row.items()} for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')] return csv_contents #//*************** csv parser ****************//# graph_uri_base = resource + 'movement_of_people/' path = 'source_datasets/' filename = 'Movement_of_people_across_borders_dataset.csv' dataset = Dataset() dataset.bind('trumpres', RESOURCE) dataset.bind('trumpvoc', VOCAB) dataset.bind('geo', GEO) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.default_context.parse(VOCAB_FILE, format='turtle') dataset, movement_graph = convert_csv(path + filename,dataset,URIRef(graph_uri_base + 'movement_graph')) serialize_upload(OUTPUT_DIR + 'movement_of_people.trig',dataset) ### Generate VoID metadata from rdflib.void import generateVoID from rdflib.namespace import VOID
class LongTermMemory(object): ONE_TO_ONE_PREDICATES = [ 'age', 'born_in', 'faceID', 'favorite', 'favorite_of', 'id', 'is_from', 'manufactured_in', 'mother_is', 'name' ] def __init__(self, address=config.BRAIN_URL_LOCAL): """ Interact with Triple store Parameters ---------- address: str IP address and port of the Triple store """ self.address = address self.namespaces = {} self.ontology_paths = {} self.format = 'trig' self.dataset = Dataset() self.query_prefixes = """ prefix gaf: <http://groundedannotationframework.org/gaf#> prefix grasp: <http://groundedannotationframework.org/grasp#> prefix leolaniInputs: <http://cltl.nl/leolani/inputs/> prefix leolaniFriends: <http://cltl.nl/leolani/friends/> prefix leolaniTalk: <http://cltl.nl/leolani/talk/> prefix leolaniTime: <http://cltl.nl/leolani/time/> prefix leolaniWorld: <http://cltl.nl/leolani/world/> prefix n2mu: <http://cltl.nl/leolani/n2mu/> prefix ns1: <urn:x-rdflib:> prefix owl: <http://www.w3.org/2002/07/owl#> prefix prov: <http://www.w3.org/ns/prov#> prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> prefix skos: <http://www.w3.org/2004/02/skos/core#> prefix time: <http://www.w3.org/TR/owl-time/#> prefix xml: <http://www.w3.org/XML/1998/namespace> prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> """ self._define_namespaces() self._get_ontology_path() self._bind_namespaces() self.my_uri = None self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted") #################################### Main functions to interact with the brain #################################### def update(self, capsule): """ Main function to interact with if a statement is coming into the brain. Takes in a structured parsed statement, transforms them to triples, and posts them to the triple store :param statement: Structured data of a parsed statement :return: json response containing the status for posting the triples, and the original statement """ # Case fold capsule = casefold_capsule(capsule) # Create graphs and triples self._model_graphs_(capsule) data = self._serialize(config.BRAIN_LOG) code = self._upload_to_brain(data) # Create JSON output capsule["date"] = str(capsule["date"]) output = {'response': code, 'statement': capsule} return output def experience(self, capsule): """ Main function to interact with if a statement is coming into the brain. Takes in a structured parsed statement, transforms them to triples, and posts them to the triple store :param capsule: Structured data of a parsed statement :return: json response containing the status for posting the triples, and the original statement """ # Case fold capsule = casefold_capsule(capsule) # Create graphs and triples self._model_graphs_(capsule, type='Experience') data = self._serialize(config.BRAIN_LOG) code = self._upload_to_brain(data) # Create JSON output capsule["date"] = str(capsule["date"]) output = {'response': code, 'statement': capsule} return output def query_brain(self, capsule): """ Main function to interact with if a question is coming into the brain. Takes in a structured parsed question, transforms it into a query, and queries the triple store for a response :param capsule: Structured data of a parsed question :return: json response containing the results of the query, and the original question """ # Case fold capsule = casefold_capsule(capsule) # Generate query query = self._create_query(capsule) # Perform query response = self._submit_query(query) # Create JSON output if 'date' in capsule.keys(): capsule["date"] = str(capsule["date"]) output = {'response': response, 'question': capsule} return output def process_visual(self, item, exact_only=True): """ Main function to determine if this item can be recognized by the brain, learned, or none :param item: :return: """ if casefold(item) in self.get_classes(): # If this is in the ontology already, create sensor triples directly text = 'I know about %s. I will remember this object' % item return item, text temp = self.get_labels_and_classes() if casefold(item) in temp.keys(): # If this is in the ontology already, create sensor triples directly text = 'I know about %s. It is of type %s. I will remember this object' % ( item, temp[item]) return item, text # Query the web for information class_type, description = self.exact_match_dbpedia(item) if class_type is not None: # Had to learn it, but I can create triples now text = 'I did not know what %s is, but I searched on the web and I found that it is a %s. ' \ 'I will remember this object' % (item, class_type) return casefold(class_type), text if not exact_only: # Second go at dbpedia, relaxed approach class_type, description = self.keyword_match_dbpedia(item) if class_type is not None: # Had to really search for it to learn it, but I can create triples now text = 'I did not know what %s is, but I searched for fuzzy matches on the web and I found that it ' \ 'is a %s. I will remember this object' % (item, class_type) return casefold(class_type), text # Failure, nothing found text = 'I am sorry, I could not learn anything on %s so I will not remember it' % item return None, text ########## management system for keeping track of chats and turns ########## def get_last_chat_id(self): """ Get the id for the last interaction recorded :return: id """ query = read_query('last_chat_id') response = self._submit_query(query) return int(response[0]['chatid']['value']) if response else 0 def get_last_turn_id(self, chat_id): """ Get the id for the last turn in the given chat :param chat_id: id for chat of interest :return: id """ query = read_query('last_turn_id') % (chat_id) response = self._submit_query(query) last_turn = 0 for turn in response: turn_uri = turn['s']['value'] turn_id = turn_uri.split('/')[-1][10:] turn_id = int(turn_id) if turn_id > last_turn: last_turn = turn_id return last_turn ########## brain structure exploration ########## def get_predicates(self): """ Get predicates in social ontology :return: """ query = read_query('predicates') response = self._submit_query(query) return [elem['p']['value'].split('/')[-1] for elem in response] def get_classes(self): """ Get classes in social ontology :return: """ query = read_query('classes') response = self._submit_query(query) return [elem['o']['value'].split('/')[-1] for elem in response] def get_labels_and_classes(self): """ Get classes in social ontology :return: """ query = read_query('labels_and_classes') response = self._submit_query(query) temp = dict() for r in response: temp[r['l']['value']] = r['o']['value'].split('/')[-1] return temp ########## learned facts exploration ########## def count_statements(self): """ Count statements or 'facts' in the brain :return: """ query = read_query('count_statements') response = self._submit_query(query) return response[0]['count']['value'] def count_friends(self): """ Count number of people I have talked to :return: """ query = read_query('count_friends') response = self._submit_query(query) return response[0]['count']['value'] def get_my_friends(self): """ Get names of people I have talked to :return: """ query = read_query('my_friends') response = self._submit_query(query) return [elem['name']['value'].split('/')[-1] for elem in response] def get_best_friends(self): """ Get names of the 5 people I have talked to the most :return: """ query = read_query('best_friends') response = self._submit_query(query) return [elem['name']['value'] for elem in response] def get_instance_of_type(self, instance_type): """ Get isntances of a certain class type :param instance_type: name of class in ontology :return: """ query = read_query('instance_of_type') % (instance_type) response = self._submit_query(query) return [elem['name']['value'] for elem in response] def when_last_chat_with(self, actor_label): """ Get time value for the last time I chatted with this person :param actor_label: name of person :return: """ query = read_query('when_last_chat_with') % (actor_label) response = self._submit_query(query) return response[0]['time']['value'].split('/')[-1] def get_triples_with_predicate(self, predicate): """ Get triples that contain this predicate :param predicate: :return: """ query = read_query('triples_with_predicate') % predicate response = self._submit_query(query) return [(elem['sname']['value'], elem['oname']['value']) for elem in response] ########## conflicts ########## def get_all_conflicts(self): """ Aggregate all conflicts in brain :return: """ conflicts = [] for predicate in self.ONE_TO_ONE_PREDICATES: conflicts.extend(self._get_conflicts_with_predicate(predicate)) return conflicts ########## semantic web ########## def exact_match_dbpedia(self, item): """ Query dbpedia for information on this item to get it's semantic type and description. :param item: :return: """ # Gather combinations combinations = [item, item.lower(), item.capitalize(), item.title()] for comb in combinations: # Try exact matching query query = read_query('dbpedia_type_and_description') % (comb) response = self._submit_query(query) # break if we have a hit if response: break class_type = response[0]['label_type']['value'] if response else None description = response[0]['description']['value'].split( '.')[0] if response else None return class_type, description def keyword_match_dbpedia(self, item): # Query API r = requests.get( 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch', params={ 'QueryString': item, 'MaxHits': '10' }, headers={ 'Accept': 'application/json' }).json()['results'] # Fuzzy match choices = [e['label'] for e in r] best_match = process.extractOne("item", choices) # Get best match object r = [{ 'label': e['label'], 'classes': e['classes'], 'description': e['description'] } for e in r if e['label'] == best_match[0]] if r: r = r[0] if r['classes']: # process dbpedia classes only r['classes'] = [ c['label'] for c in r['classes'] if 'dbpedia' in c['uri'] ] else: r = {'label': None, 'classes': None, 'description': None} return r['classes'][0] if r['classes'] else None, r[ 'description'].split('.')[0] if r['description'] else None ######################################## Helpers for setting up connection ######################################## def _define_namespaces(self): """ Define namespaces for different layers (ontology/vocab and resource). Assign them to self :return: """ # Namespaces for the instance layer instance_vocab = 'http://cltl.nl/leolani/n2mu/' self.namespaces['N2MU'] = Namespace(instance_vocab) instance_resource = 'http://cltl.nl/leolani/world/' self.namespaces['LW'] = Namespace(instance_resource) # Namespaces for the mention layer mention_vocab = 'http://groundedannotationframework.org/gaf#' self.namespaces['GAF'] = Namespace(mention_vocab) mention_resource = 'http://cltl.nl/leolani/talk/' self.namespaces['LTa'] = Namespace(mention_resource) # Namespaces for the attribution layer attribution_vocab = 'http://groundedannotationframework.org/grasp#' self.namespaces['GRASP'] = Namespace(attribution_vocab) attribution_resource_friends = 'http://cltl.nl/leolani/friends/' self.namespaces['LF'] = Namespace(attribution_resource_friends) attribution_resource_inputs = 'http://cltl.nl/leolani/inputs/' self.namespaces['LI'] = Namespace(attribution_resource_inputs) # Namespaces for the temporal layer-ish time_vocab = 'http://www.w3.org/TR/owl-time/#' self.namespaces['TIME'] = Namespace(time_vocab) time_resource = 'http://cltl.nl/leolani/time/' self.namespaces['LTi'] = Namespace(time_resource) # The namespaces of external ontologies skos = 'http://www.w3.org/2004/02/skos/core#' self.namespaces['SKOS'] = Namespace(skos) prov = 'http://www.w3.org/ns/prov#' self.namespaces['PROV'] = Namespace(prov) sem = 'http://semanticweb.cs.vu.nl/2009/11/sem/' self.namespaces['SEM'] = Namespace(sem) xml = 'https://www.w3.org/TR/xmlschema-2/#' self.namespaces['XML'] = Namespace(xml) def _get_ontology_path(self): """ Define ontology paths to key vocabularies :return: """ self.ontology_paths[ 'n2mu'] = './../../knowledge_representation/ontologies/leolani.ttl' self.ontology_paths[ 'gaf'] = './../../knowledge_representation/ontologies/gaf.rdf' self.ontology_paths[ 'grasp'] = './../../knowledge_representation/ontologies/grasp.rdf' self.ontology_paths[ 'sem'] = './../../knowledge_representation/ontologies/sem.rdf' def _bind_namespaces(self): """ Bnd namespaces :return: """ self.dataset.bind('n2mu', self.namespaces['N2MU']) self.dataset.bind('leolaniWorld', self.namespaces['LW']) self.dataset.bind('gaf', self.namespaces['GAF']) self.dataset.bind('leolaniTalk', self.namespaces['LTa']) self.dataset.bind('grasp', self.namespaces['GRASP']) self.dataset.bind('leolaniFriends', self.namespaces['LF']) self.dataset.bind('leolaniInputs', self.namespaces['LI']) self.dataset.bind('time', self.namespaces['TIME']) self.dataset.bind('leolaniTime', self.namespaces['LTi']) self.dataset.bind('skos', self.namespaces['SKOS']) self.dataset.bind('prov', self.namespaces['PROV']) self.dataset.bind('sem', self.namespaces['SEM']) self.dataset.bind('xml', self.namespaces['XML']) self.dataset.bind('owl', OWL) ######################################## Helpers for statement processing ######################################## def create_chat_id(self, actor, date): """ Determine chat id depending on my last conversation with this person :param actor: :param date: :return: """ self._log.debug('Chat with {} on {}'.format(actor, date)) query = read_query('last_chat_with') % (actor) response = self._submit_query(query) if response and int(response[0]['day']['value']) == int(date.day) \ and int(response[0]['month']['value']) == int(date.month) \ and int(response[0]['year']['value']) == int(date.year): # Chatted with this person today so same chat id chat_id = int(response[0]['chatid']['value']) else: # Either have never chatted with this person, or I have but not today. Add one to latest chat chat_id = self.get_last_chat_id() + 1 return chat_id def create_turn_id(self, chat_id): self._log.debug('Turn in chat {}'.format(chat_id)) query = read_query('last_turn_in_chat') % (chat_id) response = self._submit_query(query) return int(response['turnid']['value']) + 1 if response else 1 def _generate_leolani(self, instance_graph): # Create Leolani leolani_id = 'leolani' leolani_label = 'leolani' leolani = URIRef(to_iri(self.namespaces['LW'] + leolani_id)) leolani_label = Literal(leolani_label) leolani_type1 = URIRef(to_iri(self.namespaces['N2MU'] + 'robot')) leolani_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance')) instance_graph.add((leolani, RDFS.label, leolani_label)) instance_graph.add((leolani, RDF.type, leolani_type1)) instance_graph.add((leolani, RDF.type, leolani_type2)) self.my_uri = leolani return leolani def _generate_subject(self, capsule, instance_graph): if capsule['subject']['type'] == '': # We only get the label subject_vocab = OWL subject_type = 'Thing' else: subject_vocab = self.namespaces['N2MU'] subject_type = capsule['subject']['type'] subject_id = capsule['subject']['label'] subject = URIRef(to_iri(self.namespaces['LW'] + subject_id)) subject_label = Literal(subject_id) subject_type1 = URIRef(to_iri(subject_vocab + subject_type)) subject_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance')) instance_graph.add((subject, RDFS.label, subject_label)) instance_graph.add((subject, RDF.type, subject_type1)) instance_graph.add((subject, RDF.type, subject_type2)) return subject, subject_label def _create_leolani_world(self, capsule, type='Statement'): # Instance graph instance_graph_uri = URIRef(to_iri(self.namespaces['LW'] + 'Instances')) instance_graph = self.dataset.graph(instance_graph_uri) # Subject if type == 'Statement': subject, subject_label = self._generate_subject( capsule, instance_graph) elif type == 'Experience': subject = self._generate_leolani( instance_graph) if self.my_uri is None else self.my_uri subject_label = 'leolani' # Object if capsule['object']['type'] == '': # We only get the label object_vocab = OWL object_type = 'Thing' else: object_vocab = self.namespaces['N2MU'] object_type = capsule['object']['type'] object_id = capsule['object']['label'] object = URIRef(to_iri(self.namespaces['LW'] + object_id)) object_label = Literal(object_id) object_type1 = URIRef(to_iri(object_vocab + object_type)) object_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance')) instance_graph.add((object, RDFS.label, object_label)) instance_graph.add((object, RDF.type, object_type1)) instance_graph.add((object, RDF.type, object_type2)) if type == 'Statement': claim_graph, statement = self._create_claim_graph( subject, subject_label, object, object_label, capsule['predicate']['type'], type='Statement') elif type == 'Experience': claim_graph, statement = self._create_claim_graph( subject, subject_label, object, object_label, 'sees', type='Experience') return instance_graph, claim_graph, subject, object, statement def _create_claim_graph(self, subject, subject_label, object, object_label, predicate, type='Statement'): # Claim graph claim_graph_uri = URIRef(to_iri(self.namespaces['LW'] + 'Claims')) claim_graph = self.dataset.graph(claim_graph_uri) # Statement statement_id = hash_statement_id( [subject_label, predicate, object_label]) statement = URIRef(to_iri(self.namespaces['LW'] + statement_id)) statement_type1 = URIRef(to_iri(self.namespaces['GRASP'] + type)) statement_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance')) statement_type3 = URIRef(to_iri(self.namespaces['SEM'] + 'Event')) # Create graph and add triple graph = self.dataset.graph(statement) graph.add((subject, self.namespaces['N2MU'][predicate], object)) claim_graph.add((statement, RDF.type, statement_type1)) claim_graph.add((statement, RDF.type, statement_type2)) claim_graph.add((statement, RDF.type, statement_type3)) return claim_graph, statement def _create_leolani_talk(self, capsule, leolani, type='Statement'): # Interaction graph if type == 'Statement': graph_to_write = 'Interactions' elif type == 'Experience': graph_to_write = 'Sensors' interaction_graph_uri = URIRef( to_iri(self.namespaces['LTa'] + graph_to_write)) interaction_graph = self.dataset.graph(interaction_graph_uri) # Time date = capsule["date"] time = URIRef( to_iri(self.namespaces['LTi'] + str(capsule["date"].isoformat()))) time_type = URIRef( to_iri(self.namespaces['TIME'] + 'DateTimeDescription')) day = Literal(date.day, datatype=self.namespaces['XML']['gDay']) month = Literal(date.month, datatype=self.namespaces['XML']['gMonthDay']) year = Literal(date.year, datatype=self.namespaces['XML']['gYear']) time_unitType = URIRef(to_iri(self.namespaces['TIME'] + 'unitDay')) interaction_graph.add((time, RDF.type, time_type)) interaction_graph.add((time, self.namespaces['TIME']['day'], day)) interaction_graph.add((time, self.namespaces['TIME']['month'], month)) interaction_graph.add((time, self.namespaces['TIME']['year'], year)) interaction_graph.add( (time, self.namespaces['TIME']['unitType'], time_unitType)) # Actor actor_id = capsule['author'] actor_label = capsule['author'] actor = URIRef(to_iri(to_iri(self.namespaces['LF'] + actor_id))) actor_label = Literal(actor_label) actor_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Actor')) actor_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance')) if type == 'Statement': actor_type3 = URIRef(to_iri(self.namespaces['N2MU'] + 'person')) elif type == 'Experience': actor_type3 = URIRef(to_iri(self.namespaces['N2MU'] + 'sensor')) interaction_graph.add((actor, RDFS.label, actor_label)) interaction_graph.add((actor, RDF.type, actor_type1)) interaction_graph.add((actor, RDF.type, actor_type2)) interaction_graph.add((actor, RDF.type, actor_type3)) # Add leolani knows/senses actor if type == 'Statement': predicate = 'knows' elif type == 'Experience': predicate = 'senses' interaction_graph.add( (leolani, self.namespaces['N2MU'][predicate], actor)) _, _ = self._create_claim_graph(leolani, 'leolani', actor, actor_label, predicate, type) # Event and subevent event_id = self.create_chat_id(actor_label, date) if type == 'Statement': event_label = 'chat%s' % event_id elif type == 'Experience': event_label = 'visual%s' % event_id subevent_id = self.create_turn_id(event_id) if type == 'Statement': subevent_label = event_label + '_turn%s' % subevent_id elif type == 'Experience': subevent_label = event_label + '_object%s' % subevent_id turn = URIRef(to_iri(self.namespaces['LTa'] + subevent_label)) turn_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Event')) if type == 'Statement': turn_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Turn')) elif type == 'Experience': turn_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Object')) interaction_graph.add((turn, RDF.type, turn_type1)) interaction_graph.add((turn, RDF.type, turn_type2)) interaction_graph.add( (turn, self.namespaces['N2MU']['id'], Literal(subevent_id))) interaction_graph.add( (turn, self.namespaces['SEM']['hasActor'], actor)) interaction_graph.add((turn, self.namespaces['SEM']['hasTime'], time)) chat = URIRef(to_iri(self.namespaces['LTa'] + event_label)) chat_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Event')) if type == 'Statement': chat_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Chat')) elif type == 'Experience': chat_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Visual')) interaction_graph.add((chat, RDF.type, chat_type1)) interaction_graph.add((chat, RDF.type, chat_type2)) interaction_graph.add( (chat, self.namespaces['N2MU']['id'], Literal(event_id))) interaction_graph.add( (chat, self.namespaces['SEM']['hasActor'], actor)) interaction_graph.add((chat, self.namespaces['SEM']['hasTime'], time)) interaction_graph.add( (chat, self.namespaces['SEM']['hasSubevent'], turn)) perspective_graph, mention, attribution = self._create_perspective_graph( capsule, subevent_label) # Link interactions and perspectives perspective_graph.add( (mention, self.namespaces['GRASP']['wasAttributedTo'], actor)) perspective_graph.add( (mention, self.namespaces['GRASP']['hasAttribution'], attribution)) perspective_graph.add( (mention, self.namespaces['PROV']['wasDerivedFrom'], chat)) perspective_graph.add( (mention, self.namespaces['PROV']['wasDerivedFrom'], turn)) return interaction_graph, perspective_graph, actor, time, mention, attribution def _create_perspective_graph(self, capsule, turn_label, type='Statement'): # Perspective graph perspective_graph_uri = URIRef( to_iri(self.namespaces['LTa'] + 'Perspectives')) perspective_graph = self.dataset.graph(perspective_graph_uri) # Mention if type == 'Statement': mention_id = turn_label + '_char%s' % capsule['position'] elif type == 'Experience': mention_id = turn_label + '_pixel%s' % capsule['position'] mention = URIRef(to_iri(self.namespaces['LTa'] + mention_id)) mention_type = URIRef(to_iri(self.namespaces['GRASP'] + 'Mention')) perspective_graph.add((mention, RDF.type, mention_type)) # Attribution attribution_id = mention_id + '_CERTAIN' attribution = URIRef(to_iri(self.namespaces['LTa'] + attribution_id)) attribution_type = URIRef( to_iri(self.namespaces['GRASP'] + 'Attribution')) attribution_value = URIRef(to_iri(self.namespaces['GRASP'] + 'CERTAIN')) perspective_graph.add((attribution, RDF.type, attribution_type)) perspective_graph.add((attribution, RDF.value, attribution_value)) return perspective_graph, mention, attribution def _serialize(self, file_path): """ Save graph to local file and return the serialized string :param file_path: path to where data will be saved :return: serialized data as string """ # Save to file but return the python representation with open(file_path + '.' + self.format, 'w') as f: self.dataset.serialize(f, format=self.format) return self.dataset.serialize(format=self.format) def _upload_to_brain(self, data): """ Post data to the brain :param data: serialized data as string :return: response status """ self._log.debug("Posting triples") # From serialized string post_url = self.address + "/statements" response = requests.post( post_url, data=data, headers={'Content-Type': 'application/x-' + self.format}) return str(response.status_code) def _model_graphs_(self, capsule, type='Statement'): # Leolani world (includes instance and claim graphs) instance_graph, claim_graph, subject, object, instance = self._create_leolani_world( capsule, type) # Identity leolani = self._generate_leolani( instance_graph) if self.my_uri is None else self.my_uri # Leolani talk (includes interaction and perspective graphs) interaction_graph, perspective_graph, actor, time, mention, attribution = self._create_leolani_talk( capsule, leolani, type) # Interconnections instance_graph.add( (subject, self.namespaces['GRASP']['denotedIn'], mention)) instance_graph.add( (object, self.namespaces['GRASP']['denotedIn'], mention)) instance_graph.add( (instance, self.namespaces['GRASP']['denotedBy'], mention)) instance_graph.add( (instance, self.namespaces['SEM']['hasActor'], actor)) instance_graph.add((instance, self.namespaces['SEM']['hasTime'], time)) perspective_graph.add( (mention, self.namespaces['GRASP']['containsDenotation'], subject)) perspective_graph.add( (mention, self.namespaces['GRASP']['containsDenotation'], object)) perspective_graph.add( (mention, self.namespaces['GRASP']['denotes'], instance)) perspective_graph.add( (attribution, self.namespaces['GRASP']['isAttributionFor'], mention)) ######################################### Helpers for question processing ######################################### def _create_query(self, parsed_question): _ = hash_statement_id([ parsed_question['subject']['label'], parsed_question['predicate']['type'], parsed_question['object']['label'] ]) # Query subject if parsed_question['subject']['label'] == "": # Case fold # object_label = casefold_label(parsed_question['object']['label']) query = """ SELECT ?slabel ?authorlabel WHERE { ?s n2mu:%s ?o . ?s rdfs:label ?slabel . ?o rdfs:label '%s' . GRAPH ?g { ?s n2mu:%s ?o . } . ?g grasp:denotedBy ?m . ?m grasp:wasAttributedTo ?author . ?author rdfs:label ?authorlabel . } """ % (parsed_question['predicate']['type'], parsed_question['object']['label'], parsed_question['predicate']['type']) # Query object elif parsed_question['object']['label'] == "": query = """ SELECT ?olabel ?authorlabel WHERE { ?s n2mu:%s ?o . ?s rdfs:label '%s' . ?o rdfs:label ?olabel . GRAPH ?g { ?s n2mu:%s ?o . } . ?g grasp:denotedBy ?m . ?m grasp:wasAttributedTo ?author . ?author rdfs:label ?authorlabel . } """ % (parsed_question['predicate']['type'], parsed_question['subject']['label'], parsed_question['predicate']['type']) # Query existence else: query = """ SELECT ?authorlabel ?v WHERE { ?s n2mu:%s ?o . ?s rdfs:label '%s' . ?o rdfs:label '%s' . GRAPH ?g { ?s n2mu:%s ?o . } . ?g grasp:denotedBy ?m . ?m grasp:wasAttributedTo ?author . ?author rdfs:label ?authorlabel . ?m grasp:hasAttribution ?att . ?att rdf:value ?v . } """ % (parsed_question['predicate']['type'], parsed_question['subject']['label'], parsed_question['object']['label'], parsed_question['predicate']['type']) query = self.query_prefixes + query return query def _submit_query(self, query): # Set up connection sparql = SPARQLWrapper(self.address) # Response parameters sparql.setQuery(query) sparql.setReturnFormat(JSON) sparql.addParameter('Accept', 'application/sparql-results+json') response = sparql.query().convert() return response["results"]["bindings"] ######################################### Helpers for conflict processing ######################################### def _get_conflicts_with_predicate(self, one_to_one_predicate): query = """ PREFIX n2mu: <http://cltl.nl/leolani/n2mu/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX grasp: <http://groundedannotationframework.org/grasp#> select ?sname (group_concat(?oname ; separator=";") as ?onames) (group_concat(?authorlabel ; separator=";") as ?authorlabels) where { GRAPH ?g { ?s n2mu:%s ?o . } . ?s rdfs:label ?sname . ?o rdfs:label ?oname . ?g grasp:denotedBy ?m . ?m grasp:wasAttributedTo ?author . ?author rdfs:label ?authorlabel . } group by ?sname having (count(distinct ?oname) > 1) """ % one_to_one_predicate response = self._submit_query(query) conflicts = [] for item in response: conflict = { 'subject': item['sname']['value'], 'predicate': one_to_one_predicate, 'objects': [] } values = item['onames']['value'].split(';') authors = item['authorlabels']['value'].split(';') for val, auth in zip(values, authors): option = {'value': val, 'author': auth} conflict['objects'].append(option) conflicts.append(conflict) return conflicts
with open(filename,'r') as csvfile: csv_contents = [{k: v for k, v in row.items()} for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')] return csv_contents #//*************** csv parser ****************//# graph_uri_base = resource path = 'source_datasets/' filename_population = 'all_population_by_type.csv' filename_unemployment = 'unemployment_eu.csv' filename_inflow = 'inflow_dataset.csv' filename_asylum = 'asylum_seekers.csv' dataset = Dataset() dataset.bind('mpr', RESOURCE) dataset.bind('mpo', VOCAB) dataset.bind('geo', GEO) dataset.bind('geo_country_code', GCC) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.bind('sdmx', SDMX) dataset.default_context.parse(VOCAB_FILE, format='turtle') dataset, unemployment_eu_graph = convert_unemployment_csv(filename_unemployment,dataset,URIRef(graph_uri_base + 'unemployment_eu_graph')) dataset, population_eu_graph = convert_population_csv(filename_population,dataset,URIRef(graph_uri_base + 'population_eu_graph')) dataset, inflow_graph = convert_inflow_csv(filename_inflow,dataset,URIRef(graph_uri_base + 'inflow_graph'))
#!/usr/bin/python from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD import rdflib.resource from provmodified import Entity import provmodified as prov import json import subprocess, shlex import collections DOCKER = Namespace("http://www.example.org/ns/docker#") PROV = Namespace("http://www.w3.org/ns/prov#") ds = Dataset(default_union=True) ds.bind("docker", DOCKER) ds.bind("prov", PROV) default_graph = ds def bind_ns(prefix, namespace): ds.namespace_manager.bind(prefix, Namespace(namespace)) def parse_json_byfile(filename): with open(filename) as data_file: data = json.load(data_file) return data[0] def inspect_json(cmd): # print cmd p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD import rdflib.resource """ @newfield iri: IRI """ PROV = Namespace("http://www.w3.org/ns/prov#") ds = Dataset(default_union=True) ds.bind("prov", PROV) default_graph = ds #print type(default_graph) config = { "useInverseProperties": False } def set_use_inverse_properties(flag=False): config["useInverseProperties"] = flag def using_inverse_properties(): return config["useInverseProperties"] def clear_graph(bundle=default_graph): bundle.remove((None, None, None))
DC = Namespace("http://purl.org/dc/terms/") UUIDNS = Namespace("urn:uuid:") DOCKER = Namespace("http://w3id.org/daspos/docker#") # W3C namespace: POSIX = Namespace("http://www.w3.org/ns/posix/stat#") ACL = Namespace("http://www.w3.org/ns/auth/acl#") # DASPOS namespaces SC = Namespace("https://w3id.org/daspos/smartcontainers#") CA = Namespace("https://w3id.org/daspos/computationalactivity#") CE = Namespace("https://w3id.org/daspos/computationalenvironment#") # Need to handle DOI # http://bitwacker.com/2010/02/04/dois-uris-and-cool-resolution/ ds.bind("prov", PROV) ds.bind("ore", ORE) ds.bind("owl", OWL) ds.bind("dc", DC) ds.bind("uuidns", UUIDNS) ds.bind("docker", DOCKER) ds.bind("posix", POSIX) ds.bind("acl", ACL) ds.bind("sc", SC) ds.bind("ca", CA) ds.bind("ce", CE) default_graph = ds #image_name = cmd_string.rsplit(' ', 1) [1] #image_id = self.get_imageID(image_name)
from rdflib import Dataset, URIRef, Literal, Namespace, RDF, RDFS, OWL, XSD host = "http://localhost:5820/MATH" # A namespace for our resources data = host + '/' # + '/resource/' DATA = Namespace(data) # A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.) vocab = host # + '/vocab/' VOCAB = Namespace(host + '/vocab/') # The URI for our graph graph_uri = URIRef(host) # + '/graph') # We initialize a dataset, and bind our namespaces dataset = Dataset() dataset.bind('data', DATA) dataset.bind('vocab', VOCAB) # We then get a new graph object with our URI from the dataset. graph = dataset.graph(graph_uri) dataset.default_context.parse("../vocab.ttl", format="turtle") # IRI baker is a library that reliably creates valid (parts of) IRIs from strings (spaces are turned into underscores, etc.). # for row in same_set: # # graph.add((row[0], RDF.type, VOCAB['Formula'])) # # graph.add((row[1], RDF.type, VOCAB['Formula'])) # graph.add((URIRef(row[0]), OWL.sameas, URIRef(row[1]))) # # with open('same_formula_db.trig', 'w') as f: # graph.serialize(f, format='trig')
def test_scenarios() -> None: """ Testing scenarios: 1. no base set 2. base set at graph creation 3. base set at serialization 4. base set at both graph creation & serialization, serialization overrides 5. multiple serialization side effect checking 6. checking results for RDF/XML 7. checking results for N3 8. checking results for TriX & TriG """ # variables base_one = Namespace("http://one.org/") base_two = Namespace("http://two.org/") title = Literal("Title", lang="en") description = Literal("Test Description", lang="en") creator = URIRef("https://creator.com") cs = URIRef("") # starting graph g = Graph() g.add((cs, RDF.type, SKOS.ConceptScheme)) g.add((cs, DCTERMS.creator, creator)) g.add((cs, DCTERMS.source, URIRef("nick"))) g.bind("dct", DCTERMS) g.bind("skos", SKOS) # 1. no base set for graph, no base set for serialization g1 = Graph() g1 += g # @base should not be in output assert "@base" not in g.serialize(format="turtle") # 2. base one set for graph, no base set for serialization g2 = Graph(base=base_one) g2 += g # @base should be in output, from Graph (one) assert "@base <http://one.org/> ." in g2.serialize(format="turtle") # 3. no base set for graph, base two set for serialization g3 = Graph() g3 += g # @base should be in output, from serialization (two) assert "@base <http://two.org/> ." in g3.serialize(format="turtle", base=base_two) # 4. base one set for graph, base two set for serialization, Graph one overrides g4 = Graph(base=base_one) g4 += g # @base should be in output, from graph (one) assert "@base <http://two.org/> ." in g4.serialize(format="turtle", base=base_two) # just checking that the serialization setting (two) hasn't snuck through assert "@base <http://one.org/> ." not in g4.serialize(format="turtle", base=base_two) # 5. multiple serialization side effect checking g5 = Graph() g5 += g # @base should be in output, from serialization (two) assert "@base <http://two.org/> ." in g5.serialize(format="turtle", base=base_two) # checking for side affects - no base now set for this serialization # @base should not be in output assert "@base" not in g5.serialize(format="turtle") # 6. checking results for RDF/XML g6 = Graph() g6 += g g6.bind("dct", DCTERMS) g6.bind("skos", SKOS) assert "@xml:base" not in g6.serialize(format="xml") assert 'xml:base="http://one.org/"' in g6.serialize(format="xml", base=base_one) g6.base = base_two assert 'xml:base="http://two.org/"' in g6.serialize(format="xml") assert 'xml:base="http://one.org/"' in g6.serialize(format="xml", base=base_one) # 7. checking results for N3 g7 = Graph() g7 += g g7.bind("dct", DCTERMS) g7.bind("skos", SKOS) assert "@xml:base" not in g7.serialize(format="xml") assert "@base <http://one.org/> ." in g7.serialize(format="n3", base=base_one) g7.base = base_two assert "@base <http://two.org/> ." in g7.serialize(format="n3") assert "@base <http://one.org/> ." in g7.serialize(format="n3", base=base_one) # 8. checking results for TriX & TriG # TriX can specify a base per graph but setting a base for the whole base_three = Namespace("http://three.org/") ds1 = Dataset() ds1.bind("dct", DCTERMS) ds1.bind("skos", SKOS) g8 = ds1.graph(URIRef("http://g8.com/"), base=base_one) g9 = ds1.graph(URIRef("http://g9.com/")) g8 += g g9 += g g9.base = base_two ds1.base = base_three trix = ds1.serialize(format="trix", base=Namespace("http://two.org/")) assert '<graph xml:base="http://one.org/">' in trix assert '<graph xml:base="http://two.org/">' in trix assert '<TriX xml:base="http://two.org/"' in trix trig = ds1.serialize(format="trig", base=Namespace("http://two.org/")) assert "@base <http://one.org/> ." not in trig assert "@base <http://three.org/> ." not in trig assert "@base <http://two.org/> ." in trig
def make_RDF(self,contents): host = "http://localhost:5820/MATH" # A namespace for our resources data = host +'/'# + '/resource/' DATA = Namespace(data) # A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.) vocab = host #+ '/vocab/' VOCAB = Namespace(host + '/vocab/') # The URI for our graph graph_uri = URIRef(host)#+ '/graph') # We initialize a dataset, and bind our namespaces dataset = Dataset() dataset.bind('data', DATA) dataset.bind('vocab', VOCAB) # We then get a new graph object with our URI from the dataset. graph = dataset.graph(graph_uri) dataset.default_context.parse("db/vocab.ttl",format="turtle") # IRI baker is a library that reliably creates valid (parts of) IRIs from strings (spaces are turned into underscores, etc.). for row in contents: id = URIRef((data + str(row['id']))) # primary key for the object id_ = URIRef((data + str(row['id_']))) # graph.add((id, VOCAB['previous_id'] ,id_)) # graph.add((id, RDF.type, OWL.NamedIndividual)) if ('Formula' in row): graph.add((id,RDF.type,VOCAB['Formula'])) formula_xml = Literal(row['Formula'], datatype=XSD['string']) graph.add((id, VOCAB['xml'], formula_xml)) Description = Literal(row['description'], datatype=XSD['string']) graph.add((id,VOCAB['Description'],Description)) label = Literal(row['label'], datatype=XSD['string']) graph.add((id,VOCAB['label'],label)) if row['id_'] !=0: parent_id = URIRef((data + str(row['id_']))) graph.add((id, VOCAB['subFormulaOf'], parent_id)) if ('Symbol' in row): graph.add((id, RDF.type, VOCAB['Symbol'])) # print(row['Symbol']) symbol = Literal(row['Symbol'], datatype=XSD['string']) # print(symbol) graph.add((id, VOCAB['label'], symbol)) parent_id = URIRef((data + str(row['parent_id']))) graph.add((id, VOCAB['partOf'], parent_id)) if ('Operator' in row): graph.add((id, RDF.type, VOCAB['Operator'])) operator = Literal(row['Operator'], datatype=XSD['string']) graph.add((id, VOCAB['label'], operator)) parent_id = URIRef((data + str(row['parent_id']))) graph.add((id, VOCAB['partOf'], parent_id)) if ('Function_add' in row): # print(row) id = URIRef((data + str(row['Function_add']))) graph.add((id, RDF.type, VOCAB['Operator'])) left = URIRef((data + str(row['left']))) graph.add((id, VOCAB['left'], left)) right = URIRef((data + str(row['right']))) graph.add((id, VOCAB['right'], right)) if ('Function_subtract' in row): print(row) id = URIRef((data + str(row['Function_subtract']))) graph.add((id, RDF.type, VOCAB['Operator'])) left = URIRef((data + str(row['left']))) graph.add((id, VOCAB['left'], left)) right = URIRef((data + str(row['right']))) graph.add((id, VOCAB['right'], right)) with open('db/math_db.trig','w') as f: graph.serialize(f, format='trig')
# Info on the item g.add((item, RDF.type, saa.Item)) g.add((item, saa.term('index'), Literal(record['assigned_item_no']))) if record['persistent_uid'] != "": g.add((item, saa.identifier, Literal(record['persistent_uid']))) g.add((item, RDFS.label, Literal(record['title'], lang='nl'))) g.add((item, saa.artist, Literal(record['artist_name_1']))) g.add((item, saa.transcription, Literal(record['entry'], lang='nl'))) g.add((item, saa.workType, Literal(record['object_type_1'], lang='nl'))) if record['room'] != "": g.add((item, saa.room, Literal(record['room'], lang='nl'))) if record['valuation_amount'] != "": g.add((item, saa.valuation, Literal(record['valuation_amount']))) return g if __name__ == "__main__": ds = Dataset() ds.bind('ga', ga) ds.bind('saa', saa) ds = main(dataset=ds) ds.serialize('Dutch_Archival_Descriptions_Getty.trig', format='trig')
class RdfBuilder(object): ONTOLOGY_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../ontologies')) def __init__(self): # type: () -> RdfBuilder self.ontology_paths = {} self.namespaces = {} self.dataset = Dataset() self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted") self._define_namespaces() self._bind_namespaces() self.define_named_graphs() self.load_ontology_integration() ########## setting up connection ########## def _define_namespaces(self): """ Define namespaces for different layers (ontology/vocab and resource). Assign them to self :return: """ # Namespaces for the instance layer instance_vocab = 'http://cltl.nl/leolani/n2mu/' self.namespaces['N2MU'] = Namespace(instance_vocab) instance_resource = 'http://cltl.nl/leolani/world/' self.namespaces['LW'] = Namespace(instance_resource) # Namespaces for the mention layer mention_vocab = 'http://groundedannotationframework.org/gaf#' self.namespaces['GAF'] = Namespace(mention_vocab) mention_resource = 'http://cltl.nl/leolani/talk/' self.namespaces['LTa'] = Namespace(mention_resource) # Namespaces for the attribution layer attribution_vocab = 'http://groundedannotationframework.org/grasp#' self.namespaces['GRASP'] = Namespace(attribution_vocab) attribution_resource_friends = 'http://cltl.nl/leolani/friends/' self.namespaces['LF'] = Namespace(attribution_resource_friends) attribution_resource_inputs = 'http://cltl.nl/leolani/inputs/' self.namespaces['LI'] = Namespace(attribution_resource_inputs) # Namespaces for the temporal layer-ish context_vocab = 'http://cltl.nl/episodicawareness/' self.namespaces['EPS'] = Namespace(context_vocab) self.namespaces['LC'] = Namespace('http://cltl.nl/leolani/context/') # The namespaces of external ontologies skos = 'http://www.w3.org/2004/02/skos/core#' self.namespaces['SKOS'] = Namespace(skos) prov = 'http://www.w3.org/ns/prov#' self.namespaces['PROV'] = Namespace(prov) sem = 'http://semanticweb.cs.vu.nl/2009/11/sem/' self.namespaces['SEM'] = Namespace(sem) time = 'http://www.w3.org/TR/owl-time/#' self.namespaces['TIME'] = Namespace(time) xml = 'https://www.w3.org/TR/xmlschema-2/#' self.namespaces['XML'] = Namespace(xml) wd = 'http://www.wikidata.org/entity/' self.namespaces['WD'] = Namespace(wd) wdt = 'http://www.wikidata.org/prop/direct/' self.namespaces['WDT'] = Namespace(wdt) wikibase = 'http://wikiba.se/ontology#' self.namespaces['wikibase'] = Namespace(wikibase) def define_named_graphs(self): # Instance graph self.ontology_graph = self.dataset.graph(self.create_resource_uri('LW', 'Ontology')) self.instance_graph = self.dataset.graph(self.create_resource_uri('LW', 'Instances')) self.claim_graph = self.dataset.graph(self.create_resource_uri('LW', 'Claims')) self.perspective_graph = self.dataset.graph(self.create_resource_uri('LTa', 'Perspectives')) self.interaction_graph = self.dataset.graph(self.create_resource_uri('LTa', 'Interactions')) def _get_ontology_path(self): """ Define ontology paths to key vocabularies :return: """ self.ontology_paths['n2mu'] = os.path.join(self.ONTOLOGY_ROOT, 'leolani.ttl') self.ontology_paths['gaf'] = os.path.join(self.ONTOLOGY_ROOT, 'gaf.rdf') self.ontology_paths['grasp'] = os.path.join(self.ONTOLOGY_ROOT, 'grasp.rdf') self.ontology_paths['sem'] = os.path.join(self.ONTOLOGY_ROOT, 'sem.rdf') def load_ontology_integration(self): self.ontology_graph.parse(location=os.path.join(self.ONTOLOGY_ROOT, 'integration.ttl'), format="turtle") def _bind_namespaces(self): """ Bind namespaces :return: """ self.dataset.bind('n2mu', self.namespaces['N2MU']) self.dataset.bind('leolaniWorld', self.namespaces['LW']) self.dataset.bind('gaf', self.namespaces['GAF']) self.dataset.bind('leolaniTalk', self.namespaces['LTa']) self.dataset.bind('grasp', self.namespaces['GRASP']) self.dataset.bind('leolaniFriends', self.namespaces['LF']) self.dataset.bind('leolaniInputs', self.namespaces['LI']) self.dataset.bind('time', self.namespaces['TIME']) self.dataset.bind('eps', self.namespaces['EPS']) self.dataset.bind('leolaniContext', self.namespaces['LC']) self.dataset.bind('skos', self.namespaces['SKOS']) self.dataset.bind('prov', self.namespaces['PROV']) self.dataset.bind('sem', self.namespaces['SEM']) self.dataset.bind('xml', self.namespaces['XML']) self.dataset.bind('owl', OWL) self.dataset.bind('wd', self.namespaces['WD']) self.dataset.bind('wdt', self.namespaces['WDT']) self.dataset.bind('wikibase', self.namespaces['wikibase']) ########## basic constructors ########## def _fix_nlp_types(self, types): # TODO here we know if two types are different category (aka noun and verb) we might need to split the triple fixed_types = [] for el in types: if len(el) == 1: # this was just a char fixed_types.append(types.split('.')[-1]) break elif '.' in el: fixed_types.append(el.split('.')[-1]) else: fixed_types.append(el) # Hand fixed mappings if 'artifact' in fixed_types: fixed_types.append('object') return fixed_types def create_resource_uri(self, namespace, resource_name): """ Create an URI for the given resource (entity, predicate, named graph, etc) in the given namespace Parameters ---------- namespace: str Namespace where entity belongs to resource_name: str Label of resource Returns ------- uri: str Representing the URI of the resource """ if namespace in self.namespaces.keys(): uri = URIRef(to_iri(self.namespaces[namespace] + resource_name)) else: uri = URIRef(to_iri('{}:{}'.format(namespace, resource_name))) return uri def fill_literal(self, value, datatype=None): """ Create an RDF literal given its value and datatype Parameters ---------- value: str Value of the literal resource datatype: str Datatype of the literal Returns ------- Literal with value and datatype given """ return Literal(value, datatype=datatype) if datatype is not None else Literal(value) def fill_entity(self, label, types, namespace='LW', uri=None): """ Create an RDF entity given its label, types and its namespace Parameters ---------- label: str Label of entity types: List[str] List of types for this entity uri: str URI of the entity, is available (i.e. when extracting concepts from wikidata) namespace: str Namespace where entity belongs to Returns ------- Entity object with given label """ if types in [None, ''] and label != '': self._log.warning('Unknown type: {}'.format(label)) return self.fill_entity_from_label(label, namespace) else: entity_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri)) fixed_types = self._fix_nlp_types(types) return Entity(entity_id, Literal(label), fixed_types) def fill_predicate(self, label, namespace='N2MU', uri=None): """ Create an RDF predicate given its label and its namespace Parameters ---------- label: str Label of predicate uri: str URI of the predicate, is available (i.e. when extracting concepts from wikidata) namespace: Namespace where predicate belongs to Returns ------- Predicate object with given label """ predicate_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri)) return Predicate(predicate_id, Literal(label)) def fill_entity_from_label(self, label, namespace='LW', uri=None): """ Create an RDF entity given its label and its namespace Parameters ---------- label: str Label of entity uri: str URI of the entity, is available (i.e. when extracting concepts from wikidata) namespace: str Namespace where entity belongs to Returns ------- Entity object with given label and no type information """ entity_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri)) return Entity(entity_id, Literal(label), ['']) def empty_entity(self): """ Create an empty RDF entity Parameters ---------- Returns ------- Entity object with no label and no type information """ return Entity('', Literal(''), ['']) def fill_provenance(self, author, date): """ Structure provenance to pair authors and dates when mentions are created Parameters ---------- author: str Actor that generated the knowledge date: date Date when knowledge was generated Returns ------- Provenance object containing author and date """ return Provenance(author, date) def fill_triple(self, subject_dict, predicate_dict, object_dict, namespace='LW'): """ Create an RDF entity given its label and its namespace Parameters ---------- subject_dict: dict Information about label and type of subject predicate_dict: dict Information about type of predicate object_dict: dict Information about label and type of object namespace: str Information about which namespace the entities belongs to Returns ------- Entity object with given label """ subject = self.fill_entity(subject_dict['label'], [subject_dict['type']], namespace=namespace) predicate = self.fill_predicate(predicate_dict['type']) object = self.fill_entity(object_dict['label'], [object_dict['type']], namespace=namespace) return Triple(subject, predicate, object) def fill_triple_from_label(self, subject_label, predicate, object_label, namespace='LW'): """ Create an RDF entity given its label and its namespace Parameters ---------- subject_label: str Information about label of subject predicate: str Information about predicate object_label: str Information about label of object namespace: str Information about which namespace the entities belongs to Returns ------- Entity object with given label """ subject = self.fill_entity_from_label(subject_label, namespace=namespace) predicate = self.fill_predicate(predicate) object = self.fill_entity_from_label(object_label, namespace=namespace) return Triple(subject, predicate, object) ########## basic reverse engineer ########## def label_from_uri(self, uri, namespace='LTi'): return uri.strip(self.namespaces[namespace]) def clean_aggregated_types(self, aggregated_types): split_types = aggregated_types.split('|') clean_types = [] for type_uri in split_types: if '#' in type_uri: [prefix, bare_type] = type_uri.split('#', 1) elif '/' in type_uri: [prefix, bare_type] = type_uri.rsplit('/', 1) else: bare_type = type_uri bare_type = casefold_text(bare_type, format='triple') clean_types.append(bare_type) return clean_types def clean_aggregated_detections(self, aggregared_detections): split_detections = aggregared_detections.split('|') clean_detections = [] for detection_label in split_detections: if '-' in detection_label: [detection_label, detection_id] = detection_label.rsplit('-', 1) clean_detections.append(detection_label) return clean_detections
def main(source, target, geometryfile='data/point2wkt.json'): with open(source) as infile: data = json.load(infile) with open(geometryfile) as infile: point2wkt = json.load(infile) ds = Dataset() dataset = lp.term('') g = rdfSubject.db = ds.graph(identifier=lp) ### Custom triples / Ontology g.add((lpOnt.Adres, OWL.equivalentClass, schema.PostalAddress)) g.add((lpOnt.Straat, OWL.equivalentClass, hg.Street)) g.add((lpOnt.Buurt, OWL.equivalentClass, hg.Neighbourhood)) g.add((lpOnt.adres, OWL.equivalentProperty, schema.address)) ######## # Data # ######## adres2locatie = defaultdict(lambda: defaultdict(list)) for n, adresLabel in enumerate(data, 1): if n % 5000 == 0: print(f"{n}/{len(data)}", end='\r') # break # # geometry # wkt = point2wkt.get(locatiepunt) # wktLiteral = Literal(wkt, datatype=geo.wktLiteral) # geometry = Geometry(lpGeo.term(str(locatiepunt)), # asWKT=wktLiteral, # label=[str(locatiepunt)]) addresses = getAdres(data[adresLabel], adresLabel, point2wkt) # adres2locatie[adres][year].append(geometry) # observations.append(locpdetail) # locp.observation = observations # addresses.append( # Role( # None, # label=address.label, # address=address, # hasLatestBeginTimeStamp=locpdetail.hasLatestBeginTimeStamp, # hasEarliestEndTimeStamp=locpdetail.hasEarliestEndTimeStamp, # startDate=Literal(year, datatype=XSD.gYear))) ds.bind('create', create) ds.bind('schema', schema) ds.bind('sem', sem) ds.bind('geo', geo) ds.bind('juso', juso) ds.bind('qb', qb) ds.bind('void', void) print("Serializing!") ds.serialize(target, format='trig')
NP = Namespace('http://www.nanopub.org/nschema#') FOAF = Namespace('http://xmlns.com/foaf/0.1/') dataset = "gdppc" pathtofile = '../../sdh-public-datasets/allcliodata_raw.csv' BASE = Namespace('http://data.socialhistory.org/resource/{}/'.format(dataset)) # Initialize a conjunctive graph for the whole lot rdf_dataset = Dataset() rdf_dataset.bind('qbrv', QBRV) rdf_dataset.bind('qbr', QBR) rdf_dataset.bind('qb', QB) rdf_dataset.bind('skos', SKOS) rdf_dataset.bind('prov', PROV) rdf_dataset.bind('np', NP) rdf_dataset.bind('foaf', FOAF) rdf_dataset.bind('clio-property', CLIOPROP) rdf_dataset.bind('clio-indicator', CLIOIND) rdf_dataset.bind('clio-country', CLIOCTR) rdf_dataset.bind('clio', CLIO) rdf_dataset.bind('sdmx', SDMX) rdf_dataset.bind('sdmx-dimension', SDMXDIM) rdf_dataset.bind('sdmx-measure', SDMXMSR)
return str(response.status_code) def serialize_upload(filename, dataset, upload=True): with open(filename, 'w') as f: dataset.serialize(f, format='trig') upload_to_stardog(dataset.serialize(format='trig')) graph_uri_base = resource + 'findaslot/' drop_stardog() dataset = Dataset() dataset.bind('fasdat', RESOURCE) dataset.bind('fasont', VOCAB) dataset.bind('geo', GEO) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.default_context.parse(VOCAB_FILE, format='turtle') # Upload vocabulary with open(VOCAB_FILE, 'r') as f: upload_to_stardog(f.read()) dataset, t_graph = convert_dataset( SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters'), museums=False) serialize_upload(OUTPUT_DIR + 'theaters.trig', t_graph) # dataset.remove_graph(t_graph)