def test_single_column_sheet_generates_string_value_triple( self, test_data_dir): kgiri_base = "https://kg.your-company.kom" # Don't use fixture for this since the .xslx file uses this one args = argparse.Namespace( input=f'{test_data_dir}/single_sheet_single_column.xlsx', verbose=True, ignored_values=list(), ignored_prefixes=list(), skip_sheets=list(), kgiri_base=kgiri_base, kgiri_prefix="abc", data_source_code="def", key_column_number=1, strip_any_prefix=False, output=None) parser = ekglib.XlsxParser(args) rdf_string_value = ekglib.RAW.StringValue actual = set( parser.g.triples((None, RDF.type, term.URIRef(rdf_string_value)))) assert actual == { (term.URIRef("%s/id/data-name-popular-value-1" % kgiri_base), RDF.type, rdf_string_value), (term.URIRef("%s/id/data-name-boring-value-2" % kgiri_base), RDF.type, rdf_string_value), (term.URIRef("%s/id/data-name-random-value-3" % kgiri_base), RDF.type, rdf_string_value) }
def _uri(self, uri, rand=True): """ Get URI for the item we are inserting """ try: uri = str(uri) except: pass try: return self._uri_map[uri] except: pass if uri == self._added_uri: self._uri_map[uri] = term.URIRef(self._update_namespace( self._namespace + self._uri_prefix + self._row[self._uidi])) else: if rand and uri.startswith(self._namespace + "n"): if self._next_id: new_id = self._next_id self._next_id += 1 else: new_id = self.sparql.get_available_id() self._uri_map[uri] = term.URIRef(self._update_namespace( "%s%s%s" % (self._namespace, "n", new_id))) else: return term.URIRef(self._update_namespace(uri)) return self._uri_map[uri]
def makegraph(codebook, variable, vocab_name): base = 'http://data.socialhistory.org/resource/' + vocab_name + '/' vrb_iri = to_iri(base + variable + '/') VCB_NAMESPACE = Namespace(vrb_iri) SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') g = Graph() g.bind(vocab_name, VCB_NAMESPACE) g.bind('skos', SKOS) g.add((VCB_NAMESPACE[variable], RDF.type, SKOS['Scheme'])) g.add((VCB_NAMESPACE[variable], SKOS['definition'], Literal(codebook['def'][0]))) if len(codebook) == 1: return g for i in range(len(codebook['code'])): iri = to_iri(VCB_NAMESPACE[str(codebook['code'][i])]) g.add((term.URIRef(iri), RDF.type, SKOS['Concept'])) g.add((term.URIRef(iri), SKOS['inScheme'], VCB_NAMESPACE[variable])) g.add((term.URIRef(iri), SKOS['prefLabel'], Literal(codebook['label'][i]))) if RepresentsInt(codebook['code'][i]): g.add((term.URIRef(iri), RDF.value, Literal(codebook['code'][i], datatype=XSD.int))) return g
def test_has_only_mlr_values(self): triples = list(self.graph.triples((term.URIRef(TEST_ID), None, None))) predicates = set(p for (s, p, o) in triples) extra_predicates = [] for p in predicates: if p == term.URIRef('http://www.inria.fr/acacia/corese#graph'): continue if not p in Element_names: extra_predicates.append(p) assert not extra_predicates, extra_predicates
def test_substitutions(self): g = TemplateGraph('test.cfg', 'people') g._read_config() g._row = ['one', 'two', 'three'] trm = term.URIRef('http://somewhere.edu/$VAR001x') s = g._fill_in(trm) assert (s == term.URIRef('http://somewhere.edu/twox')) s = g._replace(term.URIRef('http://somewhere.edu/per987-x'), '987', 3) assert (s == term.URIRef('http://somewhere.edu/per$VAR003-x'))
def get_definition(self, owl_term, add_links=True): definition = list(self.graph.objects(owl_term, OBO_DEFINITION)) definition = definition + \ list(self.graph.objects(owl_term, SKOS_DEFINITION)) if definition: if len(definition) > 1: warnings.warn('Multiple definitions for ' + self.get_label(owl_term) + ': ' + ",".join(definition)) definition = unicode(definition[0]) else: definition = "" # Add link to term in document if the definition refer to a term if add_links: # definition = re.sub( # "("+"|".join(self.labels.keys())+")", "[\\1]", definition) terms = re.findall(r'\'.*?\'', definition) for mterm in sorted(set(terms), key=len, reverse=True): literal = Literal(mterm.replace("'", "")) if str(literal) in self.labels: purl = self.labels[str(literal)] if "#" in purl and \ not self.is_deprecated(term.URIRef(mterm)): definition = definition.replace( mterm, "<a title=" + purl.split("#")[1] + ">" + mterm.replace("[", "").replace("]", "") + "</a>") # Remove final dot if present if definition: if definition[-1] == ".": definition = definition[:-1] return definition
def test_multiple_matches(self): ''' http://www.w3.org/TR/rdf-sparql-query/#MultipleMatches ''' g = create_graph(""" @prefix foaf: <http://xmlns.com/foaf/0.1/> . _:a foaf:name "Johnny Lee Outlaw" . _:a foaf:mbox <mailto:[email protected]> . _:b foaf:name "Peter Goodguy" . _:b foaf:mbox <mailto:[email protected]> . _:c foaf:mbox <mailto:[email protected]> . """) results = list( g.query(""" PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?name ?mbox WHERE { ?x foaf:name ?name . ?x foaf:mbox ?mbox } """)) expected_results = [ (term.Literal(name), term.URIRef(mbox)) for name, mbox in [("Johnny Lee Outlaw", "mailto:[email protected]" ), ("Peter Goodguy", "mailto:[email protected]")] ] results.sort() expected_results.sort() self.assertEqual(results, expected_results)
def _uriref(self, list_type, identifier): ''' Create a URIRef of the given list type and id. @param list_type Singular of lists (e.g. movie for the movies list) @param id Identifier of node ''' partial_uri = urllib.parse.quote(identifier) return term.URIRef(f"http://imdb.org/{list_type}/{partial_uri}")
def test_match_literal_arbitary_type(self): g = create_graph(self.data) results = list( g.query(""" SELECT ?v WHERE { ?v ?p "abc"^^<http://example.org/datatype#specialDatatype> } """)) expected_results = [(term.URIRef('http://example.org/ns#z'), )] self.assertEqual(results, expected_results)
def test_match_literal_numeric_type(self): g = create_graph(self.data) results = list( g.query(""" SELECT ?v WHERE { ?v ?p 42 } """)) expected_results = [(term.URIRef('http://example.org/ns#y'), )] self.assertEqual(results, expected_results)
def check_rule_with_data(rule_path, data_path): r = load_yamldown(rule_path) schema = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../metadata/rules.schema.yml") validate(r, schema) if not rule.sparql_from(r): raise Exception("No SPARQL impl for rule {}".format(rule_path)) g = rdflib.graph.ConjunctiveGraph() test_data_graph = g.parse(data_path, format="ttl", publicID="http://geneontology.org/rules/test") test_data_graph.add((term.URIRef("http://geneontology.org/rules/test"), term.URIRef("http://geneontology.org/graphType"), term.URIRef("http://geneontology.org/gafCam"))) results = g.query(rule.sparql_from(r)) return results
def test_codomain(self): wrong_codomain_type = [] for predicate in MLR_codomain.iterkeys(): for s, p, o in self.graph.triples( (None, term.URIRef(predicate), None)): for s2, p2, o2 in self.graph.triples((o, RDF.type, None)): if o2 not in MLR_codomain[predicate]: wrong_codomain_type.append((predicate, o2)) assert not wrong_codomain_type, wrong_codomain_type
def _expand_qname(self, qname): """expand a qualified name's namespace prefix to include the resolved namespace root url""" if type(qname) is not rt.URIRef: raise TypeError("Cannot expand qname of type %s, must be URIRef" % type(qname)) for ns in self.graph.namespaces(): if ns[0] == qname.split(':')[0]: return rt.URIRef("%s%s" % (ns[1], qname.split(':')[-1])) return qname
def test_has_all_mlr_values(self): triples = list(self.graph.triples((term.URIRef(TEST_ID), None, None))) #sys.stderr.write(`triples`) predicates = set(p for (s, p, o) in triples) missing_predicates = [] for (p, n) in Element_names.iteritems(): if p in Known_Missing: continue if p not in predicates: missing_predicates.append((p, n)) assert not missing_predicates, missing_predicates
def populateValue(g, datasetId, ds, data, p, o, iriCache): ## Skipping following IRI's as they are handled separately (getResearcher, getProtocols, etc.) skipIri = [ term.URIRef('http://uri.interlex.org/temp/uris/contributorTo'), term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), term.URIRef('http://uri.interlex.org/temp/uris/hasUriApi'), term.URIRef('http://uri.interlex.org/temp/uris/hasUriHuman'), term.URIRef('http://uri.interlex.org/temp/uris/hasProtocol'), term.URIRef('http://uri.interlex.org/temp/uris/wasUpdatedAtTime')] key = strip_iri(p.strip()) if p in skipIri: return if isinstance(o, term.URIRef): value = iri_lookup(g, o.strip(), iriCache) if value: if isinstance(value, dict) and 'curie' in value: ds['term'][value['curie']] = value value = value['curie'] # if isinstance(value, dict) and 'iri' in value: # key = strip_iri(value['iri']) # ds['term'][key] = value # value = key if key in arrayProps: array = data.setdefault(key, []) array.append(value) else: if key in data: log.warning('Unexpected creation of array for: %s - %s - %s', datasetId, key, value) log.warning('Existing value for this key : %s - %s - %s', datasetId, key, data[key]) log.warning('----- Will use the shortest value -----') if len(value) < len(data[key]): data[key] = value else: data[key] = value elif isinstance(o, term.Literal): value = strip_iri(o.strip()) if key in arrayProps: array = data.setdefault(key, []) array.append(value) else: if key in data: log.warning('Unexpected creation of array for: %s - %s - %s', datasetId, key, value) log.warning('Existing value for this key : %s - %s - %s', datasetId, key, data[key]) log.warning('----- Will use the shortest value -----') if len(value) < len(data[key]): data[key] = value else: data[key] = value elif isinstance(o, term.BNode): data[key] = parseMeasure(datasetId, g, o, {'value': '', 'unit': ''}) else: raise Exception('Unknown RDF term: %s' % type(o))
def _replace(self, t, text, n): """ Replace the substring given in text with $VAR### :param t: Subject or Object of triple to have the replacement done. :param text: The text to replace :param n: Integer value to put in the ### part of $VAR### :returns The given subject or object with the replacement done. """ if isinstance(t, term.URIRef): return term.URIRef(t.replace(text, "$VAR%03d" % n)) elif isinstance(t, term.Literal): return term.Literal(t.replace(text, "$VAR%03d" % n), datatype=t.datatype) return t
def print_1968_launches(): ### These lines print the links to actual pages about each spacecraft launched in 1968, for example # load the term for the predicate we are going to filter (Dublin Core Standard) DUBLIN = term.URIRef(u'http://purl.org/dc/terms/subject') # same procedure as above g2 = Graph() result2 = g2.parse( u'http://dbpedia.org/resource/Category:Spacecraft_launched_in_1968') print("graph has %s statements." % len(g2)) for s in g2.subjects(predicate=DUBLIN, object=None): print(s)
def parse_ontology(self): """place the ontology graph into a set of custom data structures for use by the validator""" start = time.clock() log.info("Parsing ontology file for {}".format( self.__class__.__name__)) for subj, pred, obj in self._schema_nodes(): if subj not in self.attributes_by_class.keys(): if obj == rt.URIRef( self.lexicon['class']) and pred == rt.URIRef( self.lexicon['type']): self.attributes_by_class[subj] = [] leaves = [(subj, pred, obj)] if type(obj) == rt.BNode: leaves = deepest_node((subj, pred, obj), self.graph) for s, p, o in leaves: if o not in self.attributes_by_class.keys(): self.attributes_by_class[o] = [] if pred == rt.URIRef(self.lexicon['domain']): self.attributes_by_class[o].append(subj) log.info("Ontology parsing complete in {}".format( (time.clock() - start) * 1000))
def test_export(): out = "TEST" #None try: #staticpath = global_settings.STATIC_PATH #return staticpath[:len(staticpath)-staticpath[::-1].find('/')] #return staticpath[:len(staticpath)-staticpath[::-1].find('/')-1] + '/b2note_api/test_rdf.rdf' #nf = open(os.path.join(staticpath[:len(staticpath)-staticpath[::-1].find('/')-1], '/b2note_api/test_rdf.rdf'), 'w') apipath = "/bsc/public/b2note_project/b2note_devel/" nf = open(apipath + "b2note_api/test_rdf.rdf", "w") nf.write('''<?xml version="1.0" encoding="UTF-8"?> <rdf:RDF xmlns:oa="http://www.w3.org/ns/oa#" xmlns:as="http://www.w3.org/ns/activitystreams#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" > </rdf:RDF> ''') nf.close() t_start = datetime.datetime.now() annL = None annL = retrieve_annotation_jsonld_from_api() t_api = datetime.datetime.now() if annL: # Replace field name "type" by "@type" for rdflib-jsonld correct processing annL = addarobase_totypefieldname(annL) # B2SHARE sends fiel urls containing whitespace characters, # that rdflib refuses to serialize, replace by %20 if annL: for ann in annL: if isinstance(ann, dict): if "target" in ann.keys(): if isinstance(ann["target"], dict): if "source" in ann["target"].keys(): if isinstance(ann["target"]["source"], (str, unicode)): if ann["target"]["source"].find( " ") > 0: ann["target"]["source"] = ann[ "target"]["source"].replace( " ", "%20") if "id" in ann["target"].keys(): if isinstance(ann["target"]["id"], (str, unicode)): if ann["target"]["id"].find(" ") > 0: ann["target"]["id"] = ann[ "target"]["id"].replace( " ", "%20") else: print( "export_to_triplestore function, no annotation list from addarobase function." ) stdlogger.error( "export_to_triplestore function, no annotation list from addarobase function." ) return None else: print( "export_to_triplestore function, no annotation list retrieved." ) stdlogger.error( "export_to_triplestore function, no annotation list retrieved." ) return None t_nospace = datetime.datetime.now() # Re-set blank node ids in existing graph prog = 0 bnc = 0 nf = open(apipath + "b2note_api/test_rdf.rdf", "r") nRDF = nf.read() nf.close() while '''rdf:nodeID="''' in nRDF[prog:]: b = prog + nRDF[prog:].find('''rdf:nodeID="''') + len( '''rdf:nodeID="''') f = b + nRDF[b:].find('''"''') prog = b old_node_id = nRDF[b:f] if old_node_id[:len("B2NOTEBLANKNODE")] != "B2NOTEBLANKNODE": new_node_id = "B2NOTEBLANKNODE" + str(bnc) nRDF = nRDF.replace(old_node_id, new_node_id) bnc += 1 nf = open(apipath + "b2note_api/test_rdf.rdf", "w") nf.write(nRDF) nf.close() g = None nRDF = None if annL: for ann in annL: # Build-up graph from jsonld list of annotations g = Graph().parse(data=json.dumps(ann), format='json-ld') if g: # The library adds a trailing slash character to the Software homepage url for s, p, o in g.triples( (None, None, term.URIRef(u"https://b2note.bsc.es/"))): g.add((s, p, term.URIRef(u"https://b2note.bsc.es"))) for s, p, o in g.triples( (None, None, term.URIRef(u"https://b2note.bsc.es/"))): g.remove( (s, p, term.URIRef(u"https://b2note.bsc.es/"))) else: print( "export_to_triplestore function, no graph parsed from json-ld." ) stdlogger.error( "export_to_triplestore function, no graph parsed from json-ld." ) return None files = None if g: files = g.serialize(format='xml') descr = None if files: b = files.find('''<rdf:Description''') b = b - files[:b][::-1].find('''>''') + 1 f = files.find('''</rdf:RDF>''') descr = files[b:f] if descr: prog = 0 while '''rdf:nodeID="''' in descr[prog:]: b = prog + descr[prog:].find( '''rdf:nodeID="''') + len('''rdf:nodeID="''') f = b + descr[b:].find('''"''') prog = b old_node_id = descr[b:f] print prog, bnc, b, f, old_node_id if old_node_id[:len("B2NOTEBLANKNODE" )] != "B2NOTEBLANKNODE": new_node_id = "B2NOTEBLANKNODE" + str(bnc) descr = descr.replace(old_node_id, new_node_id) bnc += 1 else: print( "export_to_triplestore function, no graph from removing trailing slash from software homepage url." ) stdlogger.error( "export_to_triplestore function, no graph from removing trailing slash from software homepage url." ) return None if descr: nf = open(apipath + "b2note_api/test_rdf.rdf", "r") nRDF = nf.read() nf.close() nf = open(apipath + "b2note_api/test_rdf.rdf", "w") nf.write(nRDF[:nRDF.find('''</rdf:RDF>''')] + files[b:f] + '\n' + '''</rdf:RDF>''') nf.close() else: print( "export_to_triplestore function, no annotation description extracted from serilalized RDF." ) stdlogger.error( "export_to_triplestore function, no annotation description extracted from serilalized RDF." ) return None t_makegraph = datetime.datetime.now() R = None if nRDF: R = httpPutRdfXmlFileContentToOpenVirtuoso( 'http://opseudat03.bsc.es:8890/DAV/home/b2note/rdf_sink/annotations.rdf', virtuoso_settings['VIRTUOSO_B2NOTE_USR'], virtuoso_settings['VIRTUOSO_B2NOTE_PWD'], nRDF) else: print( "export_to_triplestore function, replacement RDF was not constructed." ) stdlogger.error( "export_to_triplestore function, replacement RDF was not constructed." ) return None t_sending = datetime.datetime.now() if R is not None: return ''' <h1>DONE</h1> <br> <p>Nb annotations: ''' + str(len(annL)) + '''</p> <p>t_start: 0, 0, ''' + str(t_start) + '''</p> <p>t_api: ''' + str(t_api - t_start) + ''', ''' + str( t_api - t_start) + ''', ''' + str(t_api) + '''</p> <p>t_nospace: ''' + str(t_nospace - t_api) + ''', ''' + str( t_nospace - t_start) + ''', ''' + str(t_nospace) + '''</p> <p>t_makegraph: ''' + str(t_makegraph - t_api) + ''', ''' + str( t_makegraph - t_start) + ''', ''' + str(t_makegraph) + '''</p> <p>t_sending: ''' + str(t_sending - t_api) + ''', ''' + str( t_sending - t_start) + ''', ''' + str(t_sending) + '''</p> <br> <pre>''' + R.text + '''</pre> <br> <p>Example query:<p> <pre>SELECT DISTINCT ?file ?free_text ?semantic_label FROM <urn:dav:home:b2note:rdf_sink> WHERE { ?s ?p <http://www.w3.org/ns/oa#Annotation>. ?s <http://www.w3.org/ns/oa#hasTarget> ?file. ?s <http://www.w3.org/ns/oa#hasBody> ?b. OPTIONAL{ ?b <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?free_text. } OPTIONAL{ ?b <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/oa#Composite>. ?b <http://www.w3.org/ns/activitystreams#items> ?d. ?d <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> ?e. ?e <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> ?f. ?f <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?semantic_label. } } LIMIT 50 </pre> ''' except: print("export_to_triplestore function, did not complete.") stdlogger.error("export_to_triplestore function, did not complete.") return False return out
def test_get_resmap(self): response = self.client.get("/hsapi/resource/{pid}/map/".format(pid=self.pid), format='json') # Note: this presumes that there is always a single redirection. # This might not be true if we utilize systems other than iRODS. self.assertEqual(response.status_code, status.HTTP_302_FOUND) response2 = self.client.get(response.url) self.assertEqual(response2.status_code, status.HTTP_200_OK) # collect response from stream output = "" while True: try: output += response2.streaming_content.next() except StopIteration: break # parse as simple RDF graph g = Graph() g.parse(data=output) documents = g.triples( (None, term.URIRef(u'http://purl.org/spar/cito/documents'), None) ) # check for "documents" node doclen = 0 for s, p, o in documents: doclen += 1 self.assertTrue(isinstance(s, term.URIRef)) subject = s.split('/') subject = subject[len(subject)-1] self.assertEqual(subject, "resourcemetadata.xml") self.assertTrue(isinstance(o, term.Literal)) object = o.split('/') object = object[len(object)-1] self.assertEqual(object, "resourcemap.xml#aggregation") self.assertEqual(doclen, 1) # now create a file in the resource map txt_file_name = 'test.txt' txt_file_path = os.path.join(self.tmp_dir, txt_file_name) txt = open(txt_file_path, 'w') txt.write("Hello World.\n") txt.close() # Upload the new resource file params = {'file': (txt_file_name, open(txt_file_path), 'text/plain')} url = "/hsapi/resource/{pid}/files/".format(pid=self.pid) response = self.client.post(url, params) self.assertEqual(response.status_code, status.HTTP_201_CREATED) content = json.loads(response.content) self.assertEquals(content['resource_id'], self.pid) # download the resource map and # Make sure the new file appears in the resource map response = self.client.get("/hsapi/resource/{pid}/map/".format(pid=self.pid)) self.assertEqual(response.status_code, status.HTTP_302_FOUND) response2 = self.client.get(response.url) self.assertEqual(response2.status_code, status.HTTP_200_OK) # collect the map from the stream output = "" while True: try: output += response2.streaming_content.next() except StopIteration: break # parse as a simple RDF file of triples g = Graph() g.parse(data=output) # check that the graph contains an appropriate "documents" node documents = g.triples( (None, term.URIRef(u'http://purl.org/spar/cito/documents'), None) ) doclen = 0 for s, p, o in documents: doclen += 1 self.assertTrue(isinstance(s, term.URIRef)) subject = s.split('/') subject = subject[len(subject)-1] self.assertEqual(subject, "resourcemetadata.xml") self.assertTrue(isinstance(o, term.Literal)) object = o.split('/') object = object[len(object)-1] self.assertEqual(object, "resourcemap.xml#aggregation") self.assertEqual(doclen, 1) formats = g.triples( (None, term.URIRef(u'http://purl.org/dc/elements/1.1/format'), None) ) # check that MIME types are correctly defined fmtlen = 0 for s, p, o in formats: fmtlen += 1 subject = s.split('/') subject = subject[len(subject)-1] self.assertTrue(isinstance(o, term.Literal)) if (subject == 'test.txt'): self.assertEqual(str(o), u'text/plain') else: self.assertEqual(str(o), u'application/rdf+xml') # pidgeonhole principle: if there are three, then one is the file in question self.assertEqual(fmtlen, 3)
def export_to_triplestore(): out = None try: annL = None annL = retrieve_annotation_jsonld_from_api() if annL: # Replace field name "type" by "@type" for rdflib-jsonld correct processing annL = addarobase_totypefieldname(annL) # B2SHARE sends fiel urls containing whitespace characters, # that rdflib refuses to serialize, replace by %20 for ann in annL: if isinstance(ann, dict): if "target" in ann.keys(): if isinstance(ann["target"], dict): if "source" in ann["target"].keys(): if isinstance(ann["target"]["source"], (str, unicode)): if ann["target"]["source"].find(" ") > 0: ann["target"]["source"] = ann[ "target"]["source"].replace( " ", "%20") if "id" in ann["target"].keys(): if isinstance(ann["target"]["id"], (str, unicode)): if ann["target"]["id"].find(" ") > 0: ann["target"]["id"] = ann["target"][ "id"].replace(" ", "%20") else: print( "export_to_triplestore function, no annotation list retrieved." ) stdlogger.error( "export_to_triplestore function, no annotation list retrieved." ) return None g = None if annL: # Build-up graph from jsonld list of annotations g = Graph().parse(data=json.dumps(annL), format='json-ld') else: print( "export_to_triplestore function, no annotation list from addarobase function." ) stdlogger.error( "export_to_triplestore function, no annotation list from addarobase function." ) return None if g: # The library adds a trailing slash character to the Software homepage url for s, p, o in g.triples( (None, None, term.URIRef(u"https://b2note.bsc.es/"))): g.add((s, p, term.URIRef(u"https://b2note.bsc.es"))) for s, p, o in g.triples( (None, None, term.URIRef(u"https://b2note.bsc.es/"))): g.remove((s, p, term.URIRef(u"https://b2note.bsc.es/"))) else: print( "export_to_triplestore function, no graph parsed from json-ld." ) stdlogger.error( "export_to_triplestore function, no graph parsed from json-ld." ) return None files = None if g: files = g.serialize(format='xml') else: print( "export_to_triplestore function, no graph from removing trailing slash from software homepage url." ) stdlogger.error( "export_to_triplestore function, no graph from removing trailing slash from software homepage url." ) return None # CLEAR previous graph graph_urn = "urn:dav:home:b2note:rdf_sink" q = urllib.quote_plus('CLEAR GRAPH <' + graph_urn + '>') url = 'http://opseudat03.bsc.es:8890/sparql?query=' + q rc = None rc = requests.get(url, auth=HTTPBasicAuth( virtuoso_settings['VIRTUOSO_B2NOTE_USR'], virtuoso_settings['VIRTUOSO_B2NOTE_PWD'])) R = None if rc and rc.text and isinstance( rc.text, (str, unicode)) and rc.text.find("Clear graph <" + graph_urn + "> -- done") > 0: R = httpPutRdfXmlFileContentToOpenVirtuoso( 'http://opseudat03.bsc.es:8890/DAV/home/b2note/rdf_sink/annotations.rdf', virtuoso_settings['VIRTUOSO_B2NOTE_USR'], virtuoso_settings['VIRTUOSO_B2NOTE_PWD'], files) else: print( "export_to_triplestore function, call to CLEAR previous GRAPH on triplestore failed." ) stdlogger.error( "export_to_triplestore function, call to CLEAR previous GRAPH on triplestore failed." ) return None if R is not None: print "export_to_triplestore function, completed publishing of B2Note annotations to Open Virtuoso triplestore." return ''' <h1>B2NOTE triplestore data update</h1> <p>Completed publishing annotations to B2NOTE Open Virtuoso triplestore.</p> <p>SPARQL endpoint: <a href="http://opseudat03.bsc.es:8890/sparql" target="_blank">http://opseudat03.bsc.es:8890/sparql</a></p> <p>Example query:<p> <pre>SELECT DISTINCT ?file ?free_text ?semantic_label FROM <urn:dav:home:b2note:rdf_sink> WHERE { ?s ?p <http://www.w3.org/ns/oa#Annotation>. ?s <http://www.w3.org/ns/oa#hasTarget> ?file. ?s <http://www.w3.org/ns/oa#hasBody> ?b. OPTIONAL{ ?b <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?free_text. } OPTIONAL{ ?b <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/oa#Composite>. ?b <http://www.w3.org/ns/activitystreams#items> ?d. ?d <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> ?e. ?e <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> ?f. ?f <http://www.w3.org/1999/02/22-rdf-syntax-ns#value> ?semantic_label. } } LIMIT 50 </pre>''' else: print "export_to_triplestore function, could not send rdf/xml file content to Open Virtuoso rdf-sink." stdlogger.error( "export_to_triplestore function, could not send rdf/xml file content to Open Virtuoso rdf-sink." ) return None except: print("export_to_triplestore function, did not complete.") stdlogger.error("export_to_triplestore function, did not complete.") return False return out
while True: found = False for p, o in self.schema_def.ontology[superclass]: if self.schema_def.lexicon['subclass'] == str(p): found = True classes.append(o) superclass = o if not found: break return classes def _is_instance(self, (subj, pred, obj)): """helper, returns the class type of subj""" input_pred_ns = self._namespace_from_uri(self._expand_qname(pred)) triples = self.graph.triples( (subj, rt.URIRef(self.schema_def.lexicon['type']), None)) if triples: for tr in triples: triple_obj_ns = self._namespace_from_uri( self._expand_qname(tr[2])) if input_pred_ns == triple_obj_ns: # match namespaces return tr[2] # return the object def _field_name_from_uri(self, uri): """helper, returns the name of an attribute (without namespace prefix)""" # TODO - should use graph API uri = str(uri) parts = uri.split('#') if len(parts) == 1: return uri.split('/')[-1] or uri return parts[-1]
''' Created on 14 Jun 2012 @author: AYODELE-M.AKINGBULU ''' from rdflib import Graph, term, namespace graph = Graph(store='Sleepycat', identifier='test') graph.open("somefolder", create=True) graph.add((term.URIRef('http://www.google.com/'), namespace.RDFS.label, term.Literal('Google home page'))) graph.add((term.URIRef('http://wikipedia.org/'), namespace.RDFS.label, term.Literal('Wikipedia home page'))) graph.close() graph = Graph(store='Sleepycat', identifier='test') graph.open("somefolder") len(graph) print "things in a_graph" for s, p, o in graph: print s, p, o
import sys import pandas as pd from rdflib import Graph, term geography_codes_register = "https://statistics.gov.scot/downloads/graph?uri=http://statistics.gov.scot/graph/standard-geography-code-register" geography_codes = Graph() official_name_predicate = term.URIRef( "http://statistics.data.gov.uk/def/statistical-geography#officialname") def init_graph(): """ Initialise the graph with latest geography codes from scot gov. """ print("Initialising Geography Register this can take circa 20 seconds") geography_codes.parse(geography_codes_register, "nt") def get_official_name(feature_code: str): """ Function extracts a feature codes official name from the Scot Gov geography register""" subject = term.URIRef( f"http://statistics.gov.scot/id/statistical-geography/{feature_code}") return geography_codes.value(subject, official_name_predicate) def write_geo_names_csv(filename: str): """ Parse csv file, and lookup official names for each featurecode in the file """ pd.set_option("display.max_columns", None) pd.set_option("display.width", 400) stats_df = pd.read_csv(filename) print(stats_df.head())
def get_official_name(feature_code: str): """ Function extracts a feature codes official name from the Scot Gov geography register""" subject = term.URIRef( f"http://statistics.gov.scot/id/statistical-geography/{feature_code}") return geography_codes.value(subject, official_name_predicate)
def test_example(g): test('Some triples have been loaded', len(g)) test('A person has been defined', g.subjects(RDF.type, term.URIRef('http://xmlns.com/foaf/0.1/Person'))) print('All tests passed. Well done!')
def main(data,lang): track=0 nif=rdflib.Namespace("http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") if lang != "en" : nlp = spacy.load(''+lang+'_core_news_sm') else: nlp = spacy.load('en_core_web_sm') for filename in os.listdir('Files/Input'+lang+'/'): if(track < int(data)): graph2=rdflib.Graph() graph2.parse('Files/Input'+lang+'/'+filename,format='nt') g=Graph() name=filename.split(".")[0] s=graph2.serialize(format="nt") for s,p,o in graph2: if type(o)==rdflib.term.Literal and nif.isString in p: sentences = nlp(o.encode().decode('utf-8')) for i in sentences.sents: try: BII=o.encode(sys.stdout.encoding, errors='replace').index(i.text.encode(sys.stdout.encoding, errors='replace')) EII=o.encode(sys.stdout.encoding, errors='replace').index(i.text.encode(sys.stdout.encoding, errors='replace'))+len(i.text.encode(sys.stdout.encoding, errors='replace')) inner=nlp(i.text.encode().decode('utf-8')) offset=0 for ing in inner: offset = i.text.encode().decode('utf-8').index(ing.text.encode().decode('utf-8'),offset) BI= offset+ BII EI=BI +len(ing.text.encode().decode('utf-8')) offset=offset+len(ing.text.encode().decode('utf-8')) hello="http://purl.org/olia/olia.owl#"+ ing.pos_ if ing.text.encode().decode('utf-8') not in string.punctuation: g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),RDF.type,nif.Word]) g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.beginIndex,rdflib.term.Literal(str(BI))]) g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.endIndex, rdflib.term.Literal(str(EI))]) g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.anchorOf,rdflib.term.Literal(ing.text.encode().decode('utf-8'))]) g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.referenceContext,rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=context")]) g.add([rdflib.term.URIRef("http://dbpedia.org/resource/"+name+"?dbpv=2016-10&nif=word_"+str(BI)+"_"+str(EI)),nif.oliaCategory,term.URIRef(hello)]) except: pass g.bind("nif",nif) g.serialize(destination='Files/POS/'+filename,format="turtle") track=track+1 print("Your Output is stored in POS Folder via spacyio")
def URIRef(value: Any) -> Identifier: return term.URIRef(value) # type: ignore
data_path = 'annotated_images' images = [] try: images = load_images(data_path) dicom_file_name = glob(data_path + '/*') print('Total of %d DICOM images.' % len(dicom_file_name)) except: print( 'Check value "data_path". If "data_path" is correct, then check the constraint. ' 'Most likely the images are not added. Add images.') # ***************************************************Terms Begin******************************************************** # Classes dicom_image = term.URIRef(BASE + 'DicomImage') # Study Types localizer = term.URIRef(BASE + 'Localizer') general = term.URIRef(BASE + 'General') # Annotation Status annotated = term.URIRef(BASE + 'Annotated') not_annotated = term.URIRef(BASE + 'NotAnnotated') # Part of Body head = term.URIRef(BASE + 'Head') neck = term.URIRef(BASE + 'Neck') chest = term.URIRef(BASE + 'Chest') abdomen = term.URIRef(BASE + 'Abdomen') pelvis = term.URIRef(BASE + 'Pelvis')
) ]) g.add([ rdflib.term.URIRef("http://dbpedia.org/resource/" + name + "?dbpv=2016-10&nif=word_" + str(BI) + "_" + str(EI)), nif.oliaLink, rdflib.term.URIRef("http://purl.org/olia/penn.owl#" + tagged[i][count][1]) ]) g.add([ rdflib.term.URIRef("http://dbpedia.org/resource/" + name + "?dbpv=2016-10&nif=word_" + str(BI) + "_" + str(EI)), nif.oliaCategory, term.URIRef(hello) ]) g.add([ rdflib.term.URIRef("http://dbpedia.org/resource/" + name + "?dbpv=2016-10&nif=word_" + str(BI) + "_" + str(EI)), nif.oliaCategory, term.URIRef(hell) ]) count = count + 1 except: pass g.bind("nif", nif) #print(g.serialize(format="turtle")) g.serialize(destination='Files/Search/' + name + "-pos.ttl", format="turtle")