def get_vocabulary(self): from model.vocabulary import Vocabulary q = '''PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT * WHERE { ?s a skos:ConceptScheme ; dct:title ?t ; dct:description ?d . OPTIONAL {?s dct:creator ?c } OPTIONAL {?s dct:created ?cr } OPTIONAL {?s dct:modified ?m } OPTIONAL {?s owl:versionInfo ?v } }''' for r in self.g.query(q): v = Vocabulary(self.vocab_id, r['s'], r['t'], r['d'], r['c'], r['cr'], r['m'], r['v'], [], None, None) q2 = '''PREFIX skos: <http://www.w3.org/2004/02/skos/core#> SELECT * WHERE { ?s skos:hasTopConcept ?tc . ?tc skos:prefLabel ?pl . }''' # add the top concepts to the Vocabulary class instance v.hasTopConcepts = [(x['tc'], x['pl']) for x in self.g.query(q2)] # sort the top concepts by prefLabel v.hasTopConcepts.sort(key=lambda tup: tup[1]) return v
def index(self): word = self.params.get('word') if not word: raise "no word!" user = get_user_or_letlogin(self,'/search/?word=%s' % word) log = SearchLog(word=word, user=user) log.put() voca = Vocabulary(key_name=user.email+'_'+word, word=word, user=user) voca.put() query_url = YahooDictionary.get_search_url(word) self.redirect(query_url)
def get_vocabulary(self): s = VocBench("x", self.request)._authed_request_object() r = s.post( config.VB_ENDPOINT + "/SPARQL/evaluateQuery", data={ "query": """PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT * WHERE { ?s a skos:ConceptScheme ; skos:prefLabel ?t . OPTIONAL {?s dct:description ?d } OPTIONAL {?s dct:creator ?c } OPTIONAL {?s dct:created ?cr } OPTIONAL {?s dct:modified ?m } OPTIONAL {?s owl:versionInfo ?v } OPTIONAL {?tc skos:topConceptOf ?s } OPTIONAL {?tc skos:prefLabel ?tcpl } }""", "ctx_project": self.vocab_uri, }, ) if r.status_code == 200: try: metadata = json.loads(r.content.decode( "utf-8"))["result"]["sparql"]["results"]["bindings"] except: raise VbException(r.content.decode("utf-8")) concept_hierarchy = self.get_concept_hierarchy( str(metadata[0]["s"]["value"])) if len(concept_hierarchy.strip()) == 0: concept_hierarchy = None from model.vocabulary import Vocabulary return Vocabulary( self.vocab_uri, metadata[0]["s"]["value"], metadata[0]["t"]["value"], metadata[0]["d"]["value"] if metadata[0].get("d") is not None else None, metadata[0].get("c").get("value") if metadata[0].get("c") is not None else None, metadata[0].get("cr").get("value") if metadata[0].get("cr") is not None else None, metadata[0].get("m").get("value") if metadata[0].get("m") is not None else None, metadata[0].get("v").get("value") if metadata[0].get("v") is not None else None, [(x.get("tc").get("value"), x.get("tcpl").get("value")) for x in metadata], concept_hierarchy=concept_hierarchy, ) else: raise VbException("There was an error: " + r.content.decode("utf-8"))
def get_vocabulary(self): s = VOCBENCH('x', self.request)._authed_request_object() r = s.post(config.VB_ENDPOINT + '/SPARQL/evaluateQuery', data={ 'query': '''PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT * WHERE { ?s a skos:ConceptScheme ; skos:prefLabel ?t . OPTIONAL {?s dct:description ?d } OPTIONAL {?s dct:creator ?c } OPTIONAL {?s dct:created ?cr } OPTIONAL {?s dct:modified ?m } OPTIONAL {?s owl:versionInfo ?v } OPTIONAL {?tc skos:topConceptOf ?s } OPTIONAL {?tc skos:prefLabel ?tcpl } }''', 'ctx_project': self.vocab_id }) if r.status_code == 200: try: metadata = json.loads(r.content.decode( 'utf-8'))['result']['sparql']['results']['bindings'] except: raise VbException(r.content.decode('utf-8')) concept_hierarchy = self.get_concept_hierarchy( str(metadata[0]['s']['value'])) if len(concept_hierarchy.strip()) == 0: concept_hierarchy = None from model.vocabulary import Vocabulary return Vocabulary( self.vocab_id, metadata[0]['s']['value'], metadata[0]['t']['value'], metadata[0]['d']['value'] if metadata[0].get('d') is not None else None, metadata[0].get('c').get('value') if metadata[0].get('c') is not None else None, metadata[0].get('cr').get('value') if metadata[0].get('cr') is not None else None, metadata[0].get('m').get('value') if metadata[0].get('m') is not None else None, metadata[0].get('v').get('value') if metadata[0].get('v') is not None else None, [(x.get('tc').get('value'), x.get('tcpl').get('value')) for x in metadata], concept_hierarchy=concept_hierarchy) else: raise VbException('There was an error: ' + r.content.decode('utf-8'))
def collect(details): """ For this source, vocabs must be nominated via their ID (a number) in details['vocab_ids'] 'rva': { 'source': VocabSource.RVA, 'api_endpoint': 'https://vocabs.ands.org.au/registry/api/resource/vocabularies/{}?includeAccessPoints=true', 'vocabs': [ { 'ardc_id': 50, 'uri': 'http://resource.geosciml.org/classifierscheme/cgi/2016.01/geologicunittype', }, { 'ardc_id': 52, 'uri': 'http://resource.geosciml.org/classifierscheme/cgi/2016.01/contacttype', }, { 'ardc_id': 57, 'uri': 'http://resource.geosciml.org/classifierscheme/cgi/2016.01/stratigraphicrank', } ] } """ # Get the details for each vocab from the RVA catalogue API logging.debug("RVA collect()...") rva_vocabs = {} for vocab in details["vocabs"]: r = requests.get( details["api_endpoint"].format(vocab["ardc_id"]), headers={"Accept": "application/json"}, ) if r.status_code == 200: j = json.loads(r.text) vocab_id = "rva-" + str(vocab["ardc_id"]) rva_vocabs[vocab_id] = Vocabulary( vocab_id, vocab["uri"], j["title"], j.get("description"), j.get("creator"), dateutil.parser.parse(j.get("creation-date")), None, j["version"][0]["title"], config.VocabSource.RVA, vocab["uri"], sparql_endpoint=j["version"][0]["access-point"][0] ["ap-api-sparql"]["url"], ) else: logging.error("Could not get vocab {} from RVA".format( vocab["ardc_id"])) g.VOCABS = {**g.VOCABS, **rva_vocabs} logging.debug("RVA collect() complete")
def home(self): self.user = get_user_or_letlogin(self, '/user/home') self.vocabularies = Vocabulary.gql("WHERE user=:1", self.user).fetch(200)
def get_vocabulary(self): from model.vocabulary import Vocabulary result = self.g.query('''PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX dct: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?s ?title ?description ?creator ?created ?modified ?version ?hasTopConcept ?topConceptLabel WHERE {{ {{ ?s a skos:ConceptScheme . ?s skos:prefLabel ?title . }} UNION {{ ?s a skos:ConceptScheme . ?s dct:title ?title . MINUS {{ ?s skos:prefLabel ?prefLabel }} }} UNION {{ ?s a skos:ConceptScheme . ?s rdfs:label ?title . MINUS {{ ?s skos:prefLabel ?prefLabel }} MINUS {{ ?s dct:title ?prefLabel }} }} OPTIONAL {{ ?s dct:description ?description }} OPTIONAL {{ ?s dct:creator ?creator }} OPTIONAL {{ ?s dct:created ?created }} OPTIONAL {{ ?s dct:modified ?modified }} OPTIONAL {{ ?s owl:versionInfo ?version }} OPTIONAL {{ ?s skos:hasTopConcept ?hasTopConcept . ?hasTopConcept skos:prefLabel ?topConceptLabel . }} }}''') title = None description = None creator = None created = None modified = None version = None topConcepts = [] for r in result: self.uri = str(r['s']) if title is None: title = r['title'] if description is None: description = r['description'] if creator is None: creator = r['creator'] if created is None: created = r['created'] if modified is None: modified = r['modified'] if version is None: version = r['version'] if r['hasTopConcept'] and r['topConceptLabel'] is not None: topConcepts.append((r['hasTopConcept'], r['topConceptLabel'])) v = Vocabulary( self.vocab_id, self.uri, title, description, creator, created, modified, version, topConcepts ) # sort the top concepts by prefLabel v.hasTopConcepts = topConcepts if v.hasTopConcepts: v.hasTopConcepts.sort(key=lambda tup: tup[1]) v.conceptHierarchy = self.get_concept_hierarchy() return v
def collect(details): file_vocabs = {} # find all files in project_directory/data/source/vocab_files for path, subdirs, files in os.walk( join(config.APP_DIR, "data", "vocab_files")): for name in files: if name.split(".")[-1] in FILE.MAPPER: # load file file_path = os.path.join(path, name) file_format = FILE.MAPPER[name.split(".")[-1]] # load graph gr = Graph().parse(file_path, format=file_format) file_name = name.split(".")[0] # pickle to directory/vocab_files/ with open(join(path, file_name + ".p"), "wb") as f: pickle.dump(gr, f) f.close() # extract vocab metadata # Get the ConceptSchemes from the graph of the file # Interpret the CS as a Vocab q = """ PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT ?cs # 0 ?title # 1 ?created # 2 ?issued # 3 ?modified # 4 ?version # 5 ?description # 6 WHERE {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs owl:versionInfo ?version }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }}""".format(language=DEFAULT_LANGUAGE) vocab_id = str(name).split(".")[0] for cs in gr.query(q): file_vocabs[vocab_id] = Vocabulary( vocab_id, str(cs[0]), str(cs[1]) or str(cs[0]) if str(cs[1]) else str( cs[0]), # Need string, not None str(cs[6]) if cs[6] is not None else None, None, dateutil.parser.parse(str(cs[2])) if cs[2] is not None else None, dateutil.parser.parse(str(cs[4])) if cs[4] is not None else None, str(cs[5]) if cs[5] is not None else None, # versionInfo config.VocabSource.FILE, cs[0], ) g.VOCABS = {**g.VOCABS, **file_vocabs} logging.debug("FILE collect() complete.")
def collect(details): """ For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes 'ga-jena-fuseki': { 'source': VocabSource.SPARQL, 'sparql_endpoint': 'http://dev2.nextgen.vocabs.ga.gov.au/fuseki/vocabs', 'sparql_username': '******', # Optional username for SPARQL endpoint 'sparql_password': '******', # Optional password for SPARQL endpoint #'uri_filter_regex': '.*', # Regular expression to filter vocabulary URIs - Everything #'uri_filter_regex': '^http(s?)://pid.geoscience.gov.au/def/voc/ga/', # Regular expression to filter vocabulary URIs - GA #'uri_filter_regex': '^https://gcmdservices.gsfc.nasa.gov', # Regular expression to filter vocabulary URIs - GCMD 'uri_filter_regex': '^http(s?)://resource.geosciml.org/', # Regular expression to filter vocabulary URIs - CGI }, """ logging.debug("SPARQL collect()...") # Get all the ConceptSchemes from the SPARQL endpoint # Interpret each CS as a Vocab q = """PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT * WHERE {{ {{ GRAPH ?g {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs owl:versionInfo ?version }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }} }} UNION {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs owl:versionInfo ?version }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }} }} ORDER BY ?title""".format(language=DEFAULT_LANGUAGE) # record just the IDs & title for the VocPrez in-memory vocabs list concept_schemes = Source.sparql_query( details["sparql_endpoint"], q, sparql_username=details.get("sparql_username"), sparql_password=details.get("sparql_password"), ) assert concept_schemes is not None, "Unable to query conceptSchemes" sparql_vocabs = {} for cs in concept_schemes: # handling CS URIs that end with '/' vocab_id = cs["cs"]["value"].replace("/conceptScheme", "").split("/")[-1] # TODO: Investigate putting regex into SPARQL query # print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value'])) if details.get("uri_filter_regex") and not re.search( details["uri_filter_regex"], cs["cs"]["value"]): logging.debug("Skipping vocabulary {}".format(vocab_id)) continue if len(vocab_id) < 2: vocab_id = cs["cs"]["value"].split("/")[-2] sparql_vocabs[vocab_id] = Vocabulary( vocab_id, cs["cs"]["value"], cs["title"].get("value") or vocab_id if cs.get("title") else vocab_id, # Need string value for sorting, not None cs["description"].get("value") if cs.get("description") is not None else None, None, # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs dateutil.parser.parse(cs.get("created").get("value")) if cs.get("created") is not None else None, # dct:issued not in Vocabulary # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None, dateutil.parser.parse(cs.get("modified").get("value")) if cs.get("modified") is not None else None, cs["version"].get("value") if cs.get("version") is not None else None, # versionInfo config.VocabSource.SPARQL, cs["cs"]["value"], sparql_endpoint=details["sparql_endpoint"], sparql_username=details["sparql_username"], sparql_password=details["sparql_password"], ) g.VOCABS = {**g.VOCABS, **sparql_vocabs} logging.debug("SPARQL collect() complete.")
def collect(details): """ For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes 'gsq-graphdb': { 'source': VocabSource.SPARQL, 'sparql_endpoint': 'http://graphdb.gsq.digital:7200/repositories/GSQ_Vocabularies_core' }, """ logging.debug('SPARQL collect()...') # Get all the ConceptSchemes from the SPARQL endpoint # Interpret each CS as a Vocab q = ''' PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT * WHERE {{ GRAPH ?g {{ ?cs a skos:ConceptScheme . OPTIONAL {{ ?cs skos:prefLabel ?title . FILTER(lang(?title) = "{language}" || lang(?title) = "") }} OPTIONAL {{ ?cs dcterms:created ?created }} OPTIONAL {{ ?cs dcterms:issued ?issued }} OPTIONAL {{ ?cs dcterms:modified ?modified }} OPTIONAL {{ ?cs skos:definition ?description . FILTER(lang(?description) = "{language}" || lang(?description) = "") }} }} }} ORDER BY ?l '''.format(language=DEFAULT_LANGUAGE) # record just the IDs & title for the VocPrez in-memory vocabs list concept_schemes = Source.sparql_query( details['sparql_endpoint'], q, sparql_username=details.get('sparql_username'), sparql_password=details.get('sparql_password')) or {} sparql_vocabs = {} for cs in concept_schemes: # handling CS URIs that end with '/' vocab_id = cs['cs']['value'].replace('/conceptScheme', '').split('/')[-1] #print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value'])) if details.get('uri_filter_regex') and not re.search( details['uri_filter_regex'], cs['cs']['value']): logging.debug('Skipping vocabulary {}'.format(vocab_id)) continue if len(vocab_id) < 2: vocab_id = cs['cs']['value'].split('/')[-2] sparql_vocabs[vocab_id] = Vocabulary( vocab_id, cs['cs']['value'].replace('/conceptScheme', ''), cs['title'].get('value') or vocab_id if cs.get('title') else vocab_id, # Need string value for sorting, not None cs['description'].get('value') if cs.get('description') is not None else None, None, # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs dateutil.parser.parse(cs.get('created').get('value')) if cs.get('created') is not None else None, # dct:issued not in Vocabulary # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None, dateutil.parser.parse(cs.get('modified').get('value')) if cs.get('modified') is not None else None, None, # versionInfo config.VocabSource.SPARQL, cs['cs']['value'], sparql_endpoint=details['sparql_endpoint'], sparql_username=details['sparql_username'], sparql_password=details['sparql_password']) g.VOCABS = {**g.VOCABS, **sparql_vocabs} logging.debug('SPARQL collect() complete.')