def query_complete_classes_of_entity(ent): if ent.startswith(dbp_prefix): ent = ent.split(dbp_prefix)[1] classes = set() try: s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") statement = 'select distinct ?superclass where { <%s%s> rdf:type ?e. ' \ '?e rdfs:subClassOf* ?superclass. FILTER(strstarts(str(?superclass), "%s"))}' \ % (dbp_prefix, ent, dbo_prefix) result = s.query(statement) for row in result.fetchone(): cls_uri = str(row[0]) cls = cls_uri.split(dbo_prefix)[1] classes.add(cls) statement = 'select distinct ?ss where {<%s%s> dbo:wikiPageRedirects ?e. ?e rdf:type ?s. ' \ '?s rdfs:subClassOf* ?ss. FILTER(strstarts(str(?ss), "%s"))}' \ % (dbp_prefix, ent, dbo_prefix) result = s.query(statement) for row in result.fetchone(): cls_uri = str(row[0]) cls = cls_uri.split(dbo_prefix)[1] classes.add(cls) except UnicodeDecodeError: print(' %s: UnicodeDecodeError' % ent) pass return classes
def get_text_info_for_entity(entity_uri): handle = sparql.Service('http://dbpedia.org/sparql', "utf-8") statement = _gen_sparql_for_abstract_and_comment(entity_uri) try: result = handle.query(statement) # except HTTPError as e: except: import ipdb ipdb.set_trace() print(e.args[0]) res_all = result.fetchall() keys = result.variables values = ['Nothing'] * len(keys) try: if len(res_all) > 0: _v = res_all[0] for idx in range(len(values)): values[ idx] = _v[idx].value if _v[idx] is not None else 'Nothing' except: import ipdb ipdb.set_trace() r = {k: values[idx] for idx, k in enumerate(keys)} # print(r) return r
def query_ent_num(cls): s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") statement = 'select count(?ent) where {?ent rdf:type ?t. ?t rdfs:subClassOf* <%s%s>}' % ( dbo_prefix, cls) result = s.query(statement) for row in result.fetchone(): return int(str(row[0]))
def test_single_query(): handle = sparql.Service('http://dbpedia.org/sparql', "utf-8") statement = \ 'PREFIX foaf: <http://xmlns.com/foaf/0.1/> ' \ 'PREFIX dbo: <http://dbpedia.org/ontology/> ' \ 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> ' \ 'PREFIX dbr: <http://dbpedia.org/resource/> ' \ \ 'SELECT ?abs, ?comment ' \ 'WHERE {' \ 'dbr:Apple_II dbo:abstract ?abs . ' \ 'dbr:Apple_II rdfs:comment ?comment . ' \ 'FILTER (langMatches(lang(?abs),"en")) ' \ 'FILTER (langMatches(lang(?comment),"en")) ' \ '} ' \ 'limit 10' t = time.time() for _ in range(100): result = handle.query(statement) print(time.time() - t) t = time.time() res_all = result.fetchall() keys = result.variables values = res_all[0] if len(res_all) > 0 else ['Nothing']*len(keys) r = {k:values[idx].value[:10] for idx, k in enumerate(keys)} print(r) return
def update_country_names(): SPARQL_QUERY = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX eea: <http://rdfdata.eionet.europa.eu/eea/ontology/> SELECT DISTINCT ?code ?countryname ?publishingCode IF(bound(?eumember),'Yes','') AS ?eu IF(bound(?eeamember),'Yes','') AS ?eea IF(bound(?eionetmember),'Yes','') AS ?eionet IF(bound(?eun22member),'Yes','') AS ?eun22 WHERE { ?ucountry a eea:Country ; eea:code ?code; eea:publishingCode ?publishingCode; rdfs:label ?countryname OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EU> skos:member ?ucountry, ?eumember } OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EUN22> skos:member ?ucountry, ?eun22member } OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EEA> skos:member ?ucountry, ?eeamember } OPTIONAL { <http://rdfdata.eionet.europa.eu/eea/countries/EIONET> skos:member ?ucountry, ?eionetmember } }""" SPARQL_ENDPOINT = 'http://semantic.eea.europa.eu/sparql' s = sparql.Service(SPARQL_ENDPOINT) results = [i for i in s.query(SPARQL_QUERY).fetchone()] countries = {} for item in results: countries[item[0].value.lower()] = item[1].value f = open(("refdata/countries.json"), "w") json.dump(countries, f) f.close()
def get_envelope_release_date(file_url): q = """ PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#> PREFIX terms: <http://purl.org/dc/terms/> PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#> SELECT ?released WHERE { ?file terms:date ?date . ?file cr:mediaType 'text/xml'. ?file terms:isPartOf ?part . ?part schema:released ?released . FILTER (str(?file) = '%s') } ORDER BY DESC(?date) LIMIT 1 """ % file_url service = sparql.Service('https://cr.eionet.europa.eu/sparql') try: req = service.query(q) rows = req.fetchall() released = rows[0][0].value except: logger.exception( 'Got an error in querying SPARQL endpoint for ' 'file_url: %s', file_url) raise release_date = _to_datetime(released) return release_date
def wikidata_nuts_to_geonames(client, args): db = client.geostore nuts = db.nuts s = sparql.Service(WIKIDATA_ENDPOINT, "utf-8", "GET") statement = ''' SELECT ?s ?o ?g WHERE { ?s wdt:P605 ?o. ?s wdt:P1566 ?g } ''' result = s.query(statement) for row in result.fetchone(): wikidata_id = row[0].value.strip() nuts_code = row[1].value.strip() geon = 'http://sws.geonames.org/' + row[2].value + '/' entry = nuts.find_one({'_id': nuts_code}) if entry: nuts.update_one({'_id': nuts_code}, {'$set': {'geonames': geon, 'wikidata': wikidata_id}}) else: logging.debug('NUTS entry not in DB: ' + str(nuts_code)) geon_entry = db.geonames.find_one({'_id': geon}) if geon_entry: if 'country' in geon_entry: nuts.insert_one({'_id': nuts_code, 'geonames': geon, 'wikidata': wikidata_id, 'name': geon_entry['name'], 'country': geon_entry['country']}) else: nuts.insert_one({'_id': nuts_code, 'geonames': geon, 'wikidata': wikidata_id, 'name': geon_entry['name']})
def queryLabelByClass(class_text): s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?l) where {<%s%s> rdfs:label ?l. FILTER(langMatches(lang(?l), ' \ '"en"))}' % (ONTOLOGY_NS, class_text) result = s.query(statement) for row in result.fetchone(): return row[0] return None
def queryEngLabelByEntity(e): labels = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?l) where {<%s> rdfs:label ?l. FILTER( langMatches(lang(?l), "en"))}' % e result = s.query(statement) for row in result.fetchone(): labels.add(row[0]) return labels
def _get_report_fileurl_art11_2014(country, region, article, descriptor): # return [ # 'https://cdr.eionet.europa.eu/de/eu/msfd_mp/balde/envvfjbwg/BALDE_MSFD11Mon_20141105.xml', # 'https://cdr.eionet.europa.eu/de/eu/msfd_mp/balde/envu58cfw/BALDE_MSFD11MonSub_BALDE_Sub_099_20141015.xml' # ] q = """ PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#> PREFIX terms: <http://purl.org/dc/terms/> PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#> PREFIX core: <http://www.w3.org/2004/02/skos/core#> SELECT distinct ?file WHERE { ?file terms:date ?date . ?file cr:mediaType 'text/xml' . ?file terms:isPartOf ?isPartOf . ?file cr:xmlSchema ?schema . ?file schema:restricted ?restricted. ?isPartOf schema:locality ?locality . ?isPartOf schema:obligation ?obligation . ?obligation core:notation ?obligationNr . ?locality core:notation ?notation . FILTER (?notation = '%s') FILTER (?obligationNr = '611') FILTER (str(?schema) = 'http://dd.eionet.europa.eu/schemas/MSFD11Mon/MSFD11MonSub_1p0.xsd' || str(?schema) = 'http://dd.eionet.europa.eu/schemas/MSFD11Mon/MSFD11Mon_1p1.xsd') #FILTER regex(str(?file), '/%s') #FILTER (?restricted = 0) } ORDER BY DESC(?date) """ % (country.upper(), region.lower()) service = sparql.Service('https://cr.eionet.europa.eu/sparql') logger.info("Getting fileurl Art11 with SPARQL: %s - %s", country, region) try: req = service.query(q) rows = req.fetchall() urls = [] for row in rows: url = row[0].value splitted = url.split('/') # filename_from_url = splitted[-1] urls.append(url) except: logger.exception( 'Got an error in querying SPARQL endpoint for ' 'Art11: %s - %s', country, region) raise return urls
def queryDescendantByClass(c): descendants = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?d) where {?d rdfs:subClassOf* <%s>. ' \ 'FILTER(strstarts(str(?d), "%s"))}' % (c, ONTOLOGY_NS) result = s.query(statement) for row in result.fetchone(): descendants.add(row[0]) return descendants
def queryAncestorByClass(c): ancestors = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?a) where {<%s> rdfs:subClassOf* ?a. ' \ 'FILTER(strstarts(str(?a), "%s"))}' % (c, ONTOLOGY_NS) result = s.query(statement) for row in result.fetchone(): ancestors.add(row[0]) return ancestors
def querySiblingByClass(c): siblings = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?s) where {<%s> rdfs:subClassOf ?p. ?s rdfs:subClassOf ?p. ' \ 'FILTER(?s != <%s> && strstarts(str(?s), "%s"))}' % (c, c, ONTOLOGY_NS) result = s.query(statement) for row in result.fetchone(): siblings.add(row[0]) return siblings
def queryTripleByClass(top_k, c): triples = list() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?s), str(?p), str(?o), str(?l) where {?s ?p ?o. ?o rdf:type <%s>. ' \ '?o rdfs:label ?l. FILTER( langMatches(lang(?l), "en"))} ORDER BY RAND() limit %d' % (c, top_k) result = s.query(statement) for row in result.fetchone(): triples.append([row[0], row[1], row[2], row[3]]) return triples
def queryObjEntitiesByProperty(top_k, p): objs = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?o) where {?s <%s> ?o. FILTER (strstarts(str(?o), "%s"))} ' \ 'ORDER BY RAND() limit %d' % (p, RESOURCE_NS, top_k) result = s.query(statement) for row in result.fetchone(): objs.add(row[0]) return objs
def querySubByPropertyAndObject(p, o, top_k): subs = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?s) where {?s <%s> <%s> } limit %d' % ( p, o, top_k) result = s.query(statement) for row in result.fetchone(): subs.add(row[0]) return subs
def __init__(self): config = ConfigParser.SafeConfigParser() config.read('db.conf') endpoint = config.get('endpoint', 'url') # DBUSER = config.get('database', 'user') # DBPASS = config.get('database', 'password') self.server = sparql.Service(endpoint) self.subjects = {} self.invertedalso = False
def querySubObjLabByProperty(top_k, p): subs_objs_labs = list() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?s),str(?o),str(?l) where {?s <%s> ?o. ?o rdfs:label ?l. ' \ 'FILTER (strstarts(str(?o), "%s") ' \ 'and langMatches(lang(?l), "en"))} ORDER BY RAND() LIMIT %d' % (p, RESOURCE_NS, top_k) result = s.query(statement) for row in result.fetchone(): subs_objs_labs.append([row[0], row[1], row[2]]) return subs_objs_labs
def equivalent_classes(cls): classes = set() s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") statement = 'SELECT distinct ?eqclass WHERE { <%s%s> owl:equivalentClass ' \ '?eqclass. FILTER ( strstarts(str(?eqclass), "%s"))}' % (dbo_prefix, cls, dbo_prefix) result = s.query(statement) for row in result.fetchone(): cls_uri = str(row[0]) cls = cls_uri.split(dbo_prefix)[1] classes.add(cls) return classes
def querySubLiteralsByProperty(top_k, p, max_str_len): subs_lits = list() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select str(?sub), str(?obj) where {?sub <%s> ?obj. ' \ 'FILTER ( (regex(datatype(?obj), "langString") || regex(datatype(?obj), "string")) ' \ '&& strlen(str(?obj))<%d )} ' \ 'ORDER BY RAND() LIMIT %d' % (p, max_str_len, top_k) result = s.query(statement) for row in result.fetchone(): subs_lits.append([row[0], row[1]]) return subs_lits
def query_property_ent_num(cls): prop_ent_num = dict() s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") statement = 'select ?pro (count(distinct ?ent) as ?ent_num) where {?ent rdf:type ?t. ?t rdfs:subClassOf* <%s%s>. ' \ '?ent ?pro ?obj. FILTER(strstarts(str(?pro), "%s"))} group by ?pro' \ % (dbo_prefix, cls, dbo_prefix) result = s.query(statement) for row in result.fetchone(): p = str(row[0]) n = int(str(row[1])) prop_ent_num[p] = n return prop_ent_num
def query_super_classes(cls): s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") supers = set() statement = 'SELECT distinct ?superclass WHERE { <%s%s> rdfs:subClassOf* ?superclass. ' \ 'FILTER ( strstarts(str(?superclass), "%s"))}' % (dbo_prefix, cls, dbo_prefix) result = s.query(statement) for row in result.fetchone(): super_str = str(row[0]) super_name = super_str.split(dbo_prefix)[1] supers.add(super_name) return supers
def get_text_reports_2018(country_code): if country_code == 'EL': country_code = 'GR' if country_code == 'UK': country_code = 'GB' q = """ PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#> PREFIX terms: <http://purl.org/dc/terms/> PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#> PREFIX core: <http://www.w3.org/2004/02/skos/core#> SELECT distinct ?file, ?released WHERE { ?file terms:date ?date . #?file cr:mediaType 'text/xml' . ?file terms:isPartOf ?isPartOf . ?isPartOf schema:released ?released . ?isPartOf schema:locality ?locality . ?isPartOf schema:obligation ?obligation . ?obligation core:notation ?obligationNr . ?locality core:notation ?notation . FILTER (?notation = '%s') FILTER (?obligationNr = '761') } ORDER BY DESC(?date) """ % country_code service = sparql.Service('https://cr.eionet.europa.eu/sparql') res = [] try: req = service.query(q, timeout=30) rows = req.fetchall() for row in rows: file_url = row[0].value release_date = _to_datetime(row[1].value) res.append((file_url, release_date)) except: logger.exception( 'Got an error in querying SPARQL endpoint when ' 'getting text reports for: %s', country_code) raise return res
def querySubLiteralsByPropertyStr(top_k, p, l_str, max_str_len): subs_lits = list() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") l_str = l_str.replace("'", "\\'") statement = '''select str(?s),str(?o) where {?s <%s> ?o. FILTER ( contains(str(?o), '%s') && (regex(datatype(?o), "langString") || regex(datatype(?o), "string")) && strlen(str(?o))<%d )} ORDER BY RAND() LIMIT %d''' % (p, l_str, max_str_len, top_k) try: result = s.query(statement) for row in result.fetchone(): subs_lits.append([row[0], row[1]]) except UnicodeDecodeError: pass return subs_lits
def __get_report_filename_art3_4(country, region, schema, obligation): """ Retrieve from CDR the latest filename for Article 3/4 """ q = """ PREFIX cr: <http://cr.eionet.europa.eu/ontologies/contreg.rdf#> PREFIX terms: <http://purl.org/dc/terms/> PREFIX schema: <http://rod.eionet.europa.eu/schema.rdf#> PREFIX core: <http://www.w3.org/2004/02/skos/core#> SELECT ?file WHERE { ?file terms:date ?date . ?file cr:mediaType 'text/xml' . ?file terms:isPartOf ?isPartOf . ?file cr:xmlSchema ?schema . ?isPartOf schema:locality ?locality . ?isPartOf schema:obligation ?obligation . ?obligation core:notation ?obligationNr . ?locality core:notation ?notation . FILTER (?notation = '%s') FILTER (?obligationNr = '%s') FILTER (str(?schema) = '%s') FILTER regex(str(?file), '%s') } ORDER BY DESC(?date) LIMIT 1 """ % (country.upper(), obligation, schema, region.upper()) service = sparql.Service('https://cr.eionet.europa.eu/sparql') filename = '' try: req = service.query(q) rows = req.fetchall() if not rows: logger.warning("Filename not found for query: %s", q) return filename url = rows[0][0].value splitted = url.split('/') filename = splitted[-1] except: logger.exception( 'Got an error in querying SPARQL endpoint for ' 'Article 3/4 country: %s', country) raise return filename
def super_classes(col_classes): dbo_prefix = 'http://dbpedia.org/ontology/' s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") for i, col in enumerate(col_classes.keys()): ori_cls = col_classes[col][0] statement = 'SELECT distinct ?superclass WHERE { dbo:%s rdfs:subClassOf* ?superclass. ' \ 'FILTER ( strstarts(str(?superclass), "%s"))}' % (ori_cls, dbo_prefix) result = s.query(statement) for row in result.fetchone(): super_cls = str(row[0]) super_cls_name = super_cls.split(dbo_prefix)[1] if super_cls_name not in col_classes[col]: col_classes[col].append(super_cls_name) if i % 10 == 0: print('%d columns done' % (i + 1)) return col_classes
def queryClassByEntity(e): types = set() s = sparql.Service(SPARQL_END_POINT, "utf-8", "GET") statement = 'select distinct str(?s) where { <%s> rdf:type ?e. ?e rdfs:subClassOf* ?s. ' \ 'FILTER(strstarts(str(?s), "%s"))}' % (e, ONTOLOGY_NS) result = s.query(statement) for row in result.fetchone(): types.add(row[0]) statement = 'select distinct str(?ss) where { <%s> dbo:wikiPageRedirects ?e. ?e rdf:type ?s. ' \ '?s rdfs:subClassOf* ?ss. FILTER(strstarts(str(?ss), "%s"))}' % (e, ONTOLOGY_NS) result = s.query(statement) for row in result.fetchone(): types.add(row[0]) return types
def query_general_entities(cls_entities): dbp_prefix = 'http://dbpedia.org/resource/' cls_gen_entities = dict() s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") for cls in cls_entities.keys(): par_entities = cls_entities[cls] entities = list() statement = 'select distinct ?e where {?e a dbo:%s} ORDER BY RAND() limit 1000' % cls result = s.query(statement) for row in result.fetchone(): ent_uri = str(row[0]) ent = ent_uri.split(dbp_prefix)[1] if ent not in par_entities: entities.append(ent) cls_gen_entities[cls] = entities print('%s done, %d entities' % (cls, len(entities))) return cls_gen_entities
def super_classes_of_classes(classes): super_clses = dict() dbo_prefix = 'http://dbpedia.org/ontology/' s = sparql.Service('http://dbpedia.org/sparql', "utf-8", "GET") for cls in classes: supers = set() statement = 'SELECT distinct ?superclass WHERE { dbo:%s rdfs:subClassOf* ?superclass. ' \ 'FILTER ( strstarts(str(?superclass), "%s"))}' % (cls, dbo_prefix) result = s.query(statement) for row in result.fetchone(): super_str = str(row[0]) super_name = super_str.split(dbo_prefix)[1] if super_name not in supers: supers.add(super_name) super_clses[cls] = supers return super_clses
def wikidata_osmrelations_to_geonames(client, args): db = client.geostore s = sparql.Service(WIKIDATA_ENDPOINT, "utf-8", "GET") statement = ''' SELECT ?s ?o ?g WHERE { ?s wdt:P402 ?o. ?s wdt:P1566 ?g. } ''' result = s.query(statement) for row in result.fetchone(): wikidata_id = row[0].value.strip() osm_relation = row[1].value.strip() geon = 'http://sws.geonames.org/' + row[2].value + '/' geon_entry = db.geonames.find_one({'_id': geon}) if geon_entry: db.geonames.update_one({'_id': geon}, {'$set': {'osm_relation': osm_relation, 'wikidata': wikidata_id}})