示例#1
0
def main():

    parser = argparse.ArgumentParser(
        description=
        "Translate an input string by looking up DBpedia entries with that name."
    )
    parser.add_argument("input", type=str, help="The temponym to translate")
    args = parser.parse_args()

    query = sparql_resource_query_str(args.input)

    sparql = SPARQLWrapper.SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    results = sparql.query().convert()

    tuples = set()

    if "results" in results and "bindings" in results["results"]:
        for result in results["results"]["bindings"]:
            label = result["label"]
            translation = label["value"]
            language = label["xml:lang"]
            tuples.add((language, translation))

    print(sorted(list(tuples)))
示例#2
0
def years():
    sparql = SPARQLWrapper.SPARQLWrapper("http://dbpedia.org/sparql")

    year_counter = counter = collections.Counter()
    unfound = []
    total = 0
    with open("processed_24_dec_2015_backup.txt") as o:
        for line in o:
            total += 1
            words = []
            line = line.strip()
            fixed_line = re.sub(
                'Original Mix|Explicit|Radio Version|Dirty Radio Edit|Radio Edit|Original Radio Edit|\(Download\)|, Richard Bedford|Remastered Version|Remastered Version|Remastered Album Version|Original Version| - Original Mix|Single Version| - Live|Single Edit|LP Version|BBC Radio 1 Live Lounge|Original London Cast|Michael Reed|Theme from|The| - Edit|\(feat .*\)|\(Feat .*\)|\(featuring .*\)|Theme From| - Original$|/ Mono|Digital Remaster|Original mix|/Soundtrack Version|Extended Version|New Sound Remastered|Explicit Version|\(Mono\)|Album Version|45 Version|Radio Mix|\(.*\)|New Stereo Mix|Stereo Remastered Version|Original Album Version|Original Mono Version|Remixed Version|Soundtrack Version|Radio edit|\[.*\]|/ Stereo$|Club Mix|Album Verision|Alternate Version|Dance Mix|Revised Album Version',
                '', line)
            blacklist = ["REMASTER", "REMASTERED", "Y"]
            for p in fixed_line.translate(None, ',()').split(" – "):
                for w in p.split(" "):
                    if w.isalnum() and not w.isdigit() and not w.upper(
                    ) in blacklist and len(w) > 2:
                        words.append("\"" + w.upper() + "\"")
            search_string = " AND ".join(words)
            query = """
                PREFIX dbo: <http://dbpedia.org/ontology/>

                select ?s1, ?date where
                {
                    ?s1 dbo:abstract ?o1 .
                    ?o1 bif:contains ' (%s) ' option ( score ?sc ) .
                    ?s1 dbo:releaseDate ?date .

                }
                order by desc ( ?sc * 3e-1 + sql:rnk_scale ( <LONG::IRI_RANK> ( ?s1 ) ) ) limit 1 offset 0
            """ % (search_string)
            sparql.setQuery(query)
            sparql.setReturnFormat(SPARQLWrapper.JSON)
            results = sparql.query().convert()

            # print query
            b = results["results"]["bindings"]
            if len(b) > 0:
                print "FOUND__"
                print line
                print b[0]["date"]["value"]
                year = b[0]["date"]["value"].split("-")[0]
                year_counter[year] += 1
                print b[0]["s1"]["value"]
            else:
                print "\tUNFOUND__"
                print "\t", line
                print "\t", search_string
                unfound.append(line)
            # print
            if total >= 5000:
                break

    for x in year_counter.most_common():
        print x[1], "\t", x[0]

    print "\n".join(unfound)
示例#3
0
def runQuery(query):
    endpoint = 'https://query.wikidata.org/sparql'
    sparql = SPARQLWrapper.SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    results = sparql.query().convert()

    return results['results']['bindings']
def runQuery(query):
	endpoint = 'https://query.wikidata.org/sparql'
	sparql = SPARQLWrapper.SPARQLWrapper(endpoint, agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36')
	sparql.setQuery(query)
	sparql.setReturnFormat(SPARQLWrapper.JSON)
	results = sparql.query().convert()

	return results['results']['bindings']
示例#5
0
def prop_has_range_or_comment(prop_value):
    '''
    Function to check whether the property has 'range' or 'comment' attribute associated with it.
    '''
    # return (0, 0)
    has_comment = 0
    has_range = 0
    prop = prop_value['prop']

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query1 = """
    select distinct ?prop ?value where {
      <""" + prop + """> ?prop ?value }
      """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    for result in results["results"]["bindings"]:
        prop = result["prop"]["value"]
        if "range" in prop:
            has_range = 1
        if "comment" in prop:
            has_comment = 1

    return (has_range, has_comment)
def get_distractors(resource, resource_type):

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """

    select ?similar (count(?p) as ?similarity)  where {
      values ?res {<http://dbpedia.org/resource/""" + resource + """>}
      ?similar ?p ?o ; a <""" + resource_type + """> .
      ?res   ?p ?o .

    }
    group by ?similar ?res
    having (count(?p) > 1)
    order by desc(?similarity)
    LIMIT 30
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    del similar_resources[:]
    for result in results["results"]["bindings"]:
        res = result["similar"]["value"]
        value = result["similarity"]["value"]
        similar_resources.append([res, int(value), 0, 0])
def total_one_degree_paths(res1, res2):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """

   select count(distinct ?var3) as ?cnt where
{
{
SELECT distinct ?var3
WHERE {
    <http://dbpedia.org/resource/""" + res1 + """> ?prop1 ?var3 .
<""" + res2 + """> ?pr ?var3.
}
}
UNION
{
SELECT distinct ?var3
WHERE {
    <http://dbpedia.org/resource/""" + res1 + """> ?prop1 ?var3 .

?var3 ?prop <""" + res2 + """> .
}
}
}
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        return result["cnt"]["value"]
示例#8
0
def run_query(query, endpoint):
    """
    Run a sparql query against an endpoint and return the results converted to
    JSON dictionary.
    """
    sparql = SPARQLWrapper.SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    return sparql.query().convert()
示例#9
0
 def __init__(self, base_url):
     if not base_url.endswith('/'):
         base_url += '/'
     self.base_url = base_url
     # Create query and update wrapper
     self.wrappers = {
         op: sprqlw.SPARQLWrapper(''.join((self.base_url, op)))
         for op in ('query', 'update')
     }
示例#10
0
def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0],
                                                sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()
def get_sparql_results(sparql_query_string):
    sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(sparql_query_string)
    try:
        # stream with the results in XML, see <http://www.w3.org/TR/rdf-sparql-XMLres/>
        sparql.setReturnFormat(SPARQLWrapper.JSON)
        result = sparql.query().convert()
    except:
        result = None
    return result
示例#12
0
 def _do_query(self, statement, access_point):
     sparql = SPARQLWrapper.SPARQLWrapper(
         self._query_url.format(url=self._base_url, ap=access_point))
     sparql.setReturnFormat(SPARQLWrapper.XML)
     query = self._get_prefix_str(access_point) + statement
     #print query
     sparql.setQuery(query)
     #for line in sparql.query().response:
     #    print line.rstrip()
     return self._parse_response(sparql.query().response)
示例#13
0
def update_profile(profile):
    if len(profile.add | profile.remove) == 0:
        return '200 No update'
    remote = SPARQLWrapper.SPARQLWrapper(updateUrl)
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    remote.setQuery(write_update_query(profile))
    results = remote.queryAndConvert().decode('utf-8')
    return results
示例#14
0
def query_trainings(uris=None, faculty=None, link_text=None,
    link_address=None, rank=None):
    'trainings' : 'http://vivo.brown.edu/ontology/profile#hasTraining',
    'specialty' : 'http://vivo.brown.edu/ontology/profile#hasSpecialty',
    'hospital' : 'http://vivo.brown.edu/ontology/profile#hasHospital',
    'organization' : 'http://vivo.brown.edu/ontology/profile#hasOrganization',
    filters = []
    if faculty:
        filters.append(
            make_filter('?link',
                '<http://vivo.brown.edu/ontology/vivo-brown/drrbWebPageOf>',
                '?fac',
                '<{}>'.format(faculty) ) )
    if link_text:
        filters.append(
            make_filter('?link',
                '<http://vivoweb.org/ontology/core#linkAnchorText>',
                '?link_text',
                '{}'.format(json.dumps(link_text)) ) )
    if link_address:
        filters.append(
            make_filter('?link',
                '<http://vivoweb.org/ontology/core#linkURI>',
                '?link_address',
                '{}'.format(json.dumps(link_address)) ) )
    if rank:
        filters.append(
            make_filter('?link',
                '<http://vivoweb.org/ontology/core#rank>',
                '?rank',
                '{}'.format(json.dumps(rank)) ) )
    if uris:
        filters.append(
            make_filter(None,None,'?link',''.join( ['<{}>'.format(u) for u in uris ]) ) )
    query = """
        PREFIX core: <http://vivoweb.org/ontology/core#>
        DESCRIBE ?train
        WHERE {{ ?link a core:URLLink . {0} {1} }}
        """.format(''.join([ f['filter'] for f in filters if f.get('filter') ]),
         ''.join([ f['values'] for f in filters if f.get('values') ]) )
    remote = SPARQLWrapper.SPARQLWrapper(queryUrl, updateUrl)
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    remote.setQuery( query )
    results = remote.queryAndConvert()
    resources = defaultdict(lambda: defaultdict(list))
    for r in results.triples((None,None,None)):
        resources[r[0].toPython()][r[1].toPython()].append(r[2].toPython())
    out = []
    for r in resources:
        res = models.WebLink(uri=r)
        res.load(resources[r])
        out.append(res)
    return out
示例#15
0
def clear_graph(sparql_endpoint=SPARQL_ENDPOINT, graph=EVAL_DATA_GRAPH):
    sparql = SPARQLWrapper.SPARQLWrapper(sparql_endpoint)
    sparql.setMethod(SPARQLWrapper.POST)
    q = 'CLEAR GRAPH %s' % (URIRef(graph).n3(), )
    logger.info('Clearing graph %s on endpoint %s', graph, sparql_endpoint)
    sparql.setQuery(q)
    try:
        sparql.query()
    except urllib2.HTTPError:
        # argh, don't ask me why, but it seems we get a 406 on success
        # TODO: report to SPARQLWrapper?
        pass
def get_distractors(resource, resource_type):

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """

    select ?similar (count(?p) as ?similarity)  where {
      values ?res {<http://dbpedia.org/resource/""" + resource + """>}
      ?similar ?p ?o ; a <""" + resource_type + """> .
      ?res   ?p ?o .

    }
    group by ?similar ?res
    having (count(?p) > 1)
    order by desc(?similarity)
    LIMIT 30
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    del similar_resources[:]
    for result in results["results"]["bindings"]:
        res = result["similar"]["value"]
        value = result["similarity"]["value"]
        similar_resources.append([res, int(value), 0, 0])
def total_one_degree_paths(res1, res2):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """

   select count(distinct ?var3) as ?cnt where
{
{
SELECT distinct ?var3
WHERE {
    <http://dbpedia.org/resource/""" + res1 + """> ?prop1 ?var3 .
<""" + res2 + """> ?pr ?var3.
}
}
UNION
{
SELECT distinct ?var3
WHERE {
    <http://dbpedia.org/resource/""" + res1 + """> ?prop1 ?var3 .

?var3 ?prop <""" + res2 + """> .
}
}
}
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        return result["cnt"]["value"]
def prop_has_range_or_comment(prop_value):
    '''
    Function to check whether the property has 'range' or 'comment' attribute associated with it.
    '''
    # return (0, 0)
    has_comment = 0
    has_range = 0
    prop = prop_value['prop']

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query1 = """
    select distinct ?prop ?value where {
      <""" + prop + """> ?prop ?value }
      """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    for result in results["results"]["bindings"]:
        prop = result["prop"]["value"]
        if "range" in prop:
            has_range = 1
        if "comment" in prop:
            has_comment = 1

    return (has_range, has_comment)
示例#19
0
def query_collaborators(uris=None, faculty=None, label=None,
        fullName=None, alphaName=None):
    filters = []
    if faculty:
        filters.append({'filter':
            write_statement(
                ("<{}>".format(faculty),
                    '<http://vivoweb.org/ontology/core#hasCollaborator>',
                '?uri') ) })
    if label:
        filters.append(
            make_filter('?uri',
                '<http://www.w3.org/2000/01/rdf-schema#label>',
                '?label',
                '{}'.format(json.dumps(label)) ) )
    if fullName:
        filters.append(
            make_filter('?uri',
                '<http://vivo.brown.edu/ontology/vivo-brown/fullName>',
                '?fullName',
                '{}'.format(json.dumps(fullName)) ) )
    if alphaName:
        filters.append(
            make_filter('?uri',
                '<http://vivo.brown.edu/ontology/vivo-brown/alphaName>',
                '?alphaName',
                '{}'.format(json.dumps(alphaName)) ) )
    if uris:
        filters.append(
            make_filter(None,None,'?uri',''.join( ['<{}>'.format(u) for u in uris ]) ) )
    query = """
        PREFIX vivo: <http://vivoweb.org/ontology/core#>
        DESCRIBE ?uri
        WHERE {{ ?uri a vivo:FacultyMember . {0} {1} }}
        """.format(''.join([ f['filter'] for f in filters if f.get('filter') ]),
         ''.join([ f['values'] for f in filters if f.get('values') ]) )
    remote = SPARQLWrapper.SPARQLWrapper(queryUrl, updateUrl)
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    remote.setQuery( query )
    results = remote.queryAndConvert()
    resources = defaultdict(lambda: defaultdict(list))
    for r in results.triples((None,None,None)):
        resources[r[0].toPython()][r[1].toPython()].append(r[2].toPython())
    out = []
    for r in resources:
        res = models.Collaborator(uri=r)
        res.load(resources[r])
        out.append(res)
    return out
def query_wikidata_service(searchterm, language_code):
    query = """SELECT ?item ?itemLabel ?subclass_of ?subclass_ofLabel ?category_of ?category_ofLabel 
        ?instance_of ?instance_ofLabel WHERE { SERVICE wikibase:mwapi { bd:serviceParam wikibase:api "EntitySearch" . 
        bd:serviceParam wikibase:endpoint "www.wikidata.org" . bd:serviceParam mwapi:search '""" + searchterm + """' . 
        bd:serviceParam mwapi:language '""" + language_code + """' . bd:serviceParam wikibase:limit 1 . 
        ?item wikibase:apiOutputItem mwapi:item .} SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
        OPTIONAL { ?item (wdt:P279) ?subclass_of.} OPTIONAL { ?item (wdt:P910) ?category_of.} 
        OPTIONAL { ?item (wdt:P31) ?instance_of.}}"""
    url = 'https://query.wikidata.org/sparql'
    sparql = SPARQLWrapper(url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()
def get_similar_resources(resource):

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query1 = """
    select distinct ?prop ?value
    where {
    <http://dbpedia.org/resource/""" + resource + """> ?prop ?value }
      """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    resource_type = get_resource_type(results)
    get_distractors(resource, resource_type)
    # Alchemy API Part starts
    abstract = get_abstract(results)
    concepts = alchemy_concepts(abstract)

    for concept in concepts:
        for res in similar_resources:
            if concept[0] == res[0]:
                res[2] = concept[1]

    for res in similar_resources:
        res[3] = int(total_one_degree_paths(resource, res[0]))

    similar_resources.sort(key=lambda x: (-x[2], -x[1], -x[3]))
    # Alchemy API part ends
    tot_val = len(similar_resources)
    tot = '"total": "' + str(tot_val) + '", '
    ans = '{' + tot + '  "error": "0" , "resources": ['
    res = ""
    i = 1
    for x in similar_resources:
        res += """
        {
    "rank": \"""" + str(i) + """\",
    "dbpedia": \"""" + x[0] + """\",
    "similarity": \"""" + str(x[1]) + """\",
    "alchemy": \"""" + str(x[2]) + """\",
    "paths": \"""" + str(x[3]) + """\"
    },"""
        i += 1

    ans += res
    ans = ans[0:-1]
    ans += ']}'
    json_obj = json.loads(ans, strict=False)
    ans = json.dumps(json_obj, indent=4)

    print ans
示例#22
0
def query_faculty(shortId):
    uri = shortIdToUri(shortId)
    remote = SPARQLWrapper.SPARQLWrapper(queryUrl, updateUrl)
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    remote.setQuery(
        "DESCRIBE ?uri WHERE {{ VALUES ?uri {{ <{0}> }} }}".format(uri) )
    results = remote.queryAndConvert()
    out = defaultdict(list)
    for r in results.triples((None,None,None)):
        out[r[1].toPython()].append(r[2].toPython())
    profile = models.FacultyProfile(uri)
    profile.load(out)
    return profile
示例#23
0
def chech_dbpedia(item):
    sparql = SPARQLWrapper.SPARQLWrapper("http://dbpedia.org/sparql")
    try:
        sparql.setQuery("""
		    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
		    SELECT ?label
		    WHERE {{ <http://dbpedia.org/resource/{}> rdfs:label ?label }}
		""".format(item[::-1]))
        sparql.setReturnFormat(SPARQLWrapper.JSON)
        results = sparql.query().convert()
    except:
        return False
    else:
        if results == []:
            return False
        return True
示例#24
0
def querySPARQLtoRDF(query):
    repeats = 0
    while True:
        try:
            sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINTS[repeats])
            sparql.setQuery(query)
            sparql.setReturnFormat(SPARQLWrapper.RDF)
            results = sparql.query().convert()
            return results['results']['bindings']
        except Exception, e:
            print 'Repeating query. Error', e
            repeats += 1

        if repeats > len(SPARQL_ENDPOINTS) - 1:
            print 'Following query could not finish normally:', query
            return []
示例#25
0
def query_faculty_association(shortId, assocProp):
    remote = SPARQLWrapper.SPARQLWrapper(queryUrl, updateUrl)
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    remote.setQuery(
        """
        DESCRIBE ?assoc
        WHERE {{ ?uri <{1}> ?assoc.
        VALUES ?uri {{ <{0}> }}
        }}""".format(
            shortIdToUri(shortId), assocProp) )
    results = remote.queryAndConvert()
    out = defaultdict(lambda: defaultdict(list))
    for r in results.triples((None,None,None)):
        out[r[0]][r[1].toPython()].append(r[2].toPython())
    return out
示例#26
0
def SparqlRequest(rq_code, rq_uri, defgraph=None, fmt=None):
  rq_results = None
  logging.debug('URI: %s'%(rq_uri))
  logging.debug('%s'%(rq_code))
  try:
    sparql = SPARQLWrapper.SPARQLWrapper(rq_uri)
    sparql.setQuery(rq_code)
    if fmt:
      sparql.setReturnFormat(fmt)
    rq_results = sparql.query()
  except Exception as e:
    logging.error('Error: %s'%e)
    errtype,val,traceback = sys.exc_info()
    logging.error('sys.exc_info:\n(%s)\n%s'%(errtype,val))
    if traceback: logging.info('traceback:\n%s>'%(traceback))
    logging.error('%s'%str(rq_code))
  return rq_results
示例#27
0
def fetch_mathematicians(num):
    sparql = SPARQLWrapper.SPARQLWrapper('https://query.wikidata.org/sparql')
    sparql.setQuery(
        """SELECT ?mathematician ?mathematicianLabel (COUNT(DISTINCT ?sitelink) AS ?sites) ?Mathematics_Genealogy_Project_ID WHERE {{
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
?mathematician wdt:P549 ?Mathematics_Genealogy_Project_ID.
?sitelink schema:about ?mathematician.
}}
GROUP BY ?mathematician ?mathematicianLabel ?Mathematics_Genealogy_Project_ID
ORDER BY DESC(?sites)
LIMIT {:d}""".format(num))
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    results = sparql.query().convert()
    return [{
        'wiki_id': int(result['mathematician']['value'].split('/')[-1][1:]),
        'id': int(result['Mathematics_Genealogy_Project_ID']['value']),
        'score': int(result['sites']['value'])
    } for result in results['results']['bindings']]
示例#28
0
    def get(namehint: str) -> List[PlaceSuggestion]:
        if namehint == '':
            return list()
        escapednamehint = (namehint.lower().replace('\n', '\\n').replace(
            '\"', '\\\"').replace('\t', '\\t'))
        sw = SPARQLWrapper.SPARQLWrapper("http://dbpedia.org/sparql",
                                         returnFormat=SPARQLWrapper.JSON)
        q = f"""
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX dbp: <http://dbpedia.org/property/>
            PREFIX dbr: <http://dbpedia.org/resource/>
            PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>

            SELECT DISTINCT
                ?plc (LANG(?nm) as ?lang) ?lat ?long ?nm ?abs
            WHERE {{
                ?plc rdfs:label ?nm ;
                dbo:abstract ?abs ;
                (dbp:latitude|geo:lat) ?lat ;
                (dbp:longitude|geo:long) ?long .
                FILTER(
                    LANG(?nm)=LANG(?abs)
                ) .
                FILTER(
                    STRSTARTS(
                        LCASE(STR(
                            ?nm
                        )),
                        "{escapednamehint}"
                    )
                ) .
            }}
            LIMIT 15
        """
        # sw.setTimeout(30)
        sw.setQuery(q)
        results = sw.query().convert()
        return [
            PlaceSuggestion(result["lang"]["value"], result["lat"]["value"],
                            result["long"]["value"], result["nm"]["value"],
                            result["abs"]["value"], result["plc"]["value"])
            for result in results["results"]["bindings"]
        ]
示例#29
0
def query_training(shortId):
    uri = shortIdToUri(shortId)
    train_prop = property_map['trainings']
    spec_prop = property_map['specialty']
    hosp_prop = property_map['hospital']
    org_prop = property_map['organization']
    label_prop = property_map['label']
    remote = SPARQLWrapper.SPARQLWrapper(queryUrl, updateUrl)
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    remote.setQuery("""
        DESCRIBE ?uri ?x1 ?x2 ?x3 ?x4
        WHERE {{
          ?uri <{1}> ?x1 .
          OPTIONAL {{?x1 <{2}> ?x2 .}}
          OPTIONAL {{?x1 <{3}> ?x3 .}}
          OPTIONAL {{?x1 <{4}> ?x4 .}}
          values ?uri {{ <{0}> }}
        }}""".format(uri, train_prop, spec_prop, hosp_prop, org_prop) )
    results = remote.queryAndConvert()
    resources = defaultdict(lambda: defaultdict(list))
    for r in results.triples((None,None,None)):
        resources[r[0].toPython()][r[1].toPython()].append(r[2].toPython())
    fac = resources[uri]
    trainings = [ (train, resources[train]) for train in
        fac.get(train_prop, []) ]
    out = []
    for rabid, data in trainings:
        data['rabid'] = rabid
        if data.get(spec_prop):
            data[spec_prop] = [
                { 'rabid': spec, 'label': resources[spec][label_prop] }
                    for spec in data[spec_prop] ]
        if data.get(hosp_prop):
            data[hosp_prop] = [
                { 'rabid': hosp, 'label': resources[hosp][label_prop] }
                    for hosp in data[hosp_prop] ]
        if data.get(org_prop):
            data[org_prop] = [
                { 'rabid': org, 'label': resources[org][label_prop] }
                    for org in data[org_prop] ]
        out.append(data)
    return out
def get_similar_resources(resource):

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query1 = """
    select distinct ?prop ?value
    where {
    <http://dbpedia.org/resource/""" + resource + """> ?prop ?value }
      """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    resource_type = get_resource_type(results)
    get_distractors(resource, resource_type)
    # Alchemy API Part starts
    abstract = get_abstract(results)
    concepts = alchemy_concepts(abstract)

    for concept in concepts:
        for res in similar_resources:
            if concept[0] == res[0]:
                res[2] = concept[1]

    for res in similar_resources:
        res[3] = int(total_one_degree_paths(resource, res[0]))

    similar_resources.sort(key=lambda x: (-x[2], -x[1], -x[3]))
    # Alchemy API part ends
    tot_val = len(similar_resources)
    tot = '"total": "' + str(tot_val) + '", '
    ans = '{' + tot + '  "error": "0" , "resources": ['
    res = ""
    i = 1
    for x in similar_resources:
        res += """
        {
    "rank": \"""" + str(i) + """\",
    "dbpedia": \"""" + x[0] + """\",
    "similarity": \"""" + str(x[1]) + """\",
    "alchemy": \"""" + str(x[2]) + """\",
    "paths": \"""" + str(x[3]) + """\"
    },"""
        i += 1

    ans += res
    ans = ans[0:-1]
    ans += ']}'
    json_obj = json.loads(ans, strict=False)
    ans = json.dumps(json_obj, indent=4)

    print ans
示例#31
0
    def get_items(self, query, item_name="item"):
        def get_id(line):
            return line[item_name]["value"].replace(self.entity_url, "")

        sparql = SPARQLWrapper.SPARQLWrapper(self.endpoint)
        sparql.setQuery(query)
        sparql.setReturnFormat(SPARQLWrapper.JSON)
        result = sparql.query().convert()["results"]["bindings"]
        item_ids = (get_id(line) for line in result)
        while True:
            batch = list(itertools.islice(item_ids, 50))
            if not batch:
                break
            r = self.site.api(action="wbgetentities", ids=batch)
            for raw_entity in r["entities"].values():
                try:
                    yield wbinteract.Item.from_json(self.site, raw_entity)
                except ValueError:
                    pass
示例#32
0
def mint_uri():
    qtext = "ASK WHERE {{ {{ <{0}> ?p ?o. }} UNION {{ ?s ?p2 <{0}> }} }}"
    remote = SPARQLWrapper.SPARQLWrapper(queryUrl, updateUrl)
    remote.setReturnFormat('json')
    remote.addParameter('email', user)
    remote.addParameter('password', passw)
    remote.setMethod(SPARQLWrapper.POST)
    new_uri = False
    tries = 0
    while not new_uri and tries < 50:
        uri = 'http://vivo.brown.edu/individual/n{}'.format(
            uuid.uuid4().hex)
        remote.setQuery( qtext.format(uri) )
        resp = remote.queryAndConvert()
        if not resp['boolean']:
            new_uri = uri
        else:
            tries += 1
    return new_uri
示例#33
0
def get_graph_count(config):
    # logger.info('Querying {}/sparql'.format(config['fuseki']))
    sparql = SPARQLWrapper.SPARQLWrapper('{}/sparql'.format(config['fuseki']))
    sparql.setMethod(SPARQLWrapper.POST)  # to avoid caching
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    sparql.setQuery(
        textwrap.dedent("""
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

        SELECT (COUNT(?s) as ?conceptCount)
        WHERE {
           GRAPH <%s> {
             ?s a skos:Concept .
           }
        }
    """ % (config['graph'])))
    results = sparql.query().convert()
    count = results['results']['bindings'][0]['conceptCount']['value']
    return int(count)
def create(wiki_directory, target_directory, subdir_start, subdir_end,
           process_no):
    global no_sents
    global logfile
    subdir_count = 0
    sent_count = 0

    logfile = open("../logs/log" + str(process_no) + ".txt", "w")

    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    tagger = PerceptronTagger()
    sparql = SPARQLWrapper.SPARQLWrapper("http://localhost:8890/sparql/")

    print("processing " + wiki_directory)
    logfile.write("processing " + wiki_directory + "\n")

    for subdir in os.listdir(wiki_directory):
        wiki_sub_directory = wiki_directory + "/" + subdir
        if not os.path.isdir(wiki_sub_directory):
            continue
        #process only subdirs from subdir_start to subdir_end
        if (subdir_count < subdir_start):
            subdir_count += 1
            # print("skipping "+wiki_sub_directory)
            logfile.write("skipping " + wiki_sub_directory + "\n")
            continue
        if subdir_count >= subdir_end:
            break

        print("processing " + wiki_sub_directory)
        logfile.write("processing " + wiki_sub_directory + "\n")
        for file in os.listdir(wiki_directory + "/" + subdir):
            wiki_file_path = wiki_sub_directory + "/" + file
            target_subdir = target_directory + "/" + subdir
            sent_count = process_file(wiki_file_path, sparql, tagger,
                                      sent_count, target_subdir, file)
            # stop after single file
            # return
    no_sents = sent_count
    logfile.close()
def count_freq(resource_type, prop):
    '''
    Function which counts how many times has the property appeared w.r.t. the resource type.
    '''
    freq = 0
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """
    SELECT COUNT(DISTINCT ?entity)
    WHERE { ?entity <""" + prop + """> ?value.
    ?entity <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <""" + resource_type + """> }
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        freq = int(result["callret-0"]["value"])

    return freq
def total_pages_for_type(resource_type):
    '''
    Function which returns the total number of resources belonging to given resource type.
    '''
    freq = 0
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """
    SELECT COUNT(DISTINCT ?entity)
    WHERE { ?entity <http://dbpedia.org/ontology/wikiPageID> ?value.
    ?entity <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <""" + resource_type + """> }
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        freq = int(result["callret-0"]["value"])

    return freq
#!/usr/bin/env python3

from SPARQLWrapper import *
import os
import os.path as path
import json
import glob
import time

#BaseUrl="http://buda1.bdrc.io:13180/fuseki/bdrcrw/"
BaseUrl="http://localhost:13180/fuseki/bdrcrw/"

QueryEndpoint = SPARQLWrapper(BaseUrl+"query")
QueryEndpoint.setRequestMethod(POSTDIRECTLY)
QueryEndpoint.setMethod(POST)
QueryEndpoint.setReturnFormat(JSON)
UpdateEndpoint = SPARQLWrapper(BaseUrl+"update")
UpdateEndpoint.setRequestMethod(POSTDIRECTLY)
UpdateEndpoint.setMethod(POST)
ThisPath = os.path.dirname(os.path.abspath(__file__))

def get_all_tests(testgroupname, specifictest=None):
    grouppath = path.join(ThisPath, testgroupname)
    res = []
    if specifictest is not None:
        res.append(path.join(grouppath, specifictest))
        return res
    for dirname in sorted(glob.glob(grouppath+"/*")):
        if path.isdir(dirname):
            res.append(dirname)
    return res
示例#38
0
    fh = open(CRAWLER_DIR+localName, "w")
    fh.write(response.read())
    fh.close()
    return CRAWLER_DIR+localName

def loadMetricConfiguration():
    g = rdflib.Graph();
    config = g.parse("config.ttl", format="turtle")
    return g.serialize(format="json-ld", indent=0)

def formatMetricConfiguration(configStr):
    formattedStr = configStr.replace('\n', ' ').replace('\r', '').replace('"','\"')
    return formattedStr

# MAIN
sparql = SPARQLWrapper(LOD_LAUNDROMAT_SPARQL)
sparql.setQuery('PREFIX llo: <http://lodlaundromat.org/ontology/> SELECT ?md5 WHERE { ?d llo:triples ?n . ?d llo:md5 ?md5 . FILTER (?n > 0) }')
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

if not os.path.exists(CRAWLER_DIR):
    os.makedirs(CRAWLER_DIR)

metricsConf = formatMetricConfiguration(loadMetricConfiguration())

for result in results["results"]["bindings"]:
    document = LOD_LAUNDROMAT_DOWNLOAD + result['md5']['value']
    print 'Downloading : '+document
    filename = download(document)

    logger_crawl.info("Metrics config: {0}".format(metricsConf))
示例#39
0
def fes_date_filter(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
    if constraint == 'overlaps':
        start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
        stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
    elif constraint == 'within':
        start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
        stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
    return start,stop

#Establish bounding box filter for Geographic Range of IBAs
bbox = fes.BBox([-130.5, 47.9, 167.6, 74.7])


# In[50]:

sparql = SPARQLWrapper("http://mmisw.org/sparql")
queryString = """
PREFIX ioos: <http://mmisw.org/ont/ioos/parameter/>
SELECT DISTINCT ?parameter ?definition ?unit ?property ?value 
WHERE {?parameter a ioos:Parameter .
       ?parameter ?property ?value .
       ?parameter ioos:Term ?term . 
       ?parameter ioos:Definition ?definition . 
       ?parameter ioos:Units ?unit .
       FILTER (regex(str(?property), "(exactMatch|closeMatch)", "i") && regex(str(?value), "temperature", "i") )
      } 
ORDER BY ?parameter
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
def handle_is_of_relations(resource, resource_type, total_pages):

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    query1 = """
    select  ?prop ?value where {
    ?value ?prop <http://dbpedia.org/resource/""" + resource + """>  }
    """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        prop = result["prop"]["value"]
        value = result["value"]["value"]
        cleaned_property_label = get_label(prop)

        if blacklisted(cleaned_property_label):
            continue

        if "ontology" not in prop and "property" not in prop and "subject" not in prop:
            continue

        if prop in ans_dict:
            ans_dict[prop].setdefault('value', []).append(value)
            prop_val_count[prop] += 1
            ans_dict[prop]['is_of_relation'] = 1
            continue

        prop_info = dict.fromkeys(parameter_list, 0)
        prop_info['score'] = 0
        prop_info['value'] = []

        if "xml:lang" not in result["value"] or 'en' in result["value"]["xml:lang"]:
            prop_value = {}
            prop_value['prop'] = prop
            prop_value['value'] = value

            if prop in prop_val_count:
                prop_val_count[prop] += 1

            else:
                prop_val_count[prop] = 1

            cleaned_property_label = get_label(prop)
            prop_info['label'] = cleaned_property_label

            prop_info.setdefault('value', []).append(value)
            prop_info['blacklisted'] = 0
            '''
            if prop_info['blacklisted']:
                ans_dict[prop] = prop_info
                continue
            '''
            google_autosuggest = google_autocomplete_ranker(
                resource, cleaned_property_label)
            prop_info['is_onto'] = is_onto(prop)
            prop_info['special_char'] = doesnt_contain_special_chars(
                cleaned_property_label)
            prop_info['no_of_words'] = no_of_words(cleaned_property_label)
            range_comment = prop_has_range_or_comment(prop_value)
            prop_info['has_range'] = range_comment[0]
            prop_info['has_comment'] = range_comment[1]
            prop_info['value_relevant'] = value_relevant(prop_value)
            prop_info['special_datatype'] = is_special_datatype(result)
            prop_info['google_keypress'] = google_autosuggest[0]
            prop_info['google_location'] = google_autosuggest[1]
            prop_info['is_of_relation'] = 1
            prop_info['frequency'] = count_freq(
                resource_type, prop) / float(total_pages)
        ans_dict[prop] = prop_info
def start(resource):

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    query1 = """
    select distinct ?prop ?value
    where {
    <http://dbpedia.org/resource/""" + resource + """> ?prop ?value }
      """

    sparql.setQuery(query1)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    resource_type = get_resource_type(results)
    total_pages = total_pages_for_type(resource_type)

    for result in results["results"]["bindings"]:

        prop = result["prop"]["value"]
        value = result["value"]["value"]
        cleaned_property_label = get_label(prop)

        if "ontology" not in prop and "property" not in prop and "subject" not in prop:
            continue

        prop_info = dict.fromkeys(parameter_list, 0)
        prop_info['score'] = 0
        prop_info['value'] = []

        if "xml:lang" not in result["value"] or 'en' in result["value"]["xml:lang"]:

            prop_value = {}
            prop_value['prop'] = prop
            prop_value['value'] = value

            if prop in prop_val_count:
                prop_val_count[prop] += 1
            else:

                normalized_label = cleaned_property_label.lower().replace(
                    ' ', '')

                if normalized_label in normalized_labels:
                    continue

                normalized_labels.append(normalized_label)
                prop_val_count[prop] = 1

            if prop in ans_dict:
                ans_dict[prop].setdefault('value', []).append(value)
                continue

            prop_info['label'] = cleaned_property_label

            prop_info.setdefault('value', []).append(value)
            prop_info['blacklisted'] = 0

            '''
            if prop_info['blacklisted']:
                ans_dict[prop] = prop_info
                continue
            '''

            google_autosuggest = google_autocomplete_ranker(
                resource, cleaned_property_label)
            prop_info['is_onto'] = is_onto(prop)
            prop_info['special_char'] = doesnt_contain_special_chars(
                cleaned_property_label)
            prop_info['no_of_words'] = no_of_words(cleaned_property_label)
            range_comment = prop_has_range_or_comment(prop_value)
            prop_info['has_range'] = range_comment[0]
            prop_info['has_comment'] = range_comment[1]
            prop_info['value_relevant'] = value_relevant(prop_value)
            prop_info['special_datatype'] = is_special_datatype(result)
            prop_info['google_keypress'] = google_autosuggest[0]
            prop_info['google_location'] = google_autosuggest[1]
            prop_info['is_of_relation'] = 0
            prop_info['frequency'] = count_freq(
                resource_type, prop) / float(total_pages)
        ans_dict[prop] = prop_info

    #handle_is_of_relations(resource, resource_type,  total_pages)

    for prop, count in prop_val_count.iteritems():
        ans_dict[prop]['total_values'] = (1.0 - 1.0 / count)
        #score = raw_input("Enter score for: " + prop + " (from 1-5) \n")
        ans_dict[prop]['score'] = 0