def get_edge_prov_counts(qid, pid, o_qid, use_subclass_sub=False, extend_subclass_sub=False, use_subclass_obj=False, extend_subclass_obj=False, biological_edge=False): global LOGSTR p_subj = gc.determine_p(use_subclass_sub, extend_subclass_sub) p_obj = gc.determine_p(use_subclass_obj, extend_subclass_obj) if biological_edge: bio_edge = "" else: bio_edge = (""" ?item {p_subj} wd:{qid}. ?obj {p_obj} wd:{o_qid}.""".replace('{p_subj}', p_subj).replace( '{qid}', qid).replace('{p_obj}', p_obj).replace('{o_qid}', o_qid)) edge_query = """ SELECT ?ref ?refLabel ?count WHERE { {SELECT ?ref (COUNT(*) AS ?count) WHERE { SELECT DISTINCT ?item ?obj ?ref WHERE { {bio_edge} ?item p:{pid} [ps:{pid} ?obj; prov:wasDerivedFrom [pr:P248 ?ref;] ] }} GROUP BY ?ref } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } }ORDER BY DESC (?count)""".replace('{pid}', pid).replace( '{bio_edge}', bio_edge) #print(edge_query) try: d = gc.execute_sparql_query(edge_query)['results']['bindings'] except: print("***** FAILED SPARQL *****") print("Item QID: {}\tProp PID: {} Obj QID: {}\n".format( qid, pid, o_qid)) d = [] LOGSTR += 'Edge Reference Query:' LOGSTR += edge_query + '\n\n' return [(x['ref']['value'].replace("http://www.wikidata.org/entity/", ""), x['refLabel']['value'], int(x['count']['value'])) for x in d]
def count_edges(s, p, o, s_subclass, s_expand, o_subclass, o_expand): p_sub = determine_p(s_subclass, s_expand) p_obj = determine_p(o_subclass, o_expand) q_string = """ SELECT (count(distinct *) as ?count) WHERE { ?subject {p_sub} wd:{s} . ?subject wdt:{p} ?object . ?object {p_obj} wd:{o} } """.replace('{p_sub}', p_sub).replace('{s}', s).replace('{p}', p).replace('{p_obj}', p_obj).replace('{o}', o) d = execute_sparql_query(q_string)['results']['bindings'] return [int(x['count']['value']) for x in d][0]
def count_edges(s, p, o, s_subclass, s_expand, o_subclass, o_expand): p_sub = gc.determine_p(s_subclass, s_expand) p_obj = gc.determine_p(o_subclass, o_expand) # test for reciprocal relationships that need to be collapsed recip_rels = { 'P527': 'P361', 'P361': 'P527', 'P2176': 'P2175', 'P2175': 'P2176', 'P702': 'P688', 'P688': 'P702', 'P1343': 'P4510', 'P4510': 'P1343', 'P828': 'P1542', 'P1542': 'P828', 'P3781': 'P3780', 'P3780': 'P3781' } if p in recip_rels.keys(): u = """UNION {?object wdt:""" + recip_rels[p] + """ ?subject .}""" else: u = "" q_string = """ SELECT (count(distinct *) as ?count) WHERE { ?subject {p_sub} wd:{s} . { {?subject wdt:{p} ?object .} {u} } ?object {p_obj} wd:{o} } """.replace('{p_sub}', p_sub).replace('{s}', s).replace('{p}', p).replace( '{p_obj}', p_obj).replace('{o}', o).replace('{u}', u) print("B1: " + q_string) try: d = gc.execute_sparql_query(q_string)['results']['bindings'] print("B2: " + str(d)) edge_count = [int(x['count']['value']) for x in d][0] except: edge_count = -1 print("B3: " + str(edge_count)) return edge_count
def count_prop(qid, prop, is_subclass, expand): p = determine_p(is_subclass, expand) q_string = """ SELECT (count(*) as ?count) WHERE { ?item {p} wd:{qid} . ?item wdt:{prop} [] . } """.replace('{p}', p).replace('{qid}', qid).replace('{prop}', prop) d = execute_sparql_query(q_string)['results']['bindings'] return [int(x['count']['value']) for x in d][0]
def count_prop(qid, prop, is_subclass, expand): p = gc.determine_p(is_subclass, expand) q_string = """ SELECT (count(?item) as ?count) where { SELECT DISTINCT ?item WHERE { ?item {p} wd:{qid} . ?item wdt:{prop} [] . }} """.replace('{p}', p).replace('{qid}', qid).replace('{prop}', prop) print("A1: " + q_string) try: d = gc.execute_sparql_query(q_string)['results']['bindings'] print("A2: " + str(d)) prop_count = [int(x['count']['value']) for x in d][0] except: prop_count = -1 print("A3: " + str(prop_count)) return prop_count
def get_property_prov_counts(qid, pid, use_subclass=False, extend_subclass=False): global LOGSTR p_subj = gc.determine_p(use_subclass, extend_subclass) prop_query = """ SELECT ?ref ?refLabel ?count WHERE { {SELECT ?ref (COUNT(*) AS ?count) WHERE { SELECT DISTINCT ?item ?xref ?ref WHERE { ?item {p_subj} wd:{qid}. ?item p:{pid} [ps:{pid} ?xref; prov:wasDerivedFrom [pr:P248 ?ref;] ] }} GROUP BY ?ref } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } }ORDER BY DESC (?count)""".replace('{p_subj}', p_subj).replace( '{qid}', qid).replace('{pid}', pid) #print(prop_query) try: d = gc.execute_sparql_query(prop_query)['results']['bindings'] except: print("***** FAILED SPARQL *****") print("Item QID: {}\tProp PID: {}\n".format(qid, pid)) d = [] LOGSTR += 'Node Property Reference Query:' LOGSTR += prop_query + '\n\n' return [(x['ref']['value'].replace("http://www.wikidata.org/entity/", ""), x['refLabel']['value'], int(x['count']['value'])) for x in d]