Пример #1
0
def get_edge_prov_counts(qid,
                         pid,
                         o_qid,
                         use_subclass_sub=False,
                         extend_subclass_sub=False,
                         use_subclass_obj=False,
                         extend_subclass_obj=False,
                         biological_edge=False):

    global LOGSTR

    p_subj = gc.determine_p(use_subclass_sub, extend_subclass_sub)
    p_obj = gc.determine_p(use_subclass_obj, extend_subclass_obj)

    if biological_edge:
        bio_edge = ""
    else:
        bio_edge = ("""          ?item {p_subj} wd:{qid}.
          ?obj {p_obj} wd:{o_qid}.""".replace('{p_subj}', p_subj).replace(
            '{qid}', qid).replace('{p_obj}', p_obj).replace('{o_qid}', o_qid))

    edge_query = """
    SELECT ?ref ?refLabel ?count WHERE {
      {SELECT ?ref  (COUNT(*) AS ?count) WHERE {
        SELECT DISTINCT ?item ?obj ?ref WHERE  {
{bio_edge}
          ?item p:{pid} [ps:{pid} ?obj;
                        prov:wasDerivedFrom
                        [pr:P248 ?ref;]
                       ]
          }} GROUP BY ?ref
       }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }

        }ORDER BY DESC (?count)""".replace('{pid}', pid).replace(
        '{bio_edge}', bio_edge)

    #print(edge_query)
    try:
        d = gc.execute_sparql_query(edge_query)['results']['bindings']
    except:
        print("***** FAILED SPARQL *****")
        print("Item QID: {}\tProp PID: {} Obj QID: {}\n".format(
            qid, pid, o_qid))
        d = []
        LOGSTR += 'Edge Reference Query:'
        LOGSTR += edge_query + '\n\n'

    return [(x['ref']['value'].replace("http://www.wikidata.org/entity/",
                                       ""), x['refLabel']['value'],
             int(x['count']['value'])) for x in d]
Пример #2
0
def count_edges(s, p, o, s_subclass, s_expand, o_subclass, o_expand):
    p_sub = determine_p(s_subclass, s_expand)
    p_obj = determine_p(o_subclass, o_expand)
    q_string = """
    SELECT (count(distinct *) as ?count) WHERE {
        ?subject {p_sub} wd:{s} .
        ?subject wdt:{p} ?object .
        ?object {p_obj} wd:{o} }
    """.replace('{p_sub}',
                p_sub).replace('{s}',
                               s).replace('{p}',
                                          p).replace('{p_obj}',
                                                     p_obj).replace('{o}', o)
    d = execute_sparql_query(q_string)['results']['bindings']
    return [int(x['count']['value']) for x in d][0]
Пример #3
0
def count_edges(s, p, o, s_subclass, s_expand, o_subclass, o_expand):
    p_sub = gc.determine_p(s_subclass, s_expand)
    p_obj = gc.determine_p(o_subclass, o_expand)

    # test for reciprocal relationships that need to be collapsed
    recip_rels = {
        'P527': 'P361',
        'P361': 'P527',
        'P2176': 'P2175',
        'P2175': 'P2176',
        'P702': 'P688',
        'P688': 'P702',
        'P1343': 'P4510',
        'P4510': 'P1343',
        'P828': 'P1542',
        'P1542': 'P828',
        'P3781': 'P3780',
        'P3780': 'P3781'
    }

    if p in recip_rels.keys():
        u = """UNION
               {?object wdt:""" + recip_rels[p] + """ ?subject .}"""
    else:
        u = ""

    q_string = """
    SELECT (count(distinct *) as ?count) WHERE {
        ?subject {p_sub} wd:{s} .
        {
            {?subject wdt:{p} ?object .}
            {u}
        }
        ?object {p_obj} wd:{o} }
    """.replace('{p_sub}', p_sub).replace('{s}', s).replace('{p}', p).replace(
        '{p_obj}', p_obj).replace('{o}', o).replace('{u}', u)
    print("B1: " + q_string)
    try:
        d = gc.execute_sparql_query(q_string)['results']['bindings']
        print("B2: " + str(d))
        edge_count = [int(x['count']['value']) for x in d][0]
    except:
        edge_count = -1
    print("B3: " + str(edge_count))
    return edge_count
Пример #4
0
def count_prop(qid, prop, is_subclass, expand):
    p = determine_p(is_subclass, expand)
    q_string = """
    SELECT (count(*) as ?count) WHERE {
        ?item {p} wd:{qid} .
        ?item wdt:{prop} [] . }
    """.replace('{p}', p).replace('{qid}', qid).replace('{prop}', prop)
    d = execute_sparql_query(q_string)['results']['bindings']
    return [int(x['count']['value']) for x in d][0]
Пример #5
0
def count_prop(qid, prop, is_subclass, expand):
    p = gc.determine_p(is_subclass, expand)
    q_string = """
    SELECT (count(?item) as ?count) where {
        SELECT DISTINCT ?item WHERE {
            ?item {p} wd:{qid} .
            ?item wdt:{prop} [] . }}
    """.replace('{p}', p).replace('{qid}', qid).replace('{prop}', prop)
    print("A1: " + q_string)
    try:
        d = gc.execute_sparql_query(q_string)['results']['bindings']
        print("A2: " + str(d))
        prop_count = [int(x['count']['value']) for x in d][0]
    except:
        prop_count = -1
    print("A3: " + str(prop_count))
    return prop_count
Пример #6
0
def get_property_prov_counts(qid,
                             pid,
                             use_subclass=False,
                             extend_subclass=False):

    global LOGSTR

    p_subj = gc.determine_p(use_subclass, extend_subclass)

    prop_query = """
    SELECT ?ref ?refLabel ?count WHERE {
      {SELECT ?ref  (COUNT(*) AS ?count) WHERE {
        SELECT DISTINCT ?item ?xref ?ref WHERE  {
          ?item {p_subj} wd:{qid}.
          ?item p:{pid} [ps:{pid} ?xref;
                        prov:wasDerivedFrom
                        [pr:P248 ?ref;]
                       ]
          }} GROUP BY ?ref
       }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }

    }ORDER BY DESC (?count)""".replace('{p_subj}', p_subj).replace(
        '{qid}', qid).replace('{pid}', pid)
    #print(prop_query)

    try:
        d = gc.execute_sparql_query(prop_query)['results']['bindings']
    except:
        print("***** FAILED SPARQL *****")
        print("Item QID: {}\tProp PID: {}\n".format(qid, pid))
        d = []
        LOGSTR += 'Node Property Reference Query:'
        LOGSTR += prop_query + '\n\n'

    return [(x['ref']['value'].replace("http://www.wikidata.org/entity/",
                                       ""), x['refLabel']['value'],
             int(x['count']['value'])) for x in d]