Пример #1
0
def get_predicates():
    """
    get_predicates
    Get a list of predicates used in statements issued by the knowledge source 

    :rtype: List[Predicate]
    """
    # Possible types: {'CommonsMedia', 'Time', 'Quantity', 'WikibaseProperty', 'WikibaseItem', 'GlobeCoordinate',
    # 'String', 'ExternalId', 'Math', 'Monolingualtext', 'TabularData', 'Url', 'GeoShape'}
    query = """SELECT ?p ?pt ?pLabel ?d ?aliases WHERE {
      {
        SELECT ?p ?pt ?d (GROUP_CONCAT(DISTINCT ?alias; separator="|") as ?aliases) WHERE {
          ?p wikibase:propertyType ?pt .
          OPTIONAL {?p skos:altLabel ?alias FILTER (LANG (?alias) = "en")}
          OPTIONAL {?p schema:description ?d FILTER (LANG (?d) = "en") .}
        } GROUP BY ?p ?pt ?d
      }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }"""
    results = execute_sparql_query(query)['results']['bindings']
    results = [{k: v['value'] for k, v in item.items()} for item in results]
    print(results[0])

    items = [{
        'id': "wd:" + x['p'].split("/")[-1],
        'name': x['pLabel'],
        'definition': x.get("d", ""),
        'aliases': x['aliases'].split("|") if x['aliases'] else [],
        'ptype': x['pt'].replace("http://wikiba.se/ontology#", "")
    } for x in results]
    # note: 'aliases' and 'ptype' are not in the official spec!

    return items
Пример #2
0
def get_equiv_item(curie):
    """
    From a curie, get the wikidata item
    get_equiv_item("PMID:10028264")
    :param curie:
    :return:
    """
    try:
        pid, value = cu.parse_curie(curie)
    except ValueError as e:
        print(e)
        return []
    prop_direct = "<http://www.wikidata.org/prop/direct/{}>".format(
        pid.split("/")[-1])
    query_str = "SELECT ?item WHERE {{ ?item {} '{}' }}".format(
        prop_direct, value)
    d = execute_sparql_query(query_str)['results']['bindings']
    equiv_qids = list(
        set(chain(*[{v['value']
                     for k, v in x.items()} for x in d])))
    equiv_qids = [
        "wd:" + x.replace("http://www.wikidata.org/entity/", "")
        for x in equiv_qids
    ]
    return equiv_qids
Пример #3
0
def get_all_types():
    """
    Get all semantic group types, and their counts.
    :return: {"id": [], "frequency": xx} for all entity types in garbanzo
    """
    agg = {}
    for (entity_id, group_name) in qid_semgroup.items():
        if isinstance(group_name, str):
            group_name = [group_name]
        for group in group_name:
            if entity_id != 'Q5':  # Q5 = human, can't do a count
                query_str = """SELECT (COUNT (DISTINCT ?type) AS ?count) WHERE {{?type wdt:P31 wd:{0}}}""".format(
                    entity_id)
                agg[entity_id] = {
                    'sum':
                    int(
                        execute_sparql_query(query_str)['results']['bindings']
                        [0]['count']['value']),
                    'group':
                    group
                }

    # ret = [{'id': '{} wd:{}'.format(v['group'], k), 'frequency': v['sum']} for k,v in agg.items()]
    ret = [{
        'id': 'wd:{}'.format(k),
        'frequency': v['sum']
    } for k, v in agg.items()]
    return ret
Пример #4
0
def get_concept_details(qid):
    """
    This will form the "details" for the GET /translator/concepts/{conceptId} endpoint
    :param qid:
    :return:
    """
    query_str = """SELECT distinct ?prop ?propLabel ?value ?valueLabel
    WHERE
    {{
        {} ?p ?value .
        ?prop wikibase:directClaim ?p .
        SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" . }}
    }}""".format(always_curie(qid))
    d = execute_sparql_query(query_str)['results']['bindings']
    results = [{k: v['value'] for k, v in item.items()} for item in d]
    for result in results:
        result['prop'] = result['prop'].replace(
            "http://www.wikidata.org/entity/", "wd:")
        result['tag'] = result['prop']
        result['value'] = result['value'].replace(
            "http://www.wikidata.org/entity/", "wd:")
    return results
Пример #5
0
def _query_statements(s, t=None, relations=None, direction="f"):
    """
    if direction = f (forward), s is source, t is target
    if direction = r (reverse), s and t are reversed
    if t is not given, target is unconstrained
    if relations is not given, relations are unconstrained
    """
    assert direction in {"f", "r"}, "direction must be 'f' or 'r'"
    s = set(map(always_curie, s))
    t = set(map(always_curie, t)) if t else set()
    relations = set(map(always_curie, relations)) if relations else set()

    s_str = " ".join(s)
    t_str = " ".join(t)
    r_str = " ".join(relations)

    if direction == "r":
        s_str, t_str = t_str, s_str

    query_str = """
    SELECT ?s ?sLabel ?r ?rLabel ?t ?tLabel ?id (GROUP_CONCAT(?stype) as ?stypes) (GROUP_CONCAT(?ttype) as ?ttypes) WHERE {{
      {source_filter}
      {target_filter}
      {relation_filter}
      ?s ?propertyclaim ?id .
      ?r wikibase:claim ?propertyclaim .
      ?id ?b ?t .
      OPTIONAL {{?s wdt:P31 ?stype}}
      OPTIONAL {{?t wdt:P31 ?ttype}}
      FILTER(regex(str(?b), "http://www.wikidata.org/prop/statement" ))
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }}
    }} GROUP BY ?s ?sLabel ?r ?rLabel ?t ?tLabel ?id"""
    query_str = query_str.format(
        source_filter="values ?s {" + s_str + "}" if s_str else "",
        target_filter="values ?t {" + t_str + "}" if t_str else "",
        relation_filter="values ?r {" + r_str + "}" if r_str else "")
    d = execute_sparql_query(query_str)['results']['bindings']
    results = [{k: v['value'] for k, v in item.items()} for item in d]
    # remove non item statements
    results = [
        x for x in results if "http://www.wikidata.org/entity/" in x['s']
        and "http://www.wikidata.org/entity/" in x['t']
    ]
    for result in results:
        result['s'] = result['s'].replace("http://www.wikidata.org/entity/",
                                          "wd:")
        result['r'] = result['r'].replace("http://www.wikidata.org/entity/",
                                          "wd:")
        result['t'] = result['t'].replace("http://www.wikidata.org/entity/",
                                          "wd:")
        result['id'] = result['id'].replace(
            "http://www.wikidata.org/entity/statement/",
            "wds:").replace("-", "$", 1)
        sType = [
            x.replace("http://www.wikidata.org/entity/", "wd:")
            for x in result['stypes'].split(" ")
        ] if result['stypes'] else []
        tType = [
            x.replace("http://www.wikidata.org/entity/", "wd:")
            for x in result['ttypes'].split(" ")
        ] if result['ttypes'] else []
        result['sSemanticGroup'] = " ".join(
            get_semgroups_from_qids(sType)) if sType else ""
        result['tSemanticGroup'] = " ".join(
            get_semgroups_from_qids(tType)) if tType else ""
    results = [x for x in results if x['id'].startswith("wds:Q")]
    data = [{
        'id': s['id'],
        'subject': {
            'id': s['s'],
            'name': s['sLabel'],
            'semanticGroup': s['sSemanticGroup']
        },
        'predicate': {
            'id': s['r'],
            'name': s['rLabel']
        },
        'object': {
            'id': s['t'],
            'name': s['tLabel'],
            'semanticGroup': s['tSemanticGroup']
        },
    } for s in results]
    #if direction == "r":
    #    for d in data:
    #        d['subject'], d['object'] = d['object'], d['subject']

    return data