Пример #1
0
def get_internal_person_from_external_iri(g:Graph, persons:set):
    owl = get_schemas()['owl']
    internals = set()
    for person in persons:
        for s,p,o in g.triples((None, owl.sameAs, URIRef(person))):
            print(f'{s} <--- {person}')
            internals.add(s)
    return internals
Пример #2
0
def get_directors(g:Graph):
    """
        obtiene todos los directores de mi base.
    """
    ss = get_schemas()
    s = ss['schema']
    ds = [d for m,p,d in g.triples((None, s.director, None))]
    return ds
Пример #3
0
def get_external_references(g:Graph, ds:Iterable):
    """
        solo arma un mapa para poder mapear mas simple recursos externos a entidades internas mías.
    """
    owl = get_schemas()['owl']
    refs = {}
    for d in ds:
        for s,p,o in g.triples((d, owl.sameAs, None)):
            refs[str(o)] = d
    return refs
Пример #4
0
from common import get_persons_names, get_schemas, bind_schemas


def get_dbpedia_endpoint():
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setReturnFormat(JSON)
    return sparql


if __name__ == '__main__':

    g = Graph()
    with open('data/dataset-original.ttl', 'r') as f:
        g.parse(f, format='turtle')

    sch = get_schemas()
    schema = sch['schema']
    names = get_persons_names(g, schema)
    ''' cargo las ocupaciones procesadas a mano '''
    with open('data/db_pedia_occupations.json', 'r') as f:
        occupations = json.load(f)['occupations']

    subjects = {}
    sql = get_dbpedia_endpoint()

    cantidad = len(names)
    procesado = 0
    for my_subject, name in names:
        print(f'procesando {procesado}/{cantidad}')
        local_subjects = set()
Пример #5
0

if __name__ == '__main__':

    g = Graph()
    gdir = Graph()
    bind_schemas(gdir)

    with open('data/dataset-final.ttl','r') as f:
        g.parse(f, format='turtle')

    #with open('data/wikidata_subjects.ttl','r') as f:
    #    g.parse(f, format='turtle')        


    ss = get_schemas()
    schema = ss['schema']
    twss = ss['twss']

    sql = get_wikidata_endpoint()

    """
        punto 1 - ver notes_random.txt 
    """
    print("/////////////////////////////////////////\nPunto1\n//////////////////////////////////////////")

    print(f'buscando datos específicos de wikidata sobre premios')
    persons_with_award = get_wikidat_persons_with_award(sql)
    directors_with_awards = set()

    directors = get_directors(g)
Пример #6
0

if __name__ == '__main__':

    print('leyendo archivo de subjects')
    gsubjects = Graph()
    with open('data/dbpedia_subjects.ttl','r') as f:
        gsubjects.parse(f, format='turtle')

    with open('data/wikidata_subjects.ttl','r') as f:
        gsubjects.parse(f, format='turtle')    

    delay = 2
    gdata = Graph()
    bind_schemas(gdata)
    owl = get_schemas()['owl']

    endpoints = get_endpoints()
    procesado = 0
    cantidad = len(gsubjects)

    for s,p,o in gsubjects.triples((None,OWL.sameAs,None)):

        ''' agrego la tripleta del sameAs así me queda interna la dataset '''
        gdata.add((s, owl.sameAs, o))

        my_subject = str(s)
        subject = str(o)

        print(f'procesando {procesado}/{cantidad}')
        sql = select_endpoint(subject, endpoints)