def get_internal_person_from_external_iri(g:Graph, persons:set): owl = get_schemas()['owl'] internals = set() for person in persons: for s,p,o in g.triples((None, owl.sameAs, URIRef(person))): print(f'{s} <--- {person}') internals.add(s) return internals
def get_directors(g:Graph): """ obtiene todos los directores de mi base. """ ss = get_schemas() s = ss['schema'] ds = [d for m,p,d in g.triples((None, s.director, None))] return ds
def get_external_references(g:Graph, ds:Iterable): """ solo arma un mapa para poder mapear mas simple recursos externos a entidades internas mías. """ owl = get_schemas()['owl'] refs = {} for d in ds: for s,p,o in g.triples((d, owl.sameAs, None)): refs[str(o)] = d return refs
from common import get_persons_names, get_schemas, bind_schemas def get_dbpedia_endpoint(): sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setReturnFormat(JSON) return sparql if __name__ == '__main__': g = Graph() with open('data/dataset-original.ttl', 'r') as f: g.parse(f, format='turtle') sch = get_schemas() schema = sch['schema'] names = get_persons_names(g, schema) ''' cargo las ocupaciones procesadas a mano ''' with open('data/db_pedia_occupations.json', 'r') as f: occupations = json.load(f)['occupations'] subjects = {} sql = get_dbpedia_endpoint() cantidad = len(names) procesado = 0 for my_subject, name in names: print(f'procesando {procesado}/{cantidad}') local_subjects = set()
if __name__ == '__main__': g = Graph() gdir = Graph() bind_schemas(gdir) with open('data/dataset-final.ttl','r') as f: g.parse(f, format='turtle') #with open('data/wikidata_subjects.ttl','r') as f: # g.parse(f, format='turtle') ss = get_schemas() schema = ss['schema'] twss = ss['twss'] sql = get_wikidata_endpoint() """ punto 1 - ver notes_random.txt """ print("/////////////////////////////////////////\nPunto1\n//////////////////////////////////////////") print(f'buscando datos específicos de wikidata sobre premios') persons_with_award = get_wikidat_persons_with_award(sql) directors_with_awards = set() directors = get_directors(g)
if __name__ == '__main__': print('leyendo archivo de subjects') gsubjects = Graph() with open('data/dbpedia_subjects.ttl','r') as f: gsubjects.parse(f, format='turtle') with open('data/wikidata_subjects.ttl','r') as f: gsubjects.parse(f, format='turtle') delay = 2 gdata = Graph() bind_schemas(gdata) owl = get_schemas()['owl'] endpoints = get_endpoints() procesado = 0 cantidad = len(gsubjects) for s,p,o in gsubjects.triples((None,OWL.sameAs,None)): ''' agrego la tripleta del sameAs así me queda interna la dataset ''' gdata.add((s, owl.sameAs, o)) my_subject = str(s) subject = str(o) print(f'procesando {procesado}/{cantidad}') sql = select_endpoint(subject, endpoints)