def main(): logging.info(f"Request to {TWSS_RESOURCES_URI}") twss_graph = OwlMovieRepository.read(TWSS_RESOURCES_URI) # Change this flag to use local data local = False if local: dbpedia_graph = OwlMovieRepository.read(DBPEDIA_PERSONS_FILE) wiki_graph = OwlMovieRepository.read(WIKIDATA_PERSONS_FILE) else: names = get_persons_names(twss_graph) logging.info("Querying remote persons") # Warining making too many requests on dbpedia server may block you dbpedia_graph = query_dbpedia_persons(names) wiki_graph = query_wikidata_persons(names) remote_persons_graph = dbpedia_graph + wiki_graph merged_graph = merge_graphs(twss_graph, remote_persons_graph) logging.info(f"Done! Writing graph in {EXTENDED_PERSONS_FILE}") OwlMovieRepository.write( path_file=EXTENDED_PERSONS_FILE, graph=merged_graph, namespaces=NAMESPACES, )
def main(): # Init arguments parser parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", action="store_true") parser.add_argument("-o", "--offline", action="store_true") args = parser.parse_args() # Init logger logging.basicConfig( level=logging.DEBUG if args.verbose else logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) # Evaluate arguments if not args.offline: logging.info("Running Oscar script") oscars.main() logging.info("Running person enricher script") person_enricher.main() # Merge enrichments logging.info("Enriching the graph") enriched_graph = build_enrichment_graph() OwlMovieRepository.write(path_file=ENRICHED_GRAPH_FILE, graph=enriched_graph, namespaces=NAMESPACES) logging.info(f"Done! Enriched graph saved in {ENRICHED_GRAPH_FILE}")
def query_dbpedia_persons(names): DBPEDIA_URL = "http://dbpedia.org/sparql" logging.info(f"Request to {DBPEDIA_URL}") persons_regex = "(" + "|".join(names) + ")" sparql = SPARQLWrapper(DBPEDIA_URL, returnFormat=RDFXML) sparql.setQuery(DBPEDIA_PERSONS.format(persons_regex=persons_regex)) results = sparql.queryAndConvert() OwlMovieRepository.write(path_file=DBPEDIA_PERSONS_FILE, graph=results, namespaces=NAMESPACES) return results
def query_wikidata_persons(names): WIKIDATA_URL = "https://query.wikidata.org/sparql" logging.info(f"Request to {WIKIDATA_URL}") persons_regex = "(" + "|".join(names) + ")" sparql = SPARQLWrapper(WIKIDATA_URL, returnFormat=RDFXML) sparql.setQuery(WIKIDATA_PERSONS.format(persons_regex=persons_regex)) results = sparql.queryAndConvert() OwlMovieRepository.write( path_file=WIKIDATA_PERSONS_FILE, graph=results, namespaces=NAMESPACES, ) return results
def main(): logging.info("Reading graphs") twss_resources = get_twss_resources_graph() oscar_winners_graph = get_oscar_winners_graph(twss_resources) logging.info("Matching remote actors with locals") merged_graph = twss_resources + oscar_winners_graph result = merged_graph.query(COMBINE_REMOTE_AND_LOCAL_ACTORS) logging.info(f"Done! Writing graph in {OSCAR_WINNERS_FILE}") OwlMovieRepository.write( path_file=OSCAR_WINNERS_FILE, graph=result.graph, namespaces=NAMESPACES )
def get_oscar_winners_graph(twss_resources): logging.info("Looking for actors who were directed by Oscar winners.") if OSCAR_WINNERS_CACHE_FILE.exists(): logging.info("Reading Oscar winners from cache") oscar_winners_graph = OwlMovieRepository.read(source=OSCAR_WINNERS_CACHE_FILE) else: logging.info("Reading Oscar winners from web") oscar_winners_graph = build_oscar_winners_graph(twss_resources) logging.info( f"Writing the cache file of Oscar winners " f"in {OSCAR_WINNERS_CACHE_FILE}." ) OwlMovieRepository.write( path_file=OSCAR_WINNERS_CACHE_FILE, graph=oscar_winners_graph, namespaces=NAMESPACES, ) return oscar_winners_graph
def write_links(): twss_graph = OwlMovieRepository.read(ORIGINAL_DATASET_FILE) links_graph = Graph() twss_actors_uris = get_actors_uris(twss_graph) dbpedia_actors = get_dbpedia_actors(twss_actors_uris) for dbpedia_actor, twss_actor_uri in zip(dbpedia_actors, twss_actors_uris): dbpedia_actor_name = to_dbpedia_actor_name(twss_actor_uri) if len(dbpedia_actor) == 0: logging.error(f"Not found owl:sameAs for {dbpedia_actor_name}") else: logging.debug(f"Found owl:sameAs for dbpedia_{dbpedia_actor_name}") links_graph.add( ( twss_actor_uri, OWL.sameAs, get_dbpedia_actor_uri(dbpedia_actor, dbpedia_actor_name), ) ) OwlMovieRepository.write(LINKS_FILE, links_graph, namespaces=NAMESPACES)