Пример #1
0
def main():
    logging.info(f"Request to {TWSS_RESOURCES_URI}")
    twss_graph = OwlMovieRepository.read(TWSS_RESOURCES_URI)

    # Change this flag to use local data
    local = False

    if local:
        dbpedia_graph = OwlMovieRepository.read(DBPEDIA_PERSONS_FILE)
        wiki_graph = OwlMovieRepository.read(WIKIDATA_PERSONS_FILE)
    else:
        names = get_persons_names(twss_graph)
        logging.info("Querying remote persons")
        # Warining making too many requests on dbpedia server may block you
        dbpedia_graph = query_dbpedia_persons(names)
        wiki_graph = query_wikidata_persons(names)

    remote_persons_graph = dbpedia_graph + wiki_graph
    merged_graph = merge_graphs(twss_graph, remote_persons_graph)

    logging.info(f"Done! Writing graph in {EXTENDED_PERSONS_FILE}")

    OwlMovieRepository.write(
        path_file=EXTENDED_PERSONS_FILE,
        graph=merged_graph,
        namespaces=NAMESPACES,
    )
Пример #2
0
def main():
    # Init arguments parser
    parser = argparse.ArgumentParser()

    parser.add_argument("-v", "--verbose", action="store_true")
    parser.add_argument("-o", "--offline", action="store_true")

    args = parser.parse_args()

    # Init logger
    logging.basicConfig(
        level=logging.DEBUG if args.verbose else logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
    )

    # Evaluate arguments
    if not args.offline:
        logging.info("Running Oscar script")
        oscars.main()
        logging.info("Running person enricher script")
        person_enricher.main()

    # Merge enrichments
    logging.info("Enriching the graph")
    enriched_graph = build_enrichment_graph()

    OwlMovieRepository.write(path_file=ENRICHED_GRAPH_FILE,
                             graph=enriched_graph,
                             namespaces=NAMESPACES)

    logging.info(f"Done! Enriched graph saved in {ENRICHED_GRAPH_FILE}")
Пример #3
0
def query_dbpedia_persons(names):
    DBPEDIA_URL = "http://dbpedia.org/sparql"
    logging.info(f"Request to {DBPEDIA_URL}")
    persons_regex = "(" + "|".join(names) + ")"

    sparql = SPARQLWrapper(DBPEDIA_URL, returnFormat=RDFXML)
    sparql.setQuery(DBPEDIA_PERSONS.format(persons_regex=persons_regex))
    results = sparql.queryAndConvert()

    OwlMovieRepository.write(path_file=DBPEDIA_PERSONS_FILE,
                             graph=results,
                             namespaces=NAMESPACES)
    return results
Пример #4
0
def query_wikidata_persons(names):
    WIKIDATA_URL = "https://query.wikidata.org/sparql"
    logging.info(f"Request to {WIKIDATA_URL}")
    persons_regex = "(" + "|".join(names) + ")"

    sparql = SPARQLWrapper(WIKIDATA_URL, returnFormat=RDFXML)
    sparql.setQuery(WIKIDATA_PERSONS.format(persons_regex=persons_regex))
    results = sparql.queryAndConvert()

    OwlMovieRepository.write(
        path_file=WIKIDATA_PERSONS_FILE,
        graph=results,
        namespaces=NAMESPACES,
    )
    return results
Пример #5
0
def main():
    logging.info("Reading graphs")

    twss_resources = get_twss_resources_graph()
    oscar_winners_graph = get_oscar_winners_graph(twss_resources)

    logging.info("Matching remote actors with locals")

    merged_graph = twss_resources + oscar_winners_graph
    result = merged_graph.query(COMBINE_REMOTE_AND_LOCAL_ACTORS)

    logging.info(f"Done! Writing graph in {OSCAR_WINNERS_FILE}")

    OwlMovieRepository.write(
        path_file=OSCAR_WINNERS_FILE, graph=result.graph, namespaces=NAMESPACES
    )
Пример #6
0
def get_oscar_winners_graph(twss_resources):
    logging.info("Looking for actors who were directed by Oscar winners.")

    if OSCAR_WINNERS_CACHE_FILE.exists():
        logging.info("Reading Oscar winners from cache")

        oscar_winners_graph = OwlMovieRepository.read(source=OSCAR_WINNERS_CACHE_FILE)
    else:
        logging.info("Reading Oscar winners from web")

        oscar_winners_graph = build_oscar_winners_graph(twss_resources)

        logging.info(
            f"Writing the cache file of Oscar winners "
            f"in {OSCAR_WINNERS_CACHE_FILE}."
        )

        OwlMovieRepository.write(
            path_file=OSCAR_WINNERS_CACHE_FILE,
            graph=oscar_winners_graph,
            namespaces=NAMESPACES,
        )

    return oscar_winners_graph
def write_links():
    twss_graph = OwlMovieRepository.read(ORIGINAL_DATASET_FILE)
    links_graph = Graph()

    twss_actors_uris = get_actors_uris(twss_graph)
    dbpedia_actors = get_dbpedia_actors(twss_actors_uris)

    for dbpedia_actor, twss_actor_uri in zip(dbpedia_actors, twss_actors_uris):
        dbpedia_actor_name = to_dbpedia_actor_name(twss_actor_uri)

        if len(dbpedia_actor) == 0:
            logging.error(f"Not found owl:sameAs for {dbpedia_actor_name}")
        else:
            logging.debug(f"Found owl:sameAs for dbpedia_{dbpedia_actor_name}")

            links_graph.add(
                (
                    twss_actor_uri,
                    OWL.sameAs,
                    get_dbpedia_actor_uri(dbpedia_actor, dbpedia_actor_name),
                )
            )

    OwlMovieRepository.write(LINKS_FILE, links_graph, namespaces=NAMESPACES)