Пример #1
0
def _get_training_data(graph) -> tuple:
    global __SUBJECT_ENTITY_TRAINING_DATA__
    if '__SUBJECT_ENTITY_TRAINING_DATA__' not in globals():
        __SUBJECT_ENTITY_TRAINING_DATA__ = utils.load_or_create_cache(
            'subject_entity_training_data',
            lambda: _retrieve_training_data(graph))
    return __SUBJECT_ENTITY_TRAINING_DATA__
Пример #2
0
def _get_subject_entity_predictions(graph) -> dict:
    global __SUBJECT_ENTITY_PREDICTIONS__
    if '__SUBJECT_ENTITY_PREDICTIONS__' not in globals():
        __SUBJECT_ENTITY_PREDICTIONS__ = utils.load_or_create_cache(
            'subject_entity_predictions',
            lambda: _make_subject_entity_predictions(graph))
    return __SUBJECT_ENTITY_PREDICTIONS__
Пример #3
0
def get_disjoint_types(dbp_type) -> set:
    global __DISJOINT_TYPES__
    if '__DISJOINT_TYPES__' not in globals():
        __DISJOINT_TYPES__ = utils.load_or_create_cache(
            'dbpedia_heuristic_disjoint_types', _compute_disjoint_types)

    return __DISJOINT_TYPES__[dbp_type]
Пример #4
0
def get_inverse_resource_property_mapping() -> dict:
    """Return a mapping from DBpedia resources to a dict containing property-value assignments (containing inverted facts of DBpedia)."""
    global __INVERSE_RESOURCE_PROPERTY_MAPPING__
    if '__INVERSE_RESOURCE_PROPERTY_MAPPING__' not in globals():
        initializer = lambda: rdf_util.create_dict_from_rdf([utils.get_data_file('files.dbpedia.mappingbased_objects')], reverse_key=True)
        __INVERSE_RESOURCE_PROPERTY_MAPPING__ = utils.load_or_create_cache('dbpedia_inverse_resource_properties', initializer)
    return __INVERSE_RESOURCE_PROPERTY_MAPPING__
Пример #5
0
def get_page_entities(graph) -> dict:
    global __PAGE_ENTITES__
    if '__PAGE_ENTITES__' not in globals():
        __PAGE_ENTITES__ = utils.load_or_create_cache(
            'listing_page_entities',
            lambda: extract.extract_page_entities(graph))
    return __PAGE_ENTITES__
Пример #6
0
def get_resources(category: str) -> set:
    """Return all resources of the given category."""
    global __CATEGORY_RESOURCES__
    if '__CATEGORY_RESOURCES__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf([utils.get_data_file('files.dbpedia.category_articles')], rdf_util.PREDICATE_SUBJECT, reverse_key=True)
        __CATEGORY_RESOURCES__ = utils.load_or_create_cache('dbpedia_category_resources', initializer)

    return __CATEGORY_RESOURCES__[category]
Пример #7
0
def get_resource_categories(dbp_resource: str) -> set:
    """Return all categories the given resource is contained in."""
    global __RESOURCE_CATEGORIES__
    if '__RESOURCE_CATEGORIES__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf([utils.get_data_file('files.dbpedia.category_articles')], rdf_util.PREDICATE_SUBJECT)
        __RESOURCE_CATEGORIES__ = utils.load_or_create_cache('dbpedia_resource_categories', initializer)

    return __RESOURCE_CATEGORIES__[dbp_resource]
Пример #8
0
def get_cyclefree_wikitaxonomy_listgraph() -> ListGraph:
    """Retrieve list graph with filtered edges and resolved cycles."""
    global __CYCLEFREE_WIKITAXONOMY_LISTGRAPH__
    if '__CYCLEFREE_WIKITAXONOMY_LISTGRAPH__' not in globals():
        initializer = lambda: get_wikitaxonomy_listgraph().append_unconnected()
        __CYCLEFREE_WIKITAXONOMY_LISTGRAPH__ = utils.load_or_create_cache(
            'listgraph_cyclefree', initializer)
    return __CYCLEFREE_WIKITAXONOMY_LISTGRAPH__
Пример #9
0
def get_wikitaxonomy_listgraph() -> ListGraph:
    """Retrieve list graph with filtered edges."""
    global __WIKITAXONOMY_LISTGRAPH__
    if '__WIKITAXONOMY_LISTGRAPH__' not in globals():
        initializer = lambda: get_base_listgraph().remove_unrelated_edges()
        __WIKITAXONOMY_LISTGRAPH__ = utils.load_or_create_cache(
            'listgraph_wikitaxonomy', initializer)
    return __WIKITAXONOMY_LISTGRAPH__
Пример #10
0
def get_resource_property_mapping() -> dict:
    """Return a mapping from DBpedia resources to a dict containing property-value assignments (containing facts of DBpedia)."""
    global __RESOURCE_PROPERTY_MAPPING__
    if '__RESOURCE_PROPERTY_MAPPING__' not in globals():
        property_files = [utils.get_data_file('files.dbpedia.mappingbased_literals'), utils.get_data_file('files.dbpedia.mappingbased_objects')]
        initializer = lambda: rdf_util.create_dict_from_rdf(property_files)
        __RESOURCE_PROPERTY_MAPPING__ = utils.load_or_create_cache('dbpedia_resource_properties', initializer)
    return __RESOURCE_PROPERTY_MAPPING__
Пример #11
0
def get_range(dbp_predicate: str) -> Optional[str]:
    global __RANGES__
    if '__RANGES__' not in globals():
        __RANGES__ = defaultdict(
            lambda: None,
            utils.load_or_create_cache('dbpedia_heuristic_ranges',
                                       _compute_ranges))
    return dbp_store.get_range(dbp_predicate) or __RANGES__[dbp_predicate]
Пример #12
0
def get_cyclefree_wikitaxonomy_graph() -> CategoryGraph:
    """Retrieve the cycle-free category graph with filtered categories and edges."""
    global __CYCLEFREE_WIKITAXONOMY_GRAPH__
    if '__CYCLEFREE_WIKITAXONOMY_GRAPH__' not in globals():
        initializer = lambda: get_wikitaxonomy_graph().append_unconnected()
        __CYCLEFREE_WIKITAXONOMY_GRAPH__ = utils.load_or_create_cache(
            'catgraph_cyclefree', initializer)
    return __CYCLEFREE_WIKITAXONOMY_GRAPH__
Пример #13
0
def get_base_graph() -> CaLiGraph:
    """Retrieve graph created from categories and lists."""
    global __BASE_GRAPH__
    if '__BASE_GRAPH__' not in globals():
        initializer = lambda: CaLiGraph.build_graph().append_unconnected()
        __BASE_GRAPH__ = utils.load_or_create_cache('caligraph_base',
                                                    initializer)
    return __BASE_GRAPH__
Пример #14
0
def get_axiom_graph() -> CaLiGraph:
    """Retrieve CaLiGraph enriched with axioms from the Cat2Ax approach."""
    global __AXIOM_GRAPH__
    if '__AXIOM_GRAPH__' not in globals():
        initializer = lambda: get_merged_ontology_graph(
        ).remove_transitive_edges().compute_axioms()
        __AXIOM_GRAPH__ = utils.load_or_create_cache('caligraph_axiomatized',
                                                     initializer)
    return __AXIOM_GRAPH__
Пример #15
0
def get_merged_ontology_graph() -> CaLiGraph:
    """Retrieve base graph joined with DBpedia ontology."""
    global __MERGED_ONTOLOGY_GRAPH__
    if '__MERGED_ONTOLOGY_GRAPH__' not in globals():
        initializer = lambda: get_base_graph().copy().merge_ontology(
        ).append_unconnected()
        __MERGED_ONTOLOGY_GRAPH__ = utils.load_or_create_cache(
            'caligraph_merged_ontology', initializer)
    return __MERGED_ONTOLOGY_GRAPH__
Пример #16
0
def get_merged_listgraph() -> ListGraph:
    """Retrieve list graph with filtered edges, resolved cycles, and merged lists."""
    global __MERGED_LISTGRAPH__
    if '__MERGED_LISTGRAPH__' not in globals():
        initializer = lambda: get_cyclefree_wikitaxonomy_listgraph(
        ).merge_nodes().remove_leaf_listcategories().remove_transitive_edges()
        __MERGED_LISTGRAPH__ = utils.load_or_create_cache(
            'listgraph_merged', initializer)
    return __MERGED_LISTGRAPH__
Пример #17
0
def get_base_listgraph() -> ListGraph:
    """Retrieve basic list graph without any modifications."""
    global __BASE_LISTGRAPH__
    if '__BASE_LISTGRAPH__' not in globals():
        initializer = lambda: ListGraph.create_from_dbpedia(
        ).append_unconnected()
        __BASE_LISTGRAPH__ = utils.load_or_create_cache(
            'listgraph_base', initializer)
    return __BASE_LISTGRAPH__
Пример #18
0
def get_conceptual_category_graph() -> CategoryGraph:
    """Retrieve category graph with filtered categories."""
    global __CONCEPTUAL_CATEGORY_GRAPH__
    if '__CONCEPTUAL_CATEGORY_GRAPH__' not in globals():
        initializer = lambda: CategoryGraph.create_from_dbpedia(
        ).make_conceptual().append_unconnected()
        __CONCEPTUAL_CATEGORY_GRAPH__ = utils.load_or_create_cache(
            'catgraph_conceptual', initializer)
    return __CONCEPTUAL_CATEGORY_GRAPH__
Пример #19
0
def get_merged_graph() -> CategoryGraph:
    """Retrieve the cycle-free category graph with filtered+merged categories and filtered edges."""
    global __MERGED_GRAPH__
    if '__MERGED_GRAPH__' not in globals():
        initializer = lambda: get_cyclefree_wikitaxonomy_graph().merge_nodes(
        ).remove_transitive_edges()
        __MERGED_GRAPH__ = utils.load_or_create_cache('catgraph_merged',
                                                      initializer)
    return __MERGED_GRAPH__
Пример #20
0
def get_wikitaxonomy_graph() -> CategoryGraph:
    """Retrieve category graph with filtered categories and edges."""
    global __WIKITAXONOMY_CATEGORY_GRAPH__
    if '__WIKITAXONOMY_CATEGORY_GRAPH__' not in globals():
        initializer = lambda: get_conceptual_category_graph(
        ).remove_unrelated_edges()
        __WIKITAXONOMY_CATEGORY_GRAPH__ = utils.load_or_create_cache(
            'catgraph_wikitaxonomy', initializer)
    return __WIKITAXONOMY_CATEGORY_GRAPH__
Пример #21
0
def resolve_redirect(dbp_resource: str, visited=None) -> str:
    """Return the resource to which `dbp_resource` redirects (if any) or `dbp_resource` itself."""
    global __REDIRECTS__
    if '__REDIRECTS__' not in globals():
        initializer = lambda: rdf_util.create_single_val_dict_from_rdf([utils.get_data_file('files.dbpedia.redirects')], rdf_util.PREDICATE_REDIRECTS)
        __REDIRECTS__ = utils.load_or_create_cache('dbpedia_resource_redirects', initializer)

    if dbp_resource in __REDIRECTS__:
        visited = visited or set()
        if dbp_resource not in visited:
            return resolve_redirect(__REDIRECTS__[dbp_resource], visited | {dbp_resource})
    return dbp_resource
Пример #22
0
def _get_page_data() -> dict:
    global __SUBJECT_ENTITY_PAGE_DATA__
    if '__SUBJECT_ENTITY_PAGE_DATA__' not in globals():
        __SUBJECT_ENTITY_PAGE_DATA__ = utils.load_or_create_cache(
            'subject_entity_page_data', _retrieve_page_data)
    return __SUBJECT_ENTITY_PAGE_DATA__
Пример #23
0
def _get_raw_markup_from_xml() -> dict:
    return defaultdict(
        str,
        utils.load_or_create_cache('wikipedia_raw_markup',
                                   _parse_raw_markup_from_xml))
Пример #24
0
def get_type_frequency(dbp_type: str) -> float:
    """Return the amount of resources having `dbp_type` as type."""
    global __TYPE_FREQUENCY__
    if '__TYPE_FREQUENCY__' not in globals():
        __TYPE_FREQUENCY__ = defaultdict(int, utils.load_or_create_cache('dbpedia_resource_type_frequency', _compute_type_frequency))
    return __TYPE_FREQUENCY__[dbp_type]
Пример #25
0
def is_functional(dbp_predicate: str) -> bool:
    """Return True, if the predicate is functional (i.e. a resource has at most one value for the given predicate)."""
    global __PREDICATE_FUNCTIONAL__
    if '__PREDICATE_FUNCTIONAL__' not in globals():
        __PREDICATE_FUNCTIONAL__ = defaultdict(bool, utils.load_or_create_cache('dbpedia_functional_predicates', _create_functional_predicate_dict))
    return __PREDICATE_FUNCTIONAL__[dbp_predicate]
Пример #26
0
def get_disambiguation_mapping() -> dict:
    global __DISAMBIGUATIONS__
    if '__DISAMBIGUATIONS__' not in globals():
        initializer = lambda: rdf_util.create_multi_val_dict_from_rdf([utils.get_data_file('files.dbpedia.disambiguations')], rdf_util.PREDICATE_DISAMBIGUATES)
        __DISAMBIGUATIONS__ = defaultdict(set, utils.load_or_create_cache('dbpedia_resource_disambiguations', initializer))
    return __DISAMBIGUATIONS__
Пример #27
0
def extract_parent_categories() -> dict:
    initializer = lambda: _extract_parent_categories_from_markup(
        _get_raw_categories_and_templates_from_xml())
    return utils.load_or_create_cache('wikipedia_parent_categories',
                                      initializer)
Пример #28
0
def _get_label_mapping() -> dict:
    global __RESOURCE_LABEL_MAPPING__
    if '__RESOURCE_LABEL_MAPPING__' not in globals():
        initializer = lambda: rdf_util.create_single_val_dict_from_rdf([utils.get_data_file('files.dbpedia.labels')], rdf_util.PREDICATE_LABEL)
        __RESOURCE_LABEL_MAPPING__ = utils.load_or_create_cache('dbpedia_resource_labels', initializer)
    return __RESOURCE_LABEL_MAPPING__
Пример #29
0
def get_parsed_articles() -> dict:
    initializer = lambda: _parse_articles(_get_raw_articles_from_xml())
    return defaultdict(
        lambda: None,
        utils.load_or_create_cache('wikipedia_parsed_articles', initializer))
Пример #30
0
def get_inverse_lexicalisations(text: str) -> dict:
    """Return all resources that fit to the given lexicalisation."""
    global __RESOURCE_INVERSE_LEXICALISATIONS__
    if '__RESOURCE_INVERSE_LEXICALISATIONS__' not in globals():
        __RESOURCE_INVERSE_LEXICALISATIONS__ = defaultdict(dict, utils.load_or_create_cache('dbpedia_resource_inverse_lexicalisations', _compute_inverse_lexicalisations))
    return __RESOURCE_INVERSE_LEXICALISATIONS__[text.lower()]