Пример #1
0
def _get_lines_instances_types(graph) -> list:
    """Serialize types of resources."""
    lines_instances_types = []

    caligraph_ancestors = defaultdict(set)
    for n in graph.traverse_nodes_topdown():
        parents = graph.parents(n)
        caligraph_ancestors[n] = parents | {
            a
            for p in parents for a in caligraph_ancestors[p]
        }

    axiom_resources = {
        ax[1]
        for n in graph.nodes for ax in graph.get_axioms(n, transitive=False)
        if clg_util.is_clg_resource(ax[1])
    }
    for res in graph.get_all_resources() | axiom_resources:
        lines_instances_types.append(
            serialize_util.as_object_triple(
                res, rdf_util.PREDICATE_TYPE,
                rdf_util.CLASS_OWL_NAMED_INDIVIDUAL))
        types = graph.get_nodes_for_resource(res)
        direct_types = types.difference(
            {a
             for t in types for a in caligraph_ancestors[t]})
        lines_instances_types.extend([
            serialize_util.as_object_triple(res, rdf_util.PREDICATE_TYPE, t)
            for t in direct_types
        ])
    return lines_instances_types
Пример #2
0
def _get_lines_ontology(graph) -> list:
    """Serialize the ontology."""
    lines_ontology = []
    # metadata
    ontology_resource = 'http://caligraph.org/ontology'
    lines_ontology.extend([
        serialize_util.as_object_triple(
            ontology_resource, rdf_util.PREDICATE_TYPE,
            'http://www.w3.org/2002/07/owl#Ontology'),
        serialize_util.as_literal_triple(ontology_resource,
                                         'http://purl.org/dc/terms/created',
                                         _get_creation_date()),
        serialize_util.as_literal_triple(ontology_resource,
                                         rdf_util.PREDICATE_LABEL,
                                         'CaLiGraph Ontology'),
        serialize_util.as_literal_triple(
            ontology_resource, 'http://www.w3.org/2002/07/owl#versionInfo',
            utils.get_config('caligraph.version')),
    ])
    # classes
    for node in graph.traverse_nodes_topdown():
        if node == rdf_util.CLASS_OWL_THING:
            continue
        lines_ontology.append(
            serialize_util.as_object_triple(node, rdf_util.PREDICATE_TYPE,
                                            rdf_util.CLASS_OWL_CLASS))
        label = graph.get_label(node)
        if label:
            lines_ontology.append(
                serialize_util.as_literal_triple(node,
                                                 rdf_util.PREDICATE_LABEL,
                                                 label))
        parents = graph.parents(node) or {rdf_util.CLASS_OWL_THING}
        lines_ontology.extend([
            serialize_util.as_object_triple(node,
                                            rdf_util.PREDICATE_SUBCLASS_OF, p)
            for p in parents
        ])
    # predicates
    for pred in graph.get_all_predicates():
        lines_ontology.append(
            serialize_util.as_object_triple(pred, rdf_util.PREDICATE_TYPE,
                                            rdf_util.CLASS_PROPERTY))
    # disjointnesses
    for node in graph.nodes:
        for disjoint_node in graph.get_disjoint_nodes(node):
            if node < disjoint_node:  # make sure that disjointnesses are only serialized once
                lines_ontology.append(
                    serialize_util.as_object_triple(
                        node, rdf_util.PREDICATE_DISJOINT_WITH, disjoint_node))
    # restrictions
    defined_restrictions = set()
    for node in graph.nodes:
        for pred, val in graph.get_axioms(node, transitive=False):
            restriction_is_defined = (pred, val) in defined_restrictions
            lines_ontology.extend(
                _serialize_restriction(node, pred, val,
                                       restriction_is_defined))
            defined_restrictions.add((pred, val))
    return lines_ontology
Пример #3
0
def _get_lines_dbpedia_instance_transitive_caligraph_types(graph) -> list:
    """Serialize transitive CaLiGraph types for DBpedia resources."""
    instance_transitive_clg_types = []

    caligraph_ancestors = defaultdict(set)
    for n in graph.traverse_nodes_topdown():
        parents = graph.parents(n)
        caligraph_ancestors[n] = parents | {
            a
            for p in parents for a in caligraph_ancestors[p]
        }

    for res in graph.get_all_resources():
        dbp_res = clg_util.clg_resource2dbp_resource(res)
        if dbp_res not in dbp_store.get_resources():
            continue

        types = graph.get_nodes_for_resource(res)
        direct_types = types.difference(
            {a
             for t in types for a in caligraph_ancestors[t]})
        transitive_types = {
            tt
            for t in direct_types for tt in graph.ancestors(t)
        }.difference(direct_types | {rdf_util.CLASS_OWL_THING})
        instance_transitive_clg_types.extend([
            serialize_util.as_object_triple(dbp_res, rdf_util.PREDICATE_TYPE,
                                            tt) for tt in transitive_types
        ])
    return instance_transitive_clg_types
Пример #4
0
def _get_lines_instances_transitive_types(graph) -> list:
    """Serialize transitive types of resources."""
    lines_instances_transitive_types = []

    caligraph_ancestors = defaultdict(set)
    for n in graph.traverse_nodes_topdown():
        parents = graph.parents(n)
        caligraph_ancestors[n] = parents | {
            a
            for p in parents for a in caligraph_ancestors[p]
        }

    for res in graph.get_all_resources():
        types = graph.get_nodes_for_resource(res)
        direct_types = types.difference(
            {a
             for t in types for a in caligraph_ancestors[t]})
        transitive_types = {
            tt
            for t in direct_types for tt in graph.ancestors(t)
        }.difference(direct_types | {rdf_util.CLASS_OWL_THING})
        lines_instances_transitive_types.extend([
            serialize_util.as_object_triple(res, rdf_util.PREDICATE_TYPE, tt)
            for tt in transitive_types
        ])
    return lines_instances_transitive_types
Пример #5
0
def _get_lines_instances_relations(graph) -> list:
    """Serialize resource facts."""
    lines_instances_relations = []
    for s, p, o in graph.get_all_relations():
        if clg_util.is_clg_resource(o):
            lines_instances_relations.append(
                serialize_util.as_object_triple(s, p, o))
        else:
            lines_instances_relations.append(
                serialize_util.as_literal_triple(s, p, o))
    return lines_instances_relations
Пример #6
0
def _get_lines_ontology_dbpedia_mapping(graph) -> list:
    """Serialize the DBpedia mapping for types and predicates."""
    lines_ontology_dbpedia_mapping = []
    for node in graph.traverse_nodes_topdown():
        if node == rdf_util.CLASS_OWL_THING:
            continue
        equivalents = {
            t
            for t in graph.get_parts(node) if dbp_util.is_dbp_type(t)
        }
        lines_ontology_dbpedia_mapping.extend([
            serialize_util.as_object_triple(node,
                                            rdf_util.PREDICATE_SUBCLASS_OF, e)
            for e in equivalents
        ])
    for pred in graph.get_all_predicates():
        eq_pred = clg_util.clg_type2dbp_type(pred)
        lines_ontology_dbpedia_mapping.append(
            serialize_util.as_object_triple(
                pred, rdf_util.PREDICATE_EQUIVALENT_PROPERTY, eq_pred))
    return lines_ontology_dbpedia_mapping
Пример #7
0
def _serialize_restriction(class_iri: str, prop_iri: str, val: str,
                           restriction_is_defined: bool) -> list:
    """Serialize the restrictions (i.e. relation axioms)."""
    prop_id = prop_iri[len(clg_util.NAMESPACE_CLG_ONTOLOGY):]
    val_id = val[len(clg_util.NAMESPACE_CLG_RESOURCE
                     ):] if clg_util.is_clg_resource(val) else val
    restriction_iri = f'{clg_util.NAMESPACE_CLG_ONTOLOGY}RestrictionHasValue_{prop_id}_{val_id}'

    if restriction_is_defined:
        result = []
    else:
        result = [
            serialize_util.as_object_triple(
                restriction_iri, rdf_util.PREDICATE_TYPE,
                'http://www.w3.org/2002/07/owl#Restriction'),
            serialize_util.as_literal_triple(
                restriction_iri, rdf_util.PREDICATE_LABEL,
                f'Restriction onProperty={prop_id} hasValue={val_id}'),
            serialize_util.as_object_triple(
                restriction_iri, 'http://www.w3.org/2002/07/owl#onProperty',
                prop_iri),
        ]
        if clg_util.is_clg_resource(val):
            result.append(
                serialize_util.as_object_triple(
                    restriction_iri, 'http://www.w3.org/2002/07/owl#hasValue',
                    val))
        else:
            result.append(
                serialize_util.as_literal_triple(
                    restriction_iri, 'http://www.w3.org/2002/07/owl#hasValue',
                    val))

    result.append(
        serialize_util.as_object_triple(class_iri,
                                        rdf_util.PREDICATE_SUBCLASS_OF,
                                        restriction_iri))
    return result
Пример #8
0
def _get_lines_instances_provenance(graph) -> list:
    """Serialize provenance information for resources."""
    lines_instances_provenance = []
    for res in graph.get_all_resources():
        provenance_data = {
            dbp_util.dbp_resource2wikipedia_resource(p)
            for p in graph.get_resource_provenance(res)
        }
        lines_instances_provenance.extend([
            serialize_util.as_object_triple(
                res, rdf_util.PREDICATE_WAS_DERIVED_FROM, p)
            for p in provenance_data
        ])
    return lines_instances_provenance
Пример #9
0
def _get_lines_ontology_provenance(graph) -> list:
    """Serialize provenance information of the ontology."""
    lines_ontology_provenance = []
    for node in graph.traverse_nodes_topdown():
        if node == rdf_util.CLASS_OWL_THING:
            continue
        sources = {
            dbp_util.dbp_resource2wikipedia_resource(p)
            for p in graph.get_parts(node)
        }
        lines_ontology_provenance.extend([
            serialize_util.as_object_triple(
                node, rdf_util.PREDICATE_WAS_DERIVED_FROM, s) for s in sources
        ])
    return lines_ontology_provenance
Пример #10
0
def _get_lines_instances_dbpedia_mapping(graph) -> list:
    """Serialize DBpedia mapping for resources."""
    lines_instances_dbpedia_mapping = []
    axiom_resources = {
        ax[1]
        for n in graph.nodes for ax in graph.get_axioms(n, transitive=False)
        if clg_util.is_clg_resource(ax[1])
    }
    for res in graph.get_all_resources() | axiom_resources:
        equivalent_res = clg_util.clg_resource2dbp_resource(res)
        if equivalent_res in dbp_store.get_resources():
            lines_instances_dbpedia_mapping.append(
                serialize_util.as_object_triple(res,
                                                rdf_util.PREDICATE_SAME_AS,
                                                equivalent_res))
    return lines_instances_dbpedia_mapping
Пример #11
0
def _get_lines_dbpedia_instances(graph) -> list:
    """Serialize new DBpedia resources in DBpedia namespace."""
    lines_dbpedia_instances = []
    new_instances = {
        clg_util.clg_resource2dbp_resource(res)
        for res in graph.get_all_resources()
    }.difference(dbp_store.get_resources())
    for inst in new_instances:
        lines_dbpedia_instances.append(
            serialize_util.as_object_triple(
                inst, rdf_util.PREDICATE_TYPE,
                rdf_util.CLASS_OWL_NAMED_INDIVIDUAL))
        label = graph.get_label(clg_util.dbp_resource2clg_resource(inst))
        if label:
            lines_dbpedia_instances.append(
                serialize_util.as_literal_triple(inst,
                                                 rdf_util.PREDICATE_LABEL,
                                                 label))
    return lines_dbpedia_instances
Пример #12
0
def _get_lines_dbpedia_instance_types(graph) -> list:
    """Serialize new types for DBpedia resources in DBpedia namespace."""
    new_dbpedia_types = defaultdict(set)
    for node in graph.nodes:
        node_types = graph.get_transitive_dbpedia_types(node,
                                                        force_recompute=True)
        transitive_node_types = {
            tt
            for t in node_types
            for tt in dbp_store.get_transitive_supertype_closure(t)
        }.difference({rdf_util.CLASS_OWL_THING})
        for res in graph.get_resources(node):
            dbp_res = clg_util.clg_resource2dbp_resource(res)
            if dbp_res in dbp_store.get_resources():
                new_dbpedia_types[dbp_res].update(
                    transitive_node_types.difference(
                        dbp_store.get_transitive_types(dbp_res)))
            else:
                new_dbpedia_types[dbp_res].update(transitive_node_types)
    return [
        serialize_util.as_object_triple(res, rdf_util.PREDICATE_TYPE, t)
        for res, types in new_dbpedia_types.items() for t in types
    ]
Пример #13
0
def _get_lines_dbpedia_instance_relations(graph) -> list:
    """Serialize new facts for DBpedia resources in DBpedia namespace."""
    new_instance_relations = set()
    for node in graph.nodes:
        for prop, val in graph.get_axioms(node):
            dbp_prop = clg_util.clg_type2dbp_type(prop)
            dbp_val = clg_util.clg_resource2dbp_resource(
                val) if clg_util.is_clg_resource(val) else val
            for res in graph.get_resources(node):
                dbp_res = clg_util.clg_resource2dbp_resource(res)
                if dbp_res not in dbp_store.get_resources(
                ) or dbp_prop not in dbp_store.get_properties(
                        dbp_res) or dbp_val not in dbp_store.get_properties(
                            dbp_res)[dbp_prop]:
                    new_instance_relations.add((dbp_res, dbp_prop, dbp_val))
    lines_dbpedia_instance_relations = []
    for s, p, o in new_instance_relations:
        if dbp_util.is_dbp_resource(o):
            lines_dbpedia_instance_relations.append(
                serialize_util.as_object_triple(s, p, o))
        else:
            lines_dbpedia_instance_relations.append(
                serialize_util.as_literal_triple(s, p, o))
    return lines_dbpedia_instance_relations
Пример #14
0
def _get_lines_metadata(graph) -> list:
    """Serialize metadata."""
    void_resource = 'http://caligraph.org/.well-known/void'
    description = 'CaLiGraph is a large-scale general-purpose knowledge graph that extends DBpedia with a more fine-grained and restrictive ontology as well as additional resources extracted from Wikipedia list pages.'
    entity_count = len(graph.get_all_resources())
    class_count = len(graph.nodes)
    predicate_count = len(graph.get_all_predicates())
    return [
        serialize_util.as_object_triple(void_resource, rdf_util.PREDICATE_TYPE,
                                        'http://rdfs.org/ns/void#Dataset'),
        serialize_util.as_literal_triple(
            void_resource, 'http://purl.org/dc/elements/1.1/title',
            'CaLiGraph'),
        serialize_util.as_literal_triple(void_resource,
                                         rdf_util.PREDICATE_LABEL,
                                         'CaLiGraph'),
        serialize_util.as_literal_triple(
            void_resource, 'http://purl.org/dc/elements/1.1/description',
            description),
        serialize_util.as_object_triple(
            void_resource, 'http://purl.org/dc/terms/license',
            'http://www.gnu.org/copyleft/fdl.html'),
        serialize_util.as_object_triple(
            void_resource, 'http://purl.org/dc/terms/license',
            'http://creativecommons.org/licenses/by-sa/4.0/'),
        serialize_util.as_literal_triple(void_resource,
                                         'http://purl.org/dc/terms/creator',
                                         'Nicolas Heist'),
        serialize_util.as_literal_triple(void_resource,
                                         'http://purl.org/dc/terms/creator',
                                         'Heiko Paulheim'),
        serialize_util.as_literal_triple(void_resource,
                                         'http://purl.org/dc/terms/created',
                                         _get_creation_date()),
        serialize_util.as_literal_triple(void_resource,
                                         'http://purl.org/dc/terms/publisher',
                                         'Nicolas Heist'),
        serialize_util.as_literal_triple(void_resource,
                                         'http://purl.org/dc/terms/publisher',
                                         'Heiko Paulheim'),
        serialize_util.as_literal_triple(void_resource,
                                         'http://rdfs.org/ns/void#uriSpace',
                                         clg_util.NAMESPACE_CLG_RESOURCE),
        serialize_util.as_literal_triple(void_resource,
                                         'http://rdfs.org/ns/void#entities',
                                         entity_count),
        serialize_util.as_literal_triple(void_resource,
                                         'http://rdfs.org/ns/void#classes',
                                         class_count),
        serialize_util.as_literal_triple(void_resource,
                                         'http://rdfs.org/ns/void#properties',
                                         predicate_count),
        serialize_util.as_object_triple(void_resource,
                                        'http://purl.org/dc/terms/source',
                                        'http://dbpedia.org/resource/DBpedia'),
        serialize_util.as_object_triple(
            void_resource, 'http://purl.org/dc/terms/source',
            'http://dbpedia.org/resource/Wikipedia'),
        serialize_util.as_object_triple(void_resource,
                                        'http://xmlns.com/foaf/0.1/homepage',
                                        'http://caligraph.org'),
        serialize_util.as_object_triple(
            void_resource, 'http://rdfs.org/ns/void#sparqlEndpoint',
            'http://caligraph.org/sparql'),
    ]