示例#1
0
    def load_edge(self, edge_record: List) -> Tuple:
        """
        Load an edge into an instance of BaseGraph

        Parameters
        ----------
        edge_record: List
            A 4-tuple edge record

        Returns
        -------
        Tuple
            A tuple with subject ID, object ID, edge key, and edge data

        """

        subject_node = edge_record[0]
        edge = edge_record[1]
        object_node = edge_record[2]

        if 'provided_by' in self.graph_metadata and 'provided_by' not in edge.keys():
            edge['provided_by'] = self.graph_metadata['provided_by']
        if 'id' not in edge.keys():
            edge['id'] = generate_uuid()
        key = generate_edge_key(subject_node['id'], edge['predicate'], object_node['id'])
        edge = validate_edge(edge)
        edge = sanitize_import(edge.copy())
        self.edge_properties.update(edge.keys())
        return subject_node['id'], object_node['id'], key, edge
示例#2
0
    def read_edge(self, edge: Dict) -> Optional[Tuple]:
        """
        Load an edge into an instance of BaseGraph.

        Parameters
        ----------
        edge: Dict
            An edge

        Returns
        -------
        Optional[Tuple]
            A tuple that contains subject id, object id, edge key, and edge data

        """
        edge = self.validate_edge(edge)
        if not edge:
            return None

        edge_data = sanitize_import(edge.copy(), self.list_delimiter)

        if "id" not in edge_data:
            edge_data["id"] = generate_uuid()
        s = edge_data["subject"]
        o = edge_data["object"]

        self.set_edge_provenance(edge_data)

        key = generate_edge_key(s, edge_data["predicate"], o)
        self.edge_properties.update(list(edge_data.keys()))
        if self.check_edge_filter(edge_data):
            self.node_properties.update(edge_data.keys())
            return s, o, key, edge_data
示例#3
0
    def load_edge(self, edge_record: List) -> Tuple:
        """
        Load an edge into an instance of BaseGraph

        Parameters
        ----------
        edge_record: List
            A 4-tuple edge record

        Returns
        -------
        Tuple
            A tuple with subject ID, object ID, edge key, and edge data

        """

        subject_node = edge_record[0]
        edge_data = edge_record[1]
        object_node = edge_record[2]

        self.set_edge_provenance(edge_data)

        if "id" not in edge_data.keys():
            edge_data["id"] = generate_uuid()
        key = generate_edge_key(subject_node["id"], edge_data["predicate"],
                                object_node["id"])

        edge_data = self.validate_edge(edge_data)
        if not edge_data:
            return ()

        edge_data = sanitize_import(edge_data.copy())
        self.edge_properties.update(edge_data.keys())
        return subject_node["id"], object_node["id"], key, edge_data
示例#4
0
    def read_edge(self, edge: Dict) -> Optional[Tuple]:
        """
        Load an edge into an instance of BaseGraph.

        Parameters
        ----------
        edge: Dict
            An edge

        Returns
        -------
        Optional[Tuple]
            A tuple that contains subject id, object id, edge key, and edge data

        """
        edge = validate_edge(edge)
        edge_data = sanitize_import(edge.copy())
        if 'id' not in edge_data:
            edge_data['id'] = generate_uuid()
        s = edge_data['subject']
        o = edge_data['object']
        if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys():
            edge_data['provided_by'] = self.graph_metadata['provided_by']
        key = generate_edge_key(s, edge_data['predicate'], o)
        self.edge_properties.update(list(edge_data.keys()))
        if self.check_edge_filter(edge_data):
            self.node_properties.update(edge_data.keys())
            return s, o, key, edge_data
示例#5
0
    def reify(self, u: str, v: str, data: Dict) -> Dict:
        """
        Create a node representation of an edge.

        Parameters
        ----------
        u: str
            Subject
        v: str
            Object
        k: str
            Edge key
        data: Dict
            Edge data

        Returns
        -------
        Dict
            The reified node

        """
        s = self.uriref(u)
        p = self.uriref(data["predicate"])
        o = self.uriref(v)

        if "id" in data:
            node_id = self.uriref(data["id"])
        else:
            # generate a UUID for the reified node
            node_id = self.uriref(generate_uuid())
        reified_node = data.copy()
        if "category" in reified_node:
            del reified_node["category"]
        reified_node["id"] = node_id
        reified_node["type"] = "biolink:Association"
        reified_node["subject"] = s
        reified_node["predicate"] = p
        reified_node["object"] = o
        return reified_node
示例#6
0
    def reify(self, u: str, v: str, data: Dict) -> Dict:
        """
        Create a node representation of an edge.

        Parameters
        ----------
        u: str
            Subject
        v: str
            Object
        k: str
            Edge key
        data: Dict
            Edge data

        Returns
        -------
        Dict
            The reified node

        """
        s = self.uriref(u)
        p = self.uriref(data['predicate'])
        o = self.uriref(v)

        if 'id' in data:
            node_id = self.uriref(data['id'])
        else:
            # generate a UUID for the reified node
            node_id = self.uriref(generate_uuid())
        reified_node = data.copy()
        if 'category' in reified_node:
            del reified_node['category']
        reified_node['id'] = node_id
        reified_node['type'] = 'biolink:Association'
        reified_node['subject'] = s
        reified_node['predicate'] = p
        reified_node['object'] = o
        return reified_node
示例#7
0
    def load_graph(self, rdfgraph: rdflib.Graph, **kwargs: Any) -> None:
        """
        Walk through the rdflib.Graph and load all triples into kgx.graph.base_graph.BaseGraph

        Parameters
        ----------
        rdfgraph: rdflib.Graph
            Graph containing nodes and edges
        kwargs: Any
            Any additional arguments

        """
        seen = set()
        seen.add(RDFS.subClassOf)
        for s, p, o in rdfgraph.triples((None, RDFS.subClassOf, None)):
            # ignoring blank nodes
            if isinstance(s, rdflib.term.BNode):
                continue
            pred = None
            parent = None
            os_interpretation = None
            if isinstance(o, rdflib.term.BNode):
                # C SubClassOf R some D
                for x in rdfgraph.objects(o, OWL.onProperty):
                    pred = x
                # owl:someValuesFrom
                for x in rdfgraph.objects(o, OWL.someValuesFrom):
                    os_interpretation = self.OWLSTAR.term(
                        "AllSomeInterpretation")
                    parent = x
                # owl:allValuesFrom
                for x in rdfgraph.objects(o, OWL.allValuesFrom):
                    os_interpretation = self.OWLSTAR.term(
                        "AllOnlyInterpretation")
                    parent = x
                if pred is None or parent is None:
                    log.warning(
                        f"{s} {p} {o} has OWL.onProperty {pred} and OWL.someValuesFrom {parent}"
                    )
                    log.warning(
                        "Do not know how to handle BNode: {}".format(o))
                    continue
            else:
                # C rdfs:subClassOf D (where C and D are named classes)
                pred = p
                parent = o
            if os_interpretation:
                # reify edges that have logical interpretation
                eid = generate_uuid()
                self.reified_nodes.add(eid)
                yield from self.triple(URIRef(eid),
                                       self.BIOLINK.term("category"),
                                       self.BIOLINK.Association)
                yield from self.triple(URIRef(eid),
                                       self.BIOLINK.term("subject"), s)
                yield from self.triple(URIRef(eid),
                                       self.BIOLINK.term("predicate"), pred)
                yield from self.triple(URIRef(eid),
                                       self.BIOLINK.term("object"), parent)
                yield from self.triple(
                    URIRef(eid),
                    self.BIOLINK.term("logical_interpretation"),
                    os_interpretation,
                )
            else:
                yield from self.triple(s, pred, parent)

        seen.add(OWL.equivalentClass)
        for s, p, o in rdfgraph.triples((None, OWL.equivalentClass, None)):
            # A owl:equivalentClass B (where A and B are named classes)
            if not isinstance(o, rdflib.term.BNode):
                yield from self.triple(s, p, o)

        for relation in rdfgraph.subjects(RDF.type, OWL.ObjectProperty):
            seen.add(relation)
            for s, p, o in rdfgraph.triples((relation, None, None)):
                if not isinstance(o, rdflib.term.BNode):
                    if p not in self.excluded_predicates:
                        yield from self.triple(s, p, o)

        for s, p, o in rdfgraph.triples((None, None, None)):
            if isinstance(s, rdflib.term.BNode) or isinstance(
                    o, rdflib.term.BNode):
                continue
            if p in seen:
                continue
            if p in self.excluded_predicates:
                continue
            yield from self.triple(s, p, o)

        for n in self.reified_nodes:
            data = self.node_cache.pop(n)
            self.dereify(n, data)

        for k, data in self.node_cache.items():

            node_data = self.validate_node(data)
            if not node_data:
                continue

            node_data = sanitize_import(node_data)
            self.set_node_provenance(node_data)
            if self.check_node_filter(node_data):
                self.node_properties.update(node_data.keys())
                yield k, node_data
        self.node_cache.clear()

        for k, data in self.edge_cache.items():

            edge_data = self.validate_edge(data)
            if not edge_data:
                continue

            edge_data = sanitize_import(edge_data)
            self.set_edge_provenance(edge_data)
            if self.check_edge_filter(edge_data):
                self.edge_properties.update(edge_data.keys())
                yield k[0], k[1], k[2], edge_data
        self.edge_cache.clear()
示例#8
0
    def load_edge(self, edge: Dict) -> Generator:
        """
        Load an edge into an instance of BaseGraph

        Parameters
        ----------
        edge : Dict
            An edge

        Returns
        -------
        Generator
            A generator for node and edge records

        """
        (element_uri, canonical_uri, predicate, property_name) = process_predicate(
            self.prefix_manager, edge["predicate_id"], self.predicate_mapping
        )
        if element_uri:
            edge_predicate = element_uri
        elif predicate:
            edge_predicate = predicate
        else:
            edge_predicate = property_name
        if canonical_uri:
            edge_predicate = element_uri

        data = {
            "subject": edge["subject_id"],
            "predicate": edge_predicate,
            "object": edge["object_id"],
        }
        del edge["predicate_id"]

        data = self.validate_edge(data)
        if not data:
            return  # ?

        subject_node = {}
        object_node = {}
        for k, v in edge.items():
            if k in SSSOM_NODE_PROPERTY_MAPPING:
                if k.startswith("subject"):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == "category" and not PrefixManager.is_curie(v):
                        v = f"biolink:OntologyClass"
                    subject_node[mapped_k] = v
                elif k.startswith("object"):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == "category" and not PrefixManager.is_curie(v):
                        v = f"biolink:OntologyClass"
                    object_node[mapped_k] = v
                else:
                    log.info(f"Ignoring {k} {v}")
            else:
                data[k] = v

        subject_node = self.load_node(subject_node)
        object_node = self.load_node(object_node)
        if not (subject_node and object_node):
            return  # ?
        objs = [subject_node, object_node]

        for k, v in self.graph_metadata.items():
            if k not in {"curie_map"}:
                data[k] = v

        edge_data = sanitize_import(data.copy())
        if "subject" in edge_data and "object" in edge_data:
            if "id" not in edge_data:
                edge_data["id"] = generate_uuid()
            s = edge_data["subject"]
            o = edge_data["object"]

            self.set_edge_provenance(edge_data)

            key = generate_edge_key(s, edge_data["predicate"], o)
            self.edge_properties.update(list(edge_data.keys()))
            objs.append((s, o, key, edge_data))
        else:
            self.owner.log_error(
                entity=str(edge_data),
                error_type=ErrorType.MISSING_NODE,
                message="Ignoring edge with either a missing 'subject' or 'object'",
                message_level=MessageLevel.WARNING
            )

        for o in objs:
            yield o
示例#9
0
    def load_edge(self, edge: Dict) -> Generator:
        """
        Load an edge into an instance of BaseGraph

        Parameters
        ----------
        edge : Dict
            An edge

        Returns
        -------
        Generator
            A generator for node and edge records

        """
        (element_uri, canonical_uri, predicate,
         property_name) = process_predicate(self.prefix_manager,
                                            edge['predicate_id'],
                                            self.predicate_mapping)
        if element_uri:
            edge_predicate = element_uri
        elif predicate:
            edge_predicate = predicate
        else:
            edge_predicate = property_name
        if canonical_uri:
            edge_predicate = element_uri
        data = {
            'subject': edge['subject_id'],
            'predicate': edge_predicate,
            'object': edge['object_id'],
        }
        del edge['predicate_id']
        data = validate_edge(data)
        subject_node = {}
        object_node = {}
        for k, v in edge.items():
            if k in SSSOM_NODE_PROPERTY_MAPPING:
                if k.startswith('subject'):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == 'category' and not PrefixManager.is_curie(
                            v):
                        v = f"biolink:OntologyClass"
                    subject_node[mapped_k] = v
                elif k.startswith('object'):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == 'category' and not PrefixManager.is_curie(
                            v):
                        v = f"biolink:OntologyClass"
                    object_node[mapped_k] = v
                else:
                    log.info(f"Ignoring {k} {v}")
            else:
                data[k] = v

        objs = [self.load_node(subject_node), self.load_node(object_node)]

        for k, v in self.graph_metadata.items():
            if k not in {'curie_map'}:
                data[k] = v

        edge_data = sanitize_import(data.copy())
        if 'subject' in edge_data and 'object' in edge_data:
            if 'id' not in edge_data:
                edge_data['id'] = generate_uuid()
            s = edge_data['subject']
            o = edge_data['object']
            if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys(
            ):
                edge_data['provided_by'] = self.graph_metadata['provided_by']
            key = generate_edge_key(s, edge_data['predicate'], o)
            self.edge_properties.update(list(edge_data.keys()))
            objs.append((s, o, key, edge_data))
        else:
            log.info(
                "Ignoring edge with either a missing 'subject' or 'object': {}"
                .format(edge_data))

        for o in objs:
            yield o
示例#10
0
def test_generate_uuid():
    """
    Test generation of UUID by generate_uuid method.
    """
    s = generate_uuid()
    assert s.startswith('urn:uuid:')