示例#1
0
    def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]:
        """
        This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph.
        This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form.

        Returns the CURIE identifiers used for the `subject` and `object` in the
        networkx.MultiDiGraph, and the processed `edge_label`.

        Parameters
        ----------
        subject_iri: rdflib.URIRef
            Subject IRI for the subject in a triple
        object_iri: rdflib.URIRef
            Object IRI for the object in a triple
        predicate_iri: rdflib.URIRef
            Predicate IRI for the predicate in a triple

        Returns
        -------
        Tuple[str, str, str]
            A 3-nary tuple (of the form subject, object, predicate) that represents the edge

        """
        s = self.add_node(subject_iri)
        o = self.add_node(object_iri)
        relation = self.prefix_manager.contract(predicate_iri)
        edge_label = process_iri(predicate_iri)
        if ' ' in edge_label:
            logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label))
        if edge_label.startswith(self.BIOLINK):
            logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK))
            edge_label = edge_label.replace(self.BIOLINK, '')

        if PrefixManager.is_curie(edge_label):
            name = curie_lookup(edge_label)
            if name:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name))
                edge_label = name
            else:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL))
                edge_label = self.DEFAULT_EDGE_LABEL

        kwargs = {
            'subject': s,
            'predicate': str(predicate_iri),
            'object': o,
            'relation': relation,
            'edge_label': f"biolink:{edge_label}"
        }
        if 'provided_by' in self.graph_metadata:
            kwargs['provided_by'] = self.graph_metadata['provided_by']

        key = generate_edge_key(s, edge_label, o)
        if not self.graph.has_edge(s, o, key=key):
            self.graph.add_edge(s, o, key=key, **kwargs)
        # TODO: support append
        return s, o, edge_label
示例#2
0
    def add_edge_attribute(self, subject_iri: Union[URIRef, str],
                           object_iri: URIRef, predicate_iri: URIRef, key: str,
                           value: str) -> None:
        """
        Adds an attribute to an edge, while taking into account whether the attribute
        should be multi-valued.
        Multi-valued properties will not contain duplicates.

        The key may be a rdflib.URIRef or a URI string that maps onto a property name
        as defined in `rdf_utils.property_mapping`.

        If the nodes in the edge does not exist then they will be created
        using subject_iri and object_iri.

        If the edge itself does not exist then it will be created using
        subject_iri, object_iri and predicate_iri.

        Parameters
        ----------
        subject_iri: [rdflib.URIRef, str]
            The IRI of the subject node of an edge in rdflib.Graph
        object_iri: rdflib.URIRef
            The IRI of the object node of an edge in rdflib.Graph
        predicate_iri: rdflib.URIRef
            The IRI of the predicate representing an edge in rdflib.Graph
        key: str
            The name of the attribute. Can be a rdflib.URIRef or URI string
        value: str
            The value of the attribute

        """
        if key.lower() in is_property_multivalued:
            key = key.lower()
        else:
            if not isinstance(key, URIRef):
                key = URIRef(key)
            key = property_mapping.get(key)

        if key is not None:
            subject_curie = make_curie(subject_iri)
            object_curie = make_curie(object_iri)
            edge_label = process_iri(predicate_iri)
            if is_curie(edge_label):
                edge_label = curie_lookup(edge_label)
            edge_key = generate_edge_key(subject_curie, edge_label,
                                         object_curie)
            attr_dict = self.graph.get_edge_data(subject_curie,
                                                 object_curie,
                                                 key=edge_key)
            self._add_attribute(attr_dict, key, value)
示例#3
0
    def add_node_attribute(
        self, iri: Union[URIRef, str], key: str, value: Union[str, List]
    ) -> None:
        """
        Add an attribute to a node in cache, while taking into account whether the attribute
        should be multi-valued.

        The ``key`` may be a rdflib.URIRef or an URI string that maps onto a
        property name as defined in ``rdf_utils.property_mapping``.

        Parameters
        ----------
        iri: Union[rdflib.URIRef, str]
            The IRI of a node in the rdflib.Graph
        key: str
            The name of the attribute. Can be a rdflib.URIRef or URI string
        value: Union[str, List]
            The value of the attribute

        Returns
        -------
        Dict
            The node data

        """
        if self.prefix_manager.is_iri(key):
            key_curie = self.prefix_manager.contract(key)
        else:
            key_curie = key
        c = curie_lookup(key_curie)
        if c:
            key_curie = c

        if self.prefix_manager.is_curie(key_curie):
            # property names will always be just the reference
            mapped_key = self.prefix_manager.get_reference(key_curie)
        else:
            mapped_key = key_curie

        if isinstance(value, rdflib.term.Identifier):
            if isinstance(value, rdflib.term.URIRef):
                value_curie = self.prefix_manager.contract(value)
                # if self.prefix_manager.get_prefix(value_curie) not in {'biolink'} \
                #         and mapped_key not in {'type', 'category', 'predicate', 'relation', 'predicate'}:
                #     d = self.add_node(value)
                #     value = d['id']
                # else:
                #     value = value_curie
                value = value_curie
            else:
                value = value.toPython()
        if mapped_key in is_property_multivalued and is_property_multivalued[mapped_key]:
            value = [value]
        if mapped_key in self.node_record:
            if isinstance(self.node_record[mapped_key], str):
                _ = self.node_record[mapped_key]
                self.node_record[mapped_key] = [_]
            self.node_record[mapped_key].append(value)
        else:
            self.node_record[mapped_key] = [value]
        curie = self.prefix_manager.contract(iri)
        if curie in self.node_cache:
            if mapped_key in self.node_cache[curie]:
                node = self.node_cache[curie]
                updated_node = prepare_data_dict(node, {mapped_key: value})
                self.node_cache[curie] = updated_node
            else:
                self.node_cache[curie][mapped_key] = value
        else:
            self.node_cache[curie] = {'id': curie, mapped_key: value}
示例#4
0
def test_curie_lookup(query):
    """
    Test look up of a CURIE.
    """
    s = curie_lookup(query[0])
    assert s == query[1]
示例#5
0
def test_curie_lookup(query):
    s = curie_lookup(query[0])
    assert s == query[1]