def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None: """ Adds an attribute to the attribute dictionary, respecting whether or not that attribute should be multi-valued. Multi-valued attributes will not contain duplicates. Some attributes are singular form of others. In such cases overflowing values will be placed into the correlating multi-valued attribute. For example, `name` attribute will hold only one value while any additional value will be stored as `synonym` attribute. Parameters ---------- attr_dict: dict Dictionary representing the attribute set of a node or an edge in a networkx graph key: str The name of the attribute value: str The value of the attribute """ if PrefixManager.is_iri(value): value = process_iri(value) if key in is_property_multivalued and is_property_multivalued[key]: if key not in attr_dict: attr_dict[key] = [value] elif value not in attr_dict[key]: attr_dict[key].append(value) else: if key == 'name': self._add_attribute(attr_dict, 'synonym', value) else: attr_dict[key] = value
def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form. Returns the CURIE identifiers used for the `subject` and `object` in the networkx.MultiDiGraph, and the processed `edge_label`. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = self.prefix_manager.contract(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label)) if edge_label.startswith(self.BIOLINK): logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') if PrefixManager.is_curie(edge_label): name = curie_lookup(edge_label) if name: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name)) edge_label = name else: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = { 'subject': s, 'predicate': str(predicate_iri), 'object': o, 'relation': relation, 'edge_label': f"biolink:{edge_label}" } if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_label, o) if not self.graph.has_edge(s, o, key=key): self.graph.add_edge(s, o, key=key, **kwargs) # TODO: support append return s, o, edge_label
def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None: """ Adds an attribute to an edge, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The key may be a rdflib.URIRef or a URI string that maps onto a property name as defined in `rdf_utils.property_mapping`. If the nodes in the edge does not exist then they will be created using subject_iri and object_iri. If the edge itself does not exist then it will be created using subject_iri, object_iri and predicate_iri. Parameters ---------- subject_iri: [rdflib.URIRef, str] The IRI of the subject node of an edge in rdflib.Graph object_iri: rdflib.URIRef The IRI of the object node of an edge in rdflib.Graph predicate_iri: rdflib.URIRef The IRI of the predicate representing an edge in rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if key.lower() in is_property_multivalued: key = key.lower() else: if not isinstance(key, URIRef): key = URIRef(key) key = property_mapping.get(key) if key is not None: subject_curie = make_curie(subject_iri) object_curie = make_curie(object_iri) edge_label = process_iri(predicate_iri) if is_curie(edge_label): edge_label = curie_lookup(edge_label) edge_key = generate_edge_key(subject_curie, edge_label, object_curie) attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key) self._add_attribute(attr_dict, key, value)
def _add_attribute(self, attr_dict: Dict, key: str, value: str) -> None: """ Adds an attribute to the attribute dictionary, respecting whether or not that attribute should be multi-valued. Multi-valued attributes will not contain duplicates. Some attributes are singular form of others. In such cases overflowing values will be placed into the correlating multi-valued attribute. For example, 'name' attribute will hold only one value while any additional value will be stored as 'synonym' attribute. Parameters ---------- attr_dict: dict Dictionary representing the attribute set of a node or an edge in a networkx graph key: str The name of the attribute value: str The value of the attribute """ if key is None or key not in is_property_multivalued: logging.warning( "Discarding key {} as it is not a valid property.".format(key)) return value = make_curie(process_iri(value)) if is_property_multivalued[key]: if key not in attr_dict: attr_dict[key] = [value] elif value not in attr_dict[key]: attr_dict[key].append(value) else: if key not in attr_dict: attr_dict[key] = value elif key == 'name': self._add_attribute(attr_dict, 'synonym', value)
def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the subject and object identifiers are CURIEs, and that edge_label is in the correct form. Returns the CURIE identifiers used for the subject and object in the networkx.MultiDiGraph, and the processed edge_label. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = make_curie(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug( "predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'" .format(predicate_iri, edge_label)) # TODO: shouldn't this move to the utilities function process_uri() if edge_label.startswith(self.BIOLINK): logging.debug( "predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix" .format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') # TODO: is there no way to get label of a CURIE? # TODO: this should also move to the utilities function # Any service? or preload required ontologies by prefix? if ':' in edge_label: logging.debug( "edge label '{}' is a CURIE; defaulting back to 'related_to'". format(edge_label)) logging.debug( "predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}" .format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = {'relation': relation, 'edge_label': edge_label} if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] if self.graph.has_edge(s, o, key=edge_label): logging.debug("{} -- {} --> {} edge already exists".format( s, edge_label, o)) else: self.graph.add_edge(s, o, key=edge_label, **kwargs) return s, o, edge_label
def test_process_iri(query): s = process_iri(query[0]) assert s == query[1]