def validate_edge_property_values(self, subject: str, object: str, data: dict) -> list: """ Validate an edge property's value. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties Returns ------- list A list of errors for a given edge """ errors = [] error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE if PrefixManager.is_curie(subject): prefix = PrefixManager.get_prefix(subject) if prefix and prefix not in self.get_all_prefixes(): message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) if PrefixManager.is_curie(object): prefix = PrefixManager.get_prefix(object) if prefix not in self.prefixes: message = f"Edge property 'object' has a value '{object}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) if 'relation' in data: if PrefixManager.is_curie(data['relation']): prefix = PrefixManager.get_prefix(data['relation']) if prefix not in self.prefixes: message = f"Edge property 'relation' has a value '{data['relation']}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) else: message = f"Edge property 'relation' has a value '{data['relation']}' which is not a proper CURIE" errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR)) return errors
def curie_lookup(curie: str) -> Optional[str]: """ Given a CURIE, find its label. This method first does a lookup in predefined maps. If none found, it makes use of CurieLookupService to look for the CURIE in a set of preloaded ontologies. Parameters ---------- curie: str A CURIE Returns ------- Optional[str] The label corresponding to the given CURIE """ cls = get_curie_lookup_service() name: Optional[str] = None prefix = PrefixManager.get_prefix(curie) if prefix in ['OIO', 'OWL', 'owl', 'OBO', 'rdfs']: name = stringcase.snakecase(curie.split(':', 1)[1]) elif curie in cls.curie_map: name = cls.curie_map[curie] elif curie in cls.ontology_graph: name = cls.ontology_graph.nodes()[curie]['name'] return name
def validate_node_property_values( self, node: str, data: dict ): """ Validate a node property's value. Parameters ---------- node: str Node identifier data: dict Node properties """ error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE if not PrefixManager.is_curie(node): message = f"Node property 'id' is expected to be of type 'CURIE'" self.log_error(node, error_type, message, MessageLevel.ERROR) else: prefix = PrefixManager.get_prefix(node) if prefix and prefix not in self.get_all_prefixes(): message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}'" + \ f" is not represented in Biolink Model JSON-LD context" self.log_error(node, error_type, message, MessageLevel.ERROR)
def validate_node_property_values(node: str, data: dict) -> list: """ Validate a node property's value. Parameters ---------- node: str Node identifier data: dict Node properties Returns ------- list A list of errors for a given node """ errors = [] error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE if not PrefixManager.is_curie(node): message = f"Node property 'id' expected to be of type 'CURIE'" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) else: prefix = PrefixManager.get_prefix(node) if prefix and prefix not in Validator.get_all_prefixes(): message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}' is not represented in Biolink Model JSON-LD context" errors.append( ValidationError(node, error_type, message, MessageLevel.ERROR)) return errors
def get_category(self, curie: str, node: dict) -> Optional[str]: """ Get category for a given CURIE. Parameters ---------- curie: str Curie for node node: dict Node data Returns ------- Optional[str] Category for the given node CURIE. """ category = None # use meta.basicPropertyValues if "meta" in node and "basicPropertyValues" in node["meta"]: for p in node["meta"]["basicPropertyValues"]: if p["pred"] == self.HAS_OBO_NAMESPACE: category = p["val"] element = self.toolkit.get_element(category) if element: category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}" else: element = self.toolkit.get_element_by_mapping(category) if element: category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element))}" else: category = "biolink:OntologyClass" if not category or category == "biolink:OntologyClass": prefix = PrefixManager.get_prefix(curie) # TODO: the mapping should be via biolink-model lookups if prefix == "HP": category = "biolink:PhenotypicFeature" elif prefix == "CHEBI": category = "biolink:ChemicalSubstance" elif prefix == "MONDO": category = "biolink:Disease" elif prefix == "UBERON": category = "biolink:AnatomicalEntity" elif prefix == "SO": category = "biolink:SequenceFeature" elif prefix == "CL": category = "biolink:Cell" elif prefix == "PR": category = "biolink:Protein" elif prefix == "NCBITaxon": category = "biolink:OrganismalEntity" else: self.owner.log_error( entity=f"{str(category)} for node {curie}", error_type=ErrorType.MISSING_CATEGORY, message= f"Missing category; Defaulting to 'biolink:OntologyClass'", message_level=MessageLevel.WARNING) return category
def get_category(self, curie: str, node: dict) -> Optional[str]: """ Get category for a given CURIE. Parameters ---------- curie: str Curie for node node: dict Node data Returns ------- Optional[str] Category for the given node CURIE. """ category = None # use meta.basicPropertyValues if 'meta' in node and 'basicPropertyValues' in node['meta']: for p in node['meta']['basicPropertyValues']: if p['pred'] == self.HAS_OBO_NAMESPACE: category = p['val'] element = self.toolkit.get_element(category) if element: category = ( f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}" ) else: element = self.toolkit.get_element_by_mapping(category) if element: category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}" else: category = 'biolink:OntologyClass' if not category or category == 'biolink:OntologyClass': prefix = PrefixManager.get_prefix(curie) # TODO: the mapping should be via biolink-model lookups if prefix == 'HP': category = "biolink:PhenotypicFeature" elif prefix == 'CHEBI': category = "biolink:ChemicalSubstance" elif prefix == 'MONDO': category = "biolink:Disease" elif prefix == 'UBERON': category = "biolink:AnatomicalEntity" elif prefix == 'SO': category = "biolink:SequenceFeature" elif prefix == 'CL': category = "biolink:Cell" elif prefix == 'PR': category = "biolink:Protein" elif prefix == 'NCBITaxon': category = "biolink:OrganismalEntity" else: log.debug( f"{curie} Could not find a category mapping for '{category}'; Defaulting to 'biolink:OntologyClass'" ) return category
def _compile_prefix_stats(self, n: str): prefix = PrefixManager.get_prefix(n) if not prefix: error_type = ErrorType.MISSING_NODE_CURIE_PREFIX self.mkg.log_error(entity=n, error_type=error_type, message="Node 'id' has no CURIE prefix", message_level=MessageLevel.WARNING) else: if prefix not in self.category_stats["id_prefixes"]: self.category_stats["id_prefixes"].add(prefix)
def validate_edge_property_values( self, subject: str, object: str, data: dict ): """ Validate an edge property's value. Parameters ---------- subject: str Subject identifier object: str Object identifier data: dict Edge properties """ error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE prefixes = self.get_all_prefixes() if PrefixManager.is_curie(subject): prefix = PrefixManager.get_prefix(subject) if prefix and prefix not in prefixes: message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix " + \ f"'{prefix}' that is not represented in Biolink Model JSON-LD context" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) if PrefixManager.is_curie(object): prefix = PrefixManager.get_prefix(object) if prefix not in prefixes: message = f"Edge property 'object' has a value '{object}' with a CURIE " + \ f"prefix '{prefix}' that is not represented in Biolink Model JSON-LD context" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR) else: message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE" self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
def analyse_node_category(self, n, data): prefix = PrefixManager.get_prefix(n) self.category_stats['count'] += 1 if prefix not in self.category_stats['id_prefixes']: self.category_stats['id_prefixes'].add(prefix) if 'provided_by' in data: for s in data['provided_by']: if s in self.category_stats['count_by_source']: self.category_stats['count_by_source'][s] += 1 else: self.category_stats['count_by_source'][s] = 1 else: self.category_stats['count_by_source']['unknown'] += 1
def _capture_prefix(self, n: str): prefix = PrefixManager.get_prefix(n) if not prefix: error_type = ErrorType.MISSING_NODE_CURIE_PREFIX self.summary.log_error(entity=n, error_type=error_type, message="Node 'id' has no CURIE prefix", message_level=MessageLevel.WARNING) else: if prefix in self.category_stats["count_by_id_prefix"]: self.category_stats["count_by_id_prefix"][prefix] += 1 else: self.category_stats["count_by_id_prefix"][prefix] = 1
def test_get_prefix(query): """ Test to check behavior of test_get_prefix method in PrefixManager. """ assert PrefixManager.get_prefix(query[0]) == query[1]