示例#1
0
    def validate_edge_property_values(self, subject: str, object: str, data: dict) -> list:
        """
        Validate an edge property's value.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties

        Returns
        -------
        list
            A list of errors for a given edge

        """
        errors = []
        error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE

        if PrefixManager.is_curie(subject):
            prefix = PrefixManager.get_prefix(subject)
            if prefix and prefix not in self.get_all_prefixes():
                message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        else:
            message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE"
            errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))

        if PrefixManager.is_curie(object):
            prefix = PrefixManager.get_prefix(object)
            if prefix not in self.prefixes:
                message = f"Edge property 'object' has a value '{object}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        else:
            message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE"
            errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        if 'relation' in data:
            if PrefixManager.is_curie(data['relation']):
                prefix = PrefixManager.get_prefix(data['relation'])
                if prefix not in self.prefixes:
                    message = f"Edge property 'relation' has a value '{data['relation']}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                    errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
            else:
                message = f"Edge property 'relation' has a value '{data['relation']}' which is not a proper CURIE"
                errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        return errors
示例#2
0
def curie_lookup(curie: str) -> Optional[str]:
    """
    Given a CURIE, find its label.

    This method first does a lookup in predefined maps. If none found,
    it makes use of CurieLookupService to look for the CURIE in a set
    of preloaded ontologies.

    Parameters
    ----------
    curie: str
        A CURIE

    Returns
    -------
    Optional[str]
        The label corresponding to the given CURIE

    """
    cls = get_curie_lookup_service()
    name: Optional[str] = None
    prefix = PrefixManager.get_prefix(curie)
    if prefix in ['OIO', 'OWL', 'owl', 'OBO', 'rdfs']:
        name = stringcase.snakecase(curie.split(':', 1)[1])
    elif curie in cls.curie_map:
        name = cls.curie_map[curie]
    elif curie in cls.ontology_graph:
        name = cls.ontology_graph.nodes()[curie]['name']
    return name
示例#3
0
文件: validator.py 项目: biolink/kgx
    def validate_node_property_values(
            self,
            node: str,
            data: dict
    ):
        """
        Validate a node property's value.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties

        """
        error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE
        if not PrefixManager.is_curie(node):
            message = f"Node property 'id' is expected to be of type 'CURIE'"
            self.log_error(node, error_type, message, MessageLevel.ERROR)
        else:
            prefix = PrefixManager.get_prefix(node)
            if prefix and prefix not in self.get_all_prefixes():
                message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}'" + \
                          f" is not represented in Biolink Model JSON-LD context"
                self.log_error(node, error_type, message, MessageLevel.ERROR)
示例#4
0
    def validate_node_property_values(node: str, data: dict) -> list:
        """
        Validate a node property's value.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties

        Returns
        -------
        list
            A list of errors for a given node

        """
        errors = []
        error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE
        if not PrefixManager.is_curie(node):
            message = f"Node property 'id' expected to be of type 'CURIE'"
            errors.append(
                ValidationError(node, error_type, message, MessageLevel.ERROR))
        else:
            prefix = PrefixManager.get_prefix(node)
            if prefix and prefix not in Validator.get_all_prefixes():
                message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}' is not represented in Biolink Model JSON-LD context"
                errors.append(
                    ValidationError(node, error_type, message,
                                    MessageLevel.ERROR))
        return errors
示例#5
0
    def get_category(self, curie: str, node: dict) -> Optional[str]:
        """
        Get category for a given CURIE.

        Parameters
        ----------
        curie: str
            Curie for node
        node: dict
            Node data

        Returns
        -------
        Optional[str]
            Category for the given node CURIE.

        """
        category = None
        # use meta.basicPropertyValues
        if "meta" in node and "basicPropertyValues" in node["meta"]:
            for p in node["meta"]["basicPropertyValues"]:
                if p["pred"] == self.HAS_OBO_NAMESPACE:
                    category = p["val"]
                    element = self.toolkit.get_element(category)
                    if element:
                        category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}"
                    else:
                        element = self.toolkit.get_element_by_mapping(category)
                        if element:
                            category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element))}"
                        else:
                            category = "biolink:OntologyClass"

        if not category or category == "biolink:OntologyClass":
            prefix = PrefixManager.get_prefix(curie)
            # TODO: the mapping should be via biolink-model lookups
            if prefix == "HP":
                category = "biolink:PhenotypicFeature"
            elif prefix == "CHEBI":
                category = "biolink:ChemicalSubstance"
            elif prefix == "MONDO":
                category = "biolink:Disease"
            elif prefix == "UBERON":
                category = "biolink:AnatomicalEntity"
            elif prefix == "SO":
                category = "biolink:SequenceFeature"
            elif prefix == "CL":
                category = "biolink:Cell"
            elif prefix == "PR":
                category = "biolink:Protein"
            elif prefix == "NCBITaxon":
                category = "biolink:OrganismalEntity"
            else:
                self.owner.log_error(
                    entity=f"{str(category)} for node {curie}",
                    error_type=ErrorType.MISSING_CATEGORY,
                    message=
                    f"Missing category; Defaulting to 'biolink:OntologyClass'",
                    message_level=MessageLevel.WARNING)
        return category
示例#6
0
    def get_category(self, curie: str, node: dict) -> Optional[str]:
        """
        Get category for a given CURIE.

        Parameters
        ----------
        curie: str
            Curie for node
        node: dict
            Node data

        Returns
        -------
        Optional[str]
            Category for the given node CURIE.

        """
        category = None
        # use meta.basicPropertyValues
        if 'meta' in node and 'basicPropertyValues' in node['meta']:
            for p in node['meta']['basicPropertyValues']:
                if p['pred'] == self.HAS_OBO_NAMESPACE:
                    category = p['val']
                    element = self.toolkit.get_element(category)
                    if element:
                        category = (
                            f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}"
                        )
                    else:
                        element = self.toolkit.get_element_by_mapping(category)
                        if element:
                            category = f"biolink:{stringcase.pascalcase(stringcase.snakecase(element.name))}"
                        else:
                            category = 'biolink:OntologyClass'

        if not category or category == 'biolink:OntologyClass':
            prefix = PrefixManager.get_prefix(curie)
            # TODO: the mapping should be via biolink-model lookups
            if prefix == 'HP':
                category = "biolink:PhenotypicFeature"
            elif prefix == 'CHEBI':
                category = "biolink:ChemicalSubstance"
            elif prefix == 'MONDO':
                category = "biolink:Disease"
            elif prefix == 'UBERON':
                category = "biolink:AnatomicalEntity"
            elif prefix == 'SO':
                category = "biolink:SequenceFeature"
            elif prefix == 'CL':
                category = "biolink:Cell"
            elif prefix == 'PR':
                category = "biolink:Protein"
            elif prefix == 'NCBITaxon':
                category = "biolink:OrganismalEntity"
            else:
                log.debug(
                    f"{curie} Could not find a category mapping for '{category}'; Defaulting to 'biolink:OntologyClass'"
                )
        return category
示例#7
0
 def _compile_prefix_stats(self, n: str):
     prefix = PrefixManager.get_prefix(n)
     if not prefix:
         error_type = ErrorType.MISSING_NODE_CURIE_PREFIX
         self.mkg.log_error(entity=n,
                            error_type=error_type,
                            message="Node 'id' has no CURIE prefix",
                            message_level=MessageLevel.WARNING)
     else:
         if prefix not in self.category_stats["id_prefixes"]:
             self.category_stats["id_prefixes"].add(prefix)
示例#8
0
文件: validator.py 项目: biolink/kgx
    def validate_edge_property_values(
            self,
            subject: str,
            object: str,
            data: dict
    ):
        """
        Validate an edge property's value.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties

        """
        error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE
        prefixes = self.get_all_prefixes()

        if PrefixManager.is_curie(subject):
            prefix = PrefixManager.get_prefix(subject)
            if prefix and prefix not in prefixes:
                message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix " + \
                          f"'{prefix}' that is not represented in Biolink Model JSON-LD context"
                self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        else:
            message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)

        if PrefixManager.is_curie(object):
            prefix = PrefixManager.get_prefix(object)
            if prefix not in prefixes:
                message = f"Edge property 'object' has a value '{object}' with a CURIE " + \
                          f"prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        else:
            message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
示例#9
0
 def analyse_node_category(self, n, data):
     prefix = PrefixManager.get_prefix(n)
     self.category_stats['count'] += 1
     if prefix not in self.category_stats['id_prefixes']:
         self.category_stats['id_prefixes'].add(prefix)
     if 'provided_by' in data:
         for s in data['provided_by']:
             if s in self.category_stats['count_by_source']:
                 self.category_stats['count_by_source'][s] += 1
             else:
                 self.category_stats['count_by_source'][s] = 1
     else:
         self.category_stats['count_by_source']['unknown'] += 1
示例#10
0
 def _capture_prefix(self, n: str):
     prefix = PrefixManager.get_prefix(n)
     if not prefix:
         error_type = ErrorType.MISSING_NODE_CURIE_PREFIX
         self.summary.log_error(entity=n,
                                error_type=error_type,
                                message="Node 'id' has no CURIE prefix",
                                message_level=MessageLevel.WARNING)
     else:
         if prefix in self.category_stats["count_by_id_prefix"]:
             self.category_stats["count_by_id_prefix"][prefix] += 1
         else:
             self.category_stats["count_by_id_prefix"][prefix] = 1
示例#11
0
def test_get_prefix(query):
    """
    Test to check behavior of test_get_prefix method in PrefixManager.
    """
    assert PrefixManager.get_prefix(query[0]) == query[1]