示例#1
0
文件: validator.py 项目: biolink/kgx
    def validate_edge_predicate(
            self,
            subject: str,
            object: str,
            data: dict,
            toolkit: Optional[Toolkit] = None
    ):
        """
        Validate ``edge_predicate`` field of a given edge.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties
        toolkit: Optional[Toolkit]
            Optional externally provided toolkit (default: use Validator class defined toolkit)

        """
        if not toolkit:
            toolkit = Validator.get_toolkit()

        error_type = ErrorType.INVALID_EDGE_PREDICATE
        edge_predicate = data.get("predicate")
        if edge_predicate is None:
            message = "Edge does not have an 'predicate' property"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        elif not isinstance(edge_predicate, str):
            message = f"Edge property 'edge_predicate' is expected to be of type 'string'"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        else:
            if PrefixManager.is_curie(edge_predicate):
                edge_predicate = PrefixManager.get_reference(edge_predicate)
            m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate)
            if m:
                p = toolkit.get_element(snakecase_to_sentencecase(edge_predicate))
                if p is None:
                    message = f"Edge predicate '{edge_predicate}' is not in Biolink Model"
                    self.log_error(
                        f"{subject}->{object}",
                        error_type,
                        message,
                        MessageLevel.ERROR,
                    )
                elif edge_predicate != p.name and edge_predicate in p.aliases:
                    message = f"Edge predicate '{edge_predicate}' is actually an alias for {p.name}; " + \
                              f"Should replace {edge_predicate} with {p.name}"
                    self.log_error(
                        f"{subject}->{object}",
                        error_type,
                        message,
                        MessageLevel.ERROR,
                    )
            else:
                message = f"Edge predicate '{edge_predicate}' is not in snake_case form"
                self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
示例#2
0
    def validate_categories(self,
                            node: str,
                            data: dict,
                            toolkit: Optional[Toolkit] = None):
        """
        Validate ``category`` field of a given node.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties
        toolkit: Optional[Toolkit]
            Optional externally provided toolkit (default: use Validator class defined toolkit)

        """
        if not toolkit:
            toolkit = Validator.get_toolkit()

        error_type = ErrorType.INVALID_CATEGORY
        categories = data.get("category")
        if categories is None:
            message = "Node does not have a 'category' property"
            self.log_error(node, error_type, message, MessageLevel.ERROR)
        elif not isinstance(categories, list):
            message = f"Node property 'category' is expected to be of type {list}"
            self.log_error(node, error_type, message, MessageLevel.ERROR)
        else:
            for category in categories:
                if PrefixManager.is_curie(category):
                    category = PrefixManager.get_reference(category)
                m = re.match(r"^([A-Z][a-z\d]+)+$", category)
                if not m:
                    # category is not CamelCase
                    error_type = ErrorType.INVALID_CATEGORY
                    message = f"Category '{category}' is not in CamelCase form"
                    self.log_error(node, error_type, message,
                                   MessageLevel.ERROR)
                formatted_category = camelcase_to_sentencecase(category)
                if toolkit.is_mixin(formatted_category):
                    message = f"Category '{category}' is a mixin in the Biolink Model"
                    self.log_error(node, error_type, message,
                                   MessageLevel.ERROR)
                elif not toolkit.is_category(formatted_category):
                    message = (
                        f"Category '{category}' is unknown in the current Biolink Model"
                    )
                    self.log_error(node, error_type, message,
                                   MessageLevel.ERROR)
                else:
                    c = toolkit.get_element(formatted_category.lower())
                    if c:
                        if category != c.name and category in c.aliases:
                            message = f"Category {category} is actually an alias for {c.name}; " + \
                                      f"Should replace '{category}' with '{c.name}'"
                            self.log_error(node, error_type, message,
                                           MessageLevel.ERROR)
示例#3
0
    def validate_edge_predicate(subject: str, object: str, data: dict) -> list:
        """
        Validate ``edge_predicate`` field of a given edge.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties

        Returns
        -------
        list
            A list of errors for a given edge

        """
        toolkit = get_toolkit()
        error_type = ErrorType.INVALID_EDGE_PREDICATE
        errors = []
        edge_predicate = data.get('predicate')
        if edge_predicate is None:
            message = "Edge does not have an 'predicate' property"
            errors.append(
                ValidationError(f"{subject}-{object}", error_type, message,
                                MessageLevel.ERROR))
        elif not isinstance(edge_predicate, str):
            message = f"Edge property 'edge_predicate' expected to be of type 'string'"
            errors.append(
                ValidationError(f"{subject}-{object}", error_type, message,
                                MessageLevel.ERROR))
        else:
            if PrefixManager.is_curie(edge_predicate):
                edge_predicate = PrefixManager.get_reference(edge_predicate)
            m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$",
                         edge_predicate)
            if m:
                p = toolkit.get_element(
                    snakecase_to_sentencecase(edge_predicate))
                if p is None:
                    message = f"Edge label '{edge_predicate}' not in Biolink Model"
                    errors.append(
                        ValidationError(f"{subject}-{object}", error_type,
                                        message, MessageLevel.ERROR))
                elif edge_predicate != p.name and edge_predicate in p.aliases:
                    message = f"Edge label '{edge_predicate}' is actually an alias for {p.name}; Should replace {edge_predicate} with {p.name}"
                    errors.append(
                        ValidationError(f"{subject}-{object}", error_type,
                                        message, MessageLevel.ERROR))
            else:
                message = f"Edge label '{edge_predicate}' is not in snake_case form"
                errors.append(
                    ValidationError(f"{subject}-{object}", error_type, message,
                                    MessageLevel.ERROR))
        return errors
示例#4
0
    def validate_categories(node: str, data: dict) -> list:
        """
        Validate ``category`` field of a given node.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties

        Returns
        -------
        list
            A list of errors for a given node

        """
        toolkit = get_toolkit()
        error_type = ErrorType.INVALID_CATEGORY
        errors = []
        categories = data.get('category')
        if categories is None:
            message = "Node does not have a 'category' property"
            errors.append(
                ValidationError(node, error_type, message, MessageLevel.ERROR))
        elif not isinstance(categories, list):
            message = f"Node property 'category' expected to be of type {list}"
            errors.append(
                ValidationError(node, error_type, message, MessageLevel.ERROR))
        else:
            for category in categories:
                if PrefixManager.is_curie(category):
                    category = PrefixManager.get_reference(category)
                m = re.match(r"^([A-Z][a-z\d]+)+$", category)
                if not m:
                    # category is not CamelCase
                    error_type = ErrorType.INVALID_CATEGORY
                    message = f"Category '{category}' is not in CamelCase form"
                    errors.append(
                        ValidationError(node, error_type, message,
                                        MessageLevel.ERROR))
                formatted_category = camelcase_to_sentencecase(category)
                if not toolkit.is_category(formatted_category):
                    message = f"Category '{category}' not in Biolink Model"
                    errors.append(
                        ValidationError(node, error_type, message,
                                        MessageLevel.ERROR))
                else:
                    c = toolkit.get_element(formatted_category.lower())
                    if category != c.name and category in c.aliases:
                        message = f"Category {category} is actually an alias for {c.name}; Should replace '{category}' with '{c.name}'"
                        errors.append(
                            ValidationError(node, error_type, message,
                                            MessageLevel.ERROR))
        return errors
示例#5
0
def get_biolink_element(prefix_manager: PrefixManager,
                        predicate: Any) -> Optional[Element]:
    """
    Returns a Biolink Model element for a given predicate.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    predicate: Any
        The CURIE of a predicate

    Returns
    -------
    Optional[Element]
        The corresponding Biolink Model element

    """
    toolkit = get_toolkit()
    if prefix_manager.is_iri(predicate):
        predicate_curie = prefix_manager.contract(predicate)
    else:
        predicate_curie = predicate
    if prefix_manager.is_curie(predicate_curie):
        reference = prefix_manager.get_reference(predicate_curie)
    else:
        reference = predicate_curie
    element = toolkit.get_element(reference)
    if not element:
        try:
            mapping = toolkit.get_element_by_mapping(predicate)
            if mapping:
                element = toolkit.get_element(mapping)
        except ValueError as e:
            log.error(e)
    return element
示例#6
0
def process_predicate(
    prefix_manager: PrefixManager,
    p: Union[URIRef, str],
    predicate_mapping: Optional[Dict] = None,
) -> Tuple:
    """
    Process a predicate where the method checks if there is a mapping in Biolink Model.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    p: Union[URIRef, str]
        The predicate
    predicate_mapping: Optional[Dict]
        Predicate mappings

    Returns
    -------
    Tuple[str, str, str, str]
        A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available),
        the CURIE form of p, the reference of p

    """
    if prefix_manager.is_iri(p):
        predicate = prefix_manager.contract(str(p))
    else:
        predicate = None
    if prefix_manager.is_curie(p):
        property_name = prefix_manager.get_reference(p)
        predicate = p
    else:
        if predicate and prefix_manager.is_curie(predicate):
            property_name = prefix_manager.get_reference(predicate)
        else:
            property_name = p
            predicate = f":{p}"
    element = get_biolink_element(prefix_manager, p)
    canonical_uri = None
    if element:
        if isinstance(element, SlotDefinition):
            # predicate corresponds to a biolink slot
            if element.definition_uri:
                element_uri = prefix_manager.contract(element.definition_uri)
            else:
                element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
            if element.slot_uri:
                canonical_uri = element.slot_uri
        elif isinstance(element, ClassDefinition):
            # this will happen only when the IRI is actually
            # a reference to a class
            element_uri = prefix_manager.contract(element.class_uri)
        else:
            element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}"
        if "biolink:Attribute" in get_biolink_ancestors(element.name):
            element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
        if not predicate:
            predicate = element_uri
    else:
        # no mapping to biolink model;
        # look at predicate mappings
        element_uri = None
        if predicate_mapping:
            if p in predicate_mapping:
                property_name = predicate_mapping[p]
                predicate = f":{property_name}"
        # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri,
        # 'predicate': predicate, 'property_name': property_name}
    return element_uri, canonical_uri, predicate, property_name
示例#7
0
def test_get_reference(query):
    """
    Test to check behavior of get_reference method in PrefixManager.
    """
    assert PrefixManager.get_reference(query[0]) == query[1]