示例#1
0
    def validate_edge_property_values(self, subject: str, object: str, data: dict) -> list:
        """
        Validate an edge property's value.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties

        Returns
        -------
        list
            A list of errors for a given edge

        """
        errors = []
        error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE

        if PrefixManager.is_curie(subject):
            prefix = PrefixManager.get_prefix(subject)
            if prefix and prefix not in self.get_all_prefixes():
                message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        else:
            message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE"
            errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))

        if PrefixManager.is_curie(object):
            prefix = PrefixManager.get_prefix(object)
            if prefix not in self.prefixes:
                message = f"Edge property 'object' has a value '{object}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        else:
            message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE"
            errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        if 'relation' in data:
            if PrefixManager.is_curie(data['relation']):
                prefix = PrefixManager.get_prefix(data['relation'])
                if prefix not in self.prefixes:
                    message = f"Edge property 'relation' has a value '{data['relation']}' with a CURIE prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                    errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
            else:
                message = f"Edge property 'relation' has a value '{data['relation']}' which is not a proper CURIE"
                errors.append(ValidationError(f"{subject}-{object}", error_type, message, MessageLevel.ERROR))
        return errors
示例#2
0
    def uriref(self, identifier: str) -> URIRef:
        """
        Generate a rdflib.URIRef for a given string.

        Parameters
        ----------
        identifier: str
            Identifier as string.

        Returns
        -------
        rdflib.URIRef
            URIRef form of the input ``identifier``

        """
        if identifier.startswith('urn:uuid:'):
            uri = identifier
        elif identifier in reverse_property_mapping:
            # identifier is a property
            uri = reverse_property_mapping[identifier]
        else:
            # identifier is an entity
            if identifier.startswith(':'):
                # TODO: this should be handled upstream by prefixcommons-py
                uri = self.DEFAULT.term(identifier.replace(':', '', 1))
            else:
                uri = self.prefix_manager.expand(identifier)
            if identifier == uri:
                if PrefixManager.is_curie(identifier):
                    identifier = identifier.replace(':', '_')
                if ' ' in identifier:
                    identifier = identifier.replace(' ', '_')
                uri = self.DEFAULT.term(identifier)

        return URIRef(uri)
示例#3
0
    def _prepare_object(self, prop: str, prop_type: str, value: Any) -> rdflib.term.Identifier:
        """
        Prepare the object of a triple.

        Parameters
        ----------
        prop: str
            property name
        prop_type: str
            property type
        value: Any
            property value

        Returns
        -------
        rdflib.term.Identifier
            An instance of rdflib.term.Identifier

        """
        if prop_type == 'uriorcurie' or prop_type == 'xsd:anyURI':
            if isinstance(value, str) and PrefixManager.is_curie(value):
                o = self.uriref(value)
            elif isinstance(value, str) and PrefixManager.is_iri(value):
                if _is_valid_uri(value):
                    o = URIRef(value)
                else:
                    o = Literal(value)
            else:
                o = Literal(value)
        elif prop_type.startswith('xsd'):
            o = Literal(value, datatype=self.prefix_manager.expand(prop_type))
        else:
            o = Literal(value, datatype=self.prefix_manager.expand("xsd:string"))
        return o
示例#4
0
    def validate_node_property_values(node: str, data: dict) -> list:
        """
        Validate a node property's value.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties

        Returns
        -------
        list
            A list of errors for a given node

        """
        errors = []
        error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE
        if not PrefixManager.is_curie(node):
            message = f"Node property 'id' expected to be of type 'CURIE'"
            errors.append(
                ValidationError(node, error_type, message, MessageLevel.ERROR))
        else:
            prefix = PrefixManager.get_prefix(node)
            if prefix and prefix not in Validator.get_all_prefixes():
                message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}' is not represented in Biolink Model JSON-LD context"
                errors.append(
                    ValidationError(node, error_type, message,
                                    MessageLevel.ERROR))
        return errors
示例#5
0
文件: validator.py 项目: biolink/kgx
    def validate_node_property_values(
            self,
            node: str,
            data: dict
    ):
        """
        Validate a node property's value.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties

        """
        error_type = ErrorType.INVALID_NODE_PROPERTY_VALUE
        if not PrefixManager.is_curie(node):
            message = f"Node property 'id' is expected to be of type 'CURIE'"
            self.log_error(node, error_type, message, MessageLevel.ERROR)
        else:
            prefix = PrefixManager.get_prefix(node)
            if prefix and prefix not in self.get_all_prefixes():
                message = f"Node property 'id' has a value '{node}' with a CURIE prefix '{prefix}'" + \
                          f" is not represented in Biolink Model JSON-LD context"
                self.log_error(node, error_type, message, MessageLevel.ERROR)
示例#6
0
文件: validator.py 项目: biolink/kgx
    def validate_edge_predicate(
            self,
            subject: str,
            object: str,
            data: dict,
            toolkit: Optional[Toolkit] = None
    ):
        """
        Validate ``edge_predicate`` field of a given edge.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties
        toolkit: Optional[Toolkit]
            Optional externally provided toolkit (default: use Validator class defined toolkit)

        """
        if not toolkit:
            toolkit = Validator.get_toolkit()

        error_type = ErrorType.INVALID_EDGE_PREDICATE
        edge_predicate = data.get("predicate")
        if edge_predicate is None:
            message = "Edge does not have an 'predicate' property"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        elif not isinstance(edge_predicate, str):
            message = f"Edge property 'edge_predicate' is expected to be of type 'string'"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        else:
            if PrefixManager.is_curie(edge_predicate):
                edge_predicate = PrefixManager.get_reference(edge_predicate)
            m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$", edge_predicate)
            if m:
                p = toolkit.get_element(snakecase_to_sentencecase(edge_predicate))
                if p is None:
                    message = f"Edge predicate '{edge_predicate}' is not in Biolink Model"
                    self.log_error(
                        f"{subject}->{object}",
                        error_type,
                        message,
                        MessageLevel.ERROR,
                    )
                elif edge_predicate != p.name and edge_predicate in p.aliases:
                    message = f"Edge predicate '{edge_predicate}' is actually an alias for {p.name}; " + \
                              f"Should replace {edge_predicate} with {p.name}"
                    self.log_error(
                        f"{subject}->{object}",
                        error_type,
                        message,
                        MessageLevel.ERROR,
                    )
            else:
                message = f"Edge predicate '{edge_predicate}' is not in snake_case form"
                self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
示例#7
0
    def validate_categories(self,
                            node: str,
                            data: dict,
                            toolkit: Optional[Toolkit] = None):
        """
        Validate ``category`` field of a given node.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties
        toolkit: Optional[Toolkit]
            Optional externally provided toolkit (default: use Validator class defined toolkit)

        """
        if not toolkit:
            toolkit = Validator.get_toolkit()

        error_type = ErrorType.INVALID_CATEGORY
        categories = data.get("category")
        if categories is None:
            message = "Node does not have a 'category' property"
            self.log_error(node, error_type, message, MessageLevel.ERROR)
        elif not isinstance(categories, list):
            message = f"Node property 'category' is expected to be of type {list}"
            self.log_error(node, error_type, message, MessageLevel.ERROR)
        else:
            for category in categories:
                if PrefixManager.is_curie(category):
                    category = PrefixManager.get_reference(category)
                m = re.match(r"^([A-Z][a-z\d]+)+$", category)
                if not m:
                    # category is not CamelCase
                    error_type = ErrorType.INVALID_CATEGORY
                    message = f"Category '{category}' is not in CamelCase form"
                    self.log_error(node, error_type, message,
                                   MessageLevel.ERROR)
                formatted_category = camelcase_to_sentencecase(category)
                if toolkit.is_mixin(formatted_category):
                    message = f"Category '{category}' is a mixin in the Biolink Model"
                    self.log_error(node, error_type, message,
                                   MessageLevel.ERROR)
                elif not toolkit.is_category(formatted_category):
                    message = (
                        f"Category '{category}' is unknown in the current Biolink Model"
                    )
                    self.log_error(node, error_type, message,
                                   MessageLevel.ERROR)
                else:
                    c = toolkit.get_element(formatted_category.lower())
                    if c:
                        if category != c.name and category in c.aliases:
                            message = f"Category {category} is actually an alias for {c.name}; " + \
                                      f"Should replace '{category}' with '{c.name}'"
                            self.log_error(node, error_type, message,
                                           MessageLevel.ERROR)
示例#8
0
    def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]:
        """
        This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph.
        This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form.

        Returns the CURIE identifiers used for the `subject` and `object` in the
        networkx.MultiDiGraph, and the processed `edge_label`.

        Parameters
        ----------
        subject_iri: rdflib.URIRef
            Subject IRI for the subject in a triple
        object_iri: rdflib.URIRef
            Object IRI for the object in a triple
        predicate_iri: rdflib.URIRef
            Predicate IRI for the predicate in a triple

        Returns
        -------
        Tuple[str, str, str]
            A 3-nary tuple (of the form subject, object, predicate) that represents the edge

        """
        s = self.add_node(subject_iri)
        o = self.add_node(object_iri)
        relation = self.prefix_manager.contract(predicate_iri)
        edge_label = process_iri(predicate_iri)
        if ' ' in edge_label:
            logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label))
        if edge_label.startswith(self.BIOLINK):
            logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK))
            edge_label = edge_label.replace(self.BIOLINK, '')

        if PrefixManager.is_curie(edge_label):
            name = curie_lookup(edge_label)
            if name:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name))
                edge_label = name
            else:
                logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL))
                edge_label = self.DEFAULT_EDGE_LABEL

        kwargs = {
            'subject': s,
            'predicate': str(predicate_iri),
            'object': o,
            'relation': relation,
            'edge_label': f"biolink:{edge_label}"
        }
        if 'provided_by' in self.graph_metadata:
            kwargs['provided_by'] = self.graph_metadata['provided_by']

        key = generate_edge_key(s, edge_label, o)
        if not self.graph.has_edge(s, o, key=key):
            self.graph.add_edge(s, o, key=key, **kwargs)
        # TODO: support append
        return s, o, edge_label
示例#9
0
    def validate_edge_predicate(subject: str, object: str, data: dict) -> list:
        """
        Validate ``edge_predicate`` field of a given edge.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties

        Returns
        -------
        list
            A list of errors for a given edge

        """
        toolkit = get_toolkit()
        error_type = ErrorType.INVALID_EDGE_PREDICATE
        errors = []
        edge_predicate = data.get('predicate')
        if edge_predicate is None:
            message = "Edge does not have an 'predicate' property"
            errors.append(
                ValidationError(f"{subject}-{object}", error_type, message,
                                MessageLevel.ERROR))
        elif not isinstance(edge_predicate, str):
            message = f"Edge property 'edge_predicate' expected to be of type 'string'"
            errors.append(
                ValidationError(f"{subject}-{object}", error_type, message,
                                MessageLevel.ERROR))
        else:
            if PrefixManager.is_curie(edge_predicate):
                edge_predicate = PrefixManager.get_reference(edge_predicate)
            m = re.match(r"^([a-z_][^A-Z\s]+_?[a-z_][^A-Z\s]+)+$",
                         edge_predicate)
            if m:
                p = toolkit.get_element(
                    snakecase_to_sentencecase(edge_predicate))
                if p is None:
                    message = f"Edge label '{edge_predicate}' not in Biolink Model"
                    errors.append(
                        ValidationError(f"{subject}-{object}", error_type,
                                        message, MessageLevel.ERROR))
                elif edge_predicate != p.name and edge_predicate in p.aliases:
                    message = f"Edge label '{edge_predicate}' is actually an alias for {p.name}; Should replace {edge_predicate} with {p.name}"
                    errors.append(
                        ValidationError(f"{subject}-{object}", error_type,
                                        message, MessageLevel.ERROR))
            else:
                message = f"Edge label '{edge_predicate}' is not in snake_case form"
                errors.append(
                    ValidationError(f"{subject}-{object}", error_type, message,
                                    MessageLevel.ERROR))
        return errors
示例#10
0
    def validate_categories(node: str, data: dict) -> list:
        """
        Validate ``category`` field of a given node.

        Parameters
        ----------
        node: str
            Node identifier
        data: dict
            Node properties

        Returns
        -------
        list
            A list of errors for a given node

        """
        toolkit = get_toolkit()
        error_type = ErrorType.INVALID_CATEGORY
        errors = []
        categories = data.get('category')
        if categories is None:
            message = "Node does not have a 'category' property"
            errors.append(
                ValidationError(node, error_type, message, MessageLevel.ERROR))
        elif not isinstance(categories, list):
            message = f"Node property 'category' expected to be of type {list}"
            errors.append(
                ValidationError(node, error_type, message, MessageLevel.ERROR))
        else:
            for category in categories:
                if PrefixManager.is_curie(category):
                    category = PrefixManager.get_reference(category)
                m = re.match(r"^([A-Z][a-z\d]+)+$", category)
                if not m:
                    # category is not CamelCase
                    error_type = ErrorType.INVALID_CATEGORY
                    message = f"Category '{category}' is not in CamelCase form"
                    errors.append(
                        ValidationError(node, error_type, message,
                                        MessageLevel.ERROR))
                formatted_category = camelcase_to_sentencecase(category)
                if not toolkit.is_category(formatted_category):
                    message = f"Category '{category}' not in Biolink Model"
                    errors.append(
                        ValidationError(node, error_type, message,
                                        MessageLevel.ERROR))
                else:
                    c = toolkit.get_element(formatted_category.lower())
                    if category != c.name and category in c.aliases:
                        message = f"Category {category} is actually an alias for {c.name}; Should replace '{category}' with '{c.name}'"
                        errors.append(
                            ValidationError(node, error_type, message,
                                            MessageLevel.ERROR))
        return errors
示例#11
0
文件: validator.py 项目: biolink/kgx
    def validate_edge_property_values(
            self,
            subject: str,
            object: str,
            data: dict
    ):
        """
        Validate an edge property's value.

        Parameters
        ----------
        subject: str
            Subject identifier
        object: str
            Object identifier
        data: dict
            Edge properties

        """
        error_type = ErrorType.INVALID_EDGE_PROPERTY_VALUE
        prefixes = self.get_all_prefixes()

        if PrefixManager.is_curie(subject):
            prefix = PrefixManager.get_prefix(subject)
            if prefix and prefix not in prefixes:
                message = f"Edge property 'subject' has a value '{subject}' with a CURIE prefix " + \
                          f"'{prefix}' that is not represented in Biolink Model JSON-LD context"
                self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        else:
            message = f"Edge property 'subject' has a value '{subject}' which is not a proper CURIE"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)

        if PrefixManager.is_curie(object):
            prefix = PrefixManager.get_prefix(object)
            if prefix not in prefixes:
                message = f"Edge property 'object' has a value '{object}' with a CURIE " + \
                          f"prefix '{prefix}' that is not represented in Biolink Model JSON-LD context"
                self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
        else:
            message = f"Edge property 'object' has a value '{object}' which is not a proper CURIE"
            self.log_error(f"{subject}->{object}", error_type, message, MessageLevel.ERROR)
示例#12
0
def get_category_via_superclass(graph: BaseGraph,
                                curie: str,
                                load_ontology: bool = True) -> Set[str]:
    """
    Get category for a given CURIE by tracing its superclass, via ``subclass_of`` hierarchy,
    and getting the most appropriate category based on the superclass.

    Parameters
    ----------
    graph: kgx.graph.base_graph.BaseGraph
        Graph to traverse
    curie: str
        Input CURIE
    load_ontology: bool
        Determines whether to load ontology, based on CURIE prefix, or to simply
        rely on ``subclass_of`` hierarchy from graph

    Returns
    -------
    Set[str]
        A set containing one (or more) category for the given CURIE

    """
    log.debug("curie: {}".format(curie))
    new_categories = []
    toolkit = get_toolkit()
    if PrefixManager.is_curie(curie):
        ancestors = get_ancestors(graph, curie, relations=['subclass_of'])
        if len(ancestors) == 0 and load_ontology:
            cls = get_curie_lookup_service()
            ontology_graph = cls.ontology_graph
            new_categories += [
                x for x in get_category_via_superclass(ontology_graph, curie,
                                                       False)
            ]
        log.debug("Ancestors for CURIE {} via subClassOf: {}".format(
            curie, ancestors))
        seen = []
        for anc in ancestors:
            mapping = toolkit.get_by_mapping(anc)
            seen.append(anc)
            if mapping:
                # there is direct mapping to BioLink Model
                log.debug("Ancestor {} mapped to {}".format(anc, mapping))
                seen_labels = [
                    graph.nodes()[x]['name'] for x in seen
                    if 'name' in graph.nodes()[x]
                ]
                new_categories += [x for x in seen_labels]
                new_categories += [x for x in toolkit.ancestors(mapping)]
                break
    return set(new_categories)
示例#13
0
    def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None:
        """
        Adds an attribute to an edge, while taking into account whether the attribute
        should be multi-valued.
        Multi-valued properties will not contain duplicates.

        The ``key`` may be a rdflib.URIRef or a URI string that maps onto a property name
        as defined in ``rdf_utils.property_mapping``.

        If the nodes in the edge does not exist then they will be created
        using ``subject_iri`` and ``object_iri``.

        If the edge itself does not exist then it will be created using
        ``subject_iri``, ``object_iri`` and ``predicate_iri``.

        Parameters
        ----------
        subject_iri: [rdflib.URIRef, str]
            The IRI of the subject node of an edge in rdflib.Graph
        object_iri: rdflib.URIRef
            The IRI of the object node of an edge in rdflib.Graph
        predicate_iri: rdflib.URIRef
            The IRI of the predicate representing an edge in rdflib.Graph
        key: str
            The name of the attribute. Can be a rdflib.URIRef or URI string
        value: str
            The value of the attribute

        """
        if key.lower() in is_property_multivalued:
            key = key.lower()
        else:
            if not isinstance(key, URIRef):
                key = URIRef(key)
            key = property_mapping.get(key)

        if key is not None and value is not None:
            subject_curie = self.prefix_manager.contract(subject_iri)
            object_curie = self.prefix_manager.contract(object_iri)
            edge_label = process_iri(predicate_iri)
            if PrefixManager.is_curie(edge_label):
                edge_label = curie_lookup(edge_label)
            edge_key = generate_edge_key(subject_curie, edge_label, object_curie)
            attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key)
            self._add_attribute(attr_dict, key, value)
示例#14
0
def get_biolink_element(prefix_manager: PrefixManager,
                        predicate: Any) -> Optional[Element]:
    """
    Returns a Biolink Model element for a given predicate.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    predicate: Any
        The CURIE of a predicate

    Returns
    -------
    Optional[Element]
        The corresponding Biolink Model element

    """
    toolkit = get_toolkit()
    if prefix_manager.is_iri(predicate):
        predicate_curie = prefix_manager.contract(predicate)
    else:
        predicate_curie = predicate
    if prefix_manager.is_curie(predicate_curie):
        reference = prefix_manager.get_reference(predicate_curie)
    else:
        reference = predicate_curie
    element = toolkit.get_element(reference)
    if not element:
        try:
            mapping = toolkit.get_element_by_mapping(predicate)
            if mapping:
                element = toolkit.get_element(mapping)
        except ValueError as e:
            log.error(e)
    return element
示例#15
0
def process_predicate(
    prefix_manager: PrefixManager,
    p: Union[URIRef, str],
    predicate_mapping: Optional[Dict] = None,
) -> Tuple:
    """
    Process a predicate where the method checks if there is a mapping in Biolink Model.

    Parameters
    ----------
    prefix_manager: PrefixManager
        An instance of prefix manager
    p: Union[URIRef, str]
        The predicate
    predicate_mapping: Optional[Dict]
        Predicate mappings

    Returns
    -------
    Tuple[str, str, str, str]
        A tuple that contains the Biolink CURIE (if available), the Biolink slot_uri CURIE (if available),
        the CURIE form of p, the reference of p

    """
    if prefix_manager.is_iri(p):
        predicate = prefix_manager.contract(str(p))
    else:
        predicate = None
    if prefix_manager.is_curie(p):
        property_name = prefix_manager.get_reference(p)
        predicate = p
    else:
        if predicate and prefix_manager.is_curie(predicate):
            property_name = prefix_manager.get_reference(predicate)
        else:
            property_name = p
            predicate = f":{p}"
    element = get_biolink_element(prefix_manager, p)
    canonical_uri = None
    if element:
        if isinstance(element, SlotDefinition):
            # predicate corresponds to a biolink slot
            if element.definition_uri:
                element_uri = prefix_manager.contract(element.definition_uri)
            else:
                element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
            if element.slot_uri:
                canonical_uri = element.slot_uri
        elif isinstance(element, ClassDefinition):
            # this will happen only when the IRI is actually
            # a reference to a class
            element_uri = prefix_manager.contract(element.class_uri)
        else:
            element_uri = f"biolink:{sentencecase_to_camelcase(element.name)}"
        if "biolink:Attribute" in get_biolink_ancestors(element.name):
            element_uri = f"biolink:{sentencecase_to_snakecase(element.name)}"
        if not predicate:
            predicate = element_uri
    else:
        # no mapping to biolink model;
        # look at predicate mappings
        element_uri = None
        if predicate_mapping:
            if p in predicate_mapping:
                property_name = predicate_mapping[p]
                predicate = f":{property_name}"
        # cache[p] = {'element_uri': element_uri, 'canonical_uri': canonical_uri,
        # 'predicate': predicate, 'property_name': property_name}
    return element_uri, canonical_uri, predicate, property_name
示例#16
0
    def load_edge(self, edge: Dict) -> Generator:
        """
        Load an edge into an instance of BaseGraph

        Parameters
        ----------
        edge : Dict
            An edge

        Returns
        -------
        Generator
            A generator for node and edge records

        """
        (element_uri, canonical_uri, predicate, property_name) = process_predicate(
            self.prefix_manager, edge["predicate_id"], self.predicate_mapping
        )
        if element_uri:
            edge_predicate = element_uri
        elif predicate:
            edge_predicate = predicate
        else:
            edge_predicate = property_name
        if canonical_uri:
            edge_predicate = element_uri

        data = {
            "subject": edge["subject_id"],
            "predicate": edge_predicate,
            "object": edge["object_id"],
        }
        del edge["predicate_id"]

        data = self.validate_edge(data)
        if not data:
            return  # ?

        subject_node = {}
        object_node = {}
        for k, v in edge.items():
            if k in SSSOM_NODE_PROPERTY_MAPPING:
                if k.startswith("subject"):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == "category" and not PrefixManager.is_curie(v):
                        v = f"biolink:OntologyClass"
                    subject_node[mapped_k] = v
                elif k.startswith("object"):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == "category" and not PrefixManager.is_curie(v):
                        v = f"biolink:OntologyClass"
                    object_node[mapped_k] = v
                else:
                    log.info(f"Ignoring {k} {v}")
            else:
                data[k] = v

        subject_node = self.load_node(subject_node)
        object_node = self.load_node(object_node)
        if not (subject_node and object_node):
            return  # ?
        objs = [subject_node, object_node]

        for k, v in self.graph_metadata.items():
            if k not in {"curie_map"}:
                data[k] = v

        edge_data = sanitize_import(data.copy())
        if "subject" in edge_data and "object" in edge_data:
            if "id" not in edge_data:
                edge_data["id"] = generate_uuid()
            s = edge_data["subject"]
            o = edge_data["object"]

            self.set_edge_provenance(edge_data)

            key = generate_edge_key(s, edge_data["predicate"], o)
            self.edge_properties.update(list(edge_data.keys()))
            objs.append((s, o, key, edge_data))
        else:
            self.owner.log_error(
                entity=str(edge_data),
                error_type=ErrorType.MISSING_NODE,
                message="Ignoring edge with either a missing 'subject' or 'object'",
                message_level=MessageLevel.WARNING
            )

        for o in objs:
            yield o
示例#17
0
    def export_edges(self) -> Set[URIRef]:
        """
        Export all edges from networkx.MultiDiGraph.

        This method yields one (or more) triple that corresponds to an edge.

        Returns
        -------
        Set[rdflib.term.URIRef]
            A triple

        """
        cache = []
        for u, v, k, data in self.graph.edges(data=True, keys=True):
            if data['edge_label'] in self.edge_properties:
                # treat as a direct edge
                s = self.uriref(u)
                p = self.uriref(data['edge_label'])
                o = self.uriref(v)
                yield (s, p, o)
            else:
                # reify
                s = self.uriref(u)
                p = self.uriref(data['edge_label'])
                o = self.uriref(v)
                cache.append((s, p, o))
                if 'id' in data:
                    s = self.uriref(data['id'])
                else:
                    # generate a UUID for the reified node
                    s = self.uriref(generate_uuid())
                all_data = data.copy()
                all_data['type'] = 'biolink:Association'
                for prop, value in all_data.items():
                    if prop in {'id', 'association_id', 'edge_key'}:
                        continue
                    p = self.uriref(prop)
                    if isinstance(value, list):
                        for x in value:
                            if isinstance(x,
                                          str) and PrefixManager.is_curie(x):
                                o = self.uriref(x)
                            elif isinstance(x,
                                            str) and PrefixManager.is_iri(x):
                                o = URIRef(x)
                            else:
                                o = Literal(x)
                            yield (s, p, o)
                    else:
                        if isinstance(value,
                                      str) and PrefixManager.is_curie(value):
                            o = self.uriref(value)
                        elif isinstance(value,
                                        str) and PrefixManager.is_iri(value):
                            o = URIRef(value)
                        else:
                            # literal
                            o = Literal(value)
                        yield (s, p, o)

        for t in cache:
            yield (t[0], t[1], t[2])
示例#18
0
    def load_edge(self, edge: Dict) -> Generator:
        """
        Load an edge into an instance of BaseGraph

        Parameters
        ----------
        edge : Dict
            An edge

        Returns
        -------
        Generator
            A generator for node and edge records

        """
        (element_uri, canonical_uri, predicate,
         property_name) = process_predicate(self.prefix_manager,
                                            edge['predicate_id'],
                                            self.predicate_mapping)
        if element_uri:
            edge_predicate = element_uri
        elif predicate:
            edge_predicate = predicate
        else:
            edge_predicate = property_name
        if canonical_uri:
            edge_predicate = element_uri
        data = {
            'subject': edge['subject_id'],
            'predicate': edge_predicate,
            'object': edge['object_id'],
        }
        del edge['predicate_id']
        data = validate_edge(data)
        subject_node = {}
        object_node = {}
        for k, v in edge.items():
            if k in SSSOM_NODE_PROPERTY_MAPPING:
                if k.startswith('subject'):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == 'category' and not PrefixManager.is_curie(
                            v):
                        v = f"biolink:OntologyClass"
                    subject_node[mapped_k] = v
                elif k.startswith('object'):
                    mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k]
                    if mapped_k == 'category' and not PrefixManager.is_curie(
                            v):
                        v = f"biolink:OntologyClass"
                    object_node[mapped_k] = v
                else:
                    log.info(f"Ignoring {k} {v}")
            else:
                data[k] = v

        objs = [self.load_node(subject_node), self.load_node(object_node)]

        for k, v in self.graph_metadata.items():
            if k not in {'curie_map'}:
                data[k] = v

        edge_data = sanitize_import(data.copy())
        if 'subject' in edge_data and 'object' in edge_data:
            if 'id' not in edge_data:
                edge_data['id'] = generate_uuid()
            s = edge_data['subject']
            o = edge_data['object']
            if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys(
            ):
                edge_data['provided_by'] = self.graph_metadata['provided_by']
            key = generate_edge_key(s, edge_data['predicate'], o)
            self.edge_properties.update(list(edge_data.keys()))
            objs.append((s, o, key, edge_data))
        else:
            log.info(
                "Ignoring edge with either a missing 'subject' or 'object': {}"
                .format(edge_data))

        for o in objs:
            yield o
示例#19
0
    def add_edge(
        self,
        subject_iri: URIRef,
        object_iri: URIRef,
        predicate_iri: URIRef,
        data: Optional[Dict[Any, Any]] = None,
    ) -> Dict:
        """
        Add an edge to cache.

        Parameters
        ----------
        subject_iri: rdflib.URIRef
            Subject IRI for the subject in a triple
        object_iri: rdflib.URIRef
            Object IRI for the object in a triple
        predicate_iri: rdflib.URIRef
            Predicate IRI for the predicate in a triple
        data: Optional[Dict[Any, Any]]
            Additional edge properties

        Returns
        -------
        Dict
            The edge data

        """
        (element_uri, canonical_uri, predicate, property_name) = self.process_predicate(
            predicate_iri
        )
        subject_curie = self.prefix_manager.contract(subject_iri)
        object_curie = self.prefix_manager.contract(object_iri)
        if subject_curie in self.node_cache:
            subject_node = self.node_cache[subject_curie]
        else:
            subject_node = self.add_node(subject_iri)

        if object_curie in self.node_cache:
            object_node = self.node_cache[object_curie]
        else:
            object_node = self.add_node(object_iri)
        edge_predicate = element_uri if element_uri else predicate
        if not edge_predicate:
            edge_predicate = property_name

        if ' ' in edge_predicate:
            log.debug(
                f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that not in snake_case form; replacing ' ' with '_'"
            )
        edge_predicate_prefix = self.prefix_manager.get_prefix(edge_predicate)
        if edge_predicate_prefix not in {'biolink', 'rdf', 'rdfs', 'skos', 'owl'}:
            if PrefixManager.is_curie(edge_predicate):
                # name = curie_lookup(edge_predicate)
                # if name:
                #     log.debug(f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that is actually a CURIE; Using its mapping instead: {name}")
                #     edge_predicate = f"{edge_predicate_prefix}:{name}"
                # else:
                #     log.debug(f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that is actually a CURIE; defaulting back to {self.DEFAULT_EDGE_PREDICATE}")
                edge_predicate = DEFAULT_EDGE_PREDICATE

        edge_key = generate_edge_key(subject_node['id'], edge_predicate, object_node['id'])
        if (subject_node['id'], object_node['id'], edge_key) in self.edge_cache:
            # edge already exists; process kwargs and update the edge
            edge_data = self.update_edge(subject_node['id'], object_node['id'], edge_key, data)
        else:
            # add a new edge
            edge_data = data if data else {}
            edge_data.update(
                {
                    'subject': subject_node['id'],
                    'predicate': f"{edge_predicate}",
                    'object': object_node['id'],
                }
            )
            if 'relation' not in edge_data:
                edge_data['relation'] = predicate

            if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data:
                edge_data['provided_by'] = self.graph_metadata['provided_by']
        self.edge_cache[(subject_node['id'], object_node['id'], edge_key)] = edge_data
        return edge_data
示例#20
0
def test_is_curie(query):
    """
    Test to check behavior of is_curie method in PrefixManager.
    """
    assert PrefixManager.is_curie(query[0]) == query[1]