示例#1
0
    def values(self, property, namespace=None, language=None, localName=False):
        from rdflib import URIRef
        if not isinstance(property, URIRef):
            if namespace:
                n = Namespace(namespace)
                predicate = n.term(property)
            else:
                predicate = URIRef(property)
        else:
            predicate = property

        if language:
            result = [o for s,p,o in self.__triples if p == predicate and o.language == language]
        else:
            result = [o for s,p,o in self.__triples if p == predicate]

        if localName:
            from rdflib import Literal
            aux = []
            for x in result:
                if isinstance(x, Literal):
                    aux.append(x.value)
                else:
                    aux.append(split_uri(x)[1])
            result = aux

        return result
示例#2
0
    def setProperty(self, namespace, property, value):
        if isinstance(namespace, Namespace):
            predicate = namespace.term(property)
        else:
            n = Namespace(namespace)
            predicate = n.term(property)

        from itertools import chain
        chain(self.__triples, [(self.uri, predicate, value)])
示例#3
0
import rdflib

from rdflib.namespace import Namespace
from rdflib import RDFS, RDF, OWL

SH = Namespace('http://www.w3.org/ns/shacl#')

# Classes
RDF_Property = RDF.term('Property')
RDF_List = RDF.term('List')
RDFS_Resource = RDFS.term('Resource')
RDFS_Class = RDFS.term('Class')
OWL_Ontology = OWL.term("Ontology")
OWL_Class = OWL.term("Class")
OWL_DatatypeProperty = OWL.term("DatatypeProperty")
SH_NodeShape = SH.term('NodeShape')
SH_PropertyShape = SH.term('PropertyShape')
SH_ValidationResult = SH.term('ValidationResult')
SH_ValidationReport = SH.term('ValidationReport')
SH_Violation = SH.term('Violation')
SH_Info = SH.term('Info')
SH_Warning = SH.term('Warning')
SH_IRI = SH.term('IRI')
SH_BlankNode = SH.term('BlankNode')
SH_Literal = SH.term('Literal')
SH_BlankNodeOrIRI = SH.term('BlankNodeOrIRI')
SH_BlankNodeORLiteral = SH.term('BlankNodeOrLiteral')
SH_IRIOrLiteral = SH.term('IRIOrLiteral')
SH_SPARQLFunction = SH.term('SPARQLFunction')
SH_SPARQLRule = SH.term('SPARQLRule')
SH_TripleRule = SH.term('TripleRule')
示例#4
0
文件: consts.py 项目: MFSY/pySHACL
# -*- coding: utf-8 -*-
import rdflib

from rdflib.namespace import Namespace
from rdflib import RDFS, RDF, OWL

SH = Namespace('http://www.w3.org/ns/shacl#')

# Classes
RDFS_Class = RDFS.term('Class')
SH_NodeShape = SH.term('NodeShape')
SH_PropertyShape = SH.term('PropertyShape')
SH_ValidationResult = SH.term('ValidationResult')
SH_ValidationReport = SH.term('ValidationReport')
SH_Violation = SH.term('Violation')
SH_Info = SH.term('Info')
SH_Warning = SH.term('Warning')
SH_IRI = SH.term('IRI')
SH_BlankNode = SH.term('BlankNode')
SH_Literal = SH.term('Literal')
SH_BlankNodeOrIRI = SH.term('BlankNodeOrIRI')
SH_BlankNodeORLiteral = SH.term('BlankNodeOrLiteral')
SH_IRIOrLiteral = SH.term('IRIOrLiteral')

# predicates
RDF_type = RDF.term('type')
RDFS_subClassOf = RDFS.term('subClassOf')
SH_path = SH.term('path')
SH_deactivated = SH.term('deactivated')
SH_message = SH.term('message')
SH_name = SH.term('name')
        text = p.xpath(".//br")[-1].tail.replace("&lt;", "<").replace(
            "\n", "").replace("&gt;", ">")
        epi_converter.reset()

        text_converted = epi_converter.convert(text)
        text_xml = template.render(title=text_id, xml=text_converted, urn=urn)

        work = XmlCtsWorkMetadata(urn=(URN(urn)).upTo(URN.WORK))
        work.set_cts_property("title", text_id, lang="eng")

        for ident in additional_ids:
            work.metadata.add(DC.term("identifier"), ident)
        if text_image is not None:
            work.metadata.add(DCTERMS.term("isFormatOf"), text_image)
        if trismegistos is not None:
            work.metadata.add(SAWS.term("identifier"),
                              "www.trismegistos.org/text/" + trismegistos[1:])
        if trismegistos_place is not None:
            work.metadata.add(
                SAWS.term("isLocatedAt"),
                "http://www.trismegistos.org/place/" + trismegistos_place)
        if placename is not None:
            work.metadata.add(SAWS.term("isLocatedAt"), placename)
        if longitude is not None and latitude is not None:
            work.metadata.add(SAWS.term("isLocatedAt"),
                              "long:{};lat:{}".format(longitude, latitude))
        if regio is not None:
            work.metadata.add(SAWS.term("isLocatedAt"), regio)
            work.metadata.add(DCTERMS.term("Location"), regio)

        edition = XmlCtsEditionMetadata(urn=urn, parent=work, lang="lat")
示例#6
0
def main():

    # Parse Swift book to retrieve concepts and related resources
    start = "https://docs.swift.org/swift-book/"
    nextURL = start
    urls = [nextURL]

    concepts = {}

    while nextURL:
        url = nextURL
        page = urlopen(url)
        soup = BeautifulSoup(page, 'html.parser')

        #title = soup.find('title').string

        article = soup.select_one('article.page')
        headings = article.find_all(re.compile('^h[1-6]$'))

        for heading in headings:
            heading_text = str(heading.contents[0]).lower()
            permalink = url + heading.contents[1].get('href')

            doc = nlp(heading_text)

            noun_phrases = [chunk for chunk in doc.noun_chunks]

            if len(noun_phrases) > 0:
                new_concepts = [lemmatize(lstrip_stopwords(chunk)).strip() for chunk in noun_phrases]
            else:
                # if no noun-phrases, take as verbatim (e.g. break, continue)
                new_concepts = [heading_text]

            for c in new_concepts:
                if c not in concepts:
                    concepts[c] = []
                if permalink not in concepts[c]:
                    # optionally: don't add if permalink (apart from fragment) is already contained (to avoid reindexing the same page multiple times, as a concept like "Function" might appear many times on its dedicated page in different headers)
                    if not page_included(permalink, concepts[c]):
                        concepts[c].append(permalink)

        # continue to next page (if any)
        nextLink = soup.select_one("p.next a")

        if nextLink:
            parts = urlsplit(nextURL)
            base_path, _ = split(parts.path)
            base_url = urlunsplit((parts.scheme, parts.netloc, join(base_path, ""), parts.query, parts.fragment))
            nextURL = urljoin(base_url, nextLink.get('href'))
            urls.append(nextURL)
        else:
            nextURL = None

    # RDF Graph creation
    g = Graph()

    # Namespace bindings
    NS = Namespace(ALMA_NS + SCHEME_NAME + "#")
    DBPEDIA = Namespace('http://dbpedia.org/page/')
    g.namespace_manager.bind('owl', OWL)
    g.namespace_manager.bind('skos', SKOS)
    g.namespace_manager.bind('dct', DCTERMS)
    g.namespace_manager.bind('foaf', FOAF)
    g.namespace_manager.bind('dbr', DBPEDIA)
    g.namespace_manager.bind(SCHEME_NAME, NS)

    # Ontology Metadata
    ontology = URIRef(ALMA_NS + SCHEME_NAME)
    g.add((ontology, RDF.type, OWL.term("Ontology")))
    g.add((ontology, DCTERMS.term("title"), Literal("{} Ontology".format(SCHEME_NAME.title()))))
    g.add((ontology, DCTERMS.term("description"), Literal("This is an SKOS-based lightweight ontology about the Swift programming language.")))
    g.add((ontology, DCTERMS.term("subject"), URIRef(quote("http://dbpedia.org/page/Swift_(programming_language)"))))
    g.add((ontology, DCTERMS.term("license"), URIRef("https://creativecommons.org/licenses/by-sa/4.0/")))
    g.add((ontology, DCTERMS.term("created"), Literal(DATE_CREATED)))
    g.add((ontology, DCTERMS.term("modified"), Literal(DATE_MODIFIED)))
    g.add((ontology, RDFS.term("seeAlso"), URIRef("https://coast.uni.lu/alma/")))
    g.add((ontology, OWL.term("versionIRI"), URIRef("http://purl.org/lu/uni/alma/{}/{}".format(SCHEME_NAME, LANGUAGE_VERSION))))
    g.add((ontology, OWL.term("versionInfo"), Literal("{}/{}".format(LANGUAGE_VERSION, ONTOLOGY_VERSION))))
    g.add((ontology, OWL.term("imports"), URIRef("http://www.w3.org/2004/02/skos/core")))
    creator = BNode()
    g.add((ontology, DCTERMS.term("creator"), creator))
    g.add((creator, RDF.type, FOAF.term("Person")))
    g.add((creator, FOAF.term("name"), Literal(AUTHOR_NAME)))
    g.add((creator, FOAF.term("mbox"), URIRef(AUTHOR_EMAIL)))

    # Concept Scheme
    schemeURI = NS.term("Scheme")
    g.add((schemeURI, RDF.type, SKOS.term("ConceptScheme")))
    g.add((schemeURI, DCTERMS.term("title"), Literal(SCHEME_NAME.title())))

    # Concepts
    for (concept, urls) in concepts.items():
        conceptURI = NS.term(cleanse(concept))
        prefLabel = concept.title()
        g.add((conceptURI, RDF.type, SKOS.term("Concept")))
        g.add((conceptURI, RDF.type, OWL.term("NamedIndividual")))
        g.add((conceptURI, SKOS.term("inScheme"), schemeURI))
        g.add((conceptURI, SKOS.term("prefLabel"), Literal(prefLabel, lang='en')))

        # Resources from Swift book
        for url in urls:
            g.add((conceptURI, SKOS.term("definition"), URIRef(url)))

    # Serialization
    for (format, file_extension) in SERIALIZATION_FORMATS.items():
        file_name = "{}_{}_{}.{}".format(SCHEME_NAME, LANGUAGE_VERSION, ONTOLOGY_VERSION, file_extension)
        g.serialize(format=format, destination=file_name)
        print("Saved under {}".format(file_name))

    print("# triples:", len(g))
"""
https://w3c.github.io/data-shapes/data-shapes-test-suite/#submitting-implementation-reports
"""
from collections import defaultdict, OrderedDict
from os import path
from datetime import datetime
import pyshacl
from pyshacl.errors import ReportableRuntimeError
import rdflib
from rdflib.namespace import Namespace, RDF, XSD

from test.helpers import load_manifest, flatten_manifests

EARL = Namespace("http://www.w3.org/ns/earl#")
DOAP = Namespace("http://usefulinc.com/ns/doap#")
PASSED = EARL.term('passed')
FAILED = EARL.term('failed')
PARTIAL = EARL.term('partial')

TEST_PREFIX = "urn:x-shacl-test:"
PYSHACL_URI = rdflib.URIRef("https://github.com/RDFLib/pySHACL")
DEVELOPER_URI = rdflib.URIRef("https://github.com/ashleysommer")

here_dir = path.abspath(path.dirname(__file__))
sht_files_dir = path.join(here_dir, 'resources', 'sht_tests')
sht_main_manifest = path.join(sht_files_dir, 'manifest.ttl')

main_manifest = load_manifest(sht_main_manifest)
manifests_with_entries = flatten_manifests(main_manifest, True)

tests_found_in_manifests = defaultdict(lambda: [])
示例#8
0
class OdgiStore(Store):
    """\
    An in memory implementation of an ODGI read only store.
    
    It used the disk based odgi/handlegraph as backing store.
    
    Authors: Jerven Bolleman
    """

    def __init__(self, configuration=None, identifier=None, base=None):
        super(OdgiStore, self).__init__(configuration)
        self.namespace_manager = NamespaceManager(Graph())
        self.bind('vg', VG)
        self.bind('faldo', FALDO)
        self.identifier = identifier
        self.configuration = configuration
        if base is None:
            self.base = 'http://example.org/vg/'
        else:
            self.base = base
        self.pathNS = Namespace(f'{self.base}path/')
        self.stepNS = Namespace(f'{self.base}step/')
        self.bind('path', self.pathNS)
        self.bind('step', self.stepNS)
        self.odgi = None

    def open(self, odgifile, create=False):
        og = odgi.graph()
        ogf = og.load(odgifile)
        self.odgi = og

    def triples(self, triple_pattern, context=None):
        """A generator over all the triples matching """
        subject, predicate, obj = triple_pattern
        if RDF.type == predicate and obj is not None:
            return self.typeTriples(subject, predicate, obj)
        elif predicate in nodeRelatedPredicates:
            return self.nodes(subject, predicate, obj)
        elif predicate in stepAssociatedPredicates:
            return self.steps(subject, predicate, obj)
        elif RDFS.label == predicate:
            return self.paths(subject, predicate, obj)
        elif subject is None and predicate is None and obj is None:
            return chain(self.__allPredicates(), self.__allTypes())
        elif subject is not None:
            subjectIriParts = subject.toPython().split('/')
            if 'node' == subjectIriParts[-2] and self.odgi.has_node(int(subjectIriParts[-1])):
                handle = self.odgi.get_handle(int(subjectIriParts[-1]))
                return chain(self.handleToTriples(predicate, obj, handle),
                             self.handleToEdgeTriples(subject, predicate, obj, handle))
            elif 'path' == subjectIriParts[-4] and 'step' == subjectIriParts[-2]:
                return self.steps(subject, predicate, obj)
            elif 'path' == subjectIriParts[-2]:
                return self.paths(subject, predicate, obj)
            elif type(subject) == StepBeginIriRef or type(subject) == StepEndIriRef:
                return self.steps(subject, predicate, obj)
            else:
                return self.__emptygen()
        else:
            return self.__emptygen()

    # For the known types we can shortcut evaluation in many cases
    def typeTriples(self, subject, predicate, obj):
        if VG.Node == obj:
            return self.nodes(subject, predicate, obj)
        elif VG.Path == obj:
            return self.paths(subject, predicate, obj)
        elif obj in stepAssociatedTypes:
            return self.steps(subject, predicate, obj)
        else:
            return self.__emptygen()

    def __allTypes(self):
        for typ in knownTypes:
            yield from self.triples((None, RDF.type, typ))

    def __allPredicates(self):
        for pred in knownPredicates:
            yield from self.triples((None, pred, None))

    @staticmethod
    def __emptygen():
        """return an empty generator"""
        if False:
            yield

    def nodes(self, subject, predicate, obj):
        if subject is not None:
            isNodeIri = self.isNodeIriInGraph(subject)

            if predicate == RDF.type and obj == VG.Node and isNodeIri:
                yield [(subject, RDF.type, VG.Node), None]
            elif predicate is None and obj == VG.Node and isNodeIri:
                yield [(subject, RDF.type, VG.Node), None]
            elif (type(subject) == NodeIriRef):
                yield from self.handleToTriples(predicate, obj, subject._nodeHandle)
                yield from self.handleToEdgeTriples(subject, predicate, obj, subject._nodeHandle)
            elif isNodeIri:
                subjectIriParts = subject.toPython().split('/')
                nh = self.odgi.get_handle(int(subjectIriParts[-1]))
                yield from self.handleToTriples(predicate, obj, nh)
                yield from self.handleToEdgeTriples(subject, predicate, obj, nh)
            else:
                return self.__emptygen()
        else:
            for handle in self.handles():
                yield from self.handleToEdgeTriples(subject, predicate, obj, handle)
                yield from self.handleToTriples(predicate, obj, handle)

    def isNodeIriInGraph(self, iri):
        if type(iri) == NodeIriRef:
            return True
        else:
            iri_parts = iri.toPython().split('/')
            return 'node' == iri_parts[-2] and self.odgi.has_node(int(iri_parts[-1]))

    def paths(self, subject, predicate, obj):
        li = []
        tt = PathToTriples(self.odgi, self.pathNS, subject, predicate, obj, li)
        self.odgi.for_each_path_handle(tt)
        for p in li:
            yield p

    def steps(self, subject, predicate, obj):

        if subject is None:
            for pathHandle in self.pathHandles():
                if not self.odgi.is_empty(pathHandle):
                    rank = 1
                    position = 1
                    step_handle = self.odgi.path_begin(pathHandle)
                    node_handle = self.odgi.get_handle_of_step(step_handle)
                    yield from self.stepHandleToTriples(step_handle, subject, predicate, obj, node_handle=node_handle,
                                                        rank=rank, position=position)

                    while self.odgi.has_next_step(step_handle):
                        step_handle = self.odgi.get_next_step(step_handle)
                        position = position + self.odgi.get_length(node_handle)
                        node_handle = self.odgi.get_handle_of_step(step_handle)
                        rank = rank + 1
                        yield from self.stepHandleToTriples(step_handle, subject, predicate, obj,
                                                            node_handle=node_handle,
                                                            rank=rank, position=position)
        elif type(subject) == StepIriRef:
            yield from self.stepHandleToTriples(subject.step_handle, subject, predicate, obj, rank=subject.rank(),
                                                position=subject.position())
        elif type(subject) == StepBeginIriRef:
            yield from self.stepHandleToTriples(subject.step_handle, subject, predicate, obj, rank=subject.rank(),
                                                position=subject.position())
        elif type(subject) == StepEndIriRef:
            yield from self.stepHandleToTriples(subject.step_handle, subject, predicate, obj, rank=subject.rank(),
                                                position=subject.position())
        else:
            subject_iri_parts = subject.toPython().split('/')
            if 'path' == subject_iri_parts[-4] and 'step' == subject_iri_parts[-2]:
                path_name = subject_iri_parts[-3];
                pathHandle = self.odgi.get_path_handle(path_name)
                stepRank = int(subject_iri_parts[-1]);

                if not self.odgi.is_empty(pathHandle):
                    rank = 1
                    position = 1
                    step_handle = self.odgi.path_begin(pathHandle)
                    node_handle = self.odgi.get_handle_of_step(step_handle)
                    while rank != stepRank and self.odgi.has_next_step(step_handle):
                        rank = rank + 1
                        position = position + self.odgi.get_length(node_handle)
                        step_handle = self.odgi.get_next_step(step_handle)
                        node_handle = self.odgi.get_handle_of_step(step_handle)
                    yield from self.stepHandleToTriples(step_handle, subject, predicate, obj, node_handle=node_handle,
                                                        rank=rank, position=position)

    # else:
    # for nodeHandle in self.handles():
    # for stepHandle in self.odgi.steps_of_handle(nodeHandle, False):
    # yield from self.stepHandleToTriples(stepHandle, subject, predicate, obj, nodeHandle=nodeHandle)
    def stepHandleToTriples(self, stepHandle, subject, predicate, obj, node_handle=None, rank=None, position=None):

        if type(subject) == StepIriRef:
            step_iri = subject
        elif type(subject) == StepBeginIriRef:
            step_iri = subject._stepIri
        elif type(subject) == StepEndIriRef:
            step_iri = subject._stepIri
        else:
            step_iri = StepIriRef(stepHandle, self.base, self.odgi, position, rank)

        if subject is None or step_iri == subject:
            if predicate == RDF.type or predicate is None:
                if obj is None or obj == VG.Step:
                    yield ([(step_iri, RDF.type, VG.Step), None])
                if obj is None or obj == FALDO.Region:
                    yield ([(step_iri, RDF.type, FALDO.Region), None])
            if node_handle is None:
                node_handle = self.odgi.get_handle_of_step(stepHandle)
            node_iri = NodeIriRef(node_handle, odgi=self.odgi, base=self.base)
            if (predicate == VG.node or predicate is None and not self.odgi.get_is_reverse(node_handle)) and (
                    obj is None or node_iri == obj):
                yield ([(step_iri, VG.node, node_iri), None])

            if (predicate == VG.reverseOfNode or predicate is None and self.odgi.get_is_reverse(node_handle)) and (
                    obj is None or node_iri == obj):
                yield ([(step_iri, VG.reverseOfNode, node_iri), None])

            if (predicate == VG.rank or predicate is None) and rank is not None:
                rank = Literal(rank)
                if obj is None or obj == rank:
                    yield ([(step_iri, VG.rank, rank), None])

            if (predicate == VG.position or predicate is None) and position is not None:
                position = Literal(position)
                if obj is None or position == obj:
                    yield ([(step_iri, VG.position, position), None])

            if predicate == VG.path or predicate is None:
                path = self.odgi.get_path_handle_of_step(stepHandle)
                path_name = self.odgi.get_path_name(path)

                path_iri = self.pathNS.term(f'{path_name}')
                if obj is None or path_iri == obj:
                    yield ([(step_iri, VG.path, path_iri), None])

            if predicate is None or predicate == FALDO.begin:
                yield ([(step_iri, FALDO.begin, StepBeginIriRef(step_iri)), None])

            if predicate is None or predicate == FALDO.end:
                yield ([(step_iri, FALDO.end, StepEndIriRef(step_iri)), None])

            if subject is None:
                begin = StepBeginIriRef(step_iri)
                yield from self.faldoForStep(step_iri, begin, predicate, obj)
                end = StepEndIriRef(step_iri)
                yield from self.faldoForStep(step_iri, end, predicate, obj)

        if (type(subject) == StepBeginIriRef) and step_iri == subject._stepIri:
            yield from self.faldoForStep(subject._stepIri, subject, predicate, obj)
        elif type(subject) == StepEndIriRef and step_iri == subject._stepIri:
            yield from self.faldoForStep(subject._stepIri, subject, predicate, obj)

    def faldoForStep(self, step_iri, subject, predicate, obj):
        ep = Literal(subject.position())
        if (predicate is None or predicate == FALDO.position) and (obj is None or obj == ep):
            yield ([(subject, FALDO.position, ep), None])
        if (predicate is None or predicate == RDF.type) and (obj is None or obj == FALDO.ExactPosition):
            yield ([(subject, RDF.type, FALDO.ExactPosition), None])
        if (predicate is None or predicate == RDF.type) and (obj is None or obj == FALDO.Position):
            yield ([(subject, RDF.type, FALDO.Position), None])
        if predicate is None or predicate == FALDO.reference:
            path = step_iri.path()
            pathName = self.odgi.get_path_name(path)
            pathIri = self.pathNS.term(f'{pathName}')
            if obj is None or obj == pathIri:
                yield ([(subject, FALDO.reference, pathIri), None])

    def handleToTriples(self, predicate, obj, node_handle):
        node_iri = NodeIriRef(node_handle, odgi=self.odgi, base=self.base)

        if predicate == RDF.value or predicate is None:
            seq_value = rdflib.term.Literal(self.odgi.get_sequence(node_handle))
            if obj is None or obj == seq_value:
                yield [(node_iri, RDF.value, seq_value), None]
        elif (predicate == RDF.type or predicate is None) and (obj is None or obj == VG.Node):
            yield [(node_iri, RDF.type, VG.Node), None]

    def handleToEdgeTriples(self, subject, predicate, obj, nodeHandle):

        if predicate is None or (predicate in nodeRelatedPredicates):
            to_node_handles = []
            self.odgi.follow_edges(nodeHandle, False, CollectEdges(to_node_handles));
            node_iri = NodeIriRef(nodeHandle, odgi=self.odgi, base=self.base)
            for edge in to_node_handles:

                otherIri = NodeIriRef(edge, odgi=self.odgi, base=self.base)

                if obj is None or otherIri == obj:
                    node_is_reverse = self.odgi.get_is_reverse(nodeHandle);
                    other_is_reverse = self.odgi.get_is_reverse(edge)
                    # TODO: check the logic here
                    if (
                            predicate is None or VG.linksForwardToForward == predicate) and not node_is_reverse and not other_is_reverse:
                        yield ([(node_iri, VG.linksForwardToForward, otherIri), None])
                    if (
                            predicate is None or VG.linksReverseToForward == predicate) and node_is_reverse and not other_is_reverse:
                        yield ([(node_iri, VG.linksReverseToForward, otherIri), None])
                    if (
                            predicate is None or VG.linksReverseToReverse == predicate) and node_is_reverse and other_is_reverse:
                        yield ([(node_iri, VG.linksReverseToReverse, otherIri), None])
                    if (
                            predicate is None or VG.linksReverseToReverse == predicate) and not node_is_reverse and other_is_reverse:
                        yield ([(node_iri, VG.linksForwardToReverse, otherIri), None])
                    if predicate is None or VG.links == predicate:
                        yield ([(node_iri, VG.links, otherIri), None])

    def bind(self, prefix, namespace):
        self.namespace_manager.bind(prefix, namespace)

    def namespace(self, search_prefix):
        for prefix, namespace in self.namespace_manager.namespaces():
            if search_prefix == prefix:
                return namespace

    def prefix(self, searchNamespace):
        for prefix, namespace in self.namespace_manager.namespaces():
            if searchNamespace == namespace:
                return prefix

    def namespaces(self):
        return self.namespace_manager.namespaces()

    def handles(self):
        node_id = self.odgi.min_node_id()

        max_node_id = self.odgi.max_node_id()
        while node_id <= max_node_id:
            if self.odgi.has_node(node_id):
                node_id = node_id + 1
                yield self.odgi.get_handle(node_id - 1)

    def pathHandles(self):
        paths = []
        self.odgi.for_each_path_handle(CollectPaths(paths))
        yield from paths
示例#9
0
文件: SIGA.py 项目: NLeSC/candYgene
def triplify(db, rdf_format, config):
    """Generate RDF triples from RDB using Direct Mapping approach."""
    fmt2fext = dict(xml = '.rdf',
                    nt = '.nt',
                    turtle = '.ttl',
                    n3 = '.n3')

    if rdf_format not in fmt2fext:
        raise IOError("Unsupported RDF serialization '{0}'.".format(rdf_format))

    base_uri = config['URIs']['rdf_base']
    creator_uri = config['URIs']['rdf_creator']
    download_url = config['URIs']['gff_source']
    species_name = config['Dataset']['species_name']
    taxon_id = config['Dataset']['ncbi_taxon_id']

    # define additional namespace prefixes
    # TODO: add namespaces to a config file
    OBO = Namespace('http://purl.obolibrary.org/obo/')
    FALDO = Namespace('http://biohackathon.org/resource/faldo#')
    DCMITYPE = Namespace('http://purl.org/dc/dcmitype/')
    SO = Namespace('http://purl.obolibrary.org/obo/so#')

    g = Graph()
    g.bind('obo', OBO)
    g.bind('faldo', FALDO)
    g.bind('dcterms', DCTERMS)
    g.bind('dcmitype', DCMITYPE)
    g.bind('so', SO)

    # map GFF feature types and DNA strandedness to ontology classes
    # Note: The 'mRNA' feature key is often used (incorrectly) in place of 'prim_transcript'
    # in genome annotations. The former feature MUST NOT contain introns while the latter
    # MAY contain introns [2].
    # Feature type to SO mappings:
    #   prim_transcript -> SO_0000120 refers to a protein-coding primary (unprocessed) transcript
    #   mRNA            -> SO_0000234 refers to a mature transcript
    #

    feature_onto_class = file_to_dict('../config/feature2class.ini')

    strand_onto_class = {
        '+' : FALDO.ForwardStrandPosition,
        '-' : FALDO.ReverseStrandPosition,
        '?' : FALDO.StrandedPosition,
        '.' : FALDO.Position
    }

    # add genome info to graph
    genome_uri = URIRef(os.path.join(base_uri, 'genome', species_name.replace(' ', '_')))
    taxon_uri = OBO.term('NCBITaxon_%d' % taxon_id)

    g.add( (genome_uri, RDF.type, OBO.term(feature_onto_class['genome'])) )
    g.add( (genome_uri, RDF.type, DCMITYPE.Dataset) )
    g.add( (genome_uri, RDFS.label, Literal('genome of {0}'.format(species_name), datatype=XSD.string)) )
    g.add( (genome_uri, DCTERMS.created, Literal(datetime.now().strftime("%Y-%m-%d"), datatype=XSD.date )) )
    g.add( (genome_uri, DCTERMS.creator, URIRef(creator_uri)) )
    g.add( (genome_uri, DCTERMS.title, Literal('genome of {0}'.format(species_name), datatype=XSD.string)) )
    g.add( (genome_uri, DCTERMS.source, URIRef(download_url)) )
    g.add( (genome_uri, SO.genome_of, taxon_uri) ) # N.B.: predicate has no domain/range defined
    g.add( (genome_uri, OBO.RO_0002162, taxon_uri) )   # use 'in taxon' alternatively
    g.add( (taxon_uri, RDFS.label, Literal('NCBI Taxonomy ID: {0}'.format(taxon_id), datatype=XSD.string)) )
    g.add( (taxon_uri, DCTERMS.identifier, Literal(taxon_id, datatype=XSD.positiveInteger)) )

    for feature in db.all_features():
        if feature.strand not in strand_onto_class:
            raise KeyError("Incorrect strand information for feature ID '{0}'.".format(feature.id))
        try: # skip GFF feature types not in feature_onto_class dict
            chrom = str(feature.seqid)
            strand_uri = strand_onto_class[feature.strand]
            feature_id = normalize_feature_id(feature.id)
            feature_type = amend_feature_type(feature.featuretype)
            feature_type_uri = OBO.term(feature_onto_class[feature_type])
            feature_uri = URIRef(os.path.join(genome_uri, feature_type, feature_id))
            seqid_uri = URIRef(os.path.join(genome_uri, 'chromosome', chrom))
            region_uri = URIRef('{0}#{1}-{2}'.format(seqid_uri, feature.start, feature.end))
            start_uri = URIRef('{0}#{1}'.format(seqid_uri, feature.start))
            end_uri = URIRef('{0}#{1}'.format(seqid_uri, feature.end))

            # add genome and chromosome info to graph
            # Note: the assumption is that the seqid field refers to chromosome
            g.add( (seqid_uri, RDF.type, OBO.term(feature_onto_class['chromosome'])) )
            g.add( (seqid_uri, RDFS.label, Literal('chromosome {0}'.format(chrom), datatype=XSD.string)) )
            g.add( (seqid_uri, SO.part_of, genome_uri) )

            # add feature types and IDs to graph
            g.add( (feature_uri, RDF.type, feature_type_uri) )
            g.add( (feature_uri, RDFS.label, Literal('{0} {1}'.format(feature_type, feature_id), datatype=XSD.string)) )
            g.add( (feature_uri, DCTERMS.identifier, Literal(feature_id, datatype=XSD.string)) )

            # add feature descriptions (from the attributes field) to graph
            des = get_feature_attrs(feature)
            if des is not None:
                g.add( (feature_uri, RDFS.comment, Literal(des, datatype=XSD.string)) )

            # add feature start/end coordinates and strand info to graph
            g.add( (feature_uri, FALDO.location, region_uri) )
            g.add( (region_uri, RDF.type, FALDO.Region) )
            g.add( (region_uri, RDFS.label, Literal('chromosome {0}:{1}-{2}'.format(chrom, feature.start, feature.end))) )
            g.add( (region_uri, FALDO.begin, start_uri) )
            g.add( (start_uri, RDF.type, FALDO.ExactPosition) )
            g.add( (start_uri, RDF.type, strand_uri) )
            g.add( (start_uri, RDFS.label, Literal('chromosome {0}:{1}-*'.format(chrom, feature.start))) )
            g.add( (start_uri, FALDO.position, Literal(feature.start, datatype=XSD.positiveInteger)) )
            g.add( (start_uri, FALDO.reference, seqid_uri) )
            g.add( (region_uri, FALDO.end, end_uri) )
            g.add( (end_uri, RDF.type, FALDO.ExactPosition) )
            g.add( (end_uri, RDF.type, strand_uri) )
            g.add( (end_uri, RDFS.label, Literal('chromosome {0}:*-{1}'.format(chrom, feature.end))) )
            g.add( (end_uri, FALDO.position, Literal(feature.end, datatype=XSD.positiveInteger)) )
            g.add( (end_uri, FALDO.reference, seqid_uri) )
            # Note: phase info is mandatory for CDS feature types but can't find a corresponding ontological term

            # add parent-child relationships between features to graph
            for child in db.children(feature, level=1):
                child_feature_id = normalize_feature_id(child.id)
                child_feature_type = amend_feature_type(child.featuretype)
                child_feature_uri = URIRef(os.path.join(genome_uri, child_feature_type, child_feature_id))
                g.add( (feature_uri, SO.has_part, child_feature_uri) ) # use the inverse of part_of

                if feature_type == 'gene' and child_feature_type == 'prim_transcript':
                    g.add( (feature_uri, SO.transcribed_to, child_feature_uri) )

        except KeyError:
            pass

    outfile = os.path.splitext(db.dbfn)[0] + fmt2fext[rdf_format]
    with open(outfile, 'w') as fout:
        fout.write(g.serialize(format=rdf_format))