Пример #1
0
    def getOr(subject, predicate, *args, **kwargs):
        """ Retrieve a metadata node or generate a new one

        :param subject: Subject to which the metadata node should be connected
        :param predicate: Predicate by which the metadata node should be connected
        :return: Metadata for given node
        :rtype: Metadata

        """
        if (subject, predicate, None) in get_graph():
            return Metadata(
                node=get_graph().objects(subject, predicate).__next__())
        return Metadata(*args, **kwargs)
Пример #2
0
    def __export__(self,
                   output=None,
                   context=False,
                   namespace_manager=None,
                   **kwargs):
        if output == Mimetypes.JSON.DTS.Std:
            if not namespace_manager:
                namespace_manager = get_graph().namespace_manager

            cite_type_term = str(
                namespace_manager.qname(RDF_NAMESPACES.DTS.term("citeType")))
            cite_structure_term = str(
                namespace_manager.qname(
                    RDF_NAMESPACES.DTS.term("citeStructure")))

            _out = {cite_type_term: self.name}

            if not self.is_empty():
                _out[cite_structure_term] = [
                    cite.export(output,
                                context=False,
                                namespace_manager=namespace_manager)
                    for cite in self.children
                ]

            if context:
                _out["@context"] = {
                    cite_type_term.split(":")[0]: str(RDF_NAMESPACES.DTS)
                }

            return _out
Пример #3
0
 def __init__(self, node=None, *args, **kwargs):
     super(Metadata, self).__init__(*args, **kwargs)
     self.__graph__ = get_graph()
     if node is not None:
         self.__node__ = node
     else:
         self.__node__ = BNode()
Пример #4
0
    def test_post_work_dispatching_active(self):
        """ Dispatching is working after editions, we dispatch based on citation scheme"""
        tic = CtsTextInventoryCollection()
        poetry = CtsTextInventoryMetadata("urn:perseus:poetry", parent=tic)
        prose = CtsTextInventoryMetadata("urn:perseus:prose", parent=tic)

        dispatcher = CollectionDispatcher(tic, default_inventory_name="urn:perseus:prose")

        @dispatcher.inventory("urn:perseus:poetry")
        def dispatchPoetry(collection, **kwargs):
            for readable in collection.readableDescendants:
                for citation in readable.citation:
                    if citation.name == "line":
                        return True
            return False

        resolver = CtsCapitainsLocalResolver(
            ["./tests/testing_data/latinLit2"],
            dispatcher=dispatcher
        )

        all = resolver.getMetadata().export(Mimetypes.XML.CTS)
        poetry_stuff = resolver.getMetadata("urn:perseus:poetry").export(Mimetypes.XML.CTS)
        prose_stuff = resolver.getMetadata("urn:perseus:prose").export(Mimetypes.XML.CTS)
        get_graph().remove((None, None, None))
        del poetry, prose
        poetry, prose = XmlCtsTextInventoryMetadata.parse(poetry_stuff), XmlCtsTextInventoryMetadata.parse(prose_stuff)
        self.assertEqual(
            len(poetry.textgroups), 3,
            "There should be 3 textgroups in Poetry (Martial, Ovid and Juvenal)"
        )
        self.assertIsInstance(poetry, CtsTextInventoryMetadata, "should be textinventory")
        self.assertEqual(
            len(prose.textgroups), 1,
            "There should be one textgroup in Prose (Greek texts)"
        )
        get_graph().remove((None, None, None))
        del poetry, prose
        all = XmlCtsTextInventoryMetadata.parse(all)
        self.assertEqual(
            len(all.readableDescendants), 26,
            "There should be all 26 readable descendants in the master collection"
        )
Пример #5
0
    def __init__(self, identifier: str=None, metadata: Metadata=None):
        self._graph = get_graph()
        self._identifier = identifier

        self._node = BNode()
        self._metadata = metadata or Metadata(node=self.asNode())

        self._graph.addN([
            (self._node, RDF_NAMESPACES.DTS.implements, URIRef(identifier), self._graph)#,
            #(self.__node__, RDF_NAMESPACES.DTS.metadata, self.metadata.asNode(), self.__graph__)
        ])
Пример #6
0
    def __init__(self, identifier="", *args, **kwargs):
        super(Collection, self).__init__(identifier, *args, **kwargs)
        self._graph = get_graph()

        self._node = URIRef(identifier)
        self._metadata = Metadata(node=self.asNode())

        self.graph.set((self.asNode(), RDF.type, self.TYPE_URI))
        self.graph.set((self.asNode(), RDF_NAMESPACES.DTS.model, self.MODEL_URI))

        self._parent = None
        self._children = {}
Пример #7
0
    def __init__(self, identifier="", *args, **kwargs):
        super(Collection, self).__init__(identifier, *args, **kwargs)
        self.__graph__ = get_graph()

        self.__node__ = URIRef(identifier)
        self.__metadata__ = Metadata(node=self.asNode())
        self.__capabilities__ = Metadata.getOr(self.asNode(),
                                               RDF_NAMESPACES.DTS.capabilities)

        self.graph.set((self.asNode(), RDF.type, self.TYPE_URI))
        self.graph.set(
            (self.asNode(), RDF_NAMESPACES.DTS.model, self.MODEL_URI))

        self.graph.addN([(self.asNode(), RDF_NAMESPACES.DTS.capabilities,
                          self.capabilities.asNode(), self.graph)])

        self.__parent__ = None
        self.__children__ = {}
Пример #8
0
    def __export__(self, output=None, **kwargs):
        if output == Mimetypes.XML.CTS:
            if self.xpath is None and self.scope is None and self.refsDecl is None:
                return ""

            child = ""
            if isinstance(self.child, Citation):
                child = self.child.export(output=output)

            label = ""
            if self.name is not None:
                label = self.name

            return make_xml_node(get_graph(),
                                 RDF_NAMESPACES.CTS.citation,
                                 attributes={
                                     "xpath":
                                     re.sub(Citation.escape, "'", self.xpath),
                                     "scope":
                                     re.sub(Citation.escape, "'", self.scope),
                                     "label":
                                     label
                                 },
                                 innerXML=child,
                                 complete=True)
        elif output == Mimetypes.XML.TEI:
            if self.refsDecl is None:
                return ""

            label = ""
            if self.name is not None:
                label = self.name

            return \
                "<tei:cRefPattern n=\"{label}\" matchPattern=\"{regexp}\" replacementPattern=\"#xpath({refsDecl})\">" \
                "<tei:p>This pointer pattern extracts {label}</tei:p></tei:cRefPattern>".format(
                    refsDecl=self.refsDecl,
                    label=label,
                    regexp="\.".join(["(\w+)"]*self.refsDecl.count("$"))
                )
Пример #9
0
 def setUp(self):
     get_graph().remove((None, None, None))
     self.resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"])
Пример #10
0
 def setUp(self):
     get_graph().remove((None, None, None))
     self.resolver = CtsCapitainsLocalResolver(["./tests/testing_data/latinLit2"])
Пример #11
0
 def graph(self):
     return get_graph()
Пример #12
0
    def __export__(self, output=None, domain=""):
        """ Export the collection item in the Mimetype required.

        ..note:: If current implementation does not have special mimetypes, reuses default_export method

        :param output: Mimetype to export to (Uses MyCapytain.common.utils.Mimetypes)
        :type output: str
        :param domain: Domain (Necessary sometime to express some IDs)
        :type domain: str
        :return: Object using a different representation
        """

        if output == Mimetypes.JSON.DTS.Std:
            nm = self.graph.namespace_manager
            bindings = {}
            for predicate in set(self.graph.predicates()):
                prefix, namespace, name = nm.compute_qname(predicate)
                bindings[prefix] = str(URIRef(namespace))

            RDFSLabel = self.graph.qname(RDFS.label)
            RDFType = self.graph.qname(RDF.type)
            store = Subgraph(get_graph().namespace_manager)
            store.graphiter(self.graph,
                            self.metadata,
                            ascendants=0,
                            descendants=1)
            metadata = {}
            for _, predicate, obj in store.graph:
                k = self.graph.qname(predicate)
                if k in metadata:
                    if isinstance(metadata[k], list):
                        metadata[k].append(LiteralToDict(obj))
                    else:
                        metadata[k] = [metadata[k], LiteralToDict(obj)]
                else:
                    metadata[k] = LiteralToDict(obj)
            o = {
                "@context": bindings,
                "@graph": {
                    "@id": self.id,
                    RDFType: str(self.type),
                    RDFSLabel: LiteralToDict(self.get_label()) or self.id,
                    self.graph.qname(RDF_NAMESPACES.DTS.size):
                    len(self.members),
                    self.graph.qname(RDF_NAMESPACES.DTS.metadata): metadata
                }
            }
            version = self.version
            if version is not None:
                o["@graph"]["version"] = str(version)
            if len(self.members):
                o["@graph"][self.graph.qname(RDF_NAMESPACES.DTS.members)] = [{
                    "@id":
                    member.id,
                    RDFSLabel:
                    LiteralToDict(member.get_label()) or member.id,
                    self.graph.qname(RDF_NAMESPACES.DTS.url):
                    domain + member.id
                } for member in self.members]
            if self.parent:
                o["@graph"][self.graph.qname(RDF_NAMESPACES.DTS.parents)] = [{
                    "@id":
                    member.id,
                    RDFSLabel:
                    LiteralToDict(member.get_label()) or member.id,
                    self.graph.qname(RDF_NAMESPACES.DTS.url):
                    domain + member.id
                } for member in self.parents]
            del store
            return o
        elif output == Mimetypes.JSON.LD\
                or output == Mimetypes.XML.RDF:

            # We create a temp graph
            store = Subgraph(get_graph().namespace_manager)
            store.graphiter(self.graph,
                            self.asNode(),
                            ascendants=1,
                            descendants=-1)

            o = store.serialize(format=RDFLIB_MAPPING[output],
                                auto_compact=True,
                                indent="")
            del store
            return o
Пример #13
0
    def __export__(self, output=None, namespace_manager=None):
        """ Export the collection item in the Mimetype required.

        ..note:: If current implementation does not have special mimetypes, reuses default_export method

        :param output: Mimetype to export to (Uses MyCapytain.common.utils.Mimetypes)
        :type output: str
        :return: Object using a different representation
        """

        if output == Mimetypes.JSON.DTS.Std:

            # Set-up a derived Namespace Manager
            if not namespace_manager:
                nsm = {
                    prefix: ns
                    for prefix, ns in self.graph.namespace_manager.namespaces()
                    if str(ns) not in [_ns_cap_str, _ns_cts_str, _ns_dts_str, _ns_dct_str, _ns_hydra_str]
                }
                nsm[""] = RDF_NAMESPACES.HYDRA
                nsm["cts"] = RDF_NAMESPACES.CTS
                nsm["dts"] = RDF_NAMESPACES.DTS
                nsm["dct"] = DCTERMS

            else:
                nsm = namespace_manager.namespaces()

            # Set-up a derived graph
            store = Subgraph(nsm)
            store.graphiter(self.graph, self.asNode(), ascendants=0, descendants=1)
            graph = store.graph
            nsm = store.graph.namespace_manager

            # Build the JSON-LD @context

            ignore_ns_for_bindings = [_ns_cap_str, _ns_hydra_str, _ns_rdf_str, _ns_rdfs_str]
            bindings = {}
            for predicate in set(graph.predicates()):
                prefix, namespace, name = nsm.compute_qname(predicate)
                if prefix not in bindings and str(namespace) not in ignore_ns_for_bindings:
                    bindings[prefix] = str(URIRef(namespace))

            # Builds the specific Store data
            extensions = {}
            dublincore = {}
            ignore_ns = [_ns_cap_str, _ns_hydra_str, _ns_rdf_str, _ns_rdfs_str, _ns_dts_str]

            # Builds the .dublincore and .extensions graphs
            for _, predicate, obj in store.graph:
                k = graph.qname(predicate)
                prefix, namespace, name = nsm.compute_qname(predicate)
                namespace = str(namespace)

                # Ignore namespaces that are part of the root DTS object
                if namespace in ignore_ns:
                    continue

                # Switch to the correct container depending on namespaces
                if namespace == str(DCTERMS):
                    metadata = dublincore
                else:
                    metadata = extensions

                if k in metadata:
                    if isinstance(metadata[k], list):
                        metadata[k].append(literal_to_dict(obj))
                    else:
                        metadata[k] = [metadata[k], literal_to_dict(obj)]
                else:
                    metadata[k] = literal_to_dict(obj)
                    if isinstance(metadata[k], dict):
                        metadata[k] = [metadata[k]]

            o = {"@context": bindings}
            o.update(self.export_base_dts(graph, self, nsm))
            o["@context"]["@vocab"] = _ns_hydra_str

            if extensions:
                o[graph.qname(RDF_NAMESPACES.DTS.extensions)] = extensions

            if dublincore:
                o[graph.qname(RDF_NAMESPACES.DTS.dublincore)] = dublincore

            if self.size:
                o[graph.qname(RDF_NAMESPACES.HYDRA.member)] = [
                    self.export_base_dts(self.graph, member, nsm)
                    for member in self.members
                ]

            # If the system handles citation structure
            if hasattr(self, "citation") and \
                    isinstance(self.citation, BaseCitationSet):
                if self.citation.depth:
                    o[graph.qname(RDF_NAMESPACES.DTS.term("citeDepth"))] = self.citation.depth

                if not self.citation.is_empty():
                    o[graph.qname(RDF_NAMESPACES.DTS.term("citeStructure"))] = self.citation.export(
                        Mimetypes.JSON.DTS.Std,
                        context=False,
                        namespace_manager=nsm
                    )

            del store
            return o
        elif output == Mimetypes.JSON.LD\
                or output == Mimetypes.XML.RDF:

            # We create a temp graph
            store = Subgraph(get_graph().namespace_manager)
            store.graphiter(self.graph, self.asNode(), ascendants=1, descendants=-1)

            o = store.serialize(format=RDFLIB_MAPPING[output], auto_compact=True, indent="")
            del store
            return o
Пример #14
0
 def setUp(self):
     get_graph().remove((None, None, None))
from MyCapytain.common.reference import URN
from rdflib.namespace import DC, DCTERMS, Namespace
from lxml import etree
import re
from chetc import Epigraph2Markup
from jinja2 import Template
from os import makedirs, path

with open('templates/template.jinja.xml') as f:
    template = Template(f.read())
with open('templates/template.textgroup.xml') as f:
    tgtemplate = Template(f.read())

SAWS = Namespace("http://purl.org/saws/ontology#")

get_graph().bind("dc", DC)
get_graph().bind("dct", DCTERMS)
get_graph().bind("saws", SAWS)

PLACE = re.compile("<b>province:<\/b>(.*)<b>place:<\/b>")
EDCS = re.compile("<b>EDCS-ID:</b> EDCS-(\w+)")
CIL = re.compile("\s*CIL\s*04,\s*(p?\s*[\*a-zA-Z0-9]+)")
CIL_NUMBER = re.compile("^(\d+)\w?$")
TRISMEGISTOS = re.compile(
    "http:\/\/db\.edcs\.eu\/epigr\/partner\.php\?param=.*(T\w+)\"")
TRISMEGISTOS_PLACE = re.compile(
    "http:\/\/www\.trismegistos\.org\/place\/(\w+)")
PUBLICATION = re.compile("publication:<\/b>([a-z,\-\+\/\*A-Z =\(\)0-9]+)<[ab]")
ORT = re.compile(
    "ort='([[a-zA-Z\s\/\-]+)'&amp;latitude='(\d+\.\d+)'&amp;longitude='(\d+\.\d+)'&amp;"
)
Пример #16
0
    def test_dispatching_output(self):
        tic = CtsTextInventoryCollection()
        latin = CtsTextInventoryMetadata("urn:perseus:latinLit", parent=tic)
        latin.set_label("Classical Latin", "eng")
        farsi = CtsTextInventoryMetadata("urn:perseus:farsiLit", parent=tic)
        farsi.set_label("Farsi", "eng")
        gc = CtsTextInventoryMetadata("urn:perseus:greekLit", parent=tic)
        gc.set_label("Ancient Greek", "eng")
        gc.set_label("Grec Ancien", "fre")

        dispatcher = CollectionDispatcher(tic)

        @dispatcher.inventory("urn:perseus:latinLit")
        def dispatchLatinLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:latinLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:farsiLit")
        def dispatchfFarsiLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:farsiLit:"):
                return True
            return False

        @dispatcher.inventory("urn:perseus:greekLit")
        def dispatchGreekLit(collection, path=None, **kwargs):
            if collection.id.startswith("urn:cts:greekLit:"):
                return True
            return False

        resolver = NautilusCTSResolver(["./tests/testing_data/latinLit2"],
                                       dispatcher=dispatcher)
        resolver.logger.disabled = True
        resolver.REMOVE_EMPTY = False
        resolver.parse()

        all = resolver.getMetadata().export(Mimetypes.XML.CTS)
        latin_stuff = resolver.getMetadata("urn:perseus:latinLit").export(
            Mimetypes.XML.CTS)
        greek_stuff = resolver.getMetadata("urn:perseus:greekLit").export(
            Mimetypes.XML.CTS)
        farsi_stuff = resolver.getMetadata("urn:perseus:farsiLit").export(
            Mimetypes.XML.CTS)
        get_graph().remove((None, None, None))
        latin_stuff, greek_stuff, farsi_stuff = XmlCtsTextInventoryMetadata.parse(latin_stuff), \
                                                XmlCtsTextInventoryMetadata.parse(greek_stuff), \
                                                XmlCtsTextInventoryMetadata.parse(farsi_stuff)
        self.assertEqual(len(latin_stuff.readableDescendants), 19,
                         "There should be 19 readable descendants in Latin")
        self.assertIsInstance(latin_stuff, CtsTextInventoryMetadata,
                              "should be textinventory")
        self.assertEqual(
            len(greek_stuff.readableDescendants), 6,
            "There should be 6 readable descendants in Greek [6 only in __cts__.xml]"
        )
        self.assertEqual(len(farsi_stuff.descendants), 0,
                         "There should be nothing in FarsiLit")
        self.assertEqual(
            greek_stuff.get_label("fre"),
            None,  # Text inventory have no label in CTS
            "Label should be correct")
        get_graph().remove((None, None, None))
        all = XmlCtsTextInventoryMetadata.parse(all)
        self.assertEqual(
            len(all.readableDescendants), 25,
            "There should be all 25 readable descendants in the master collection"
        )
Пример #17
0
    def __export__(self,
                   output=Mimetypes.JSON.Std,
                   only=None,
                   exclude=None,
                   **kwargs):
        """ Export a set of Metadata

        :param output: Mimetype to export to
        :param only: Includes only term from given namespaces
        :param exclude: Includes only term from given namespaces
        :return: Formatted Export

        .. warning:: exclude and warning cannot be used together
        """
        graph = Graph()
        graph.namespace_manager = get_graph().namespace_manager

        if only is not None:
            _only = only
            only = [str(s) for s in only]
            for predicate in set(self.graph.predicates(subject=self.asNode())):
                if str(predicate) not in only:
                    prefix, namespace, name = self.graph.compute_qname(
                        predicate)
                    if str(namespace) in only:
                        _only.append(predicate)
            for predicate, obj in self.graph[self.asNode()]:
                if predicate in _only:
                    graph.add((self.asNode(), predicate, obj))
        elif exclude is not None:
            _only = []
            exclude = [str(s) for s in exclude]
            for predicate in set(self.graph.predicates(subject=self.asNode())):
                prefix, namespace, name = self.graph.compute_qname(predicate)
                if str(predicate
                       ) not in exclude and not str(namespace) in exclude:
                    _only.append(predicate)
            for predicate, obj in self.graph[self.asNode()]:
                if predicate in _only:
                    graph.add((self.asNode(), predicate, obj))
        else:
            for predicate, object in self.graph[self.asNode()]:
                graph.add((self.asNode(), predicate, object))

        if output == Mimetypes.JSON.Std:
            out = {}
            for _, predicate, object in graph:
                predicate = str(predicate)
                if predicate not in out:
                    out[predicate] = {}
                if isinstance(object, Literal):
                    if object.language in out[predicate]:
                        if isinstance(out[predicate][object.language], str):
                            out[predicate][object.language] = [
                                out[predicate][object.language]
                            ]
                        out[predicate][object.language].append(object.title())
                    else:
                        out[predicate][object.language] = object.title()
            del graph
            return out

        elif output == Mimetypes.JSON.LD:
            out = graph.serialize(format="json-ld", context={})
            del graph
            return out

        elif output == Mimetypes.XML.RDF:
            out = graph.serialize(format="xml")
            del graph
            return out

        elif output == Mimetypes.XML.CapiTainS.CTS:
            strings = []
            for pred, obj in graph.predicate_objects(self.asNode()):
                kwargs = {}
                if hasattr(obj, "language") and obj.language is not None:
                    kwargs["xml:lang"] = obj.language
                if hasattr(obj, "datatype") and obj.datatype is not None:
                    kwargs["rdf:type"] = obj.datatype
                strings.append(
                    make_xml_node(graph,
                                  pred,
                                  text=obj,
                                  attributes=kwargs,
                                  complete=True))
            del graph
            return "\n".join(strings)