Пример #1
0
    def prevnext(resource):
        """ Parse a resource to get the prev and next urn

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: (str, str)
        """
        _prev, _next = False, False
        resource = xmlparser(resource)
        prevnext = resource.xpath("//ti:prevnext", namespaces=XPATH_NAMESPACES)

        if len(prevnext) > 0:
            _next, _prev = None, None
            prevnext = prevnext[0]
            _next_xpath = prevnext.xpath("ti:next/ti:urn/text()",
                                         namespaces=XPATH_NAMESPACES,
                                         smart_strings=False)
            _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()",
                                         namespaces=XPATH_NAMESPACES,
                                         smart_strings=False)

            if len(_next_xpath):
                _next = _next_xpath[0].split(":")[-1]

            if len(_prev_xpath):
                _prev = _prev_xpath[0].split(":")[-1]

        return _prev, _next
Пример #2
0
    def getPassage(self, reference=None):
        """ Retrieve a passage and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference, or URN, or str or list(str)
        :rtype: Passage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(reference, URN):
            urn = str(reference)
        elif isinstance(reference, Reference):
            urn = "{0}:{1}".format(self.urn, str(reference))
        elif isinstance(reference, str):
            if ":" in reference:
                urn = reference
            else:
                urn = "{0}:{1}".format(self.urn, reference)
        elif isinstance(reference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(reference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(response.xpath("//ti:request", namespaces=NS)[0])
        return Passage(urn=urn, resource=response, retriever=self.retriever)
Пример #3
0
    def getTextualNode(self, subreference=None):
        """ Retrieve a passage and store it in the object

        :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \
        compose the reference)
        :type subreference: Union[Reference, URN, str, list]
        :rtype: Passage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(subreference, URN):
            urn = str(subreference)
        elif isinstance(subreference, Reference):
            urn = "{0}:{1}".format(self.urn, str(subreference))
        elif isinstance(subreference, str):
            if ":" in subreference:
                urn = subreference
            else:
                urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE), subreference)
        elif isinstance(subreference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(subreference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(response.xpath("//ti:request", namespaces=NS)[0])
        return Passage(urn=urn, resource=response, retriever=self.retriever)
Пример #4
0
    def getValidReff(self, level=1, reference=None):
        """ Given a resource, CitableText will compute valid reffs

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: Int
        :param reference: Passage reference
        :type reference: Reference
        :rtype: list(str)
        :returns: List of levels
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        if level == -1:
            level = len(self.citation)

        xml = self.retriever.getValidReff(
            level=level,
            urn=urn
        )
        xml = xmlparser(xml)
        self.__parse_request__(xml.xpath("//ti:request", namespaces=NS)[0])

        return [ref.split(":")[-1] for ref in xml.xpath("//ti:reply//ti:urn/text()", namespaces=NS)]
Пример #5
0
 def test_text(self):
     """ Test text attribute """
     P = TEIResource(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'))
     # Without exclusion0
     self.assertEqual(P.export(output=Mimetypes.PLAINTEXT), "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
Пример #6
0
    def prevnext(resource):
        """ Parse a resource to get the prev and next urn

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: (str, str)
        """
        _prev, _next = False, False
        resource = xmlparser(resource)
        prevnext = resource.xpath("//ti:prevnext", namespaces=NS)

        if len(prevnext) > 0:
            _next, _prev = None, None
            prevnext = prevnext[0]
            _next_xpath = prevnext.xpath("ti:next/ti:urn/text()", namespaces=NS, smart_strings=False)
            _prev_xpath = prevnext.xpath("ti:prev/ti:urn/text()", namespaces=NS, smart_strings=False)

            if len(_next_xpath):
                _next = _next_xpath[0].split(":")[-1]

            if len(_prev_xpath):
                _prev = _prev_xpath[0].split(":")[-1]

        return _prev, _next
Пример #7
0
    def __init__(self, urn=None, citation=None, resource=None):
        self._passages = OrderedDict() # Represents real full passages / reffs informations. Only way to set it up is getValidReff without passage ?
        self._orphan = defaultdict(Reference) # Represents passage we got without asking for all. Storing convenience ?

        self._cRefPattern = MyCapytain.resources.texts.tei.Citation()
        self.resource = None
        self.xml = None
        self._URN = None

        if citation is not None:
            self.citation = citation
        if resource is not None:
            self.resource = resource
            self.xml = xmlparser(resource)

            self.__findCRefPattern(self.xml)

            try:
                xml = self.xml.xpath(self.citation.scope, namespaces=NS)[0]
            except IndexError:
                msg = "Main citation scope does not result in any result ({0})".format(self.citation.scope)
                raise RefsDeclError(msg)
            except Exception as E:
                raise E

            self._passages = Passage(resource=xml, citation=self.citation, urn=self.urn, id=None)
Пример #8
0
    def getValidReff(self, level=1, reference=None):
        """ Given a resource, CitableText will compute valid reffs

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: Int
        :param reference: CapitainsCtsPassage reference
        :type reference: Reference
        :rtype: list(str)
        :returns: List of levels
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        if level == -1:
            level = len(self.citation)

        xml = self.retriever.getValidReff(level=level, urn=urn)
        xml = xmlparser(xml)
        self.__parse_request__(
            xml.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0])

        return [
            ref.split(":")[-1]
            for ref in xml.xpath("//ti:reply//ti:urn/text()",
                                 namespaces=XPATH_NAMESPACES)
        ]
Пример #9
0
    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element rerpresenting a work
        :param type: basestring, etree._Element
        """
        self.xml = xmlparser(resource)

        lang = self.xml.get("{http://www.w3.org/XML/1998/namespace}lang")
        if lang is not None:
            self.lang = lang

        for child in self.xml.xpath("ti:title", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["title"][lg] = child.text

        self.__editions = xpathDict(
            xml=self.xml, xpath="ti:edition", children=Edition, parents=tuple([self]) + self.parents
        )
        self.__translations = xpathDict(
            xml=self.xml, xpath="ti:translation", children=Translation, parents=tuple([self]) + self.parents
        )

        self.texts = collections.defaultdict(Text)
        for urn in self.__editions:
            self.texts[urn] = self.__editions[urn]
        for urn in self.__translations:
            self.texts[urn] = self.__translations[urn]

        return self.texts
Пример #10
0
    def getTextualNode(self, subreference=None):
        """ Retrieve a passage and store it in the object

        :param subreference: Reference of the passage (Note : if given a list, this should be a list of string that \
        compose the reference)
        :type subreference: Union[Reference, URN, str, list]
        :rtype: CtsPassage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if isinstance(subreference, URN):
            urn = str(subreference)
        elif isinstance(subreference, Reference):
            urn = "{0}:{1}".format(self.urn, str(subreference))
        elif isinstance(subreference, str):
            if ":" in subreference:
                urn = subreference
            else:
                urn = "{0}:{1}".format(self.urn.upTo(URN.NO_PASSAGE),
                                       subreference)
        elif isinstance(subreference, list):
            urn = "{0}:{1}".format(self.urn, ".".join(subreference))
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassage(urn=urn))

        self.__parse_request__(
            response.xpath("//ti:request", namespaces=XPATH_NAMESPACES)[0])
        return CtsPassage(urn=urn, resource=response, retriever=self.retriever)
Пример #11
0
 def test_text(self):
     """ Test text attribute """
     P = Passage(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'))
     # Without exclusion0
     self.assertEqual(P.text(), "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(P.text(exclude=["note"]), "Ibis ab excusso missus in astra sago. ")
Пример #12
0
 def export(self, output="xml"):
     """ Create a {output} version of the TextInventory
     
     :param output: output to be chosen (Only XML for now)
     :type output: basestring
     :rtype: lxml.etree._Element
     :returns: XML representation of the object
     """
     return xmlparser(str(self))
Пример #13
0
    def parse(resource, parent=None):
        xml = xmlparser(resource)
        lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang")

        o = XmlCtsCommentaryMetadata(urn=xml.get("urn"), parent=parent)
        if lang is not None:
            o.lang = lang
        XmlCtsCommentaryMetadata.parse_metadata(o, xml)
        return o
Пример #14
0
 def test_str(self):
     """ Test STR conversion of xml """
     P = Passage(resource=xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     ))
     self.assertEqual(
         str(P),
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
Пример #15
0
    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element representing the text inventory
        :param type: basestring, etree._Element
        """
        self.xml = xmlparser(resource)

        self.textgroups = xpathDict(xml=self.xml, xpath="//ti:textgroup", children=TextGroup, parents=self)
        return self.textgroups
Пример #16
0
    def test_changing_space(self):
        """ Test when user change default value of export joining char """
        X = xmlparser(
            """<root>in- genium<note place="unspecified">ingenium <hi rend="italic">ll.v</hi>(<hi rend="italic">G</hi>). -nio
<hi rend="italic">B.</hi> in ganea <hi rend="italic">J</hi></note><add>n</add>a<add>t</add>us</root>"""
        )
        P = TEIResource(identifier="dummy", resource=X)
        P.plaintext_string_join = ""
        self.assertEqual(P.export(Mimetypes.PLAINTEXT, exclude=["note"]),
                         "in- geniumnatus")
Пример #17
0
 def test_str(self):
     """ Test STR conversion of xml """
     P = TEIResource(
         identifier="dummy",
         resource=xmlparser(
             '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
         ))
     self.assertEqual(
         P.export(Mimetypes.XML.Std),
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
Пример #18
0
 def test_text(self):
     """ Test text attribute """
     P = Passage(resource=xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     ))
     # Without exclusion0
     self.assertEqual(P.text(),
                      "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(P.text(exclude=["note"]),
                      "Ibis ab excusso missus in astra sago. ")
Пример #19
0
 def test_exportable_capacities(self):
     X = xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
     P = TEIResource(identifier="dummy", resource=X)
     self.assertEqual(
         P.export_capacities, [
             Mimetypes.PYTHON.ETREE, Mimetypes.XML.Std,
             Mimetypes.PYTHON.NestedDict, Mimetypes.PLAINTEXT,
             Mimetypes.XML.TEI
         ],
         "CapitainsCtsPassage should be able to export to given resources")
Пример #20
0
    def getLabel(self):
        """ Retrieve metadata about the text

        :rtype: Metadata
        :returns: Dictionary with label informations
        """
        response = xmlparser(self.retriever.getLabel(urn=str(self.urn)))

        self.__parse_request__(
            response.xpath("//ti:reply/ti:label",
                           namespaces=XPATH_NAMESPACES)[0])

        return self.metadata
Пример #21
0
 def test_text(self):
     """ Test text attribute """
     P = TEIResource(
         identifier="dummy",
         resource=xmlparser(
             '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
         ))
     # Without exclusion0
     self.assertEqual(P.export(output=Mimetypes.PLAINTEXT),
                      "Ibis hello b ab excusso missus in astra sago. ")
     # With Exclusion
     self.assertEqual(
         P.export(output=Mimetypes.PLAINTEXT, exclude=["note"]),
         "Ibis ab excusso missus in astra sago. ")
Пример #22
0
    def firstUrn(resource):
        """ Parse a resource to get the first URN

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: str
        """
        resource = xmlparser(resource)
        urn = resource.xpath("//ti:reply/ti:urn/text()", namespaces=NS, magic_string=True)

        if len(urn) > 0:
            urn = str(urn[0])
            return urn.split(":")[-1]
Пример #23
0
    def __export__(self, output=Mimetypes.PYTHON.ETREE, domain=""):
        """ Create a {format} version of the Work

        :param output: Format to be chosen (Only XML for now)
        :type output: basestring
        :param domain: Domain to prefix IDs
        :type domain: str
        :rtype: lxml.etree._Element
        :returns: XML representation of the object
        """
        if output == Mimetypes.PYTHON.ETREE:
            return xmlparser(str(self))
        elif output == Mimetypes.XML.CTS:
            return str(self)
Пример #24
0
    def parse(self, resource):
        """ Parse a resource 

        :param resource: Element rerpresenting the textgroup
        :param type: basestring, etree._Element
        """
        self.xml = xmlparser(resource)

        for child in self.xml.xpath("ti:groupname", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["groupname"][lg] = child.text

        self.works = xpathDict(xml=self.xml, xpath="ti:work", children=Work, parents=(self, self.parents))
        return self.works
Пример #25
0
    def test_ingest_single_and(self):
        text = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
    <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" />
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        citation = Citation.ingest(text)
        self.maxDiff = None
        self.assertEqual(
            str(citation),
            """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(citation.scope, "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']")
        self.assertEqual(citation.xpath, "/tei:div[@n='?' and @type='section']")
        self.assertEqual(citation.fill("1"), "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']")
Пример #26
0
    def getLabel(self):
        """ Retrieve metadata about the text

        :rtype: Metadata
        :returns: Dictionary with label informations
        """
        response = xmlparser(
            self.retriever.getLabel(urn=str(self.urn))
        )

        self.__parse_request__(
            response.xpath("//ti:reply/ti:label", namespaces=NS)[0]
        )

        return self.metadata
Пример #27
0
 def export(self, output="xml", **kwargs):
     """ Create a {format} version of the Work
     
     :param output: Format to be chosen (Only XML for now)
     :type output: basestring, citation
     :rtype: lxml.etree._Element
     :returns: XML representation of the object
     """
     if output == "xml":
         return xmlparser(str(self))
     elif issubclass(output, text.Text):
         complete_metadata = self.metadata
         for parent in self.parents:
             if isinstance(parent, inventory.Resource):
                 complete_metadata = complete_metadata + parent.metadata
         return output(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)
Пример #28
0
    def test_ingest_single(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts book and poem and line</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
Пример #29
0
    def test_ingest_single(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts book and poem and line</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
Пример #30
0
 def test_get_passage_plus_formatted(self):
     response = self.endpoint.getPassagePlus("urn:cts:farsiLit:hafez.divan:1.1.1.2", output=XML)
     parsed_response = Passage(resource=xmlparser(response), urn="urn:cts:farsiLit:hafez.divan:1.1.1.2")
     self.assertEqual(
         parsed_response.text().strip(),
         "که عشق آسان نمود اول ولی افتاد مشکل‌ها ***",
         "API Response should be parsable by MyCapytain Library"
     )
     self.assertIn(
         "<prev><urn>urn:cts:farsiLit:hafez.divan.perseus-far1:1.1.1.1</urn></prev>", response,
         "Previous URN should be found"
     )
     self.assertIn(
         "<next><urn>urn:cts:farsiLit:hafez.divan.perseus-far1:1.1.2.1</urn></next>", response,
         "Next URN should be found"
     )
Пример #31
0
    def firstUrn(resource):
        """ Parse a resource to get the first URN

        :param resource: XML Resource
        :type resource: etree._Element
        :return: Tuple representing previous and next urn
        :rtype: str
        """
        resource = xmlparser(resource)
        urn = resource.xpath("//ti:reply/ti:urn/text()",
                             namespaces=XPATH_NAMESPACES,
                             magic_string=True)

        if len(urn) > 0:
            urn = str(urn[0])
            return urn.split(":")[-1]
Пример #32
0
    def parse(resource):
        """ Parse a resource 

        :param resource: Element representing the text inventory
        :param type: basestring, etree._Element
        """
        xml = xmlparser(resource)
        o = XmlCtsTextInventoryMetadata(
            name=xml.xpath("//ti:TextInventory",
                           namespaces=XPATH_NAMESPACES)[0].get("tiid") or "")
        # Parse textgroups
        xpathDict(xml=xml,
                  xpath='//ti:textgroup',
                  cls=XmlCtsTextgroupMetadata,
                  parent=o)
        return o
Пример #33
0
    def parse(resource, parent=None):
        """ Parse a textgroup resource

        :param resource: Element representing the textgroup
        :param parent: Parent of the textgroup
        """
        xml = xmlparser(resource)
        o = XmlCtsTextgroupMetadata(urn=xml.get("urn"), parent=parent)

        for child in xml.xpath("ti:groupname", namespaces=XPATH_NAMESPACES):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                o.set_cts_property("groupname", child.text, lg)

        # Parse Works
        xpathDict(xml=xml, xpath='ti:work', cls=XmlCtsWorkMetadata, parent=o)

        __parse_structured_metadata__(o, xml)
        return o
Пример #34
0
 def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
     """ Create a {format} version of the Work
     
     :param output: Format to be chosen (Only XML for now)
     :type output: basestring, citation
     :param domain: Domain to prefix IDs
     :type domain: str
     :rtype: lxml.etree._Element
     :returns: XML representation of the object
     """
     if output == Mimetypes.PYTHON.ETREE:
         return xmlparser(str(self))
     elif output == Mimetypes.PYTHON.MyCapytain.ReadableText:
         complete_metadata = self.metadata
         for parent in self.parents:
             if isinstance(parent, cts.CTSCollection) and hasattr(parent, "metadata"):
                 complete_metadata = complete_metadata + parent.metadata
         return text.CitableText(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)
     elif output == Mimetypes.XML.CTS:
         return str(self)
Пример #35
0
    def getPassagePlus(self, reference=None):
        """ Retrieve a passage and informations around it and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference or List of text_type
        :rtype: Passage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassagePlus(urn=urn))

        self.__parse_request__(response.xpath("//ti:reply/ti:label", namespaces=NS)[0])
        passage = Passage(urn=urn, resource=response, retriever=self.retriever)
        passage.metadata, passage.citation = self.metadata, self.citation
        return passage
Пример #36
0
    def parse(self, resource):
        """ Parse a resource to feed the object
        
        :param resource: An xml representation object
        :type resource: basestring or lxml.etree._Element
        :returns: None
        """
        self.xml = xmlparser(resource)
        self.urn = URN(self.xml.get("urn"))
        self.id = str(self.urn)

        if self.subtype == "Translation":
            lang = self.xml.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lang is not None:
                self.lang = lang

        for child in self.xml.xpath("ti:description", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["description"][lg] = child.text

        for child in self.xml.xpath("ti:label", namespaces=NS):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                self.metadata["label"][lg] = child.text

        self.__findCitations(
            xml=self.xml,
            xpath="ti:online/ti:citationMapping/ti:citation"
        )

        online = self.xml.xpath("ti:online", namespaces=NS)
        if len(online) > 0:
            online = online[0]
            self.docname = online.get("docname")
            for validate in online.xpath("ti:validate", namespaces=NS):
                self.validate = validate.get("schema")
            for namespaceMapping in online.xpath("ti:namespaceMapping", namespaces=NS):
                self.metadata["namespaceMapping"][namespaceMapping.get("abbreviation")] = namespaceMapping.get("nsURI")

        return None
Пример #37
0
    def test_ingest_single_and(self):
        text = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
    <tei:cRefPattern n="section" matchPattern="(.+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n='$1' and @type='section'])" />
</tei:tei>
""".replace("\n", "").replace("\s+", " "))
        citation = Citation.ingest(text)
        self.maxDiff = None
        self.assertEqual(
            str(citation),
            """<tei:cRefPattern n="section" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'$1\' and @type='section'])"><tei:p>This pointer pattern extracts section</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            citation.scope,
            "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']")
        self.assertEqual(citation.xpath,
                         "/tei:div[@n='?' and @type='section']")
        self.assertEqual(
            citation.fill("1"),
            "/tei:TEI/tei:text/tei:body/tei:div[@type='edition']/tei:div[@n=\'1\' and @type='section']"
        )
Пример #38
0
 def test_get_label(self):
     """Check get Label"""
     # Need to parse with Citation and parse individually or simply check for some equality
     data = self.app.get("/cts?request=GetLabel&urn=urn:cts:latinLit:phi1294.phi002.perseus-lat2")\
         .data.decode("utf-8").replace("\n", "")
     parsed = xmlparser(data)
     label = parsed.xpath(".//ti:label", namespaces=XPATH_NAMESPACES)
     label_str = re.sub("\s+", " ", tostring(label[0], encoding=str)).replace("\n", "")
     self.assertIn(
         '<groupname xml:lang="eng">Martial</groupname>',
         label_str,
         "groupname should be exported correctly"
     )
     self.assertIn(
         '<title xml:lang="eng">Epigrammata</title>',
         label_str,
         "title should be exported correctly"
     )
     self.assertIn(
         '<description xml:lang="eng"> M. Valerii Martialis Epigrammaton libri / recognovit W. Heraeus </description>',
         label_str,
         "description should be exported correctly"
     )
     self.assertIn(
         '<label xml:lang="eng">Epigrammata</label>',
         label_str,
         "label should be exported correctly"
     )
     citation = Citation.ingest(label[0])
     self.assertEqual(
         len(citation), 3, "There should be three level of citation"
     )
     self.assertEqual(
         citation.name, "book", "First level is book"
     )
     if self.cache is not None:
         self.assertGreater(
             len(self.cache.cache._cache), 0,
             "There should be something cached"
         )
Пример #39
0
    def test_ingest_multiple(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1' and @type='section']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts line</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="poem"
             matchPattern="(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])">
    <tei:p>This pointer pattern extracts poem</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="book"
             matchPattern="(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])">
    <tei:p>This pointer pattern extracts book</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))

        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child),
            """<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child.child),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\' and @type=\'section\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            a.child.child.fill(Reference("1.2.3")),
            "/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'1\' and @type=\'section\']/tei:div[@n=\'2\']/tei:l[@n=\'3\']"
        )
Пример #40
0
    def test_ingest_multiple(self):
        b = xmlparser("""
<tei:tei xmlns:tei="http://www.tei-c.org/ns/1.0">
<tei:cRefPattern n="line"
             matchPattern="(\\w+).(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1' and @type='section']/tei:div[@n='$2']/tei:l[@n='$3'])">
    <tei:p>This pointer pattern extracts line</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="poem"
             matchPattern="(\\w+).(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1']/tei:div[@n='$2'])">
    <tei:p>This pointer pattern extracts poem</tei:p>
</tei:cRefPattern>
<tei:cRefPattern n="book"
             matchPattern="(\\w+)"
             replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n='$1'])">
    <tei:p>This pointer pattern extracts book</tei:p>
</tei:cRefPattern>
</tei:tei>
""".replace("\n", "").replace("\s+", " "))

        a = Citation.ingest(b)

        self.assertEqual(
            str(a),
            """<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child),
            """<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\']/tei:div[@n=\'$2\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            str(a.child.child),
            """<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'$1\' and @type=\'section\']/tei:div[@n=\'$2\']/tei:l[@n=\'$3\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>"""
        )
        self.assertEqual(
            a.child.child.fill(Reference("1.2.3")),
            "/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\'1\' and @type=\'section\']/tei:div[@n=\'2\']/tei:l[@n=\'3\']"
        )
Пример #41
0
    def parse(resource, parent=None):
        """ Parse a resource

        :param resource: Element rerpresenting a work
        :param type: basestring, etree._Element
        :param parent: Parent of the object
        :type parent: XmlCtsTextgroupMetadata
        """
        xml = xmlparser(resource)
        o = XmlCtsWorkMetadata(urn=xml.get("urn"), parent=parent)

        lang = xml.get("{http://www.w3.org/XML/1998/namespace}lang")
        if lang is not None:
            o.lang = lang

        for child in xml.xpath("ti:title", namespaces=XPATH_NAMESPACES):
            lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
            if lg is not None:
                o.set_cts_property("title", child.text, lg)

        # Parse children
        xpathDict(xml=xml,
                  xpath='ti:edition',
                  cls=XmlCtsEditionMetadata,
                  parent=o)
        xpathDict(xml=xml,
                  xpath='ti:translation',
                  cls=XmlCtsTranslationMetadata,
                  parent=o)
        # Added for commentary
        xpathDict(xml=xml,
                  xpath='ti:commentary',
                  cls=XmlCtsCommentaryMetadata,
                  parent=o)

        __parse_structured_metadata__(o, xml)

        return o
Пример #42
0
    def getPassagePlus(self, reference=None):
        """ Retrieve a passage and informations around it and store it in the object

        :param reference: Reference of the passage
        :type reference: Reference or List of text_type
        :rtype: CtsPassage
        :returns: Object representing the passage
        :raises: *TypeError* when reference is not a list or a Reference
        """
        if reference:
            urn = "{0}:{1}".format(self.urn, reference)
        else:
            urn = str(self.urn)

        response = xmlparser(self.retriever.getPassagePlus(urn=urn))

        passage = CtsPassage(urn=urn,
                             resource=response,
                             retriever=self.retriever)
        passage.__parse_request__(
            response.xpath("//ti:reply/ti:label",
                           namespaces=XPATH_NAMESPACES)[0])
        self.citation = passage.citation
        return passage
Пример #43
0
    def __init__(self,
                 urn=None,
                 citation=None,
                 resource=None,
                 autoreffs=False):
        super(Text, self).__init__(urn=urn, citation=citation)
        self._passages = Passage()
        self._orphan = defaultdict(
            Reference
        )  # Represents passage we got without asking for all. Storing convenience ?

        self._cRefPattern = MyCapytain.resources.texts.tei.Citation()
        self.xml = None

        if citation is not None:
            self.citation = citation

        if resource is not None:
            self.resource = resource
            self.xml = xmlparser(resource)
            self.__findCRefPattern(self.xml)

            if autoreffs is True:
                self.parse()
Пример #44
0
from __future__ import unicode_literals

import unittest
from six import text_type as str
from io import open

from MyCapytain.common.utils import xmlparser, NS
from MyCapytain.resources.texts.api import *
from MyCapytain.resources.texts.tei import Citation
from MyCapytain.retrievers.cts5 import CTS
from MyCapytain.common.reference import Reference, URN
from lxml import etree
import mock

with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrn.xml") as f:
    Get_FIRST = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f:
    Get_FIRST_EMPTY = xmlparser(f)
with open("tests/testing_data/cts/getlabel.xml") as f:
    GET_LABEL = xmlparser(f)


class TestAPIText(unittest.TestCase):
Пример #45
0
 def test_xml(self):
     X = xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')
     P = TEIResource(resource=X)
     self.assertIs(X, P.xml)
Пример #46
0
 def test_str(self):
     """ Test STR conversion of xml """
     P = TEIResource(resource=xmlparser('<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'))
     self.assertEqual(str(P), '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>')
Пример #47
0
 def __export__(self, output=None, domain=""):
     if output == Mimetypes.PYTHON.ETREE:
         return xmlparser(self.export(output=Mimetypes.XML.CTS))
Пример #48
0
from MyCapytain.resolvers.cts.api import HttpCtsResolver
from MyCapytain.retrievers.cts5 import HttpCtsRetriever
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes
from MyCapytain.resources.prototypes.text import Passage
from MyCapytain.resources.collections.cts import XmlCtsTextInventoryMetadata, XmlCtsTextgroupMetadata, XmlCtsWorkMetadata, XmlCtsTextMetadata
from MyCapytain.resources.prototypes.metadata import Collection

from unittest import TestCase
from mock import MagicMock

with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f:
    NEXT = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f:
    PREV = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF_FULL = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.1.1.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities.xml") as f:
    GET_CAPABILITIES = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities1294002.xml") as f:
    GET_CAPABILITIES_FILTERED = xmlparser(f)
with open("tests/testing_data/cts/getPassageOtherTest.xml") as f:
    GET_PASSAGE_CITATION_FAILURE = f.read()
Пример #49
0
 def test_get_passage_formatted(self):
     response = self.endpoint.getPassage("urn:cts:farsiLit:hafez.divan:1.1.1.1", output=XML)
     p = Passage(resource=xmlparser(response), urn="urn:cts:farsiLit:hafez.divan:1.1.1.1")
     """
Пример #50
0
 def test_xml(self):
     X = xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
     P = Passage(resource=X)
     self.assertIs(X, P.xml)
Пример #51
0
    def parse(resource, parent=None):
        xml = xmlparser(resource)
        o = XmlCtsEditionMetadata(urn=xml.get("urn"), parent=parent)
        XmlCtsEditionMetadata.parse_metadata(o, xml)

        return o
Пример #52
0
 def __init__(self, resource, **kwargs):
     super(TEIResource, self).__init__(**kwargs)
     self.resource = xmlparser(resource)
     self.__plaintext_string_join__ = "" + self.PLAINTEXT_STRING_JOIN
Пример #53
0
 def xmlparse(self, file):
     """ Parse a XML file
     :param file: Opened File
     :return: Tree
     """
     return xmlparser(file)
Пример #54
0
from MyCapytain.resolvers.cts.api import HttpCTSResolver
from MyCapytain.retrievers.cts5 import CTS
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.constants import NS, Mimetypes
from MyCapytain.common.metadata import Metadatum
from MyCapytain.resources.prototypes.text import Passage
from MyCapytain.resources.collections.cts import TextInventory, TextGroup, Work, Text
from MyCapytain.resources.prototypes.metadata import Collection

from unittest import TestCase
from mock import MagicMock


with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.nextonly.xml") as f:
    NEXT = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.prevonly.xml") as f:
    PREV = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF_FULL = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.1.1.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities.xml") as f:
    GET_CAPABILITIES = xmlparser(f)
with open("tests/testing_data/cts/getCapabilities1294002.xml") as f:
    GET_CAPABILITIES_FILTERED = xmlparser(f)
Пример #55
0
 def xmlparse(self, file):
     """ Parse a XML file
     :param file: Opened File
     :return: Tree
     """
     return xmlparser(file)
Пример #56
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import unittest
from six import text_type as str
from io import open

from MyCapytain.resources.texts.api.cts import Passage, Text
from MyCapytain.retrievers.cts5 import CTS
from MyCapytain.common.reference import Reference, Citation, URN
from MyCapytain.common.metadata import Metadata, Metadatum
from MyCapytain.common.utils import xmlparser, NS
import mock

with open("tests/testing_data/cts/getValidReff.xml") as f:
    GET_VALID_REFF = xmlparser(f)
with open("tests/testing_data/cts/getpassage.xml") as f:
    GET_PASSAGE = xmlparser(f)
with open("tests/testing_data/cts/getpassageplus.xml") as f:
    GET_PASSAGE_PLUS = xmlparser(f)
with open("tests/testing_data/cts/getprevnexturn.xml") as f:
    NEXT_PREV = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrn.xml") as f:
    Get_FIRST = xmlparser(f)
with open("tests/testing_data/cts/getFirstUrnEmpty.xml") as f:
    Get_FIRST_EMPTY = xmlparser(f)
with open("tests/testing_data/cts/getlabel.xml") as f:
    GET_LABEL = xmlparser(f)
with open("tests/testing_data/cts/getValidReff.1.1.xml") as f:
    GET_VALID_REFF_1_1 = xmlparser(f)
Пример #57
0
 def test_xml(self):
     X = xmlparser(
         '<l n="8">Ibis <note>hello<a>b</a></note> ab excusso missus in astra <hi>sago.</hi> </l>'
     )
     P = TEIResource(identifier="dummy", resource=X)
     self.assertIs(X, P.xml)
import unittest
from io import open

import xmlunittest
from lxml import etree

from MyCapytain.common.utils import xmlparser
import MyCapytain.common.reference
import MyCapytain.errors
import MyCapytain.resources.texts.encodings
import MyCapytain.resources.texts.locals.tei
from tests.resources.commonTests import CapitainsXmlTextTest, CapitainsXmlPassageTests, CapitainsXMLRangePassageTests


objectifiedParser = lambda x: xmlparser(x, objectify=False)


class TestLocalXMLTextImplementation(CapitainsXmlTextTest, unittest.TestCase, xmlunittest.XmlTestMixin):
    """ Test XML Implementation of resources found in local file """

    def setUp(self):
        self.text = open("tests/testing_data/texts/sample.xml", "rb")
        self.TEI = MyCapytain.resources.texts.locals.tei.Text(
            resource=objectifiedParser(self.text),
            urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2"
        )
        self.treeroot = etree._ElementTree()

        with open("tests/testing_data/texts/text_or_xpath.xml") as f:
            self.text_complex = MyCapytain.resources.texts.locals.tei.Text(
Пример #59
0
         "TEI fails with urn and xml lang on @xml:base/div-{epidoc}"),

        ("tei", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False,
         "TEI fails with urn and without xml lang on @n/div-{epidoc}"),

        ("tei", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", False,
         "TEI fails with urn and without xml lang on @xml:base/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}' xml:lang='{lang}'>", True,
         "Epidoc works with urn and xml lang on @n/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}' xml:lang='{lang}'>", False,
         "Epidoc fails with urn and xml lang on @xml:base/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' xml:base='{urn}'>", False,
         "Epidoc fails with urn and without xml lang on @n/div-{epidoc}"),

        ("epidoc", "<div>", "<div type='{epidoc}' n='{urn}'>", False,
         "Epidoc fails with urn and without xml lang on @xml:base/div-{epidoc}")
    ]
    for type_epidoc in ["edition", "translation", "commentary"]
]
XMLLANG_DOCUMENTS = [
    (
        scheme,
        tostring(xmlparser(TEMPLATES.replace(source, replacement).format(urn=URN, lang=LANG)), encoding=str),
        boolean,
        msg + " ("+replacement.format(urn=URN, lang=LANG)+")"
    )
    for scheme, source, replacement, boolean, msg in XMLLANG_DOCUMENTS
]
Пример #60
0
 def __init__(self, resource, **kwargs):
     super(TEIResource, self).__init__(**kwargs)
     self.resource = xmlparser(resource)