Exemplo n.º 1
0
    def getTextualNode(self, subreference=None, simple=False):
        """ Finds a passage in the current text

        :param subreference: Identifier of the subreference / passages
        :type subreference: Union[list, CtsReference]
        :param simple: If set to true, retrieves nodes up to the given one, cleaning non required siblings.
        :type simple: boolean
        :rtype: CapitainsCtsPassage, ContextPassage
        :returns: Asked passage
        """
        if subreference is None:
            return self._getSimplePassage()

        if not isinstance(subreference, CtsReference):
            if isinstance(subreference, str):
                subreference = CtsReference(subreference)
            elif isinstance(subreference, list):
                subreference = CtsReference(".".join(subreference))

        if len(subreference.start) > self.citation.root.depth:
            raise CitationDepthError("URN is deeper than citation scheme")

        if simple is True:
            return self._getSimplePassage(subreference)

        if not subreference.is_range():
            start = end = subreference.start.list
        else:
            start, end = subreference.start.list, subreference.end.list

        citation_start = self.citation.root[len(start) - 1]
        citation_end = self.citation.root[len(end) - 1]

        start, end = citation_start.fill(passage=start), citation_end.fill(
            passage=end)
        start, end = normalizeXpath(start.split("/")[2:]), normalizeXpath(
            end.split("/")[2:])

        xml = self.textObject.xml

        if isinstance(xml, etree._Element):
            root = copyNode(xml)
        else:
            root = copyNode(xml.getroot())

        root = passageLoop(xml, root, start, end)

        if self.urn:
            urn = URN("{}:{}".format(self.urn, subreference))
        else:
            urn = None

        return CapitainsCtsPassage(urn=urn,
                                   resource=root,
                                   text=self,
                                   citation=citation_start,
                                   reference=subreference)
Exemplo n.º 2
0
    def getValidReff(self,
                     level: int = 1,
                     reference: CtsReference = None,
                     _debug: bool = False) -> CtsReferenceSet:
        """ Retrieve valid passages directly

        :param level: Depth required. If not set, should retrieve first encountered level (1 based)
        :type level: int
        :param reference: CapitainsCtsPassage Reference
        :type reference: CtsReference
        :param _debug: Check on passages duplicates
        :type _debug: bool
        :returns: List of levels

        .. note:: GetValidReff works for now as a loop using CapitainsCtsPassage, subinstances of CtsTextMetadata, to retrieve the valid \
        informations. Maybe something is more powerfull ?
        """

        depth = 0
        xml = self.textObject.xml

        if reference:
            if isinstance(reference, CtsReference):
                if not reference.is_range():
                    passages = [reference.start.list]
                    depth = len(passages[0])
                    if level == 0:
                        level = None
                        if _debug:
                            warnings.warn(
                                "Using level=0 with a Non-range Reference is invalid. Autocorrected to 1"
                            )
                else:
                    xml = self.getTextualNode(subreference=reference)

                    common = []

                    for index, part in enumerate(reference.start.list):
                        if index <= reference.end.depth:
                            if part == reference.end.list[index]:
                                common.append(part)
                            else:
                                break
                        else:
                            break

                    passages = [common]
                    depth = len(common)

                    if level is None:
                        level = reference.start.depth + depth
                    elif level == 1:
                        level = reference.start.depth + 1
                    elif level == 0:
                        level = reference.start.depth
            else:
                raise TypeError()
        else:
            passages = [[]]

        if level is None:
            level = 1

        if level <= len(passages[0]) and reference is not None:
            level = len(passages[0]) + 1
        if level > len(self.citation.root):
            raise CitationDepthError("The required level is too deep")

        nodes = [None] * (level - depth)

        citations = [citation for citation in self.citation.root]

        while len(nodes) >= 1:
            passages = [
                refs + [
                    node.get(
                        current_citation.attribute.replace(
                            "xml:", "{http://www.w3.org/XML/1998/namespace}"))
                ] for xpath_result, refs, current_citation in [(
                    xml.xpath(citations[len(filling) - 1].fill(filling),
                              namespaces=XPATH_NAMESPACES), refs,
                    citations[len(filling) -
                              1]) for filling, refs in [(refs + [None], refs)
                                                        for refs in passages]]
                for node in xpath_result
            ]
            nodes.pop(0)

            if len(passages) == 0:
                msg = "Unknown reference {}".format(reference)
                raise KeyError(msg)

        passages = [".".join(passage) for passage in passages]

        if _debug:
            duplicates = set()
            seen = set()
            for n in passages:
                if n in seen:
                    duplicates.add(n)
                else:
                    seen.add(n)
            if len(duplicates) > 0:
                message = ", ".join(duplicates)
                warnings.warn(message, DuplicateReference)
            del duplicates
            empties = [n for n in passages if n.rstrip('.') != n or n == '']
            if len(empties) > 0:
                message = '{} empty reference(s) at citation level {}'.format(
                    len(empties), level)
                warnings.warn(message, EmptyReference)

        references = CtsReferenceSet([CtsReference(reff) for reff in passages],
                                     citation=self.citation.root[level - 1],
                                     level=level)
        return references