Exemplo n.º 1
0
    def get_char(self, cp_or_sequence):
        """
        Get the char object of a code point.

        :param cp_or_sequence: Code point or sequence of the character
                              to get.
        :raises NotInLGR: If the code point does not exist.

        >>> cd = Repertoire()
        >>> char = cd.add_char([0x002A])
        >>> c = cd.get_char([0x002A])
        >>> c is char
        True
        """
        assert len(cp_or_sequence), "there should be at least one char"

        origin = CharBase.from_cp_or_sequence(cp_or_sequence)

        idx = origin.as_index()
        if idx not in self._chardict:
            raise NotInLGR(cp_or_sequence)

        chars = self._chardict[idx]
        try:
            list_idx = chars.index(origin)
        except ValueError:
            logger.error("Code point '%s' does not exist",
                         format_cp(cp_or_sequence))
            raise NotInLGR(cp_or_sequence)

        char = chars[list_idx]
        return char
Exemplo n.º 2
0
    def del_char(self, cp_or_sequence):
        """
        Delete a character from the LGR.

        :param cp_or_sequence: code point or code point sequence to delete.
        :raises NotInLGR: If the code point does not exist.

        >>> cd = Repertoire()
        >>> _ = cd.add_char([0x002A])
        >>> 0x002A in cd
        True
        >>> cd.del_char([0x002A])
        >>> 0x002A in cd
        False
        >>> cd.del_char([0x002B]) # doctest: +IGNORE_EXCEPTION_DETAIL
        Traceback (most recent call last):
        ...
        NotInLGR:
        """
        assert len(cp_or_sequence), "there should be at least one char"

        char = CharBase.from_cp_or_sequence(cp_or_sequence)
        if not self._del_char(char):
            logger.error("Code point '%s' does not exist",
                         format_cp(cp_or_sequence))
            raise NotInLGR(cp_or_sequence)
Exemplo n.º 3
0
    def del_range(self, first_cp, last_cp):
        """
        Delete a range of characters from the LGR.

        Note: This MUST be the exact same range that was added,
        meaning you cannot delete partial sub-ranges!

        :param first_cp: First code point of the range.
        :param last_cp: Last code point of the range.
        :raises NotInLGR: If the range does not exist.

        >>> cd = Repertoire()
        >>> cd.add_range(0x002A, 0x0030)
        >>> cd.del_range(0x002A, 0x0030)
        >>> 0x002A in cd
        False
        >>> cd.del_range(0x002A, 0x0030) # doctest: +IGNORE_EXCEPTION_DETAIL
        Traceback (most recent call last):
        ...
        NotInLGR:
        """
        assert first_cp < last_cp, "range must be defined in order"

        if (first_cp, last_cp) not in self.ranges:
            logger.error("Range '%s - %s' does not exist", format_cp(first_cp),
                         format_cp(last_cp))
            raise NotInLGR(first_cp)

        for cp in range(first_cp, last_cp + 1):
            char = RangeChar(cp, first_cp, last_cp)
            if not self._del_char(char):
                # TODO: clean-up range on error
                # This should only happen if range insertion failed
                # -> inconsistent state for now
                logger.critical("Range '%s - %s' is missing code point %s",
                                format_cp(first_cp), format_cp(last_cp),
                                format_cp(cp))
                raise NotInLGR(cp)

        # Remove and sort by first cp
        self.ranges.remove((first_cp, last_cp))
Exemplo n.º 4
0
    def get_chars_from_prefix(self, cp, only_variants=False):
        """
        Return the list of characters starting with cp.

        :param cp: The first codepoint of the characters.
        :return: List of characters, ordered by decreasing length.
        :param only_variants: Only return chars with variants.
        :raises NotInLGR: If the code point does not exist.
        """
        if cp not in self._chardict:
            raise NotInLGR(cp)
        if not only_variants:
            iterable = self._chardict[cp]
        else:
            iterable = [v for v in self._chardict[cp] if v.has_variant()]
        return sorted(iterable, key=lambda x: len(x), reverse=True)