def get_char(self, cp_or_sequence): """ Get the char object of a code point. :param cp_or_sequence: Code point or sequence of the character to get. :raises NotInLGR: If the code point does not exist. >>> cd = Repertoire() >>> char = cd.add_char([0x002A]) >>> c = cd.get_char([0x002A]) >>> c is char True """ assert len(cp_or_sequence), "there should be at least one char" origin = CharBase.from_cp_or_sequence(cp_or_sequence) idx = origin.as_index() if idx not in self._chardict: raise NotInLGR(cp_or_sequence) chars = self._chardict[idx] try: list_idx = chars.index(origin) except ValueError: logger.error("Code point '%s' does not exist", format_cp(cp_or_sequence)) raise NotInLGR(cp_or_sequence) char = chars[list_idx] return char
def del_char(self, cp_or_sequence): """ Delete a character from the LGR. :param cp_or_sequence: code point or code point sequence to delete. :raises NotInLGR: If the code point does not exist. >>> cd = Repertoire() >>> _ = cd.add_char([0x002A]) >>> 0x002A in cd True >>> cd.del_char([0x002A]) >>> 0x002A in cd False >>> cd.del_char([0x002B]) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... NotInLGR: """ assert len(cp_or_sequence), "there should be at least one char" char = CharBase.from_cp_or_sequence(cp_or_sequence) if not self._del_char(char): logger.error("Code point '%s' does not exist", format_cp(cp_or_sequence)) raise NotInLGR(cp_or_sequence)
def del_range(self, first_cp, last_cp): """ Delete a range of characters from the LGR. Note: This MUST be the exact same range that was added, meaning you cannot delete partial sub-ranges! :param first_cp: First code point of the range. :param last_cp: Last code point of the range. :raises NotInLGR: If the range does not exist. >>> cd = Repertoire() >>> cd.add_range(0x002A, 0x0030) >>> cd.del_range(0x002A, 0x0030) >>> 0x002A in cd False >>> cd.del_range(0x002A, 0x0030) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... NotInLGR: """ assert first_cp < last_cp, "range must be defined in order" if (first_cp, last_cp) not in self.ranges: logger.error("Range '%s - %s' does not exist", format_cp(first_cp), format_cp(last_cp)) raise NotInLGR(first_cp) for cp in range(first_cp, last_cp + 1): char = RangeChar(cp, first_cp, last_cp) if not self._del_char(char): # TODO: clean-up range on error # This should only happen if range insertion failed # -> inconsistent state for now logger.critical("Range '%s - %s' is missing code point %s", format_cp(first_cp), format_cp(last_cp), format_cp(cp)) raise NotInLGR(cp) # Remove and sort by first cp self.ranges.remove((first_cp, last_cp))
def get_chars_from_prefix(self, cp, only_variants=False): """ Return the list of characters starting with cp. :param cp: The first codepoint of the characters. :return: List of characters, ordered by decreasing length. :param only_variants: Only return chars with variants. :raises NotInLGR: If the code point does not exist. """ if cp not in self._chardict: raise NotInLGR(cp) if not only_variants: iterable = self._chardict[cp] else: iterable = [v for v in self._chardict[cp] if v.has_variant()] return sorted(iterable, key=lambda x: len(x), reverse=True)