def transform_char(self, element: Element, /): char_placeholder_pattern = re.compile( r""" ( (?P<u_plus> U\+ )? # optional U+ prefix ( (?P<code_point_placeholder> X{4} ) # XXXX as code point placeholder | (?P<code_point> 1?[0-9A-F]?[0-9A-F]{4} ) # or actual code point ) )? ( \s (?P<glyph_placeholder> \[X\] ) # [X] as glyph placeholder )? ( \s (?P<name> [A-Z0-9 -]+ ) # actual name )? """, flags=re.VERBOSE, ) transformed: Element = element.makeelement("span", {"class": "character"}, None) transformed.tail = element.tail # type: ignore text: str = element.text element.getparent().replace(element, transformed) if match := char_placeholder_pattern.fullmatch(text): cps = set[int]() if name := match.group("name"): cps.add(ord(unicodedata2.lookup(name)))
def transform_element(self, element: Element, /): match element.tag: case "char": self.transform_char(element) case ("h1" | "figcaption" | "a") as tag if (mode := "numbering") in element.keys(): self.expand_placeholder_in_element(element, mode) if tag == "a": wrap = element.makeelement("cite", {}, None) wrap.tail, element.tail = element.tail, None # type: ignore element.getparent().replace(element, wrap) wrap.append(element)