Exemplo n.º 1
0
    def transform_char(self, element: Element, /):

        char_placeholder_pattern = re.compile(
            r"""
                (
                    (?P<u_plus> U\+ )?  # optional U+ prefix
                    (
                        (?P<code_point_placeholder> X{4} )  # XXXX as code point placeholder
                        | (?P<code_point> 1?[0-9A-F]?[0-9A-F]{4} )  # or actual code point
                    )
                )?
                (
                    \s
                    (?P<glyph_placeholder> \[X\] )  # [X] as glyph placeholder
                )?
                (
                    \s
                    (?P<name> [A-Z0-9 -]+ )  # actual name
                )?
            """,
            flags=re.VERBOSE,
        )

        transformed: Element = element.makeelement("span", {"class": "character"}, None)
        transformed.tail = element.tail  # type: ignore

        text: str = element.text
        element.getparent().replace(element, transformed)

        if match := char_placeholder_pattern.fullmatch(text):

            cps = set[int]()
            if name := match.group("name"):
                cps.add(ord(unicodedata2.lookup(name)))
Exemplo n.º 2
0
    def transform_element(self, element: Element, /):

        match element.tag:

            case "char":
                self.transform_char(element)

            case ("h1" | "figcaption" | "a") as tag if (mode := "numbering") in element.keys():

                self.expand_placeholder_in_element(element, mode)

                if tag == "a":

                    wrap = element.makeelement("cite", {}, None)
                    wrap.tail, element.tail = element.tail, None  # type: ignore

                    element.getparent().replace(element, wrap)
                    wrap.append(element)