示例#1
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="electronic",
                         kind="classify",
                         deterministic=deterministic)

        # tagger
        accepted_symbols = []
        with open(get_abs_path("data/electronic/symbols.tsv"), 'r') as f:
            for line in f:
                symbol, _ = line.split('\t')
                accepted_symbols.append(pynini.accep(symbol))
        username = (pynutil.insert("username: \"") + NEMO_ALPHA +
                    pynini.closure(NEMO_ALPHA | NEMO_DIGIT
                                   | pynini.union(*accepted_symbols)) +
                    pynutil.insert("\"") + pynini.cross('@', ' '))
        domain_graph = (
            NEMO_ALPHA +
            (pynini.closure(NEMO_ALPHA | NEMO_DIGIT | pynini.accep('-')
                            | pynini.accep('.'))) + (NEMO_ALPHA | NEMO_DIGIT))
        domain_graph = pynutil.insert(
            "domain: \"") + domain_graph + pynutil.insert("\"")
        tagger_graph = (username + domain_graph).optimize()

        # verbalizer
        graph_digit = pynini.string_file(
            get_abs_path(
                "data/numbers/digits_nominative_case.tsv")).optimize()
        graph_symbols = pynini.string_file(
            get_abs_path("data/electronic/symbols.tsv")).optimize()
        user_name = (
            pynutil.delete("username:"******"\"") +
            (pynini.closure(
                pynutil.add_weight(graph_digit + insert_space, 1.09)
                | pynutil.add_weight(
                    pynini.closure(graph_symbols + pynutil.insert(" ")), 1.09)
                | pynutil.add_weight(NEMO_NOT_QUOTE + insert_space, 1.1))) +
            pynutil.delete("\""))

        domain_default = (pynini.closure(NEMO_NOT_QUOTE + insert_space) +
                          pynini.cross(".", "точка ") + NEMO_NOT_QUOTE +
                          pynini.closure(insert_space + NEMO_NOT_QUOTE))

        server_default = (pynini.closure(
            (graph_digit | NEMO_ALPHA) + insert_space, 1) +
                          pynini.closure(graph_symbols + insert_space) +
                          pynini.closure(
                              (graph_digit | NEMO_ALPHA) + insert_space, 1))
        server_common = pynini.string_file(
            get_abs_path("data/electronic/server_name.tsv")) + insert_space
        domain_common = pynini.cross(".", "точка ") + pynini.string_file(
            get_abs_path("data/electronic/domain.tsv"))
        domain = (pynutil.delete("domain:") + delete_space +
                  pynutil.delete("\"") +
                  (pynutil.add_weight(server_common, 1.09)
                   | pynutil.add_weight(server_default, 1.1)) +
                  (pynutil.add_weight(domain_common, 1.09)
                   | pynutil.add_weight(domain_default, 1.1)) + delete_space +
                  pynutil.delete("\""))

        graph = user_name + delete_space + pynutil.insert(
            "собака ") + delete_space + domain + delete_space
        # replace all latin letters with their Ru verbalization
        verbalizer_graph = (graph.optimize() @ (pynini.closure(
            TO_LATIN | RU_ALPHA | pynini.accep(" ")))).optimize()
        verbalizer_graph = verbalizer_graph.optimize()

        self.final_graph = (tagger_graph @ verbalizer_graph).optimize()
        self.fst = self.add_tokens(
            pynutil.insert("username: \"") + self.final_graph +
            pynutil.insert("\"")).optimize()
示例#2
0
 def __construct_inflection(self):
     '''
 Build the inflection cross
 '''
     with pynini.default_token_type(self.__syms.alphabet):
         return pynini.union(
             pynini.concat(pynini.cross("", "<Adj0>"), self.__adj0),
             pynini.concat(pynini.cross("", "<Adj0-Up>"), self.__adj0_up),
             pynini.concat(pynini.cross("", "<Adj+>"), self.__adj_plus),
             pynini.concat(pynini.cross("", "<Adj+e>"), self.__adj_plus_e),
             pynini.concat(pynini.cross("", "<NMasc_es_e>"),
                           self.__nmasc_es_e),
             pynini.concat(pynini.cross("", "<NMasc_es_$e>"),
                           self.__nmasc_es_e_ul),
             pynini.concat(pynini.cross("", "<NMasc_es_en>"),
                           self.__nmasc_es_en),
             pynini.concat(pynini.cross("", "<NFem-Deriv>"),
                           self.__nfem_deriv),
             pynini.concat(pynini.cross("", "<NFem_0_n>"), self.__nfem_0_n),
             pynini.concat(pynini.cross("", "<NNeut-Dimin>"),
                           self.__nneut_dimin),
             pynini.concat(pynini.cross("", "<NNeut/Sg_s>"),
                           self.__nneut_sg_s),
             pynini.concat(pynini.cross("", "<VVReg>"),
                           self.__vv_reg)).optimize()
示例#3
0
    NEMO_NON_BREAKING_SPACE = u"\u00A0"
    NEMO_SPACE = " "
    NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r",
                                    u"\u00A0").optimize()
    NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize()
    NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize()

    NEMO_PUNCT = pynini.union(
        *map(pynini.escape, string.punctuation)).optimize()
    NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize()

    NEMO_SIGMA = pynini.closure(NEMO_CHAR)

    delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE))
    insert_space = pynutil.insert(" ")
    delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")

    # French frequently compounds numbers with hyphen.
    delete_hyphen = pynutil.delete(pynini.closure("-", 0, 1))
    insert_hyphen = pynutil.insert("-")

    TO_LOWER = pynini.union(*[
        pynini.cross(x, y)
        for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)
    ])
    TO_UPPER = pynini.invert(TO_LOWER)

    PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
    # Create placeholders
    NEMO_CHAR = None
示例#4
0
文件: time.py 项目: quuhua911/NeMo
    def __init__(self):
        super().__init__(name="time", kind="classify")

        suffix_graph = pynini.string_file(
            get_abs_path("data/time/time_suffix.tsv"))
        time_to_graph = pynini.string_file(
            get_abs_path("data/time/time_to.tsv"))

        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
        graph_twenties = pynini.string_file(
            get_abs_path("data/numbers/twenties.tsv"))

        graph_1_to_100 = pynini.union(
            graph_digit,
            graph_twenties,
            graph_teen,
            (graph_ties + pynutil.insert("0")),
            (graph_ties + pynutil.delete(" y ") + graph_digit),
        )

        # note that graph_hour will start from 2 hours
        # "1 o'clock" will be treated differently because it
        # is singular
        digits_2_to_23 = [str(digits) for digits in range(2, 24)]
        digits_1_to_59 = [str(digits) for digits in range(1, 60)]

        graph_1oclock = pynini.cross("la una", "la 1")
        graph_hour = pynini.cross(
            "las ", "las ") + graph_1_to_100 @ pynini.union(*digits_2_to_23)
        graph_minute = graph_1_to_100 @ pynini.union(*digits_1_to_59)
        graph_minute_verbose = pynini.cross("media", "30") | pynini.cross(
            "cuarto", "15")

        final_graph_hour = pynutil.insert("hours: \"") + (
            graph_1oclock | graph_hour) + pynutil.insert("\"")

        final_graph_minute = (pynutil.insert("minutes: \"") + pynini.closure(
            (pynutil.delete("y") | pynutil.delete("con")) + delete_space, 0,
            1) + (graph_minute | graph_minute_verbose) + pynutil.insert("\""))

        final_suffix = pynutil.insert("suffix: \"") + convert_space(
            suffix_graph) + pynutil.insert("\"")
        final_suffix_optional = pynini.closure(
            delete_space + insert_space + final_suffix, 0, 1)

        # las nueve a eme (only convert on-the-hour times if they are followed by a suffix)
        graph_hsuffix = (final_graph_hour + delete_extra_space +
                         pynutil.insert("minutes: \"00\"") + insert_space +
                         final_suffix)

        # las nueve y veinticinco
        graph_hm = final_graph_hour + delete_extra_space + final_graph_minute

        # un cuarto para las cinco
        graph_mh = (pynutil.insert("minutes: \"") + pynini.union(
            pynini.cross("un cuarto para", "45"),
            pynini.cross("cuarto para", "45"),
        ) + pynutil.insert("\"") + delete_extra_space +
                    pynutil.insert("hours: \"") + time_to_graph +
                    pynutil.insert("\""))

        # las diez menos diez
        graph_time_to = (pynutil.insert("hours: \"") + time_to_graph +
                         pynutil.insert("\"") + delete_extra_space +
                         pynutil.insert("minutes: \"") + delete_space +
                         pynutil.delete("menos") + delete_space + pynini.union(
                             pynini.cross("cinco", "55"),
                             pynini.cross("diez", "50"),
                             pynini.cross("cuarto", "45"),
                             pynini.cross("veinte", "40"),
                             pynini.cross("veinticinco", "30"),
                         ) + pynutil.insert("\""))
        final_graph = pynini.union(
            (graph_hm | graph_mh | graph_time_to) + final_suffix_optional,
            graph_hsuffix).optimize()

        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()
示例#5
0
    def __init__(self):
        super().__init__(name="money", kind="classify")
        # quantity, integer_part, fractional_part, currency, style(depr)
        cardinal_graph = CardinalFst().graph_no_exception
        graph_decimal_final = DecimalFst().final_graph_wo_negative

        unit = pynini.string_file(get_abs_path("data/currency.tsv"))
        unit_singular = pynini.invert(unit)
        unit_plural = get_singulars(unit_singular)

        graph_unit_singular = pynutil.insert("currency: \"") + convert_space(unit_singular) + pynutil.insert("\"")
        graph_unit_plural = pynutil.insert("currency: \"") + convert_space(unit_plural) + pynutil.insert("\"")

        add_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | (pynutil.insert("0") + NEMO_DIGIT)
        # twelve dollars (and) fifty cents, zero cents
        cents_standalone = (
            pynutil.insert("fractional_part: \"")
            + pynini.union(
                pynutil.add_weight(((NEMO_SIGMA - "one") @ cardinal_graph), -0.7) @ add_leading_zero_to_double_digit
                + delete_space
                + pynutil.delete("cents"),
                pynini.cross("one", "01") + delete_space + pynutil.delete("cent"),
            )
            + pynutil.insert("\"")
        )

        optional_cents_standalone = pynini.closure(
            delete_space
            + pynini.closure(pynutil.delete("and") + delete_space, 0, 1)
            + insert_space
            + cents_standalone,
            0,
            1,
        )
        # twelve dollars fifty, only after integer
        optional_cents_suffix = pynini.closure(
            delete_extra_space
            + pynutil.insert("fractional_part: \"")
            + pynutil.add_weight(cardinal_graph @ add_leading_zero_to_double_digit, -0.7)
            + pynutil.insert("\""),
            0,
            1,
        )

        graph_integer = (
            pynutil.insert("integer_part: \"")
            + ((NEMO_SIGMA - "one") @ cardinal_graph)
            + pynutil.insert("\"")
            + delete_extra_space
            + graph_unit_plural
            + (optional_cents_standalone | optional_cents_suffix)
        )
        graph_integer |= (
            pynutil.insert("integer_part: \"")
            + pynini.cross("one", "1")
            + pynutil.insert("\"")
            + delete_extra_space
            + graph_unit_singular
            + (optional_cents_standalone | optional_cents_suffix)
        )
        graph_decimal = graph_decimal_final + delete_extra_space + graph_unit_plural
        graph_decimal |= pynutil.insert("currency: \"$\" integer_part: \"0\" ") + cents_standalone
        final_graph = graph_integer | graph_decimal
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#6
0
    def __init__(self,
                 cardinal: GraphFst,
                 decimal: GraphFst,
                 fraction: GraphFst,
                 deterministic: bool = True):
        super().__init__(name="measure",
                         kind="classify",
                         deterministic=deterministic)
        cardinal_graph = cardinal.graph

        if not deterministic:
            cardinal_graph |= cardinal.range_graph

        graph_unit = pynini.string_file(get_abs_path("data/measurements.tsv"))
        graph_unit |= pynini.compose(
            pynini.closure(TO_LOWER, 1) + pynini.closure(NEMO_ALPHA),
            graph_unit)

        graph_unit_plural = convert_space(graph_unit @ SINGULAR_TO_PLURAL)
        graph_unit = convert_space(graph_unit)
        optional_graph_negative = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        graph_unit2 = pynini.cross("/", "per") + delete_space + pynutil.insert(
            NEMO_NON_BREAKING_SPACE) + graph_unit

        optional_graph_unit2 = pynini.closure(
            delete_space + pynutil.insert(NEMO_NON_BREAKING_SPACE) +
            graph_unit2,
            0,
            1,
        )

        unit_plural = (
            pynutil.insert("units: \"") +
            (graph_unit_plural + optional_graph_unit2 | graph_unit2) +
            pynutil.insert("\""))

        unit_singular = (pynutil.insert("units: \"") +
                         (graph_unit + optional_graph_unit2 | graph_unit2) +
                         pynutil.insert("\""))

        subgraph_decimal = (pynutil.insert("decimal { ") +
                            optional_graph_negative +
                            decimal.final_graph_wo_negative + delete_space +
                            pynutil.insert(" } ") + unit_plural)

        subgraph_cardinal = (pynutil.insert("cardinal { ") +
                             optional_graph_negative +
                             pynutil.insert("integer: \"") +
                             ((NEMO_SIGMA - "1") @ cardinal_graph) +
                             delete_space + pynutil.insert("\"") +
                             pynutil.insert(" } ") + unit_plural)

        subgraph_cardinal |= (pynutil.insert("cardinal { ") +
                              optional_graph_negative +
                              pynutil.insert("integer: \"") +
                              pynini.cross("1", "one") + delete_space +
                              pynutil.insert("\"") + pynutil.insert(" } ") +
                              unit_singular)

        cardinal_dash_alpha = (pynutil.insert("cardinal { integer: \"") +
                               cardinal_graph + pynini.accep('-') +
                               pynutil.insert("\" } units: \"") +
                               pynini.closure(NEMO_ALPHA, 1) +
                               pynutil.insert("\""))

        alpha_dash_cardinal = (pynutil.insert("units: \"") +
                               pynini.closure(NEMO_ALPHA, 1) +
                               pynini.accep('-') + pynutil.insert("\"") +
                               pynutil.insert(" cardinal { integer: \"") +
                               cardinal_graph +
                               pynutil.insert("\" } preserve_order: true"))

        decimal_dash_alpha = (pynutil.insert("decimal { ") +
                              decimal.final_graph_wo_negative +
                              pynini.cross('-', '') +
                              pynutil.insert(" } units: \"") +
                              pynini.closure(NEMO_ALPHA, 1) +
                              pynutil.insert("\""))

        decimal_times = (pynutil.insert("decimal { ") +
                         decimal.final_graph_wo_negative +
                         pynutil.insert(" } units: \"") +
                         pynini.cross(pynini.union('x', "X"), 'x') +
                         pynutil.insert("\""))

        alpha_dash_decimal = (pynutil.insert("units: \"") +
                              pynini.closure(NEMO_ALPHA, 1) +
                              pynini.accep('-') + pynutil.insert("\"") +
                              pynutil.insert(" decimal { ") +
                              decimal.final_graph_wo_negative +
                              pynutil.insert(" } preserve_order: true"))

        subgraph_fraction = (pynutil.insert("fraction { ") + fraction.graph +
                             delete_space + pynutil.insert(" } ") +
                             unit_plural)

        address = self.get_address_graph(cardinal)
        address = (
            pynutil.insert("units: \"address\" cardinal { integer: \"") +
            address + pynutil.insert("\" } preserve_order: true"))

        math_operations = pynini.string_file(
            get_abs_path("data/math_operations.tsv"))
        delimiter = pynini.accep(" ") | pynutil.insert(" ")

        math = (cardinal_graph + delimiter + math_operations + delimiter +
                cardinal_graph + delimiter + pynini.cross("=", "equals") +
                delimiter + cardinal_graph)
        math = (pynutil.insert("units: \"math\" cardinal { integer: \"") +
                math + pynutil.insert("\" } preserve_order: true"))
        final_graph = (subgraph_decimal
                       | subgraph_cardinal
                       | cardinal_dash_alpha
                       | alpha_dash_cardinal
                       | decimal_dash_alpha
                       | decimal_times
                       | alpha_dash_decimal
                       | subgraph_fraction
                       | address
                       | math)
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#7
0
    def __init__(self,
                 cardinal: GraphFst,
                 ordinal: GraphFst,
                 deterministic: bool = True):
        super().__init__(name="fraction",
                         kind="classify",
                         deterministic=deterministic)
        cardinal_graph = cardinal.graph
        ordinal_graph = ordinal.graph

        # 2-10 are all ordinals
        three_to_ten = pynini.string_map([
            "2",
            "3",
            "4",
            "5",
            "6",
            "7",
            "8",
            "9",
            "10",
        ])
        block_three_to_ten = pynutil.delete(
            three_to_ten)  # To block cardinal productions
        if not deterministic:  # Multiples of tens are sometimes rendered as ordinals
            three_to_ten |= pynini.string_map([
                "20",
                "30",
                "40",
                "50",
                "60",
                "70",
                "80",
                "90",
            ])
        graph_three_to_ten = three_to_ten @ ordinal_graph
        graph_three_to_ten @= pynini.cdrewrite(ordinal_exceptions, "", "",
                                               NEMO_SIGMA)

        # Higher powers of tens (and multiples) are converted to ordinals.
        hundreds = pynini.string_map([
            "100",
            "200",
            "300",
            "400",
            "500",
            "600",
            "700",
            "800",
            "900",
        ])
        graph_hundreds = hundreds @ ordinal_graph

        multiples_of_thousand = ordinal.multiples_of_thousand  # So we can have X milésimos

        graph_higher_powers_of_ten = (
            pynini.closure(ordinal.one_to_one_thousand + NEMO_SPACE, 0, 1) +
            pynini.closure("mil ", 0, 1) +
            pynini.closure(ordinal.one_to_one_thousand + NEMO_SPACE, 0, 1)
        )  # x millones / x mil millones / x mil z millones
        graph_higher_powers_of_ten += higher_powers_of_ten
        graph_higher_powers_of_ten = cardinal_graph @ graph_higher_powers_of_ten
        graph_higher_powers_of_ten @= pynini.cdrewrite(
            pynutil.delete("un "), pynini.accep("[BOS]"),
            pynini.project(higher_powers_of_ten, "output"), NEMO_SIGMA
        )  # we drop 'un' from these ordinals (millionths, not one-millionths)

        graph_higher_powers_of_ten = multiples_of_thousand | graph_hundreds | graph_higher_powers_of_ten
        block_higher_powers_of_ten = pynutil.delete(
            pynini.project(graph_higher_powers_of_ten,
                           "input"))  # For cardinal graph

        graph_fractions_ordinals = graph_higher_powers_of_ten | graph_three_to_ten
        graph_fractions_ordinals += pynutil.insert(
            "\" morphosyntactic_features: \"ordinal\""
        )  # We note the root for processing later

        # Blocking the digits and hundreds from Cardinal graph
        graph_fractions_cardinals = pynini.cdrewrite(
            block_three_to_ten | block_higher_powers_of_ten,
            pynini.accep("[BOS]"), pynini.accep("[EOS]"), NEMO_SIGMA)
        graph_fractions_cardinals @= NEMO_CHAR.plus @ pynini.cdrewrite(
            pynutil.delete("0"), pynini.accep("[BOS]"), pynini.accep("[EOS]"),
            NEMO_SIGMA
        )  # Empty characters become '0' for NEMO_CHAR fst, so need to block
        graph_fractions_cardinals @= cardinal_graph
        graph_fractions_cardinals += pynutil.insert(
            "\" morphosyntactic_features: \"add_root\""
        )  # blocking these entries to reduce erroneous possibilities in debugging

        if deterministic:
            graph_fractions_cardinals = (
                pynini.closure(NEMO_DIGIT, 1, 2) @ graph_fractions_cardinals
            )  # Past hundreds the conventional scheme can be hard to read. For determinism we stop here

        graph_denominator = pynini.union(
            graph_fractions_ordinals,
            graph_fractions_cardinals,
            pynutil.add_weight(cardinal_graph + pynutil.insert("\""), 0.001),
        )  # Last form is simply recording the cardinal. Weighting so last resort

        integer = pynutil.insert(
            "integer_part: \"") + cardinal_graph + pynutil.insert(
                "\"") + NEMO_SPACE
        numerator = (pynutil.insert("numerator: \"") + cardinal_graph +
                     (pynini.cross("/", "\" ") | pynini.cross(" / ", "\" ")))
        denominator = pynutil.insert("denominator: \"") + graph_denominator

        self.graph = pynini.closure(integer, 0, 1) + numerator + denominator

        final_graph = self.add_tokens(self.graph)
        self.fst = final_graph.optimize()
    def __init__(self):
        super().__init__(name="electronic", kind="classify")

        delete_extra_space = pynutil.delete(" ")
        alpha_num = (
            NEMO_ALPHA
            | pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
            | pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
        )

        symbols = pynini.string_file(get_abs_path("data/electronic/symbols.tsv")).invert()

        accepted_username = alpha_num | symbols
        process_dot = pynini.cross("dot", ".")
        username = (
            pynutil.insert("username: \"")
            + alpha_num
            + pynini.closure(delete_extra_space + accepted_username)
            + pynutil.insert("\"")
        )
        single_alphanum = pynini.closure(alpha_num + delete_extra_space) + alpha_num
        server = single_alphanum | pynini.string_file(get_abs_path("data/electronic/server_name.tsv"))
        domain = single_alphanum | pynini.string_file(get_abs_path("data/electronic/domain.tsv"))
        domain_graph = (
            pynutil.insert("domain: \"")
            + server
            + delete_extra_space
            + process_dot
            + delete_extra_space
            + domain
            + pynutil.insert("\"")
        )
        graph = username + delete_extra_space + pynutil.delete("at") + insert_space + delete_extra_space + domain_graph

        ############# url ###
        protocol_end = pynini.cross(pynini.union("w w w", "www"), "www")
        protocol_start = (pynini.cross("h t t p", "http") | pynini.cross("h t t p s", "https")) + pynini.cross(
            " colon slash slash ", "://"
        )
        # .com,
        ending = (
            delete_extra_space
            + symbols
            + delete_extra_space
            + (domain | pynini.closure(accepted_username + delete_extra_space,) + accepted_username)
        )

        protocol = (
            pynini.closure(protocol_start, 0, 1)
            + protocol_end
            + delete_extra_space
            + process_dot
            + pynini.closure(delete_extra_space + accepted_username, 1)
            + pynini.closure(ending, 1)
        )
        protocol = pynutil.insert("protocol: \"") + protocol + pynutil.insert("\"")
        graph |= protocol
        ########

        final_graph = self.add_tokens(graph)
        self.fst = final_graph.optimize()
示例#9
0
    def __init__(self):
        super().__init__(name="time", kind="classify")
        # hours, minutes, seconds, suffix, zone, style, speak_period

        suffix_graph = pynini.string_file(get_abs_path("data/time/time_suffix.tsv"))
        time_zone_graph = pynini.invert(pynini.string_file(get_abs_path("data/time/time_zone.tsv")))
        time_to_graph = pynini.string_file(get_abs_path("data/time/time_to.tsv"))

        # only used for < 1000 thousand -> 0 weight
        cardinal = pynutil.add_weight(CardinalFst().graph_no_exception, weight=-0.7)

        labels_hour = [num_to_word(x) for x in range(0, 24)]
        labels_minute_single = [num_to_word(x) for x in range(1, 10)]
        labels_minute_double = [num_to_word(x) for x in range(10, 60)]

        graph_hour = pynini.union(*labels_hour) @ cardinal

        graph_minute_single = pynini.union(*labels_minute_single) @ cardinal
        graph_minute_double = pynini.union(*labels_minute_double) @ cardinal
        graph_minute_verbose = pynini.cross("half", "30") | pynini.cross("quarter", "15")
        oclock = pynini.cross(pynini.union("o' clock", "o clock", "o'clock", "oclock"), "")

        final_graph_hour = pynutil.insert("hours: \"") + graph_hour + pynutil.insert("\"")
        graph_minute = (
            oclock + pynutil.insert("00")
            | pynutil.delete("o") + delete_space + graph_minute_single
            | graph_minute_double
        )
        final_suffix = pynutil.insert("suffix: \"") + convert_space(suffix_graph) + pynutil.insert("\"")
        final_suffix_optional = pynini.closure(delete_space + insert_space + final_suffix, 0, 1)
        final_time_zone_optional = pynini.closure(
            delete_space
            + insert_space
            + pynutil.insert("zone: \"")
            + convert_space(time_zone_graph)
            + pynutil.insert("\""),
            0,
            1,
        )

        # five o' clock
        # two o eight, two thiry five (am/pm)
        # two pm/am
        graph_hm = (
            final_graph_hour + delete_extra_space + pynutil.insert("minutes: \"") + graph_minute + pynutil.insert("\"")
        )
        # 10 past four, quarter past four, half past four
        graph_mh = (
            pynutil.insert("minutes: \"")
            + pynini.union(graph_minute_single, graph_minute_double, graph_minute_verbose)
            + pynutil.insert("\"")
            + delete_space
            + pynutil.delete("past")
            + delete_extra_space
            + final_graph_hour
        )

        graph_quarter_time = (
            pynutil.insert("minutes: \"")
            + pynini.cross("quarter", "45")
            + pynutil.insert("\"")
            + delete_space
            + pynutil.delete(pynini.union("to", "till"))
            + delete_extra_space
            + pynutil.insert("hours: \"")
            + time_to_graph
            + pynutil.insert("\"")
        )

        graph_h = (
            final_graph_hour
            + delete_extra_space
            + pynutil.insert("minutes: \"")
            + (pynutil.insert("00") | graph_minute)
            + pynutil.insert("\"")
            + delete_space
            + insert_space
            + final_suffix
            + final_time_zone_optional
        )
        final_graph = (graph_hm | graph_mh | graph_quarter_time) + final_suffix_optional + final_time_zone_optional
        final_graph |= graph_h

        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()
示例#10
0
    NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r",
                                    u"\u00A0").optimize()
    NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize()
    NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize()

    NEMO_PUNCT = pynini.union(
        *map(pynini.escape, string.punctuation)).optimize()
    NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize()

    NEMO_SIGMA = pynini.closure(NEMO_CHAR)

    delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE))
    delete_zero_or_one_space = pynutil.delete(
        pynini.closure(NEMO_WHITE_SPACE, 0, 1))
    insert_space = pynutil.insert(" ")
    delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")
    delete_preserve_order = pynini.closure(
        pynutil.delete(" preserve_order: true")
        | (pynutil.delete(" field_order: \"") + NEMO_NOT_QUOTE +
           pynutil.delete("\"")))

    suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
    # _v = pynini.union("a", "e", "i", "o", "u")
    _c = pynini.union("b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n",
                      "p", "q", "r", "s", "t", "v", "w", "x", "y", "z")
    _ies = NEMO_SIGMA + _c + pynini.cross("y", "ies")
    _es = NEMO_SIGMA + pynini.union("s", "sh", "ch", "x",
                                    "z") + pynutil.insert("es")
    _s = NEMO_SIGMA + pynutil.insert("s")

    graph_plural = plurals._priority_union(
示例#11
0
    def __init__(self,
                 cardinal: GraphFst,
                 decimal: GraphFst,
                 deterministic: bool = True):
        super().__init__(name="money",
                         kind="classify",
                         deterministic=deterministic)
        cardinal_graph = cardinal.cardinal_numbers
        decimal_graph = decimal.final_graph

        unit_singular = pynini.string_file(
            get_abs_path("data/currency/currency_singular.tsv"))
        unit_plural = pynini.string_file(
            get_abs_path("data/currency/currency_plural.tsv"))

        # adding weight to make sure the space is preserved for ITN
        optional_delimiter = pynini.closure(
            pynutil.add_weight(pynini.cross(NEMO_SPACE, ""), -100), 0, 1)
        graph_unit_singular = (optional_delimiter +
                               pynutil.insert(" currency: \"") +
                               unit_singular + pynutil.insert("\""))
        graph_unit_plural = optional_delimiter + pynutil.insert(
            " currency: \"") + unit_plural + pynutil.insert("\"")

        one = pynini.compose(pynini.accep("1"), cardinal_graph).optimize()
        singular_graph = pynutil.insert(
            "integer_part: \"") + one + pynutil.insert(
                "\"") + graph_unit_singular

        graph_decimal = decimal_graph + graph_unit_plural

        graph_integer = (pynutil.insert("integer_part: \"") +
                         ((NEMO_SIGMA - "1") @ cardinal_graph) +
                         pynutil.insert("\"") + (graph_unit_plural))

        graph_integer |= singular_graph
        tagger_graph = (graph_integer.optimize()
                        | graph_decimal.optimize()).optimize()

        # verbalizer
        integer = pynutil.delete("\"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        integer_part = pynutil.delete("integer_part: ") + integer

        unit = (pynutil.delete("currency: ") + pynutil.delete("\"") +
                pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\""))
        unit = pynini.accep(NEMO_SPACE) + unit

        verbalizer_graph_cardinal = (integer_part + unit).optimize()

        fractional_part = pynutil.delete("fractional_part: ") + integer
        optional_quantity = pynini.closure(
            pynini.accep(NEMO_SPACE) + pynutil.delete("quantity: ") + integer,
            0, 1)

        verbalizer_graph_decimal = (pynutil.delete('decimal { ') +
                                    integer_part + pynini.accep(" ") +
                                    fractional_part + optional_quantity +
                                    pynutil.delete(" }") + unit)

        verbalizer_graph = (verbalizer_graph_cardinal
                            | verbalizer_graph_decimal).optimize()

        self.final_graph = (tagger_graph @ verbalizer_graph).optimize()
        self.fst = self.add_tokens(self.final_graph).optimize()
示例#12
0
    def __init__(self):
        super().__init__(name="cardinal", kind="classify")
        
        left_context = pynini.union("[BOS]",NEMO_SPACE)
        right_context = pynini.union(NEMO_SPACE,"[EOS]")
        
        tr_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
        tr_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
        tr_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
        tr_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
        
        tr_one_to_two_digit
        
        tr_remove_hundred = pynini.cross("hundred", "")
        
        tr_hundred = tr_digit + delete_space + tr_remove_hundred 

        tr_number = pynini.union(tr_zero,
                                 tr_digit,
                                 tr_ties,
                                 tr_teen)
        
        graph = pynini.cdrewrite(tr_number, left_context, right_context, NEMO_SIGMA)
        
        labels_exception = [num_to_word(x) for x in range(0, 13)]
        graph_exception = pynini.union(*labels_exception)
        
        self.graph = (pynini.project(graph, "input") - graph_exception.arcsort()) @ graph
        
        
        
        self.graph_hundred_component_at_least_one_none_zero_digit = ""
        self.graph_no_exception = rw_number
        self.fst = rw_number.optimize()
        
        
        
        
        
        
        labels_exception = [num_to_word(x) for x in range(0, 13)]
        graph_exception = pynini.union(*labels_exception)

        graph = (
            pynini.cdrewrite(pynutil.delete("and"), NEMO_SPACE, NEMO_SPACE, NEMO_SIGMA)
            @ (NEMO_ALPHA + NEMO_SIGMA)
            @ graph
        )

        self.graph_no_exception = graph

        self.graph = (pynini.project(graph, "input") - graph_exception.arcsort()) @ graph

        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("minus", "\"-\"") + NEMO_SPACE, 0, 1
        )

        final_graph = optional_minus_graph + pynutil.insert("integer: \"") + self.graph + pynutil.insert("\"")

        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
    def __init__(self, deterministic: bool = True):
        super().__init__(name="cardinal",
                         kind="classify",
                         deterministic=deterministic)
        # TODO repalce to have "oh" as a default for "0"
        graph = pynini.Far(
            get_abs_path("data/numbers/cardinal_number_name.far")).get_fst()
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            pynini.closure(NEMO_DIGIT, 2, 3)
            | pynini.difference(NEMO_DIGIT, pynini.accep("0"))) @ graph
        self.graph = (pynini.closure(NEMO_DIGIT, 1, 3) + pynini.closure(
            pynini.closure(pynutil.delete(","), 0, 1) + NEMO_DIGIT +
            NEMO_DIGIT + NEMO_DIGIT)) @ graph

        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))

        single_digits_graph = pynini.invert(graph_digit | graph_zero)
        self.single_digits_graph = single_digits_graph + pynini.closure(
            insert_space + single_digits_graph)

        if not deterministic:
            # for a single token allow only the same normalization
            # "007" -> {"oh oh seven", "zero zero seven"} not {"oh zero seven"}
            single_digits_graph_zero = pynini.invert(graph_digit | graph_zero)
            single_digits_graph_oh = pynini.invert(graph_digit) | pynini.cross(
                "0", "oh")

            self.single_digits_graph = single_digits_graph_zero + pynini.closure(
                insert_space + single_digits_graph_zero)
            self.single_digits_graph |= single_digits_graph_oh + pynini.closure(
                insert_space + single_digits_graph_oh)

            single_digits_graph_with_commas = pynini.closure(
                self.single_digits_graph + insert_space, 1,
                3) + pynini.closure(
                    pynutil.delete(",") + single_digits_graph + insert_space +
                    single_digits_graph + insert_space + single_digits_graph,
                    1,
                )

            self.range_graph = pynutil.insert(
                "from ") + self.graph + pynini.cross("-", " to ") + self.graph
            self.range_graph |= self.graph + (pynini.cross(
                "x", " by ") | pynini.cross(" x ", " by ")) + self.graph
            self.range_graph |= (pynutil.insert("from ") +
                                 get_hundreds_graph() +
                                 pynini.cross("-", " to ") +
                                 get_hundreds_graph())
            self.range_graph = self.range_graph.optimize()

        serial_graph = self.get_serial_graph()
        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        if deterministic:
            long_numbers = pynini.compose(NEMO_DIGIT**(5, ...),
                                          self.single_digits_graph).optimize()
            final_graph = self.graph | serial_graph | pynutil.add_weight(
                long_numbers, -0.001)
            cardinal_with_leading_zeros = pynini.compose(
                pynini.accep("0") + pynini.closure(NEMO_DIGIT),
                self.single_digits_graph)
            final_graph |= cardinal_with_leading_zeros
        else:

            leading_zeros = pynini.compose(
                pynini.closure(pynini.accep("0"), 1), self.single_digits_graph)
            cardinal_with_leading_zeros = (
                leading_zeros + pynutil.insert(" ") +
                pynini.compose(pynini.closure(NEMO_DIGIT), self.graph))

            final_graph = (self.graph
                           | serial_graph
                           | self.range_graph
                           | self.single_digits_graph
                           | get_hundreds_graph()
                           | pynutil.add_weight(
                               single_digits_graph_with_commas, 0.001)
                           | cardinal_with_leading_zeros)

        final_graph = optional_minus_graph + pynutil.insert(
            "integer: \"") + final_graph + pynutil.insert("\"")
        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()
示例#14
0
    def __init__(self, deterministic: bool = True, lm: bool = False):
        super().__init__(name="cardinal",
                         kind="classify",
                         deterministic=deterministic)

        self.lm = lm
        self.deterministic = deterministic
        # TODO replace to have "oh" as a default for "0"
        graph = pynini.Far(
            get_abs_path("data/number/cardinal_number_name.far")).get_fst()
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            pynini.closure(NEMO_DIGIT, 2, 3)
            | pynini.difference(NEMO_DIGIT, pynini.accep("0"))) @ graph

        graph_digit = pynini.string_file(get_abs_path("data/number/digit.tsv"))
        graph_zero = pynini.string_file(get_abs_path("data/number/zero.tsv"))

        single_digits_graph = pynini.invert(graph_digit | graph_zero)
        self.single_digits_graph = single_digits_graph + pynini.closure(
            insert_space + single_digits_graph)

        if not deterministic:
            # for a single token allow only the same normalization
            # "007" -> {"oh oh seven", "zero zero seven"} not {"oh zero seven"}
            single_digits_graph_zero = pynini.invert(graph_digit | graph_zero)
            single_digits_graph_oh = pynini.invert(graph_digit) | pynini.cross(
                "0", "oh")

            self.single_digits_graph = single_digits_graph_zero + pynini.closure(
                insert_space + single_digits_graph_zero)
            self.single_digits_graph |= single_digits_graph_oh + pynini.closure(
                insert_space + single_digits_graph_oh)

            single_digits_graph_with_commas = pynini.closure(
                self.single_digits_graph + insert_space, 1,
                3) + pynini.closure(
                    pynutil.delete(",") + single_digits_graph + insert_space +
                    single_digits_graph + insert_space + single_digits_graph,
                    1,
                )

        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        graph = (pynini.closure(NEMO_DIGIT, 1, 3) +
                 (pynini.closure(pynutil.delete(",") + NEMO_DIGIT**3)
                  | pynini.closure(NEMO_DIGIT**3))) @ graph

        self.graph = graph
        self.graph_with_and = self.add_optional_and(graph)

        if deterministic:
            long_numbers = pynini.compose(NEMO_DIGIT**(5, ...),
                                          self.single_digits_graph).optimize()
            final_graph = plurals._priority_union(long_numbers,
                                                  self.graph_with_and,
                                                  NEMO_SIGMA).optimize()
            cardinal_with_leading_zeros = pynini.compose(
                pynini.accep("0") + pynini.closure(NEMO_DIGIT),
                self.single_digits_graph)
            final_graph |= cardinal_with_leading_zeros
        else:
            leading_zeros = pynini.compose(
                pynini.closure(pynini.accep("0"), 1), self.single_digits_graph)
            cardinal_with_leading_zeros = (
                leading_zeros + pynutil.insert(" ") + pynini.compose(
                    pynini.closure(NEMO_DIGIT), self.graph_with_and))

            # add small weight to non-default graphs to make sure the deterministic option is listed first
            final_graph = (
                self.graph_with_and
                | pynutil.add_weight(self.single_digits_graph, 0.0001)
                | get_four_digit_year_graph(
                )  # allows e.g. 4567 be pronouced as forty five sixty seven
                | pynutil.add_weight(single_digits_graph_with_commas, 0.0001)
                | cardinal_with_leading_zeros)

        final_graph = optional_minus_graph + pynutil.insert(
            "integer: \"") + final_graph + pynutil.insert("\"")
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#15
0
文件: date.py 项目: noetits/NeMo
 def _get_digits_graph():
     zero = pynini.cross((pynini.accep("oh") | pynini.accep("o")), "0")
     graph = zero + delete_space + graph_digit
     graph.optimize()
     return graph
示例#16
0
    def __init__(self, syms):

        with pynini.default_token_type(syms.alphabet):

            # store alphabet
            self.__syms = syms

            # delete initial features
            del_initial_features = pynini.cross("", syms.initial_features)

            # delete categories
            del_cat_ext = pynini.cross(
                "", pynini.union(syms.categories, syms.disjunctive_categories))

            # delete stem types
            del_stem_types = pynini.cross("", syms.stem_types)

            # delete prefix/suffix marker
            del_prefix_suffix_marker = pynini.cross("",
                                                    syms.prefix_suffix_marker)

            # insert prefix/suffix marker
            insert_prefix_suffix_marker = pynini.cross(
                syms.prefix_suffix_marker, "")

            # delete stem type features
            del_stem_type_feats = pynini.cross("", syms.stem_type_features)

            # delete origin features
            del_origin_feats = pynini.cross("", syms.origin_features)

            # delete complexity agreement features
            del_complexity_agreement_feats = pynini.cross(
                "", syms.complexity_agreement_features)

            # delete word complexity features
            del_complex_lex_entries = pynini.cross(
                "", syms.complexity_entry_features)

            # insert word complexity features
            insert_complex_lex_entries = pynini.cross(
                syms.complexity_entry_features, "")

            # inflection classes
            del_infl_classes = pynini.cross("", syms.inflection_classes)

            # disjunctive features
            disjunctive_feat_list = [
                "<CARD,DIGCARD,NE>", "<ADJ,CARD>", "<ADJ,NN>", "<CARD,NN>",
                "<CARD,NE>", "<ABK,ADJ,NE,NN>", "<ADJ,NE,NN>", "<ABK,NE,NN>",
                "<NE,NN>", "<ABK,CARD,NN>", "<ABK,NN>", "<ADJ,CARD,NN,V>",
                "<ADJ,NN,V>", "<ABK,ADJ,NE,NN,V>", "<ADJ,NE,NN,V>",
                "<ADV,NE,NN,V>", "<ABK,NE,NN,V>", "<NE,NN,V>", "<ABK,NN,V>",
                "<NN,V>", "<frei,fremd,gebunden>",
                "<frei,fremd,gebunden,kurz>", "<frei,fremd,gebunden,lang>",
                "<fremd,gebunden,lang>", "<frei,fremd,kurz>",
                "<frei,fremd,lang>", "<frei,gebunden>",
                "<frei,gebunden,kurz,lang>", "<frei,gebunden,lang>",
                "<frei,lang>", "<klassisch,nativ>", "<fremd,klassisch,nativ>",
                "<fremd,klassisch>", "<frei,nativ>", "<frei,fremd,nativ>",
                "<fremd,nativ>", "<nativ,prefnativ>", "<frei,nativ,prefnativ>",
                "<komposit,prefderiv,simplex,suffderiv>",
                "<prefderiv,suffderiv>", "<komposit,prefderiv,simplex>",
                "<komposit,simplex,suffderiv>", "<komposit,simplex>",
                "<prefderiv,simplex,suffderiv>", "<prefderiv,simplex>",
                "<simplex,suffderiv>"
            ]
            disjunctive_feats = pynini.string_map(
                disjunctive_feat_list).project("input").optimize()
            del_disjunctive_feats = pynini.cross("", disjunctive_feats)

            # short cut: map_helper1
            map_helper1 = pynini.union(
                syms.characters,
                pynini.accep("<FB>"),
                pynini.accep("<SS>"),
                pynini.cross("e", "<Ge-Nom>"),
                pynini.cross("n", "<n>"),
                pynini.cross("e", "<e>"),
                pynini.cross("d", "<d>"),
                pynini.cross("", "<~n>"),
                pynini.cross("", "<UL>"),
                del_stem_types,
                syms.prefix_suffix_marker,
                del_stem_type_feats,
                pynini.cross("", "<ge>"),
                del_origin_feats,
                del_complexity_agreement_feats,
                del_complex_lex_entries,
                del_infl_classes,
                del_disjunctive_feats,
            ).closure().optimize()

            # short cut: map_helper2
            map_helper2 = pynini.concat(
                map_helper1,
                pynini.concat(
                    pynini.concat(
                        syms.characters,
                        pynini.union(
                            pynini.union(syms.characters,
                                         pynini.accep("<SUFF>"),
                                         pynini.accep("<CONV>")),
                            syms.categories).closure(),
                    ).closure(0, 1), map_helper1)).optimize()

            #
            self.__map1 = pynini.concat(
                del_initial_features.closure(),
                pynini.concat(
                    pynini.union(
                        pynini.concat(
                            pynini.cross(
                                "",
                                pynini.string_map(
                                    ["<Base_Stems>",
                                     "<Pref_Stems>"]).project("input")),
                            pynini.concat(map_helper2, del_cat_ext)),
                        pynini.concat(
                            pynini.cross(
                                "",
                                pynini.string_map(
                                    ["<Deriv_Stems>",
                                     "<Kompos_Stems>"]).project("input")),
                            pynini.concat(map_helper2, syms.categories)),
                        pynini.cross("", "<Pref_Stems>") + map_helper1 +
                        del_cat_ext,
                        pynini.cross("", "<Suff_Stems>") + map_helper1 +
                        del_cat_ext + map_helper1 + syms.categories +
                        pynini.cross("", "<base>"),
                        pynini.cross("", "<Suff_Stems>") + map_helper1 +
                        del_cat_ext +
                        pynini.concat(map_helper1, del_cat_ext +
                                      pynini.accep("<SUFF>")).closure(1) +
                        pynini.cross("", "<base>"),
                        pynini.cross("", "<Suff_Stems>") + map_helper1 +
                        del_cat_ext +
                        pynini.concat(map_helper1, syms.categories +
                                      pynini.accep("<SUFF>")).closure(1) +
                        pynini.cross(
                            "",
                            pynini.string_map(["<deriv>", "<kompos>"
                                               ]).project("input"))),
                    map_helper1,
                )).optimize()

            split_origin_features = pynini.union(
                pynini.cross(
                    "<NGeo-0-$er-$er>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-$er-NMasc_s_0>",
                        "<NGeo-$er-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-$er-$isch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-$er-NMasc_s_0>",
                        "<NGeo-$isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-aner-aner>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-aner-NMasc_s_0>",
                        "<NGeo-aner-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-aner-anisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-aner-NMasc_s_0>",
                        "<NGeo-anisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-e-isch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-er-er>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-er-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-0-0>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-0-NMasc_s_0>",
                        "<NGeo-0-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-er-erisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-erisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-er-isch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-ese-esisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-ese-NMasc_n_n>",
                        "<NGeo-esisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-ianer-ianisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-ianer-NMasc_s_0>",
                        "<NGeo-ianisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-ner-isch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-ner-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0-ner-nisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Neut_s>", "<NGeo-ner-NMasc_s_0>",
                        "<NGeo-nisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0fem-er-erisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Fem_0>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-erisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0masc-er-isch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Masc_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-0masc-ese-esisch>",
                    pynini.string_map([
                        "<NGeo-0-Name-Masc_s>", "<NGeo-ese-NMasc_n_n>",
                        "<NGeo-esisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-a-er-isch>",
                    pynini.string_map([
                        "<NGeo-a-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-a-ese-esisch>",
                    pynini.string_map([
                        "<NGeo-a-Name-Neut_s>", "<NGeo-ese-NMasc_n_n>",
                        "<NGeo-esisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-afem-er-isch>",
                    pynini.string_map([
                        "<NGeo-a-Name-Fem_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-e-er-er>",
                    pynini.string_map([
                        "<NGeo-e-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-er-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-e-er-isch>",
                    pynini.string_map([
                        "<NGeo-e-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-efem-er-isch>",
                    pynini.string_map([
                        "<NGeo-e-Name-Fem_0>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-ei-e-isch>",
                    pynini.string_map([
                        "<NGeo-ei-Name-Fem_0>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-en-aner-anisch>",
                    pynini.string_map([
                        "<NGeo-en-Name-Neut_s>", "<NGeo-aner-NMasc_s_0>",
                        "<NGeo-anisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-en-e-$isch>",
                    pynini.string_map([
                        "<NGeo-en-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-$isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-en-e-isch>",
                    pynini.string_map([
                        "<NGeo-en-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-en-er-er>",
                    pynini.string_map([
                        "<NGeo-en-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-er-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-en-er-isch>",
                    pynini.string_map([
                        "<NGeo-en-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-ien-e-isch>",
                    pynini.string_map([
                        "<NGeo-ien-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-ien-er-isch>",
                    pynini.string_map([
                        "<NGeo-ien-Name-Neut_s>", "<NGeo-er-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-ien-ese-esisch>",
                    pynini.string_map([
                        "<NGeo-ien-Name-Neut_s>", "<NGeo-ese-NMasc_n_n>",
                        "<NGeo-esisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-ien-ianer-ianisch>",
                    pynini.string_map([
                        "<NGeo-ien-Name-Neut_s>", "<NGeo-ianer-NMasc_s_0>",
                        "<NGeo-ianisch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-ien-ier-isch>",
                    pynini.string_map([
                        "<NGeo-ien-Name-Neut_s>", "<NGeo-ier-NMasc_s_0>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-istan-e-isch>",
                    pynini.string_map([
                        "<NGeo-istan-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-land-$er-$er>",
                    pynini.string_map([
                        "<NGeo-land-Name-Neut_s>", "<NGeo-$er-NMasc_s_0>",
                        "<NGeo-$er-Adj0-Up>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-land-e-isch>",
                    pynini.string_map([
                        "<NGeo-land-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-isch-Adj+>"
                    ]).project("input")),
                pynini.cross(
                    "<NGeo-land-e-nisch>",
                    pynini.string_map([
                        "<NGeo-land-Name-Neut_s>", "<NGeo-e-NMasc_n_n>",
                        "<NGeo-nisch-Adj+>"
                    ]).project("input"))).optimize()

            map_helper3 = pynini.union(
                syms.characters, syms.circumfix_features,
                syms.initial_features, syms.stem_types, syms.categories,
                insert_prefix_suffix_marker, syms.stem_type_features,
                syms.origin_features, syms.complexity_agreement_features,
                insert_complex_lex_entries, syms.inflection_classes,
                self.__split_disjunctive_feats(disjunctive_feat_list),
                split_origin_features).optimize()

            self.__map2 = pynini.concat(
                map_helper3.closure(),
                pynini.concat(
                    pynini.cross("e", "<e>"),
                    pynini.concat(
                        pynini.string_map(["l", "r"]).project("input"),
                        pynini.concat(
                            pynini.string_map(
                                ["<ADJ>", "<NE>", "<NN>",
                                 "<V>"]).project("input").closure(0, 1),
                            pynini.concat(
                                pynini.accep("<V>"),
                                pynini.concat(
                                    pynini.string_map([
                                        "<SUFF>", "<CONV>"
                                    ]).project("input").closure(0, 1),
                                    pynini.concat(
                                        pynini.accep("<base> <nativ>"),
                                        pynini.concat(
                                            insert_complex_lex_entries.closure(
                                                0, 1),
                                            pynini.accep("<VVReg-el/er>")))))))
                ).closure(0, 1)).optimize()
示例#17
0
文件: telephone.py 项目: NVIDIA/NeMo
    def __init__(self, deterministic: bool = True):
        super().__init__(name="telephone", kind="classify", deterministic=deterministic)

        add_separator = pynutil.insert(", ")  # between components
        zero = pynini.cross("0", "zero")
        if not deterministic:
            zero |= pynini.cross("0", pynini.union("o", "oh"))
        digit = pynini.invert(pynini.string_file(get_abs_path("data/number/digit.tsv"))).optimize() | zero

        telephone_prompts = pynini.string_file(get_abs_path("data/telephone/telephone_prompt.tsv"))
        country_code = (
            pynini.closure(telephone_prompts + delete_extra_space, 0, 1)
            + pynini.closure(pynini.cross("+", "plus "), 0, 1)
            + pynini.closure(digit + insert_space, 0, 2)
            + digit
            + pynutil.insert(",")
        )
        country_code |= telephone_prompts
        country_code = pynutil.insert("country_code: \"") + country_code + pynutil.insert("\"")
        country_code = country_code + pynini.closure(pynutil.delete("-"), 0, 1) + delete_space + insert_space

        area_part_default = pynini.closure(digit + insert_space, 2, 2) + digit
        area_part = pynini.cross("800", "eight hundred") | pynini.compose(
            pynini.difference(NEMO_SIGMA, "800"), area_part_default
        )

        area_part = (
            (area_part + (pynutil.delete("-") | pynutil.delete(".")))
            | (
                pynutil.delete("(")
                + area_part
                + ((pynutil.delete(")") + pynini.closure(pynutil.delete(" "), 0, 1)) | pynutil.delete(")-"))
            )
        ) + add_separator

        del_separator = pynini.closure(pynini.union("-", " ", "."), 0, 1)
        number_length = ((NEMO_DIGIT + del_separator) | (NEMO_ALPHA + del_separator)) ** 7
        number_words = pynini.closure(
            (NEMO_DIGIT @ digit) + (insert_space | (pynini.cross("-", ', ')))
            | NEMO_ALPHA
            | (NEMO_ALPHA + pynini.cross("-", ' '))
        )
        number_words |= pynini.closure(
            (NEMO_DIGIT @ digit) + (insert_space | (pynini.cross(".", ', ')))
            | NEMO_ALPHA
            | (NEMO_ALPHA + pynini.cross(".", ' '))
        )
        number_words = pynini.compose(number_length, number_words)
        number_part = area_part + number_words
        number_part = pynutil.insert("number_part: \"") + number_part + pynutil.insert("\"")
        extension = (
            pynutil.insert("extension: \"") + pynini.closure(digit + insert_space, 0, 3) + digit + pynutil.insert("\"")
        )
        extension = pynini.closure(insert_space + extension, 0, 1)

        graph = plurals._priority_union(country_code + number_part, number_part, NEMO_SIGMA).optimize()
        graph = plurals._priority_union(country_code + number_part + extension, graph, NEMO_SIGMA).optimize()
        graph = plurals._priority_union(number_part + extension, graph, NEMO_SIGMA).optimize()

        # ip
        ip_prompts = pynini.string_file(get_abs_path("data/telephone/ip_prompt.tsv"))
        digit_to_str_graph = digit + pynini.closure(pynutil.insert(" ") + digit, 0, 2)
        ip_graph = digit_to_str_graph + (pynini.cross(".", " dot ") + digit_to_str_graph) ** 3
        graph |= (
            pynini.closure(
                pynutil.insert("country_code: \"") + ip_prompts + pynutil.insert("\"") + delete_extra_space, 0, 1
            )
            + pynutil.insert("number_part: \"")
            + ip_graph.optimize()
            + pynutil.insert("\"")
        )
        # ssn
        ssn_prompts = pynini.string_file(get_abs_path("data/telephone/ssn_prompt.tsv"))
        three_digit_part = digit + (pynutil.insert(" ") + digit) ** 2
        two_digit_part = digit + pynutil.insert(" ") + digit
        four_digit_part = digit + (pynutil.insert(" ") + digit) ** 3
        ssn_separator = pynini.cross("-", ", ")
        ssn_graph = three_digit_part + ssn_separator + two_digit_part + ssn_separator + four_digit_part

        graph |= (
            pynini.closure(
                pynutil.insert("country_code: \"") + ssn_prompts + pynutil.insert("\"") + delete_extra_space, 0, 1
            )
            + pynutil.insert("number_part: \"")
            + ssn_graph.optimize()
            + pynutil.insert("\"")
        )

        final_graph = self.add_tokens(graph)
        self.fst = final_graph.optimize()
示例#18
0
文件: decimal.py 项目: sycomix/NeMo
    def __init__(self, cardinal: GraphFst, deterministic: bool):
        super().__init__(name="decimal",
                         kind="classify",
                         deterministic=deterministic)

        cardinal_graph = cardinal.graph_with_and
        cardinal_graph_hundred_component_at_least_one_none_zero_digit = (
            cardinal.graph_hundred_component_at_least_one_none_zero_digit)

        self.graph = cardinal.single_digits_graph.optimize()

        if not deterministic:
            self.graph = self.graph | cardinal_graph

        point = pynutil.delete(".")
        optional_graph_negative = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        self.graph_fractional = pynutil.insert(
            "fractional_part: \"") + self.graph + pynutil.insert("\"")
        self.graph_integer = pynutil.insert(
            "integer_part: \"") + cardinal_graph + pynutil.insert("\"")
        final_graph_wo_sign = (
            pynini.closure(self.graph_integer + pynutil.insert(" "), 0, 1) +
            point + pynutil.insert(" ") + self.graph_fractional)

        quantity_w_abbr = get_quantity(
            final_graph_wo_sign,
            cardinal_graph_hundred_component_at_least_one_none_zero_digit,
            include_abbr=True)
        quantity_wo_abbr = get_quantity(
            final_graph_wo_sign,
            cardinal_graph_hundred_component_at_least_one_none_zero_digit,
            include_abbr=False)
        self.final_graph_wo_negative_w_abbr = final_graph_wo_sign | quantity_w_abbr
        self.final_graph_wo_negative = final_graph_wo_sign | quantity_wo_abbr

        # reduce options for non_deterministic and allow either "oh" or "zero", but not combination
        if not deterministic:
            no_oh_zero = pynini.difference(
                NEMO_SIGMA,
                (NEMO_SIGMA + "oh" + NEMO_SIGMA + "zero" + NEMO_SIGMA)
                | (NEMO_SIGMA + "zero" + NEMO_SIGMA + "oh" + NEMO_SIGMA),
            ).optimize()
            no_zero_oh = pynini.difference(
                NEMO_SIGMA, NEMO_SIGMA + pynini.accep("zero") + NEMO_SIGMA +
                pynini.accep("oh") + NEMO_SIGMA).optimize()

            self.final_graph_wo_negative |= pynini.compose(
                self.final_graph_wo_negative,
                pynini.cdrewrite(
                    pynini.cross("integer_part: \"zero\"",
                                 "integer_part: \"oh\""), NEMO_SIGMA,
                    NEMO_SIGMA, NEMO_SIGMA),
            )
            self.final_graph_wo_negative = pynini.compose(
                self.final_graph_wo_negative, no_oh_zero).optimize()
            self.final_graph_wo_negative = pynini.compose(
                self.final_graph_wo_negative, no_zero_oh).optimize()

        final_graph = optional_graph_negative + self.final_graph_wo_negative

        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#19
0
文件: cardinal.py 项目: vadam5/NeMo
    def __init__(self):
        super().__init__(name="cardinal", kind="classify")
        # integer, negative

        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))

        graph_hundred = pynini.cross("hundred", "")

        graph_hundred_component = pynini.union(
            graph_digit + delete_space + graph_hundred, pynutil.insert("0"))
        graph_hundred_component += delete_space
        graph_hundred_component += pynini.union(
            graph_teen | pynutil.insert("00"),
            (graph_ties | pynutil.insert("0")) + delete_space +
            (graph_digit | pynutil.insert("0")),
        )

        graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
            pynini.closure(NEMO_DIGIT) +
            (NEMO_DIGIT - "0") + pynini.closure(NEMO_DIGIT))
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            graph_hundred_component_at_least_one_none_zero_digit)

        graph_thousands = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("thousand"),
            pynutil.insert("000", weight=0.1),
        )

        graph_million = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("million"),
            pynutil.insert("000", weight=0.1),
        )
        graph_billion = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("billion"),
            pynutil.insert("000", weight=0.1),
        )
        graph_trillion = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("trillion"),
            pynutil.insert("000", weight=0.1),
        )
        graph_quadrillion = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("quadrillion"),
            pynutil.insert("000", weight=0.1),
        )
        graph_quintillion = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("quintillion"),
            pynutil.insert("000", weight=0.1),
        )
        graph_sextillion = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("sextillion"),
            pynutil.insert("000", weight=0.1),
        )

        graph = pynini.union(
            graph_sextillion + delete_space + graph_quintillion +
            delete_space + graph_quadrillion + delete_space + graph_trillion +
            delete_space + graph_billion + delete_space + graph_million +
            delete_space + graph_thousands + delete_space +
            graph_hundred_component,
            graph_zero,
        )

        graph = graph @ pynini.union(
            pynutil.delete(pynini.closure("0")) + pynini.difference(
                NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT), "0")

        labels_exception = [num_to_word(x) for x in range(0, 13)]
        graph_exception = pynini.union(*labels_exception)

        graph = pynini.cdrewrite(pynutil.delete("and"), NEMO_SPACE, NEMO_SPACE,
                                 NEMO_SIGMA) @ graph

        self.graph_no_exception = graph

        self.graph = (pynini.project(graph, "input") -
                      graph_exception.arcsort()) @ graph

        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("minus", "\"-\"") +
            NEMO_SPACE, 0, 1)

        final_graph = optional_minus_graph + pynutil.insert(
            "integer: \"") + self.graph + pynutil.insert("\"")

        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#20
0
    def __init__(self,
                 cardinal: GraphFst,
                 decimal: GraphFst,
                 deterministic: bool = True):
        super().__init__(name="money",
                         kind="classify",
                         deterministic=deterministic)
        cardinal_graph = cardinal.graph
        graph_decimal_final = decimal.final_graph_wo_negative

        maj_singular_labels = load_labels(
            get_abs_path("data/currency/currency.tsv"))
        maj_unit_plural = convert_space(maj_singular @ SINGULAR_TO_PLURAL)
        maj_unit_singular = convert_space(maj_singular)

        graph_maj_singular = pynutil.insert(
            "currency_maj: \"") + maj_unit_singular + pynutil.insert("\"")
        graph_maj_plural = pynutil.insert(
            "currency_maj: \"") + maj_unit_plural + pynutil.insert("\"")

        optional_delete_fractional_zeros = pynini.closure(
            pynutil.delete(".") + pynini.closure(pynutil.delete("0"), 1), 0, 1)

        graph_integer_one = pynutil.insert("integer_part: \"") + pynini.cross(
            "1", "one") + pynutil.insert("\"")
        # only for decimals where third decimal after comma is non-zero or with quantity
        decimal_delete_last_zeros = (
            pynini.closure(NEMO_DIGIT | pynutil.delete(",")) +
            pynini.accep(".") + pynini.closure(NEMO_DIGIT, 2) +
            (NEMO_DIGIT - "0") + pynini.closure(pynutil.delete("0")))
        decimal_with_quantity = NEMO_SIGMA + NEMO_ALPHA

        graph_decimal = (graph_maj_plural + insert_space +
                         (decimal_delete_last_zeros | decimal_with_quantity)
                         @ graph_decimal_final)

        graph_integer = (pynutil.insert("integer_part: \"") +
                         ((NEMO_SIGMA - "1") @ cardinal_graph) +
                         pynutil.insert("\""))

        graph_integer_only = graph_maj_singular + insert_space + graph_integer_one
        graph_integer_only |= graph_maj_plural + insert_space + graph_integer

        final_graph = (graph_integer_only +
                       optional_delete_fractional_zeros) | graph_decimal

        # remove trailing zeros of non zero number in the first 2 digits and fill up to 2 digits
        # e.g. 2000 -> 20, 0200->02, 01 -> 01, 10 -> 10
        # not accepted: 002, 00, 0,
        two_digits_fractional_part = (
            pynini.closure(NEMO_DIGIT) +
            (NEMO_DIGIT - "0") + pynini.closure(pynutil.delete("0"))) @ (
                (pynutil.delete("0") + (NEMO_DIGIT - "0"))
                | ((NEMO_DIGIT - "0") + pynutil.insert("0"))
                | ((NEMO_DIGIT - "0") + NEMO_DIGIT))

        graph_min_singular = pynutil.insert(
            " currency_min: \"") + min_singular + pynutil.insert("\"")
        graph_min_plural = pynutil.insert(
            " currency_min: \"") + min_plural + pynutil.insert("\"")
        # format ** dollars ** cent
        decimal_graph_with_minor = None
        integer_graph_reordered = None
        decimal_default_reordered = None
        for curr_symbol, _ in maj_singular_labels:
            preserve_order = pynutil.insert(" preserve_order: true")
            integer_plus_maj = graph_integer + insert_space + pynutil.insert(
                curr_symbol) @ graph_maj_plural
            integer_plus_maj |= graph_integer_one + insert_space + pynutil.insert(
                curr_symbol) @ graph_maj_singular

            integer_plus_maj_with_comma = pynini.compose(
                NEMO_DIGIT - "0" +
                pynini.closure(NEMO_DIGIT | pynutil.delete(",")),
                integer_plus_maj)
            integer_plus_maj = pynini.compose(
                pynini.closure(NEMO_DIGIT) - "0", integer_plus_maj)
            integer_plus_maj |= integer_plus_maj_with_comma

            graph_fractional_one = two_digits_fractional_part @ pynini.cross(
                "1", "one")
            graph_fractional_one = pynutil.insert(
                "fractional_part: \"") + graph_fractional_one + pynutil.insert(
                    "\"")
            graph_fractional = (two_digits_fractional_part @ (
                pynini.closure(NEMO_DIGIT, 1, 2) - "1"
            ) @ cardinal.graph_hundred_component_at_least_one_none_zero_digit)
            graph_fractional = pynutil.insert(
                "fractional_part: \"") + graph_fractional + pynutil.insert(
                    "\"")

            fractional_plus_min = graph_fractional + insert_space + pynutil.insert(
                curr_symbol) @ graph_min_plural
            fractional_plus_min |= (
                graph_fractional_one + insert_space +
                pynutil.insert(curr_symbol) @ graph_min_singular)

            decimal_graph_with_minor_curr = integer_plus_maj + pynini.cross(
                ".", " ") + fractional_plus_min

            if not deterministic:
                decimal_graph_with_minor_curr |= pynutil.add_weight(
                    integer_plus_maj + pynini.cross(".", " ") +
                    pynutil.insert("fractional_part: \"") +
                    two_digits_fractional_part @ cardinal.
                    graph_hundred_component_at_least_one_none_zero_digit +
                    pynutil.insert("\""),
                    weight=0.0001,
                )
                default_fraction_graph = (
                    decimal_delete_last_zeros
                    | decimal_with_quantity) @ graph_decimal_final
            decimal_graph_with_minor_curr |= (
                pynini.closure(pynutil.delete("0"), 0, 1) +
                pynutil.delete(".") + fractional_plus_min)
            decimal_graph_with_minor_curr = (pynutil.delete(curr_symbol) +
                                             decimal_graph_with_minor_curr +
                                             preserve_order)

            decimal_graph_with_minor = (
                decimal_graph_with_minor_curr
                if decimal_graph_with_minor is None else pynini.union(
                    decimal_graph_with_minor,
                    decimal_graph_with_minor_curr).optimize())

            if not deterministic:
                integer_graph_reordered_curr = (pynutil.delete(curr_symbol) +
                                                integer_plus_maj +
                                                preserve_order).optimize()

                integer_graph_reordered = (
                    integer_graph_reordered_curr
                    if integer_graph_reordered is None else pynini.union(
                        integer_graph_reordered,
                        integer_graph_reordered_curr).optimize())
                decimal_default_reordered_curr = (
                    pynutil.delete(curr_symbol) + default_fraction_graph +
                    insert_space +
                    pynutil.insert(curr_symbol) @ graph_maj_plural)

                decimal_default_reordered = (
                    decimal_default_reordered_curr
                    if decimal_default_reordered is None else pynini.union(
                        decimal_default_reordered,
                        decimal_default_reordered_curr)).optimize()

        # weight for SH
        final_graph |= pynutil.add_weight(decimal_graph_with_minor, -0.001)

        if not deterministic:
            final_graph |= integer_graph_reordered | decimal_default_reordered
            # to handle "$2.00" cases
            final_graph |= pynini.compose(
                NEMO_SIGMA + pynutil.delete(".") +
                pynini.closure(pynutil.delete("0"), 1),
                integer_graph_reordered)
        final_graph = self.add_tokens(final_graph.optimize())
        self.fst = final_graph.optimize()
示例#21
0
文件: time.py 项目: quuhua911/NeMo
    def __init__(self, number_names: dict, deterministic: bool = True):
        super().__init__(name="time",
                         kind="classify",
                         deterministic=deterministic)

        increment_hour_ordinal = pynini.string_file(
            get_abs_path("data/time/increment_hour_ordinal.tsv"))
        increment_hour_cardinal = pynini.string_file(
            get_abs_path("data/time/increment_hour_cardinal.tsv"))
        convert_hour = pynini.string_file(
            get_abs_path("data/time/time_convert.tsv"))

        number = pynini.closure(pynini.cross("0", ""), 0,
                                1) + number_names['cardinal_names_nominative']
        hour_options = pynini.project(increment_hour_ordinal, "input")
        hour_options = hour_options | pynini.project(convert_hour, "output")

        hour_exeption_ends_with_one = pynini.union(*["01", "21"])
        hour_exeption_ends_rest = pynini.union(*["02", "03", "04", "22", "23"])
        hour_other = (pynini.difference(
            hour_options,
            pynini.union(hour_exeption_ends_with_one,
                         hour_exeption_ends_rest))).optimize()

        hour = hour_exeption_ends_with_one @ number + pynutil.insert(" час")
        hour |= hour_exeption_ends_rest @ number + pynutil.insert(" часа")
        hour |= hour_other @ number + pynutil.insert(" часов")

        optional_and = pynini.closure(pynutil.insert("и "), 0, 1)
        digits = pynini.union(*[str(x) for x in range(10)])
        mins_start = pynini.union(*"012345")
        mins_options = mins_start + digits
        mins_exception_ends_with_one = mins_start + pynini.accep("1")
        mins_exception_ends_rest = pynini.difference(
            mins_start + pynini.union(*"234"),
            pynini.union(*["12", "13", "14"]))
        mins_other = pynini.difference(
            mins_options,
            pynini.union(mins_exception_ends_with_one,
                         mins_exception_ends_rest))

        minutes = mins_exception_ends_with_one @ number + pynutil.insert(
            " минута")
        minutes |= mins_exception_ends_rest @ number + pynutil.insert(
            " минуты")
        minutes |= mins_other @ number + pynutil.insert(" минут")
        self.minutes = minutes.optimize()
        # 17:15 -> "семнадцать часов и пятнадцать минут"
        hm = (pynutil.insert("hours: \"") + hour.optimize() +
              pynutil.insert("\"") +
              (pynini.cross(":", " ") + pynutil.insert("minutes: \"") +
               optional_and + minutes.optimize()) + pynutil.insert("\"") +
              pynutil.insert(" preserve_order: true"))
        h = pynutil.insert("hours: \"") + hour + pynutil.insert(
            "\"") + pynutil.delete(":00")
        self.graph_preserve_order = (hm | h).optimize()

        # 17:15 -> "пятнадцать минут шестого"
        # Requires permutations for the correct verbalization
        self.increment_hour_ordinal = pynini.compose(
            hour_options, increment_hour_ordinal).optimize()
        m_next_h = (pynutil.insert("hours: \"") + self.increment_hour_ordinal +
                    pynutil.insert("\"") + pynini.cross(":", " ") +
                    pynutil.insert("minutes: \"") + minutes +
                    pynutil.insert("\""))

        # 17:45 -> "без пятнадцати минут шесть"
        # Requires permutations for the correct verbalization
        self.mins_to_h = pynini.string_file(
            get_abs_path("data/time/minutes_to_hour.tsv")).optimize()
        self.increment_hour_cardinal = pynini.compose(
            hour_options, increment_hour_cardinal).optimize()
        m_to_h = (pynutil.insert("hours: \"") + self.increment_hour_cardinal +
                  pynutil.insert("\"") + pynini.cross(":", " ") +
                  pynutil.insert("minutes: \"без ") + self.mins_to_h +
                  pynutil.insert("\""))

        self.final_graph = m_next_h | self.graph_preserve_order | m_to_h
        self.fst = self.add_tokens(self.final_graph)
        self.fst = self.fst.optimize()
示例#22
0
文件: cardinal.py 项目: blisc/NeMo
    def __init__(self, deterministic: bool = True):
        super().__init__(name="cardinal",
                         kind="classify",
                         deterministic=deterministic)

        graph = pynini.Far(
            get_abs_path("data/numbers/cardinal_number_name.far")).get_fst()
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            pynini.closure(NEMO_DIGIT, 2, 3)
            | pynini.difference(NEMO_DIGIT, pynini.accep("0"))) @ graph
        self.graph = (pynini.closure(NEMO_DIGIT, 1, 3) + pynini.closure(
            pynini.closure(pynutil.delete(","), 0, 1) + NEMO_DIGIT +
            NEMO_DIGIT + NEMO_DIGIT)) @ graph

        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))

        single_digits_graph = pynini.invert(graph_digit | graph_zero)
        self.single_digits_graph = single_digits_graph + pynini.closure(
            pynutil.insert(" ") + single_digits_graph)

        if not deterministic:
            single_digits_graph = (pynini.invert(graph_digit | graph_zero)
                                   | pynini.cross("0", "oh")
                                   | pynini.cross("0", "o"))
            self.single_digits_graph = single_digits_graph + pynini.closure(
                pynutil.insert(" ") + single_digits_graph)

            single_digits_graph_with_commas = pynini.closure(
                self.single_digits_graph + pynutil.insert(" "), 1,
                3) + pynini.closure(
                    pynutil.delete(",") + single_digits_graph +
                    pynutil.insert(" ") + single_digits_graph +
                    pynutil.insert(" ") + single_digits_graph,
                    1,
                )
            self.graph = (self.graph
                          | self.single_digits_graph
                          | get_hundreds_graph()
                          | pynutil.add_weight(single_digits_graph_with_commas,
                                               0.001))

            self.range_graph = (
                pynini.closure(pynutil.insert("from "), 0, 1) + self.graph +
                (pynini.cross("-", " to ") | pynini.cross("-", " ")) +
                self.graph)

            self.range_graph |= self.graph + (pynini.cross(
                "x", " by ") | pynini.cross(" x ", " by ")) + self.graph
            self.range_graph = self.range_graph.optimize()

        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        long_numbers = pynini.compose(NEMO_DIGIT**(5, ...),
                                      self.single_digits_graph).optimize()
        final_graph = self.graph | self.get_serial_graph(
        ) | pynutil.add_weight(long_numbers, -0.001)

        if not deterministic:
            final_graph |= self.range_graph
            remove_leading_zeros = pynini.closure(
                pynutil.delete("0"), 1) + pynini.compose(
                    pynini.closure(NEMO_DIGIT, 1), self.graph)
            final_graph |= remove_leading_zeros

        final_graph = optional_minus_graph + pynutil.insert(
            "integer: \"") + final_graph + pynutil.insert("\"")
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#23
0
"""Portuguese g2p rules."""

import pynini

from pynini.lib import rewrite

# Gets all characters in language English
chars = ([chr(i) for i in range(65, 90)] + [chr(i) for i in range(97, 123)] +
         ["ʎ", "ʃ", "ɲ", "ç", "á", "ʁ", "ɾ", "ʒ", "ch", "lh", "nh", "ss"])
SIGMA_STAR = pynini.string_map(chars).closure()
# Portugese rule set given
G2P = (pynini.cdrewrite(
    pynini.union(
        pynini.cross("ch", "ʃ"),
        pynini.cross("lh", "ʎ"),
        pynini.cross("nh", "ɲ"),
    ),
    "",
    "",
    SIGMA_STAR,
) @ pynini.cdrewrite(pynini.cross(
    "h", ""), "", "", SIGMA_STAR) @ pynini.cdrewrite(
        pynini.cross("o", "u"),
        "",
        pynini.union("[EOS]", pynini.accep("s[EOS]"), "r"),
        SIGMA_STAR,
    ) @ pynini.cdrewrite(
        pynini.cross("e", "i"),
        "",
        pynini.union("[EOS]", pynini.accep("s[EOS]")),
        SIGMA_STAR,
示例#24
0
文件: measure.py 项目: sycomix/NeMo
    def get_address_graph(self, cardinal):
        """
        Finite state transducer for classifying serial.
            The serial is a combination of digits, letters and dashes, e.g.:
            2788 San Tomas Expy, Santa Clara, CA 95051 ->
                units: "address" cardinal
                { integer: "two seven eight eight San Tomas Expressway Santa Clara California nine five zero five one" }
                 preserve_order: true
        """
        ordinal_verbalizer = OrdinalVerbalizer().graph
        ordinal_tagger = OrdinalTagger(cardinal=cardinal).graph
        ordinal_num = pynini.compose(
            pynutil.insert("integer: \"") + ordinal_tagger +
            pynutil.insert("\""), ordinal_verbalizer)

        address_num = NEMO_DIGIT**(
            1,
            2) @ cardinal.graph_hundred_component_at_least_one_none_zero_digit
        address_num += insert_space + NEMO_DIGIT**2 @ (
            pynini.closure(pynini.cross("0", "zero "), 0, 1) +
            cardinal.graph_hundred_component_at_least_one_none_zero_digit)
        # to handle the rest of the numbers
        address_num = pynini.compose(NEMO_DIGIT**(3, 4), address_num)
        address_num = plurals._priority_union(address_num, cardinal.graph,
                                              NEMO_SIGMA)

        direction = (pynini.cross("E", "East")
                     | pynini.cross("S", "South")
                     | pynini.cross("W", "West")
                     | pynini.cross("N", "North")) + pynini.closure(
                         pynutil.delete("."), 0, 1)

        direction = pynini.closure(pynini.accep(NEMO_SPACE) + direction, 0, 1)
        address_words = get_formats(
            get_abs_path("data/address/address_word.tsv"))
        address_words = (
            pynini.accep(NEMO_SPACE) +
            (pynini.closure(ordinal_num, 0, 1)
             | NEMO_UPPER + pynini.closure(NEMO_ALPHA, 1)) + NEMO_SPACE +
            pynini.closure(NEMO_UPPER + pynini.closure(NEMO_ALPHA) +
                           NEMO_SPACE) + address_words)

        city = pynini.closure(NEMO_ALPHA | pynini.accep(NEMO_SPACE), 1)
        city = pynini.closure(
            pynini.accep(",") + pynini.accep(NEMO_SPACE) + city, 0, 1)

        states = load_labels(get_abs_path("data/address/state.tsv"))

        additional_options = []
        for x, y in states:
            additional_options.append((x, f"{y[0]}.{y[1:]}"))
        states.extend(additional_options)
        state_graph = pynini.string_map(states)
        state = pynini.invert(state_graph)
        state = pynini.closure(
            pynini.accep(",") + pynini.accep(NEMO_SPACE) + state, 0, 1)

        zip_code = pynini.compose(NEMO_DIGIT**5, cardinal.single_digits_graph)
        zip_code = pynini.closure(
            pynini.closure(pynini.accep(","), 0, 1) +
            pynini.accep(NEMO_SPACE) + zip_code,
            0,
            1,
        )

        address = address_num + direction + address_words + pynini.closure(
            city + state + zip_code, 0, 1)

        address |= address_num + direction + address_words + pynini.closure(
            pynini.cross(".", ""), 0, 1)

        return address
示例#25
0
    def __init__(self, syms):

        #
        # store alphabet
        #
        self.__syms = syms

        with pynini.default_token_type(self.__syms.alphabet):

            #
            # case markers
            #
            fix = pynini.cross("", "<Fix#>")
            adj = pynini.cross("", "<Low#>")
            adj_up = pynini.cross("", "<Up#>")
            n = pynini.cross("", "<Up#>")
            n_low = pynini.cross("", "<Low#>")
            v = pynini.cross("", "<Low#>")
            closed = pynini.cross("", "<Low#>")
            closed_up = pynini.cross("", "<Up#>")

            #
            # inflection classes
            #

            #
            # abbreviations
            abk_ADJ = pynini.concat(pynini.cross("<^ABK> <+ADJ>", ""), adj)
            abk_ADV = pynini.concat(pynini.cross("<^ABK> <+ADV>", ""), closed)
            abk_ART = pynini.concat(pynini.cross("<^ABK> <+ART>", ""), closed)
            abk_DPRO = pynini.concat(pynini.cross("<^ABK> <+DEMPRO>", ""),
                                     closed)
            abk_KONJ = pynini.concat(pynini.cross("<^ABK> <+KONJ>", ""),
                                     closed)
            abk_NE = pynini.concat(pynini.cross("<^ABK> <+NE>", ""), n)
            abk_NE_Low = pynini.concat(pynini.cross("<^ABK> <+NE>", ""), n_low)
            abk_NN = pynini.concat(pynini.cross("<^ABK> <+NN>", ""), n)
            abk_NN_Low = pynini.concat(pynini.cross("<^ABK> <+NN>", ""), n_low)
            abk_PREP = pynini.concat(pynini.cross("<^ABK> <+PREP>", ""),
                                     closed)
            abk_VPPAST = pynini.concat(
                pynini.cross("<^ABK> <^VPPAST> <+ADJ>", ""), adj)
            abk_VPPRES = pynini.concat(
                pynini.cross("<^ABK> <^VPPRES> <+ADJ>", ""), adj)

            #
            # adjectives

            # invariant adjectives
            self.__adj0 = pynini.concat(pynini.cross("<+ADJ> <Invar>", ""),
                                        adj)
            self.__adj0_up = pynini.concat(pynini.cross("<+ADJ> <Invar>", ""),
                                           adj_up)

            # inflectional endings
            adj_flex_suff = pynini.union(
                pynini.concat(
                    pynini.cross("<Masc> <Nom> <Sg> <St/Mix>", "e r"), adj),
                pynini.concat(pynini.cross("<Masc> <Nom> <Sg> <Sw>", "e"),
                              adj),
                pynini.concat(pynini.cross("<Masc> <Gen> <Sg>", "e n"), adj),
                pynini.concat(pynini.cross("<Masc> <Dat> <Sg> <St>", "e m"),
                              adj),
                pynini.concat(
                    pynini.cross("<Masc> <Dat> <Sg> <Sw/Mix>", "e n"), adj),
                pynini.concat(pynini.cross("<Masc> <Akk> <Sg>", "e n"), adj),
                pynini.concat(pynini.cross("<Fem> <Nom> <Sg>", "e"), adj),
                pynini.concat(pynini.cross("<Fem> <Gen> <Sg> <St>", "e r"),
                              adj),
                pynini.concat(pynini.cross("<Fem> <Gen> <Sg> <Sw/Mix>", "e n"),
                              adj),
                pynini.concat(pynini.cross("<Fem> <Dat> <Sg> <St>", "e r"),
                              adj),
                pynini.concat(pynini.cross("<Fem> <Dat> <Sg> <Sw/Mix>", "e n"),
                              adj),
                pynini.concat(pynini.cross("<Fem> <Akk> <Sg>", "e"), adj),
                pynini.concat(
                    pynini.cross("<Neut> <Nom> <Sg> <St/Mix>", "e s"), adj),
                pynini.concat(pynini.cross("<Neut> <Nom> <Sg> <Sw>", "e"),
                              adj),
                pynini.concat(pynini.cross("<Neut> <Gen> <Sg>", "e n"), adj),
                pynini.concat(pynini.cross("<Neut> <Dat> <Sg> <St>", "e m"),
                              adj),
                pynini.concat(
                    pynini.cross("<Neut> <Dat> <Sg> <Sw/Mix>", "e n"), adj),
                pynini.concat(
                    pynini.cross("<Neut> <Akk> <Sg> <St/Mix>", "e s"), adj),
                pynini.concat(pynini.cross("<Neut> <Akk> <Sg> <Sw>", "e"),
                              adj),
                pynini.concat(pynini.cross("<NoGend> <Nom> <Pl> <St>", "e"),
                              adj),
                pynini.concat(
                    pynini.cross("<NoGend> <Nom> <Pl> <Sw/Mix>", "e n"), adj),
                pynini.concat(
                    pynini.cross("<NoGend> <Gen> <Pl> <Sw/Mix>", "e n"), adj),
                pynini.concat(pynini.cross("<NoGend> <Gen> <Pl> <St>", "e r"),
                              adj),
                pynini.concat(pynini.cross("<NoGend> <Dat> <Pl>", "e n"), adj),
                pynini.concat(
                    pynini.cross("<NoGend> <Akk> <Pl> <Sw/Mix>", "e n"), adj),
                pynini.concat(pynini.cross("<NoGend> <Akk> <Pl> <St>", "e"),
                              adj)).optimize()

            # inflectional endings for nominalization
            adj_nn_suff = pynini.union(
                pynini.concat(
                    pynini.cross("<+NN> <Masc> <Nom> <Sg> <St/Mix>", "e r"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <Masc> <Nom> <Sg> <Sw>", "e"), n),
                pynini.concat(pynini.cross("<+NN> <Masc> <Gen> <Sg>", "e n"),
                              n),
                pynini.concat(
                    pynini.cross("<+NN> <Masc> <Dat> <Sg> <St>", "e m"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Masc> <Dat> <Sg> <Sw/Mix>", "e n"),
                    n),
                pynini.concat(pynini.cross("<+NN> <Masc> <Akk> <Sg>", "e n"),
                              n),
                pynini.concat(pynini.cross("<+NN> <Fem> <Nom> <Sg>", "e"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Fem> <Gen> <Sg> <St>", "e r"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Fem> <Gen> <Sg> <Sw/Mix>", "e n"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Fem> <Dat> <Sg> <St>", "e r"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Fem> <Dat> <Sg> <Sw/Mix>", "e n"), n),
                pynini.concat(pynini.cross("<+NN> <Fem> <Akk> <Sg>", "e"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Neut> <Nom> <Sg> <St/Mix>", "e s"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <Neut> <Nom> <Sg> <Sw>", "e"), n),
                pynini.concat(pynini.cross("<+NN> <Neut> <Gen> <Sg>", "e n"),
                              n),
                pynini.concat(
                    pynini.cross("<+NN> <Neut> <Dat> <Sg> <St>", "e m"), n),
                pynini.concat(
                    pynini.cross("<+NN> <Neut> <Dat> <Sg> <Sw/Mix>", "e n"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <Neut> <Akk> <Sg> <St/Mix>", "e s"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <Neut> <Akk> <Sg> <Sw>", "e"), n),
                pynini.concat(
                    pynini.cross("<+NN> <NoGend> <Nom> <Pl> <St>", "e"), n),
                pynini.concat(
                    pynini.cross("<+NN> <NoGend> <Nom> <Pl> <Sw/Mix>", "e n"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <NoGend> <Gen> <Pl> <Sw/Mix>", "e n"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <NoGend> <Gen> <Pl> <St>", "e r"), n),
                pynini.concat(pynini.cross("<+NN> <NoGend> <Dat> <Pl>", "e n"),
                              n),
                pynini.concat(
                    pynini.cross("<+NN> <NoGend> <Akk> <Pl> <Sw/Mix>", "e n"),
                    n),
                pynini.concat(
                    pynini.cross("<+NN> <NoGend> <Akk> <Pl> <St>", "e"),
                    n)).optimize()

            # positive
            adj_pos = pynini.union(
                pynini.concat(pynini.cross("<+ADJ> <Pos> <Pred>", ""), adj),
                pynini.concat(pynini.cross("<+ADJ> <Pos> <Adv>", ""), adj),
                pynini.concat(pynini.cross("<+ADJ> <Pos>", ""), adj_flex_suff),
            ).optimize()

            adj_pos_attr = pynini.concat(pynini.cross("<+ADJ> <Pos>", "<FB>"),
                                         adj_flex_suff).optimize()

            adj_pos_pred = pynini.concat(
                pynini.cross("<+ADJ> <Pos> <Pred>", ""), adj)

            # superlative
            adj_sup = pynini.union(
                pynini.concat(pynini.cross("<+ADJ> <Sup> <Pred>", "s t e n"),
                              adj),
                pynini.concat(pynini.cross("<+ADJ> <Sup> <Pred>", "s t"), adj),
                pynini.concat(pynini.cross("<+ADJ> <Sup> <Adv>", "s t e n"),
                              adj),
                pynini.concat(pynini.cross("<+ADJ> <Sup>", "s t"),
                              adj_flex_suff),
            ).optimize()

            # superlative with e
            adj_sup_e = pynini.union(
                pynini.concat(pynini.cross("<+ADJ> <Sup> <Pred>", "e s t e n"),
                              adj),
                pynini.concat(pynini.cross("<+ADJ> <Sup> <Pred>", "e s t"),
                              adj),
                pynini.concat(pynini.cross("<+ADJ> <Sup> <Adv>", "e s t e n"),
                              adj),
                pynini.concat(pynini.cross("<+ADJ> <Sup>", "e s t"),
                              adj_flex_suff),
            ).optimize()

            # comparative
            adj_comp = pynini.union(
                pynini.concat(pynini.cross("<+ADJ> <Comp> <Pred>", "e r"),
                              adj),
                pynini.concat(pynini.cross("<+ADJ> <Comp> <Adv>", "e r"), adj),
                pynini.concat(pynini.cross("<+ADJ> <Comp>", "e r"),
                              adj_flex_suff),
            ).optimize()

            # inflection classes (?)
            adj_nn = adj_pos_pred

            self.__adj_plus = pynini.union(
                pynini.concat(pynini.cross("", "<FB>"), adj_pos),
                pynini.concat(pynini.cross("", "<FB>"), adj_comp),
                pynini.concat(pynini.cross("", "<FB>"), adj_sup)).optimize()

            self.__adj_plus_e = pynini.union(
                pynini.concat(pynini.cross("", "<FB>"), adj_pos),
                pynini.concat(pynini.cross("", "<FB>"), adj_comp),
                pynini.concat(pynini.cross("", "<FB>"), adj_sup_e)).optimize()

            adj_pos_sup = pynini.union(
                pynini.concat(pynini.cross("", "<FB>"), adj_pos_attr),
                pynini.concat(pynini.cross("", "<FB>"), adj_sup)).optimize()

            adj_umlaut = pynini.union(
                pynini.concat(pynini.cross("", "<FB>"), adj_pos),
                pynini.concat(pynini.cross("", "<UL>"), adj_comp),
                pynini.concat(pynini.cross("", "<UL>"), adj_sup)).optimize()

            adj_umlaut_e = pynini.union(
                pynini.concat(pynini.cross("", "<FB>"), adj_pos),
                pynini.concat(pynini.cross("", "<UL>"), adj_comp),
                pynini.concat(pynini.cross("", "<UL> e"), adj_sup)).optimize()

            adj_ss_e = pynini.union(
                pynini.concat(pynini.cross("", "<SS> <FB>"), adj_pos),
                pynini.concat(pynini.cross("", "<SS> <FB>"), adj_comp),
                pynini.concat(pynini.cross("", "<SS> <FB> e"),
                              adj_sup)).optimize()

            #
            # nouns
            #

            #
            # inflection classes

            #
            # inflection endings: atomic

            # Frau; Mythos; Chaos
            n_sg_0 = pynini.union(
                pynini.concat(pynini.cross("<Nom> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Gen> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Dat> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Akk> <Sg>", "<FB>"),
                              n)).optimize()

            # Opa-s, Klima-s
            n_sg_s = pynini.union(
                pynini.concat(pynini.cross("<Nom> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Gen> <Sg>", "<FB> s"), n),
                pynini.concat(pynini.cross("<Dat> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Akk> <Sg>", "<FB>"),
                              n)).optimize()

            # Haus-es, Geist-(e)s
            n_sg_es = pynini.union(
                pynini.concat(pynini.cross("<Nom> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Gen> <Sg>", "<FB> e s <^Gen>"),
                              n),
                pynini.concat(pynini.cross("<Dat> <Sg>", "<FB>"), n),
                pynini.concat(pynini.cross("<Dat> <Sg>", "<FB> e"), n),
                pynini.concat(pynini.cross("<Akk> <Sg>", "<FB>"),
                              n)).optimize()

            n_pl_0 = pynini.union(
                pynini.concat(pynini.cross("<Nom> <Pl>", ""), n),
                pynini.concat(pynini.cross("<Gen> <Pl>", ""), n),
                pynini.concat(pynini.cross("<Dat> <Pl>", "n"), n),
                pynini.concat(pynini.cross("<Akk> <Pl>", ""), n)).optimize()

            n_pl_x = pynini.union(
                pynini.concat(pynini.cross("<Nom> <Pl>", ""), n),
                pynini.concat(pynini.cross("<Gen> <Pl>", ""), n),
                pynini.concat(pynini.cross("<Dat> <Pl>", ""), n),
                pynini.concat(pynini.cross("<Akk> <Pl>", ""), n)).optimize()

            #
            # inflection endings: meta
            n_es_e = pynini.union(
                n_sg_es, pynini.concat(pynini.cross("", "<FB> e"), n_pl_0))
            n_es_e_ul = pynini.union(
                n_sg_es, pynini.concat(pynini.cross("", "<UL> e"), n_pl_0))
            n_es_en = pynini.union(
                n_sg_es, pynini.concat(pynini.cross("", "<FB> e n"), n_pl_x))
            n_0_en = pynini.union(
                n_sg_0, pynini.concat(pynini.cross("", "<FB> e n"), n_pl_x))
            n_0_n = pynini.union(
                n_sg_0, pynini.concat(pynini.cross("", "<FB> n"), n_pl_x))
            n_s_x = pynini.union(n_sg_s, n_pl_x)

            # NMasc_es_e: Tag-(e)s/Tage
            self.__nmasc_es_e = pynini.concat(pynini.cross("<+NN> <Masc>", ""),
                                              n_es_e).optimize()

            # NMasc_es_e$: Arzt-(e)s/Ärzte
            self.__nmasc_es_e_ul = pynini.concat(
                pynini.cross("<+NN> <Masc>", ""), n_es_e_ul).optimize()

            # NMasc_es_en: Fleck-(e)s/Flecken
            self.__nmasc_es_en = pynini.concat(
                pynini.cross("<+NN> <Masc>", ""), n_es_en).optimize()

            # NFem-Deriv
            self.__nfem_deriv = pynini.concat(pynini.cross("<+NN> <Fem>", ""),
                                              n_0_en).optimize()

            # NFem_0_n: Kammer/Kammern
            self.__nfem_0_n = pynini.concat(pynini.cross("<+NN> <Fem>", ""),
                                            n_0_n).optimize()

            # NNeut-Dimin: Mäuschen-s/Mäuschen
            self.__nneut_dimin = pynini.concat(
                pynini.cross("<+NN> <Neut>", ""), n_s_x).optimize()

            # NNeut/Sg_s: Abitur-s/--
            self.__nneut_sg_s = pynini.concat(pynini.cross("<+NN> <Neut>", ""),
                                              n_sg_s).optimize()

            #
            # verbs
            #

            #
            # inflection endings: atomic

            # bin's
            v_plus_es = pynini.cross("/ \' s", "\' s").closure(0, 1) + v

            # (ich) lerne
            v_pres_reg_1 = pynini.concat(
                pynini.cross("<+V> <1> <Sg> <Pres> <Ind>", "<FB> e"),
                v_plus_es).optimize()

            # (du) lernst
            v_pres_reg_2 = pynini.concat(
                pynini.cross("<+V> <2> <Sg> <Pres> <Ind>", "<DEL-S> s t"),
                v_plus_es).optimize()

            # (er/sie/es) lernt
            v_pres_reg_3 = pynini.concat(
                pynini.cross("<+V> <3> <Sg> <Pres> <Ind>", "<DEL-S> t"),
                v_plus_es).optimize()

            # (wir/ihr/sie) lernen
            v_pres_pl_ind = pynini.concat(
                pynini.union(
                    pynini.cross("<+V> <1> <Pl> <Pres> <Ind>", "<FB> e n"),
                    pynini.cross("<+V> <2> <Pl> <Pres> <Ind>", "<DEL-S> t"),
                    pynini.cross("<+V> <3> <Pl> <Pres> <Ind>", "<FB> e n")),
                v_plus_es).optimize()

            # (ich/du/sie/wir/ihr/sie) lernen
            v_pres_subj = pynini.concat(
                pynini.union(
                    pynini.cross("<+V> <1> <Sg> <Pres> <Konj>", "<FB> e"),
                    pynini.cross("<+V> <2> <Sg> <Pres> <Konj>", "<FB> e s t"),
                    pynini.cross("<+V> <3> <Sg> <Pres> <Konj>", "<FB> e"),
                    pynini.cross("<+V> <1> <Pl> <Pres> <Konj>", "<FB> e n"),
                    pynini.cross("<+V> <2> <Pl> <Pres> <Konj>", "<FB> e t"),
                    pynini.cross("<+V> <3> <Pl> <Pres> <Konj>", "<FB> e n")),
                v_plus_es).optimize()

            # (ich/du/sie/wir/ihr/sie) lernten
            v_past_ind_reg = pynini.concat(
                pynini.union(
                    pynini.cross("<+V> <1> <Sg> <Past> <Ind>", "<DEL-S> t e"),
                    pynini.cross("<+V> <2> <Sg> <Past> <Ind>",
                                 "<DEL-S> t e s t"),
                    pynini.cross("<+V> <3> <Sg> <Past> <Ind>", "<DEL-S> t e"),
                    pynini.cross("<+V> <1> <Pl> <Past> <Ind>",
                                 "<DEL-S> t e n"),
                    pynini.cross("<+V> <2> <Pl> <Past> <Ind>",
                                 "<DEL-S> t e t"),
                    pynini.cross("<+V> <3> <Pl> <Past> <Ind>",
                                 "<DEL-S> t e n")), v_plus_es).optimize()

            # (wir/ihr/sie) lernten
            v_past_subj_reg = pynini.concat(
                pynini.union(
                    pynini.cross("<+V> <1> <Sg> <Past> <Konj>", "<DEL-S> t e"),
                    pynini.cross("<+V> <2> <Sg> <Past> <Konj>",
                                 "<DEL-S> t e s t"),
                    pynini.cross("<+V> <3> <Sg> <Past> <Konj>", "<DEL-S> t e"),
                    pynini.cross("<+V> <1> <Pl> <Past> <Konj>",
                                 "<DEL-S> t e n"),
                    pynini.cross("<+V> <2> <Pl> <Past> <Konj>",
                                 "<DEL-S> t e t"),
                    pynini.cross("<+V> <3> <Pl> <Past> <Konj>",
                                 "<DEL-S> t e n")), v_plus_es).optimize()

            # kommt, schaut!
            v_imp_pl = pynini.concat(
                pynini.cross("<+V> <Imp> <Pl>", "<DEL-S> t <^imp>"),
                v_plus_es).optimize()

            # kommt, schaut!
            v_imp_sg = pynini.concat(
                pynini.cross("<+V> <Imp> <Sg>", "<DEL-S> <^imp>"),
                v_plus_es).optimize()

            # SMOR: investigate Lernen<+NN>
            v_inf = pynini.union(
                pynini.union(pynini.cross("<+V> <Inf>", ""),
                             pynini.cross("<+V> <Inf> <zu>", "<^zz>")) + v,
                pynini.cross("<V> <CONV>", "") + self.__nneut_sg_s,
            )

            # SMOR: investigate lernendes<+ADJ>
            v_ppres = pynini.union(pynini.cross("<+V> <PPres>", ""),
                                   pynini.cross("<+V> <PPres> <zu>",
                                                "<^zz>")) + v

            # SMOR: investigate gelerntes<+ADJ>
            v_ppast = pynini.cross("<+V> <PPast>", "<^pp>") + v

            # lernend
            v_inf_plus_ppres = pynini.union(
                v_inf, pynini.concat(pynini.cross("", "d"),
                                     v_ppres)).optimize()

            # lernen
            v_inf_stem = pynini.concat(pynini.cross("", "<FB> e n"),
                                       v_inf_plus_ppres).optimize()

            # gelernt
            v_pp_t = pynini.concat(pynini.cross("", "<DEL-S> t"),
                                   v_ppast).optimize()

            #
            # inflection endings: meta
            v_flex_pres_1 = pynini.union(v_pres_reg_1, v_pres_pl_ind,
                                         v_pres_subj, v_imp_pl,
                                         v_inf_stem).optimize()

            v_flex_pres_reg = pynini.union(v_flex_pres_1, v_pres_reg_2,
                                           v_pres_reg_3, v_imp_sg).optimize()

            v_flex_reg = pynini.union(v_flex_pres_reg, v_past_ind_reg,
                                      v_past_subj_reg, v_pp_t).optimize()

            #
            # inflection classes

            # VVReg: lernen
            self.__vv_reg = pynini.concat(pynini.cross("e n", ""),
                                          v_flex_reg).optimize()

            #
            # building the inflection cross
            #
            self.__inflection = self.__construct_inflection()

            #
            # definition of a filter which enforces the correct inflection
            #
            self.__inflection_filter = self.__construct_inflection_filter()
示例#26
0
    def __init__(self):
        super().__init__(name="telephone", kind="classify")
        delete_space = pynutil.delete(' ')
        # country code, number_part, extension
        add_separator = pynutil.insert(" ")  # between components
        digit = pynini.invert(pynini.string_file(get_abs_path("data/numbers/digit.tsv"))).optimize() | pynini.cross(
            "0", pynini.union("o", "oh", "zero")
        )

        number_part = (
            (
                (pynini.closure(digit + insert_space, 2, 2) + digit + pynutil.delete("-"))
                | (
                    pynutil.delete("(")
                    + pynini.closure(digit + insert_space, 2, 2)
                    + digit
                    + pynutil.delete(")")
                    + pynini.closure(pynutil.delete("-"), 0, 1)
                    + delete_space
                )
            )
            + add_separator
            + pynini.closure(digit + insert_space, 2, 2)
            + digit
            + pynutil.delete("-")
            + add_separator
            + pynini.closure(digit + insert_space, 3, 3)
            + digit
        )
        number_part = pynutil.insert("number_part: \"") + pynini.invert(number_part) + pynutil.insert("\"")

        graph = number_part
        final_graph = self.add_tokens(graph)
        self.fst = final_graph.optimize()
示例#27
0
    def __construct_inflection_filter(self):
        '''
    Define a filter which enforces the correct inflection
    '''
        with pynini.default_token_type(self.__syms.alphabet):
            alphabet = pynini.union(
                self.__syms.characters,
                pynini.string_map([
                    "<n>", "<e>", "<d>", "<~n>", "<Ge-Nom>", "<UL>", "<SS>",
                    "<FB>", "<DEL-S>", "<ge>", "<no-ge>", "<^imp>", "<^zz>",
                    "<^pp>", "<^Ax>", "<^pl>", "<^Gen>", "<^Del>", "<Fix#>",
                    "<Low#>", "<Up#>"
                ])).project("input").closure()

            return pynini.concat(
                pynini.union(
                    pynini.concat(pynini.cross("<Adj0>", ""),
                                  pynini.cross("<Adj0>", "")),
                    pynini.concat(pynini.cross("<Adj0-Up>", ""),
                                  pynini.cross("<Adj0-Up>", "")),
                    pynini.concat(pynini.cross("<Adj+>", ""),
                                  pynini.cross("<Adj+>", "")),
                    pynini.concat(pynini.cross("<Adj+e>", ""),
                                  pynini.cross("<Adj+e>", "")),
                    pynini.concat(pynini.cross("<NMasc_es_e>", ""),
                                  pynini.cross("<NMasc_es_e>", "")),
                    pynini.concat(pynini.cross("<NMasc_es_$e>", ""),
                                  pynini.cross("<NMasc_es_$e>", "")),
                    pynini.concat(pynini.cross("<NMasc_es_en>", ""),
                                  pynini.cross("<NMasc_es_en>", "")),
                    pynini.concat(pynini.cross("<NFem-Deriv>", ""),
                                  pynini.cross("<NFem-Deriv>", "")),
                    pynini.concat(pynini.cross("<NFem_0_n>", ""),
                                  pynini.cross("<NFem_0_n>", "")),
                    pynini.concat(pynini.cross("<NNeut-Dimin>", ""),
                                  pynini.cross("<NNeut-Dimin>", "")),
                    pynini.concat(pynini.cross("<NNeut/Sg_s>", ""),
                                  pynini.cross("<NNeut/Sg_s>", "")),
                    pynini.concat(pynini.cross("<VVReg>", ""),
                                  pynini.cross("<VVReg>", ""))),
                alphabet).optimize()
示例#28
0
文件: cardinal.py 项目: NVIDIA/NeMo
    def __init__(self):
        super().__init__(name="cardinal", kind="classify")
        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))

        graph_one = pynini.cross("mốt", "1")
        graph_four = pynini.cross("tư", "4")
        graph_five = pynini.cross("lăm", "5")
        graph_half = pynini.cross("rưỡi", "5")
        graph_hundred = pynini.cross("trăm", "")
        graph_ten = pynini.cross("mươi", "")
        zero = pynini.cross(pynini.union("linh", "lẻ"), "0")

        optional_ten = pynini.closure(delete_space + graph_ten, 0, 1)
        last_digit_exception = pynini.project(pynini.cross("năm", "5"),
                                              "input")
        last_digit = pynini.union(
            (pynini.project(graph_digit, "input") -
             last_digit_exception.arcsort()) @ graph_digit,
            graph_one,
            graph_four,
            graph_five,
        )

        graph_hundred_ties_component = (
            graph_digit | graph_zero) + delete_space + graph_hundred
        graph_hundred_ties_component += delete_space
        graph_hundred_ties_component += pynini.union(
            graph_teen,
            (graph_half | graph_four | graph_one) + pynutil.insert("0"),
            graph_ties + optional_ten +
            ((delete_space + last_digit) | pynutil.insert("0")),
            zero + delete_space + (graph_digit | graph_four),
            pynutil.insert("00"),
        )
        graph_hundred_ties_component |= (
            pynutil.insert("0") + delete_space + pynini.union(
                graph_teen,
                graph_ties + optional_ten + delete_space + last_digit,
                graph_ties + delete_space + graph_ten + pynutil.insert("0"),
                zero + delete_space + (graph_digit | graph_four),
            ))
        graph_hundred_component = graph_hundred_ties_component | (
            pynutil.insert("00") + delete_space + graph_digit)

        graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
            pynini.closure(NEMO_DIGIT) +
            (NEMO_DIGIT - "0") + pynini.closure(NEMO_DIGIT))
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            graph_hundred_component_at_least_one_none_zero_digit)
        graph_hundred_ties_zero = graph_hundred_ties_component | pynutil.insert(
            "000")

        graph_thousands = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete(pynini.union("nghìn", "ngàn")),
            pynutil.insert("000", weight=0.1),
        )

        graph_ten_thousand = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("vạn"),
            pynutil.insert("0000", weight=0.1),
        )

        graph_ten_thousand_suffix = pynini.union(
            graph_digit + delete_space +
            pynutil.delete(pynini.union("nghìn", "ngàn")),
            pynutil.insert("0", weight=0.1),
        )

        graph_million = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete("triệu"),
            pynutil.insert("000", weight=0.1),
        )
        graph_billion = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete(pynini.union("tỉ", "tỷ")),
            pynutil.insert("000", weight=0.1),
        )

        graph = pynini.union(
            graph_billion + delete_space + graph_million + delete_space +
            graph_thousands + delete_space + graph_hundred_ties_zero,
            graph_ten_thousand + delete_space + graph_ten_thousand_suffix +
            delete_space + graph_hundred_ties_zero,
            graph_hundred_component_at_least_one_none_zero_digit +
            delete_space + pynutil.delete(pynini.union("nghìn", "ngàn")) +
            delete_space + (((last_digit | graph_half) + pynutil.insert("00"))
                            | graph_hundred_ties_zero),
            graph_digit,
            graph_zero,
        )

        graph = graph @ pynini.union(
            pynutil.delete(pynini.closure("0")) +
            pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT),
            "0",
        )

        # don't convert cardinals from zero to nine inclusive
        graph_exception = pynini.project(pynini.union(graph_digit, graph_zero),
                                         "input")

        self.graph_no_exception = graph

        self.graph = (pynini.project(graph, "input") -
                      graph_exception.arcsort()) @ graph

        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") +
            pynini.cross(pynini.union("âm", "trừ"), '"-"') + NEMO_SPACE,
            0,
            1,
        )

        final_graph = optional_minus_graph + pynutil.insert(
            'integer: "') + self.graph + pynutil.insert('"')

        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#29
0
 def _get_digits_graph():
     zero = pynini.cross((pynini.accep("linh") | pynini.accep("lẻ")), "0")
     four = pynini.cross("tư", "4")
     graph = pynini.union(zero + delete_space + (graph_digit | four), graph_zero + delete_space + graph_digit)
     graph.optimize()
     return graph
示例#30
0
    def __init__(self, cardinal: GraphFst, deterministic: bool = True):
        super().__init__(name="time",
                         kind="classify",
                         deterministic=deterministic)

        delete_time_delimiter = pynutil.delete(pynini.union(".", ":"))

        one = pynini.string_map([("un", "una"), ("ún", "una")])
        change_one = pynini.cdrewrite(one, "", "", NEMO_SIGMA)
        cardinal_graph = cardinal.graph @ change_one

        day_suffix = pynutil.insert("suffix: \"") + suffix + pynutil.insert(
            "\"")
        day_suffix = delete_space + insert_space + day_suffix

        delete_hora_suffix = delete_space + insert_space + pynutil.delete("h")
        delete_minute_suffix = delete_space + insert_space + pynutil.delete(
            "min")
        delete_second_suffix = delete_space + insert_space + pynutil.delete(
            "s")

        labels_hour_24 = [
            str(x) for x in range(0, 25)
        ]  # Can see both systems. Twelve hour requires am/pm for ambiguity resolution
        labels_hour_12 = [str(x) for x in range(1, 13)]
        labels_minute_single = [str(x) for x in range(1, 10)]
        labels_minute_double = [str(x) for x in range(10, 60)]

        delete_leading_zero_to_double_digit = (
            pynini.closure(pynutil.delete("0") |
                           (NEMO_DIGIT - "0"), 0, 1) + NEMO_DIGIT)

        graph_24 = (pynini.closure(NEMO_DIGIT, 1,
                                   2) @ delete_leading_zero_to_double_digit
                    @ pynini.union(*labels_hour_24))
        graph_12 = (pynini.closure(NEMO_DIGIT, 1,
                                   2) @ delete_leading_zero_to_double_digit
                    @ pynini.union(*labels_hour_12))

        graph_hour_24 = graph_24 @ cardinal_graph
        graph_hour_12 = graph_12 @ cardinal_graph

        graph_minute_single = delete_leading_zero_to_double_digit @ pynini.union(
            *labels_minute_single)
        graph_minute_double = pynini.union(*labels_minute_double)

        graph_minute = pynini.union(graph_minute_single,
                                    graph_minute_double) @ cardinal_graph

        final_graph_hour_only_24 = (pynutil.insert("hours: \"") +
                                    graph_hour_24 + pynutil.insert("\"") +
                                    delete_hora_suffix)
        final_graph_hour_only_12 = pynutil.insert(
            "hours: \"") + graph_hour_12 + pynutil.insert("\"") + day_suffix

        final_graph_hour_24 = pynutil.insert(
            "hours: \"") + graph_hour_24 + pynutil.insert("\"")
        final_graph_hour_12 = pynutil.insert(
            "hours: \"") + graph_hour_12 + pynutil.insert("\"")

        final_graph_minute = pynutil.insert(
            "minutes: \"") + graph_minute + pynutil.insert("\"")
        final_graph_second = pynutil.insert(
            "seconds: \"") + graph_minute + pynutil.insert("\"")
        final_time_zone_optional = pynini.closure(
            delete_space + insert_space + pynutil.insert("zone: \"") +
            time_zone_graph + pynutil.insert("\""),
            0,
            1,
        )

        # 02.30 h
        graph_hm = (
            final_graph_hour_24 + delete_time_delimiter +
            (pynutil.delete("00") |
             (insert_space + final_graph_minute)) + pynini.closure(
                 delete_time_delimiter +
                 (pynini.cross("00", " seconds: \"0\"") |
                  (insert_space + final_graph_second)),
                 0,
                 1,
             )  # For seconds 2.30.35 h
            + pynini.closure(delete_hora_suffix, 0,
                             1)  # 2.30 is valid if unambiguous
            + final_time_zone_optional)

        # 2 h 30 min
        graph_hm |= (
            final_graph_hour_24 + delete_hora_suffix + delete_space +
            (pynutil.delete("00") | (insert_space + final_graph_minute)) +
            delete_minute_suffix + pynini.closure(
                delete_space +
                (pynini.cross("00", " seconds: \"0\"") |
                 (insert_space + final_graph_second)) + delete_second_suffix,
                0,
                1,
            )  # For seconds
            + final_time_zone_optional)

        # 2.30 a. m. (Only for 12 hour clock)
        graph_hm |= (
            final_graph_hour_12 + delete_time_delimiter +
            (pynutil.delete("00") |
             (insert_space + final_graph_minute)) + pynini.closure(
                 delete_time_delimiter +
                 (pynini.cross("00", " seconds: \"0\"") |
                  (insert_space + final_graph_second)),
                 0,
                 1,
             )  # For seconds 2.30.35 a. m.
            + day_suffix + final_time_zone_optional)

        graph_h = (
            pynini.union(final_graph_hour_only_24, final_graph_hour_only_12) +
            final_time_zone_optional
        )  # Should always have a time indicator, else we'll pass to cardinals

        if not deterministic:
            # This includes alternate vocalization (hour menos min, min para hour), here we shift the times and indicate a `style` tag
            hour_shift_24 = pynini.invert(
                pynini.string_file(get_abs_path("data/time/hour_to_24.tsv")))
            hour_shift_12 = pynini.invert(
                pynini.string_file(get_abs_path("data/time/hour_to_12.tsv")))
            minute_shift = pynini.string_file(
                get_abs_path("data/time/minute_to.tsv"))

            graph_hour_to_24 = graph_24 @ hour_shift_24 @ cardinal_graph
            graph_hour_to_12 = graph_12 @ hour_shift_12 @ cardinal_graph

            graph_minute_to = pynini.union(
                graph_minute_single,
                graph_minute_double) @ minute_shift @ cardinal_graph

            final_graph_hour_to_24 = pynutil.insert(
                "hours: \"") + graph_hour_to_24 + pynutil.insert("\"")
            final_graph_hour_to_12 = pynutil.insert(
                "hours: \"") + graph_hour_to_12 + pynutil.insert("\"")

            final_graph_minute_to = pynutil.insert(
                "minutes: \"") + graph_minute_to + pynutil.insert("\"")

            graph_menos = pynutil.insert(" style: \"1\"")
            graph_para = pynutil.insert(" style: \"2\"")

            final_graph_style = graph_menos | graph_para

            # 02.30 h (omitting seconds since a bit awkward)
            graph_hm |= (
                final_graph_hour_to_24 + delete_time_delimiter +
                insert_space + final_graph_minute_to + pynini.closure(
                    delete_hora_suffix, 0, 1)  # 2.30 is valid if unambiguous
                + final_time_zone_optional + final_graph_style)

            # 2 h 30 min
            graph_hm |= (final_graph_hour_to_24 + delete_hora_suffix +
                         delete_space + insert_space + final_graph_minute_to +
                         delete_minute_suffix + final_time_zone_optional +
                         final_graph_style)

            # 2.30 a. m. (Only for 12 hour clock)
            graph_hm |= (final_graph_hour_to_12 + delete_time_delimiter +
                         insert_space + final_graph_minute_to + day_suffix +
                         final_time_zone_optional + final_graph_style)

        final_graph = graph_hm | graph_h
        if deterministic:
            final_graph = final_graph + pynutil.insert(" preserve_order: true")
        final_graph = final_graph.optimize()
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()