示例#1
0
    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
        super().__init__(name="measure", kind="classify")

        cardinal_graph = cardinal.graph_no_exception

        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_four = pynini.cross("tư", "4")
        graph_one = pynini.cross("mốt", "1")
        graph_half = pynini.cross("rưỡi", "5")

        graph_unit = pynini.string_file(get_abs_path("data/measurements.tsv"))
        graph_unit_singular = pynini.invert(graph_unit)  # singular -> abbr

        optional_graph_negative = pynini.closure(
            pynutil.insert("negative: ") +
            pynini.cross(pynini.union("âm", "trừ"), '"true"') +
            delete_extra_space,
            0,
            1,
        )

        unit_singular = convert_space(graph_unit_singular)
        unit_misc = pynutil.insert("/") + pynutil.delete(
            "trên") + delete_space + convert_space(graph_unit_singular)

        unit_singular = (pynutil.insert('units: "') +
                         (unit_singular | unit_misc | pynutil.add_weight(
                             unit_singular + delete_space + unit_misc, 0.01)) +
                         pynutil.insert('"'))

        subgraph_decimal = (pynutil.insert("decimal { ") +
                            optional_graph_negative +
                            decimal.final_graph_wo_negative +
                            pynutil.insert(" }") + delete_extra_space +
                            unit_singular)

        subgraph_cardinal = (pynutil.insert("cardinal { ") +
                             optional_graph_negative +
                             pynutil.insert('integer: "') + cardinal_graph +
                             pynutil.insert('"') + pynutil.insert(" }") +
                             delete_extra_space + unit_singular)
        fraction_graph = (delete_extra_space +
                          pynutil.insert('fractional_part: "') +
                          (graph_digit | graph_half | graph_one | graph_four) +
                          pynutil.insert('"'))

        subgraph_cardinal |= (pynutil.insert("cardinal { ") +
                              optional_graph_negative +
                              pynutil.insert('integer: "') + cardinal_graph +
                              pynutil.insert('" }') + delete_extra_space +
                              unit_singular + fraction_graph)
        final_graph = subgraph_decimal | subgraph_cardinal
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#2
0
文件: money.py 项目: quuhua911/NeMo
    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
        super().__init__(name="money", kind="classify")
        # quantity, integer_part, fractional_part, currency

        cardinal_graph = cardinal.graph_no_exception
        graph_decimal_final = decimal.final_graph_wo_negative
        graph_half = pynini.cross("rưỡi", "5")

        unit = pynini.string_file(get_abs_path("data/currency.tsv"))
        unit_singular = pynini.invert(unit)

        graph_unit_singular = pynutil.insert("currency: \"") + convert_space(
            unit_singular) + pynutil.insert("\"")

        add_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | (
            pynutil.insert("0") + NEMO_DIGIT)

        # twelve dollars fifty, only after integer
        optional_cents_suffix = pynini.closure(
            delete_extra_space + pynutil.insert("fractional_part: \"") +
            (pynutil.add_weight(
                cardinal_graph @ add_leading_zero_to_double_digit, -0.7)
             | graph_half) + pynutil.insert("\""),
            0,
            1,
        )

        graph_integer = (pynutil.insert("integer_part: \"") + cardinal_graph +
                         pynutil.insert("\"") + delete_extra_space +
                         graph_unit_singular + optional_cents_suffix)

        graph_decimal = graph_decimal_final + delete_extra_space + graph_unit_singular + optional_cents_suffix
        final_graph = graph_integer | graph_decimal
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#3
0
文件: whitelist.py 项目: NVIDIA/NeMo
    def __init__(self):
        super().__init__(name="whitelist", kind="classify")

        whitelist = pynini.string_file(
            get_abs_path("data/whitelist.tsv")).invert()
        graph = pynutil.insert('name: "') + convert_space(
            whitelist) + pynutil.insert('"')
        self.fst = graph.optimize()
示例#4
0
    def __init__(self):
        super().__init__(name="time", kind="classify")
        # hours, minutes, seconds, suffix, zone, style, speak_period

        graph_hours_to = pynini.string_file(
            get_abs_path("data/time/hours_to.tsv"))
        graph_minutes_to = pynini.string_file(
            get_abs_path("data/time/minutes_to.tsv"))
        graph_hours = pynini.string_file(get_abs_path("data/time/hours.tsv"))
        graph_minutes = pynini.string_file(
            get_abs_path("data/time/minutes.tsv"))
        time_zone_graph = pynini.invert(
            pynini.string_file(get_abs_path("data/time/time_zone.tsv")))

        graph_half = pynini.cross("rưỡi", "30")
        oclock = pynini.cross("giờ", "")
        minute = pynini.cross("phút", "")
        optional_minute = pynini.closure(delete_space + minute, 0, 1)
        second = pynini.cross("giây", "")

        final_graph_hour = pynutil.insert(
            "hours: \"") + graph_hours + pynutil.insert(
                "\"") + delete_space + oclock
        graph_minute = graph_minutes + optional_minute
        graph_second = graph_minute + delete_space + second
        final_time_zone_optional = pynini.closure(
            delete_space + insert_space + pynutil.insert("zone: \"") +
            convert_space(time_zone_graph) + pynutil.insert("\""),
            0,
            1,
        )

        graph_hm = (final_graph_hour + delete_extra_space +
                    pynutil.insert("minutes: \"") +
                    (graph_minute | graph_half) + pynutil.insert("\""))

        graph_hms = graph_hm + delete_extra_space + pynutil.insert(
            "seconds: \"") + graph_second + pynutil.insert("\"")

        graph_ms = (pynutil.insert("minutes: \"") + graph_minute +
                    pynutil.insert("\"") + delete_extra_space +
                    pynutil.insert("seconds: \"") +
                    (graph_second | graph_half) + pynutil.insert("\""))

        graph_hours_to_component = graph_hours @ graph_hours_to
        graph_minutes_to_component = graph_minutes @ graph_minutes_to

        graph_time_to = (pynutil.insert("hours: \"") +
                         graph_hours_to_component + pynutil.insert("\"") +
                         delete_space + oclock + delete_space +
                         pynutil.delete("kém") + delete_extra_space +
                         pynutil.insert("minutes: \"") +
                         graph_minutes_to_component + optional_minute +
                         pynutil.insert("\""))

        final_graph = (final_graph_hour | graph_hm
                       | graph_hms) + final_time_zone_optional
        final_graph |= graph_ms
        final_graph |= graph_time_to

        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()