示例#1
0
    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
        super().__init__(name="measure", kind="classify")
        # decimal, fraction, cardinal, units, style(depr)

        cardinal_graph = cardinal.graph_no_exception

        graph_unit = pynini.string_file(get_abs_path("data/measurements.tsv"))
        graph_unit_singular = pynini.invert(graph_unit)  # singular -> abbr
        graph_unit_plural = get_singulars(graph_unit_singular)  # plural -> abbr

        optional_graph_negative = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("minus", "\"true\"") + delete_extra_space, 0, 1
        )

        unit_singular = convert_space(graph_unit_singular)
        unit_plural = convert_space(graph_unit_plural)
        unit_misc = pynutil.insert("/") + pynutil.delete("per") + delete_space + convert_space(graph_unit_singular)

        unit_singular = (
            pynutil.insert("units: \"")
            + (unit_singular | unit_misc | pynutil.add_weight(unit_singular + delete_space + unit_misc, 0.01))
            + pynutil.insert("\"")
        )
        unit_plural = (
            pynutil.insert("units: \"")
            + (unit_plural | unit_misc | pynutil.add_weight(unit_plural + delete_space + unit_misc, 0.01))
            + pynutil.insert("\"")
        )

        subgraph_decimal = (
            pynutil.insert("decimal { ")
            + optional_graph_negative
            + decimal.final_graph_wo_negative
            + pynutil.insert(" }")
            + delete_extra_space
            + unit_plural
        )
        subgraph_cardinal = (
            pynutil.insert("cardinal { ")
            + optional_graph_negative
            + pynutil.insert("integer: \"")
            + ((NEMO_SIGMA - "one") @ cardinal_graph)
            + pynutil.insert("\"")
            + pynutil.insert(" }")
            + delete_extra_space
            + unit_plural
        )
        subgraph_cardinal |= (
            pynutil.insert("cardinal { ")
            + optional_graph_negative
            + pynutil.insert("integer: \"")
            + pynini.cross("one", "1")
            + pynutil.insert("\"")
            + pynutil.insert(" }")
            + delete_extra_space
            + unit_singular
        )
        final_graph = subgraph_decimal | subgraph_cardinal
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
示例#2
0
文件: money.py 项目: noetits/NeMo
    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
        super().__init__(name="money", kind="classify")
        # quantity, integer_part, fractional_part, currency

        cardinal_graph = cardinal.graph_no_exception
        graph_decimal_final = decimal.final_graph_wo_negative

        unit = pynini.string_file(get_abs_path("data/currency.tsv"))
        unit_singular = pynini.invert(unit)
        unit_plural = get_singulars(unit_singular)

        graph_unit_singular = pynutil.insert("currency: \"") + convert_space(
            unit_singular) + pynutil.insert("\"")
        graph_unit_plural = pynutil.insert("currency: \"") + convert_space(
            unit_plural) + pynutil.insert("\"")

        add_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | (
            pynutil.insert("0") + NEMO_DIGIT)
        # twelve dollars (and) fifty cents, zero cents
        cents_standalone = (pynutil.insert("fractional_part: \"") +
                            pynini.union(
                                pynutil.add_weight(
                                    ((NEMO_SIGMA - "one") @ cardinal_graph),
                                    -0.7) @ add_leading_zero_to_double_digit +
                                delete_space + pynutil.delete("cents"),
                                pynini.cross("one", "01") + delete_space +
                                pynutil.delete("cent"),
                            ) + pynutil.insert("\""))

        optional_cents_standalone = pynini.closure(
            delete_space +
            pynini.closure(pynutil.delete("and") + delete_space, 0, 1) +
            insert_space + cents_standalone,
            0,
            1,
        )
        # twelve dollars fifty, only after integer
        optional_cents_suffix = pynini.closure(
            delete_extra_space + pynutil.insert("fractional_part: \"") +
            pynutil.add_weight(
                cardinal_graph @ add_leading_zero_to_double_digit, -0.7) +
            pynutil.insert("\""),
            0,
            1,
        )

        graph_integer = (pynutil.insert("integer_part: \"") +
                         ((NEMO_SIGMA - "one") @ cardinal_graph) +
                         pynutil.insert("\"") + delete_extra_space +
                         graph_unit_plural +
                         (optional_cents_standalone | optional_cents_suffix))
        graph_integer |= (pynutil.insert("integer_part: \"") +
                          pynini.cross("one", "1") + pynutil.insert("\"") +
                          delete_extra_space + graph_unit_singular +
                          (optional_cents_standalone | optional_cents_suffix))
        graph_decimal = graph_decimal_final + delete_extra_space + graph_unit_plural
        graph_decimal |= pynutil.insert(
            "currency: \"$\" integer_part: \"0\" ") + cents_standalone
        final_graph = graph_integer | graph_decimal
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()