def __init__(self, cardinal: GraphFst, decimal: GraphFst): super().__init__(name="measure", kind="classify") cardinal_graph = cardinal.graph_no_exception graph_digit = pynini.string_file( get_abs_path("data/numbers/digit.tsv")) graph_four = pynini.cross("tư", "4") graph_one = pynini.cross("mốt", "1") graph_half = pynini.cross("rưỡi", "5") graph_unit = pynini.string_file(get_abs_path("data/measurements.tsv")) graph_unit_singular = pynini.invert(graph_unit) # singular -> abbr optional_graph_negative = pynini.closure( pynutil.insert("negative: ") + pynini.cross(pynini.union("âm", "trừ"), '"true"') + delete_extra_space, 0, 1, ) unit_singular = convert_space(graph_unit_singular) unit_misc = pynutil.insert("/") + pynutil.delete( "trên") + delete_space + convert_space(graph_unit_singular) unit_singular = (pynutil.insert('units: "') + (unit_singular | unit_misc | pynutil.add_weight( unit_singular + delete_space + unit_misc, 0.01)) + pynutil.insert('"')) subgraph_decimal = (pynutil.insert("decimal { ") + optional_graph_negative + decimal.final_graph_wo_negative + pynutil.insert(" }") + delete_extra_space + unit_singular) subgraph_cardinal = (pynutil.insert("cardinal { ") + optional_graph_negative + pynutil.insert('integer: "') + cardinal_graph + pynutil.insert('"') + pynutil.insert(" }") + delete_extra_space + unit_singular) fraction_graph = (delete_extra_space + pynutil.insert('fractional_part: "') + (graph_digit | graph_half | graph_one | graph_four) + pynutil.insert('"')) subgraph_cardinal |= (pynutil.insert("cardinal { ") + optional_graph_negative + pynutil.insert('integer: "') + cardinal_graph + pynutil.insert('" }') + delete_extra_space + unit_singular + fraction_graph) final_graph = subgraph_decimal | subgraph_cardinal final_graph = self.add_tokens(final_graph) self.fst = final_graph.optimize()
def __init__(self, cardinal: GraphFst, decimal: GraphFst): super().__init__(name="money", kind="classify") # quantity, integer_part, fractional_part, currency cardinal_graph = cardinal.graph_no_exception graph_decimal_final = decimal.final_graph_wo_negative graph_half = pynini.cross("rưỡi", "5") unit = pynini.string_file(get_abs_path("data/currency.tsv")) unit_singular = pynini.invert(unit) graph_unit_singular = pynutil.insert("currency: \"") + convert_space( unit_singular) + pynutil.insert("\"") add_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | ( pynutil.insert("0") + NEMO_DIGIT) # twelve dollars fifty, only after integer optional_cents_suffix = pynini.closure( delete_extra_space + pynutil.insert("fractional_part: \"") + (pynutil.add_weight( cardinal_graph @ add_leading_zero_to_double_digit, -0.7) | graph_half) + pynutil.insert("\""), 0, 1, ) graph_integer = (pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\"") + delete_extra_space + graph_unit_singular + optional_cents_suffix) graph_decimal = graph_decimal_final + delete_extra_space + graph_unit_singular + optional_cents_suffix final_graph = graph_integer | graph_decimal final_graph = self.add_tokens(final_graph) self.fst = final_graph.optimize()
def __init__(self): super().__init__(name="whitelist", kind="classify") whitelist = pynini.string_file( get_abs_path("data/whitelist.tsv")).invert() graph = pynutil.insert('name: "') + convert_space( whitelist) + pynutil.insert('"') self.fst = graph.optimize()
def __init__(self): super().__init__(name="time", kind="classify") # hours, minutes, seconds, suffix, zone, style, speak_period graph_hours_to = pynini.string_file( get_abs_path("data/time/hours_to.tsv")) graph_minutes_to = pynini.string_file( get_abs_path("data/time/minutes_to.tsv")) graph_hours = pynini.string_file(get_abs_path("data/time/hours.tsv")) graph_minutes = pynini.string_file( get_abs_path("data/time/minutes.tsv")) time_zone_graph = pynini.invert( pynini.string_file(get_abs_path("data/time/time_zone.tsv"))) graph_half = pynini.cross("rưỡi", "30") oclock = pynini.cross("giờ", "") minute = pynini.cross("phút", "") optional_minute = pynini.closure(delete_space + minute, 0, 1) second = pynini.cross("giây", "") final_graph_hour = pynutil.insert( "hours: \"") + graph_hours + pynutil.insert( "\"") + delete_space + oclock graph_minute = graph_minutes + optional_minute graph_second = graph_minute + delete_space + second final_time_zone_optional = pynini.closure( delete_space + insert_space + pynutil.insert("zone: \"") + convert_space(time_zone_graph) + pynutil.insert("\""), 0, 1, ) graph_hm = (final_graph_hour + delete_extra_space + pynutil.insert("minutes: \"") + (graph_minute | graph_half) + pynutil.insert("\"")) graph_hms = graph_hm + delete_extra_space + pynutil.insert( "seconds: \"") + graph_second + pynutil.insert("\"") graph_ms = (pynutil.insert("minutes: \"") + graph_minute + pynutil.insert("\"") + delete_extra_space + pynutil.insert("seconds: \"") + (graph_second | graph_half) + pynutil.insert("\"")) graph_hours_to_component = graph_hours @ graph_hours_to graph_minutes_to_component = graph_minutes @ graph_minutes_to graph_time_to = (pynutil.insert("hours: \"") + graph_hours_to_component + pynutil.insert("\"") + delete_space + oclock + delete_space + pynutil.delete("kém") + delete_extra_space + pynutil.insert("minutes: \"") + graph_minutes_to_component + optional_minute + pynutil.insert("\"")) final_graph = (final_graph_hour | graph_hm | graph_hms) + final_time_zone_optional final_graph |= graph_ms final_graph |= graph_time_to final_graph = self.add_tokens(final_graph) self.fst = final_graph.optimize()