def __init__(self, cardinal: GraphFst, deterministic: bool = True): super().__init__(name="decimal", kind="classify", deterministic=deterministic) graph_digit = digit | zero if not deterministic: graph = pynini.union(graph_digit, cardinal.hundreds, cardinal.tens) graph += pynini.closure(insert_space + graph) else: # General pattern seems to be 1-3 digits: map as cardinal, default to digits otherwise \ graph = pynini.union( graph_digit, cardinal.tens, cardinal.hundreds, graph_digit + pynini.closure(insert_space + graph_digit, 3), zero + pynini.closure(insert_space + zero) + pynini.closure(insert_space + graph_digit), # For cases such as "1,010" ) # Need to strip apocope everywhere BUT end of string reverse_apocope = pynini.string_map([("un", "uno"), ("ún", "uno")]) apply_reverse_apocope = pynini.cdrewrite(reverse_apocope, "", NEMO_SPACE, NEMO_SIGMA) graph @= apply_reverse_apocope # Technically decimals should be space delineated groups of three, e.g. (1,333 333). This removes any possible spaces strip_formatting = pynini.cdrewrite(delete_space, "", "", NEMO_SIGMA) graph = strip_formatting @ graph self.graph = graph.optimize() graph_separator = pynutil.delete(decimal_separator) optional_graph_negative = pynini.closure( pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0, 1) self.graph_fractional = pynutil.insert( "fractional_part: \"") + self.graph + pynutil.insert("\"") # Integer graph maintains apocope except for ones place graph_integer = (strip_cardinal_apocope( cardinal.graph) if deterministic else pynini.union( cardinal.graph, strip_cardinal_apocope(cardinal.graph)) ) # Gives us forms w/ and w/o apocope self.graph_integer = pynutil.insert( "integer_part: \"") + graph_integer + pynutil.insert("\"") final_graph_wo_sign = self.graph_integer + graph_separator + insert_space + self.graph_fractional self.final_graph_wo_negative = (final_graph_wo_sign | get_quantity( final_graph_wo_sign, cardinal.graph).optimize()) final_graph = optional_graph_negative + self.final_graph_wo_negative final_graph += pynutil.insert(" preserve_order: true") final_graph = self.add_tokens(final_graph) self.fst = final_graph.optimize()
def __init__(self, deterministic: bool = True): super().__init__(name="cardinal", kind="verbalize", deterministic=deterministic) optional_sign = pynini.closure( pynini.cross("negative: \"true\" ", "menos "), 0, 1) self.optional_sign = optional_sign integer = pynini.closure(NEMO_NOT_QUOTE, 1) self.integer = pynutil.delete(" \"") + integer + pynutil.delete("\"") integer = pynutil.delete("integer:") + self.integer graph_masc = optional_sign + integer graph_fem = shift_cardinal_gender(graph_masc) self.graph_masc = pynini.optimize(graph_masc) self.graph_fem = pynini.optimize(graph_fem) # Adding adjustment for fem gender (choice of gender will be random) graph = graph_masc | graph_fem if not deterministic: # For alternate renderings when apocope is omitted (i.e. cardinal stands alone) graph |= strip_cardinal_apocope(graph_masc) # "una" will drop to "un" in unique contexts graph |= add_cardinal_apocope_fem(graph_fem) delete_tokens = self.delete_tokens(graph) self.fst = delete_tokens.optimize()
def __init__(self, deterministic: bool = True): super().__init__(name="decimal", kind="classify", deterministic=deterministic) optional_sign = pynini.closure( pynini.cross("negative: \"true\"", "menos ") + delete_space, 0, 1) integer = pynutil.delete("integer_part: \"") + pynini.closure( NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") fractional_default = (pynutil.delete("fractional_part: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")) conjunction = pynutil.insert( " punto ") if LOCALIZATION == "am" else pynutil.insert(" coma ") if not deterministic: conjunction |= pynutil.insert(pynini.union(" con ", " y ")) fractional_default |= strip_cardinal_apocope(fractional_default) fractional = conjunction + fractional_default quantity = (delete_space + insert_space + pynutil.delete("quantity: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")) optional_quantity = pynini.closure(quantity, 0, 1) graph_masc = optional_sign + pynini.union( (integer + quantity), (integer + delete_space + fractional + optional_quantity)) # Allowing permutation for fem gender, don't include quantity since "million","billion", etc.. are masculine graph_fem = optional_sign + (shift_cardinal_gender(integer) + delete_space + shift_number_gender(fractional)) if not deterministic: # "una" will drop to "un" in certain cases graph_fem |= add_cardinal_apocope_fem(graph_fem) self.numbers_only_quantity = (optional_sign + pynini.union( (integer + quantity), (integer + delete_space + fractional + quantity)).optimize()) self.graph_masc = (graph_masc + delete_preserve_order).optimize() self.graph_fem = (graph_fem + delete_preserve_order).optimize() graph = graph_masc | graph_fem graph += delete_preserve_order delete_tokens = self.delete_tokens(graph) self.fst = delete_tokens.optimize()
def __init__(self, deterministic: bool = True): super().__init__(name="date", kind="verbalize", deterministic=deterministic) day_cardinal = pynutil.delete("day: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") day = strip_cardinal_apocope(day_cardinal) primero = pynini.cdrewrite(pynini.cross("uno", "primero"), "[BOS]", "[EOS]", NEMO_SIGMA) day = ( (day @ primero) if deterministic else pynini.union(day, day @ primero) ) # Primero for first day is traditional, but will vary depending on region month = pynutil.delete("month: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") year = ( pynutil.delete("year: \"") + articles + NEMO_SPACE + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") ) # Insert preposition if wasn't originally with the year. This would mean a space was present year = pynutil.add_weight(year, -0.001) year |= ( pynutil.delete("year: \"") + pynutil.insert("de ") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") ) # day month year graph_dmy = day + pynini.cross(NEMO_SPACE, " de ") + month + pynini.closure(pynini.accep(" ") + year, 0, 1) graph_mdy = month + NEMO_SPACE + day + pynini.closure(NEMO_SPACE + year, 0, 1) if deterministic: graph_mdy += pynutil.delete(" preserve_order: true") # Only accepts this if was explicitly passed self.graph = graph_dmy | graph_mdy final_graph = self.graph + delete_preserve_order delete_tokens = self.delete_tokens(final_graph) self.fst = delete_tokens.optimize()
def __init__(self, deterministic: bool = True): super().__init__(name="fraction", kind="verbalize", deterministic=deterministic) # Derivational strings append 'avo' as a suffix. Adding space for processing aid fraction_stem = pynutil.insert(" avo") plural = pynutil.insert("s") conjunction = pynutil.insert(" y ") integer = (pynutil.delete("integer_part: \"") + strip_cardinal_apocope(pynini.closure(NEMO_NOT_QUOTE)) + pynutil.delete("\"")) numerator_one = pynutil.delete("numerator: \"") + pynini.accep( "un") + pynutil.delete("\" ") numerator = (pynutil.delete("numerator: \"") + pynini.difference(pynini.closure(NEMO_NOT_QUOTE), "un") + pynutil.delete("\" ")) denominator_add_stem = pynutil.delete("denominator: \"") + ( pynini.closure(NEMO_NOT_QUOTE) + fraction_stem + pynutil.delete("\" morphosyntactic_features: \"add_root\"")) denominator_ordinal = pynutil.delete("denominator: \"") + ( pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\" morphosyntactic_features: \"ordinal\"")) denominator_cardinal = pynutil.delete("denominator: \"") + ( pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")) denominator_singular = pynini.union(denominator_add_stem, denominator_ordinal) if not deterministic: # Occasional exceptions denominator_singular |= denominator_add_stem @ pynini.string_map( [("once avo", "undécimo"), ("doce avo", "duodécimo")]) denominator_plural = denominator_singular + plural # Merging operations merge = pynini.cdrewrite( pynini.cross(" y ", "i"), "", "", NEMO_SIGMA ) # The denominator must be a single word, with the conjunction "y" replaced by i merge @= pynini.cdrewrite(delete_space, "", pynini.difference(NEMO_CHAR, "parte"), NEMO_SIGMA) # The merger can produce duplicate vowels. This is not allowed in orthography delete_duplicates = pynini.string_map([("aa", "a"), ("oo", "o")]) # Removes vowels delete_duplicates = pynini.cdrewrite(delete_duplicates, "", "", NEMO_SIGMA) remove_accents = pynini.cdrewrite( accents, pynini.union(NEMO_SPACE, pynini.accep("[BOS]")) + pynini.closure(NEMO_NOT_SPACE), pynini.closure(NEMO_NOT_SPACE) + pynini.union("avo", "ava", "ésimo", "ésima"), NEMO_SIGMA, ) merge_into_single_word = merge @ remove_accents @ delete_duplicates fraction_default = numerator + delete_space + insert_space + ( denominator_plural @ merge_into_single_word) fraction_with_one = (numerator_one + delete_space + insert_space + (denominator_singular @ merge_into_single_word)) fraction_with_cardinal = strip_cardinal_apocope(numerator | numerator_one) fraction_with_cardinal += ( delete_space + pynutil.insert(" sobre ") + strip_cardinal_apocope(denominator_cardinal)) if not deterministic: # There is an alternative rendering where ordinals act as adjectives for 'parte'. This requires use of the feminine # Other rules will manage use of "un" at end, so just worry about endings exceptions = pynini.string_map([("tercia", "tercera")]) apply_exceptions = pynini.cdrewrite(exceptions, "", "", NEMO_SIGMA) vowel_change = pynini.cdrewrite(pynini.cross("o", "a"), "", pynini.accep("[EOS]"), NEMO_SIGMA) denominator_singular_fem = shift_cardinal_gender( denominator_singular) @ vowel_change @ apply_exceptions denominator_plural_fem = denominator_singular_fem + plural numerator_one_fem = shift_cardinal_gender(numerator_one) numerator_fem = shift_cardinal_gender(numerator) fraction_with_cardinal |= ( (numerator_one_fem | numerator_fem) + delete_space + pynutil.insert(" sobre ") + shift_cardinal_gender(denominator_cardinal)) # Still need to manage stems merge_stem = pynini.cdrewrite( delete_space, "", pynini.union("avo", "ava", "avos", "avas"), NEMO_SIGMA) # For managing alternative spacing merge_stem @= remove_accents @ delete_duplicates fraction_with_one_fem = numerator_one_fem + delete_space + insert_space fraction_with_one_fem += pynini.union( denominator_singular_fem @ merge_stem, denominator_singular_fem @ merge_into_single_word) # Both forms exists fraction_with_one_fem += pynutil.insert(" parte") fraction_with_one_fem @= pynini.cdrewrite( pynini.cross("una media", "media"), "", "", NEMO_SIGMA) # "media" not "una media" fraction_default_fem = numerator_fem + delete_space + insert_space fraction_default_fem += pynini.union( denominator_plural_fem @ merge_stem, denominator_plural_fem @ merge_into_single_word) fraction_default_fem += pynutil.insert(" partes") fraction_default |= (numerator + delete_space + insert_space + denominator_plural @ merge_stem ) # Case of no merger fraction_default |= fraction_default_fem fraction_with_one |= numerator_one + delete_space + insert_space + denominator_singular @ merge_stem fraction_with_one |= fraction_with_one_fem fraction_with_one @= pynini.cdrewrite(pynini.cross( "un medio", "medio"), "", "", NEMO_SIGMA) # "medio" not "un medio" fraction = fraction_with_one | fraction_default | fraction_with_cardinal graph_masc = pynini.closure(integer + delete_space + conjunction, 0, 1) + fraction # Manage cases of fem gender (only shows on integer except for "medio") integer_fem = shift_cardinal_gender(integer) fraction_default |= ( shift_cardinal_gender(numerator) + delete_space + insert_space + (denominator_plural @ pynini.cross("medios", "medias"))) fraction_with_one |= ( pynutil.delete(numerator_one) + delete_space + (denominator_singular @ pynini.cross("medio", "media"))) fraction_fem = fraction_with_one | fraction_default | fraction_with_cardinal graph_fem = pynini.closure(integer_fem + delete_space + conjunction, 0, 1) + fraction_fem self.graph_masc = pynini.optimize(graph_masc) self.graph_fem = pynini.optimize(graph_fem) self.graph = graph_masc | graph_fem delete_tokens = self.delete_tokens(self.graph) self.fst = delete_tokens.optimize()
def __init__(self, cardinal: GraphFst, decimal: GraphFst, fraction: GraphFst, deterministic: bool = True): super().__init__(name="measure", kind="classify", deterministic=deterministic) cardinal_graph = cardinal.graph unit_singular = unit unit_plural = unit_singular @ (unit_plural_fem | unit_plural_masc) graph_unit_singular = convert_space(unit_singular) graph_unit_plural = convert_space(unit_plural) optional_graph_negative = pynini.closure("-", 0, 1) graph_unit_denominator = ( pynini.cross("/", "por") + pynutil.insert(NEMO_NON_BREAKING_SPACE) + graph_unit_singular ) optional_unit_denominator = pynini.closure( pynutil.insert(NEMO_NON_BREAKING_SPACE) + graph_unit_denominator, 0, 1, ) unit_plural = ( pynutil.insert("units: \"") + ((graph_unit_plural + optional_unit_denominator) | graph_unit_denominator) + pynutil.insert("\"") ) unit_singular_graph = ( pynutil.insert("units: \"") + ((graph_unit_singular + optional_unit_denominator) | graph_unit_denominator) + pynutil.insert("\"") ) subgraph_decimal = decimal.fst + insert_space + pynini.closure(NEMO_SPACE, 0, 1) + unit_plural subgraph_cardinal = ( (optional_graph_negative + (NEMO_SIGMA - "1")) @ cardinal.fst + insert_space + pynini.closure(delete_space, 0, 1) + unit_plural ) subgraph_cardinal |= ( (optional_graph_negative + pynini.accep("1")) @ cardinal.fst + insert_space + pynini.closure(delete_space, 0, 1) + unit_singular_graph ) subgraph_fraction = fraction.fst + insert_space + pynini.closure(delete_space, 0, 1) + unit_singular_graph decimal_times = ( pynutil.insert("decimal { ") + decimal.final_graph_wo_negative + pynutil.insert(" } units: \"") + pynini.union('x', 'X') + pynutil.insert("\"") ) cardinal_times = ( pynutil.insert("cardinal { integer: \"") + strip_cardinal_apocope(cardinal_graph) + pynutil.insert("\" } units: \"") + pynini.union('x', 'X') + pynutil.insert("\"") ) cardinal_dash_alpha = ( pynutil.insert("cardinal { integer: \"") + strip_cardinal_apocope(cardinal_graph) + pynutil.delete('-') + pynutil.insert("\" } units: \"") + pynini.closure(NEMO_ALPHA, 1) + pynutil.insert("\"") ) decimal_dash_alpha = ( pynutil.insert("decimal { ") + decimal.final_graph_wo_negative + pynutil.delete('-') + pynutil.insert(" } units: \"") + pynini.closure(NEMO_ALPHA, 1) + pynutil.insert("\"") ) alpha_dash_cardinal = ( pynutil.insert("units: \"") + pynini.closure(NEMO_ALPHA, 1) + pynutil.delete('-') + pynutil.insert("\"") + pynutil.insert(" cardinal { integer: \"") + cardinal_graph + pynutil.insert("\" } preserve_order: true") ) alpha_dash_decimal = ( pynutil.insert("units: \"") + pynini.closure(NEMO_ALPHA, 1) + pynutil.delete('-') + pynutil.insert("\"") + pynutil.insert(" decimal { ") + decimal.final_graph_wo_negative + pynutil.insert(" } preserve_order: true") ) final_graph = ( subgraph_decimal | subgraph_cardinal | cardinal_dash_alpha | alpha_dash_cardinal | decimal_dash_alpha | subgraph_fraction | decimal_times | cardinal_times | alpha_dash_decimal ) final_graph += pynutil.insert(" preserve_order: true") final_graph = self.add_tokens(final_graph) self.fst = final_graph.optimize()
def __init__(self, decimal: GraphFst, deterministic: bool = True): super().__init__(name="money", kind="verbalize", deterministic=deterministic) maj_singular_masc = ( pynutil.delete("currency_maj: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_singular) + pynutil.delete("\"")) maj_singular_fem = ( pynutil.delete("currency_maj: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_singular) + pynutil.delete("\"")) maj_plural_masc = (pynutil.delete("currency_maj: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_plural) + pynutil.delete("\"")) maj_plural_fem = (pynutil.delete("currency_maj: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_plural) + pynutil.delete("\"")) maj_masc = maj_plural_masc | maj_singular_masc # Tagger kept quantity resolution stable maj_fem = maj_plural_fem | maj_singular_fem min_singular_masc = ( pynutil.delete("currency_min: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_singular) + pynutil.delete("\"")) min_singular_fem = ( pynutil.delete("currency_min: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_singular) + pynutil.delete("\"")) min_plural_masc = (pynutil.delete("currency_min: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_plural) + pynutil.delete("\"")) min_plural_fem = (pynutil.delete("currency_min: \"") + (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_plural) + pynutil.delete("\"")) min_masc = min_plural_masc | min_singular_masc min_fem = min_plural_fem | min_singular_fem fractional_part = (pynutil.delete("fractional_part: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")) integer_part = pynutil.delete("integer_part: \"") + pynini.closure( NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") optional_add_and = pynini.closure( pynutil.insert(pynini.union("con ", "y ")), 0, 1) # *** currency_maj graph_integer_masc = integer_part + NEMO_SPACE + maj_masc graph_integer_fem = shift_cardinal_gender( integer_part) + NEMO_SPACE + maj_fem graph_integer = graph_integer_fem | graph_integer_masc # *** currency_maj + (***) | ((con) *** current_min) graph_integer_with_minor_masc = ( graph_integer_masc + NEMO_SPACE + pynini.union( optional_add_and + strip_cardinal_apocope(fractional_part), (optional_add_and + fractional_part + NEMO_SPACE + min_masc), (optional_add_and + shift_cardinal_gender(fractional_part) + NEMO_SPACE + min_fem), ) # Could be minor currency that is different gender + delete_preserve_order) graph_integer_with_minor_fem = ( graph_integer_fem + NEMO_SPACE + pynini.union( optional_add_and + shift_cardinal_gender(fractional_part), (optional_add_and + fractional_part + NEMO_SPACE + min_masc), (optional_add_and + shift_cardinal_gender(fractional_part) + NEMO_SPACE + min_fem), ) # Could be minor currency that is different gender + delete_preserve_order) graph_integer_with_minor = graph_integer_with_minor_fem | graph_integer_with_minor_masc ## *** coma *** currency_maj graph_decimal_masc = decimal.graph_masc + NEMO_SPACE + maj_masc graph_decimal_fem = decimal.graph_fem graph_decimal_fem |= decimal.numbers_only_quantity # can still have "x billions" with fem currency graph_decimal_fem += NEMO_SPACE + maj_fem graph_decimal = graph_decimal_fem | graph_decimal_masc graph_decimal = (pynini.cdrewrite( pynutil.insert(" de"), "quantity: \"" + pynini.closure( NEMO_NOT_QUOTE, 1), "\"", NEMO_SIGMA) @ graph_decimal ) # formally it's millones/billones de *** # *** current_min graph_minor_masc = fractional_part + NEMO_SPACE + min_masc + delete_preserve_order graph_minor_fem = shift_cardinal_gender( fractional_part) + NEMO_SPACE + min_fem + delete_preserve_order graph_minor = graph_minor_fem | graph_minor_masc graph = graph_integer | graph_integer_with_minor | graph_decimal | graph_minor delete_tokens = self.delete_tokens(graph) self.fst = delete_tokens.optimize()