Пример #1
0
         ),
     ),
 ),
 (
     "dollars",
     (
         re.compile(r"\$[0-9]{1,}\.?[0-9]{0,}[mbkMBK]?"),
         lambda m: dollars_to_string(m.group()),
     ),
 ),
 ("percent", (re.compile(r"\%"), lambda m: " percent")),
 (
     "fractions",
     (
         re.compile(r"\b[0-9]\s?\/\s?[0-9]\b"),
         lambda m: fraction_to_string(m.group()),
     ),
 ),
 (
     "plural_numbers",
     (
         re.compile(r"\b[0-9]{1,}s\b"),
         lambda m: plural_numbers_to_string(m.group()),
     ),
 ),
 (
     "numbers",
     (
         re.compile(r"[0-9\.]{1,}"),
         lambda m: " " + digits_to_string(m.group()) + " ",
     ),
Пример #2
0
      )
    ),
    ("acronyms", (re.compile(r"\b(([A-Z]){1,}[.]?){2,}\b"), lambda m: " ".join(m.group().lower().replace(".", "")))),
    ("dashes", (re.compile(r"\-[0-9]\b"), lambda m: "negative " + m.group()[1:])),
    ("negatives", (re.compile(r" \- "), lambda m: "")),
    ("positives", (re.compile(r"\+"), lambda m: " plus ")),
    ("ordinals", (re.compile(r"[0-9]{1,}(st|nd|rd|th)"), lambda m: ordinal_to_string(m.group()))),
    (
      "many_dollars", (
        re.compile(r"\$([0-9]{1,}\.?[0-9]{0,})\s(billion|million|trillion)"),
        lambda m: " ".join([digits_to_string(m.groups()[0]), m.groups()[1], "dollars"])
      )
    ),
    ("dollars", (re.compile(r"\$[0-9]{1,}\.?[0-9]{0,}[mbMB]?"), lambda m: dollars_to_string(m.group()))),
    ("percent", (re.compile(r"\%"), lambda m: " percent")),
    ("fractions", (re.compile(r"\b[0-9]\s?\/\s[0-9]\b"), lambda m: fraction_to_string(m.group()))),
    ("plural_numbers", (re.compile(r"\b[0-9]{1,}s\b"), lambda m: plural_numbers_to_string(m.group()))),
    ("numbers", (re.compile(r"[0-9\.]{1,}"), lambda m: " " + digits_to_string(m.group()) + " ")),
    ("apostrophes", (re.compile(r"\'"), lambda m: " \'")),
  ]
)


def remove_special_chars(line, chars_to_replace):
  "remove a set of special chars"
  for char_to_replace in chars_to_replace:
    line = line.replace(char_to_replace, ' ')
  return line


def remove_double_spaces(line):