Пример #1
0
def normalize(text, lang=None, remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.

    Args:
        text (str): the string to normalize
        lang (str): the BCP-47 code for the language to use, None uses default
        remove_articles (bool): whether to remove articles (like 'a', or
                                'the'). True by default.

    Returns:
        (str): The normalized string.
    """

    lang_code = get_primary_lang_code(lang)

    if lang_code == "en":
        return normalize_en(text, remove_articles)
    elif lang_code == "es":
        return normalize_es(text, remove_articles)
    elif lang_code == "pt":
        return normalize_pt(text, remove_articles)
    elif lang_code == "it":
        return normalize_it(text, remove_articles)
    elif lang_code == "fr":
        return normalize_fr(text, remove_articles)
    elif lang_code == "sv":
        return normalize_sv(text, remove_articles)
    elif lang_code == "de":
        return normalize_de(text, remove_articles)
    elif lang_code == "da":
        return normalize_da(text, remove_articles)
    elif lang_code == "nl":
        return normalize_nl(text, remove_articles)
    # TODO: Normalization for other languages
    _log_unsupported_language(
        lang_code, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da', 'nl'])
    return text
Пример #2
0
def normalize(text, lang=None, remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.

    Args:
        text (str): the string to normalize
        lang (str): the BCP-47 code for the language to use, None uses default
        remove_articles (bool): whether to remove articles (like 'a', or
                                'the'). True by default.

    Returns:
        (str): The normalized string.
    """

    lang_code = get_primary_lang_code(lang)

    if lang_code == "en":
        return normalize_en(text, remove_articles)
    elif lang_code == "es":
        return normalize_es(text, remove_articles)
    elif lang_code == "pt":
        return normalize_pt(text, remove_articles)
    elif lang_code == "it":
        return normalize_it(text, remove_articles)
    elif lang_code == "fr":
        return normalize_fr(text, remove_articles)
    elif lang_code == "sv":
        return normalize_sv(text, remove_articles)
    elif lang_code == "de":
        return normalize_de(text, remove_articles)
    elif lang_code == "da":
        return normalize_da(text, remove_articles)
    # TODO: Normalization for other languages
    _log_unsupported_language(lang_code,
                              ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da'])
    return text