Пример #1
0
def normalize(text, lang="en-us", remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
        lang (str): the code for the language text is in
        remove_articles (bool): whether to remove articles (like 'a', or 'the')
    Returns:
        (str): The normalized string.
    """

    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        return normalize_en(text, remove_articles)
    elif lang_lower.startswith("es"):
        return normalize_es(text, remove_articles)
    elif lang_lower.startswith("pt"):
        return normalize_pt(text, remove_articles)
    elif lang_lower.startswith("it"):
        return normalize_it(text, remove_articles)
    elif lang_lower.startswith("fr"):
        return normalize_fr(text, remove_articles)
    elif lang_lower.startswith("sv"):
        return normalize_sv(text, remove_articles)
    elif lang_lower.startswith("de"):
        return normalize_de(text, remove_articles)
    # TODO: Normalization for other languages
    return text
Пример #2
0
def normalize(text, lang="en-us", remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
        lang (str): the code for the language text is in
        remove_articles (bool): whether to remove articles (like 'a', or
                                'the'). True by default.
    Returns:
        (str): The normalized string.
    """

    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        return normalize_en(text, remove_articles)
    elif lang_lower.startswith("es"):
        return normalize_es(text, remove_articles)
    elif lang_lower.startswith("pt"):
        return normalize_pt(text, remove_articles)
    elif lang_lower.startswith("it"):
        return normalize_it(text, remove_articles)
    elif lang_lower.startswith("fr"):
        return normalize_fr(text, remove_articles)
    elif lang_lower.startswith("sv"):
        return normalize_sv(text, remove_articles)
    elif lang_lower.startswith("de"):
        return normalize_de(text, remove_articles)
    # TODO: Normalization for other languages
    LOG.warning('Language "{}" not recognized! Please make sure your '
                'language is one of the following: '
                'en, es, pt, it, fr, sv, de.'.format(lang_lower))
    return text
Пример #3
0
def normalize(text, lang="en-us", remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
        lang (str): the code for the language text is in
        remove_articles (bool): whether to remove articles (like 'a', or
                                'the'). True by default.
    Returns:
        (str): The normalized string.
    """

    lang_lower = str(lang).lower()
    if lang_lower.startswith("en"):
        return normalize_en(text, remove_articles)
    elif lang_lower.startswith("es"):
        return normalize_es(text, remove_articles)
    elif lang_lower.startswith("pt"):
        return normalize_pt(text, remove_articles)
    elif lang_lower.startswith("it"):
        return normalize_it(text, remove_articles)
    elif lang_lower.startswith("fr"):
        return normalize_fr(text, remove_articles)
    elif lang_lower.startswith("sv"):
        return normalize_sv(text, remove_articles)
    elif lang_lower.startswith("de"):
        return normalize_de(text, remove_articles)
    # TODO: Normalization for other languages
    LOG.warning('Language "{}" not recognized! Please make sure your '
                'language is one of the following: '
                'en, es, pt, it, fr, sv, de.'.format(lang_lower))
    return text
Пример #4
0
def normalize(text, lang=None, remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.

    Args:
        text (str): the string to normalize
        lang (str): the BCP-47 code for the language to use, None uses default
        remove_articles (bool): whether to remove articles (like 'a', or
                                'the'). True by default.

    Returns:
        (str): The normalized string.
    """

    lang_code = get_primary_lang_code(lang)

    if lang_code == "en":
        return normalize_en(text, remove_articles)
    elif lang_code == "es":
        return normalize_es(text, remove_articles)
    elif lang_code == "pt":
        return normalize_pt(text, remove_articles)
    elif lang_code == "it":
        return normalize_it(text, remove_articles)
    elif lang_code == "fr":
        return normalize_fr(text, remove_articles)
    elif lang_code == "sv":
        return normalize_sv(text, remove_articles)
    elif lang_code == "de":
        return normalize_de(text, remove_articles)
    elif lang_code == "da":
        return normalize_da(text, remove_articles)
    elif lang_code == "nl":
        return normalize_nl(text, remove_articles)
    # TODO: Normalization for other languages
    _log_unsupported_language(
        lang_code, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da', 'nl'])
    return text
Пример #5
0
def normalize(text, lang=None, remove_articles=True):
    """Prepare a string for parsing

    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.

    Args:
        text (str): the string to normalize
        lang (str): the BCP-47 code for the language to use, None uses default
        remove_articles (bool): whether to remove articles (like 'a', or
                                'the'). True by default.

    Returns:
        (str): The normalized string.
    """

    lang_code = get_primary_lang_code(lang)

    if lang_code == "en":
        return normalize_en(text, remove_articles)
    elif lang_code == "es":
        return normalize_es(text, remove_articles)
    elif lang_code == "pt":
        return normalize_pt(text, remove_articles)
    elif lang_code == "it":
        return normalize_it(text, remove_articles)
    elif lang_code == "fr":
        return normalize_fr(text, remove_articles)
    elif lang_code == "sv":
        return normalize_sv(text, remove_articles)
    elif lang_code == "de":
        return normalize_de(text, remove_articles)
    elif lang_code == "da":
        return normalize_da(text, remove_articles)
    # TODO: Normalization for other languages
    _log_unsupported_language(lang_code,
                              ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da'])
    return text