Python is_text示例

编程语言: Python

命名空间/包名称: normality.util

方法/功能: is_text

hotexamples.com的示例: 9

Python is_text - 已找到9个示例。这些是从开源项目中提取的最受好评的normality.util.is_text现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： cleaning.py 项目： JonoYang/normality

def decompose_nfkd(text: Any) -> Optional[str]:
    """Perform unicode compatibility decomposition.

    This will replace some non-standard value representations in unicode and
    normalise them, while also separating characters and their diacritics into
    two separate codepoints.
    """
    if not is_text(text):
        return None
    return unicodedata.normalize('NFKD', text)

示例#2

显示文件

def latinize_text(text: Optional[str], ascii: bool = False) -> Optional[str]:
    """Transliterate the given text to the latin script.

    This attempts to convert a given text to latin script using the
    closest match of characters vis a vis the original script.
    """
    if text is None or not is_text(text) or not len(text):
        return text

    if ascii:
        if not hasattr(latinize_text, "_ascii"):
            latinize_text._ascii = make_trans(ASCII_SCRIPT)  # type: ignore
        return latinize_text._ascii(text)  # type: ignore

    if not hasattr(latinize_text, "_tr"):
        latinize_text._tr = make_trans("Any-Latin")  # type: ignore
    return latinize_text._tr(text)  # type: ignore

示例#3

显示文件

文件： cleaning.py 项目： JonoYang/normality

def category_replace(
        text: Any,
        replacements: Categories = UNICODE_CATEGORIES) -> Optional[str]:
    """Remove characters from a string based on unicode classes.

    This is a method for removing non-text characters (such as punctuation,
    whitespace, marks and diacritics) from a piece of text by class, rather
    than specifying them individually.
    """
    text = decompose_nfkd(text)
    if not is_text(text):
        return None
    characters = []
    for character in text:
        cat = unicodedata.category(character)
        replacement = replacements.get(cat, character)
        if replacement is not None:
            characters.append(replacement)
    return u''.join(characters)

示例#4

显示文件

def ascii_text(text: Optional[str]) -> Optional[str]:
    """Transliterate the given text and make sure it ends up as ASCII."""
    text = latinize_text(text, ascii=True)
    if text is None or not is_text(text):
        return None
    return text.encode("ascii", "ignore").decode("ascii")

示例#5

显示文件

文件： cleaning.py 项目： JonoYang/normality

def collapse_spaces(text: Any) -> Optional[str]:
    """Remove newlines, tabs and multiple spaces with single spaces."""
    if not is_text(text):
        return None
    return COLLAPSE_RE.sub(WS, text).strip(WS)

示例#6

显示文件

文件： cleaning.py 项目： JonoYang/normality

def remove_byte_order_mark(text) -> Optional[str]:
    """Remove a BOM from the beginning of the text."""
    if not is_text(text):
        return None
    return BOM_RE.sub('', text)

示例#7

显示文件

文件： cleaning.py 项目： JonoYang/normality

def remove_unsafe_chars(text) -> Optional[str]:
    """Remove unsafe unicode characters from a piece of text."""
    if not is_text(text):
        return None
    return UNSAFE_RE.sub('', text)

示例#8

显示文件

文件： cleaning.py 项目： JonoYang/normality

def strip_quotes(text: Any) -> Optional[str]:
    """Remove double or single quotes surrounding a string."""
    if not is_text(text):
        return None
    return QUOTES_RE.sub('\\1', text)

示例#9

显示文件

文件： cleaning.py 项目： JonoYang/normality

def compose_nfkc(text: Any) -> Optional[str]:
    """Perform unicode composition."""
    if not is_text(text):
        return None
    return unicodedata.normalize('NFKC', text)