Python sub示例

编程语言: Python

命名空间/包名称: newslynx.lib.regex.re_whitespace

方法/功能: sub

hotexamples.com的示例: 6

Python sub - 已找到6个示例。这些是从开源项目中提取的最受好评的newslynx.lib.regex.re_whitespace.sub现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： search.py 项目： jjelosua/newslynx-core

    def _process_text(self, text, **kw):
        """
        Preprocess text.
        """
        # always lower case + unidecode
        text = unicode(
            unidecode(text.lower().decode('utf-8')), errors='ignore')

        # optionally remove punctuation
        if kw.get('remove_punct', True):
            text = "".join(map(lambda x: x if x not in punct else " ", text))

        # optionally remove digits
        if kw.get('remove_digits', True):
            text = "".join(map(lambda x: x if x not in digits else " ", text))

        # optionally remove whitespace
        if kw.get('remove_html', True):
            text = html.strip_tags(text)

        # optionally remove whitespace
        if kw.get('remove_whitespace', True):
            text = re_whitespace.sub(" ", text).strip()

        return text

示例#2

显示文件

文件： search.py 项目： jjelosua/newslynx-core

    def _process_text(self, text, **kw):
        """
        Preprocess text.
        """
        # always lower case + unidecode
        text = unicode(unidecode(text.lower().decode('utf-8')),
                       errors='ignore')

        # optionally remove punctuation
        if kw.get('remove_punct', True):
            text = "".join(map(lambda x: x if x not in punct else " ", text))

        # optionally remove digits
        if kw.get('remove_digits', True):
            text = "".join(map(lambda x: x if x not in digits else " ", text))

        # optionally remove whitespace
        if kw.get('remove_html', True):
            text = html.strip_tags(text)

        # optionally remove whitespace
        if kw.get('remove_whitespace', True):
            text = re_whitespace.sub(" ", text).strip()

        return text

示例#3

显示文件

文件： text.py 项目： jjelosua/newslynx-core

def prepare(s):
    """
    Prepare text.
    """
    s = unicode_symbols(s)
    s = re_whitespace.sub(' ', s).strip()
    return unidecode(s)

示例#4

显示文件

文件： text.py 项目： newslynx/newslynx-core

def prepare(s):
    """
    Prepare text.
    """
    s = unicode_symbols(s)
    s = re_whitespace.sub(' ', s).strip()
    try:
        s = unidecode(s)
    except Warning:
        pass
    return s

示例#5

显示文件

文件： search.py 项目： lexifdev/newslynx-core

    def _process_text(self, text, **kw):
        """
        Preprocess text.
        """

        # optionally remove punctuation
        if kw.get('rm_punct', True):
            text = "".join(map(lambda x: x if x not in punct else " ", text))

        # optionally remove digits
        if kw.get('rm_digits', True):
            text = "".join(map(lambda x: x if x not in digits else " ", text))

        # optionally remove whitespace
        if kw.get('rm_html', True):
            text = html.strip_tags(text)

        # optionally remove whitespace
        if kw.get('rm_whitespace', True):
            text = re_whitespace.sub(" ", text).strip()

        return text

示例#6

显示文件

文件： search.py 项目： abelsonlive/newslynx-core

    def _process_text(self, text, **kw):
        """
        Preprocess text.
        """

        # optionally remove punctuation
        if kw.get("rm_punct", True):
            text = "".join(map(lambda x: x if x not in punct else " ", text))

        # optionally remove digits
        if kw.get("rm_digits", True):
            text = "".join(map(lambda x: x if x not in digits else " ", text))

        # optionally remove whitespace
        if kw.get("rm_html", True):
            text = html.strip_tags(text)

        # optionally remove whitespace
        if kw.get("rm_whitespace", True):
            text = re_whitespace.sub(" ", text).strip()

        return text