Python SearchKey.strip_accents示例

编程语言: Python

类/类型: SearchKey

方法/功能: strip_accents

hotexamples.com的示例: 4

Python SearchKey.strip_accents - 已找到4个示例。这些是从开源项目中提取的最受好评的SearchKey.strip_accents 来自程序包 sentry现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

compact_spaces(2)

is_valid_character(2)

strip_accents(2)

all_characters(1)

make_key(1)

unsupported_punctuation(1)

示例#1

显示文件

文件： ArticleIndex.py 项目： nzmichaelh/wrdk

def bigram_encode(title):
    """encode a title in bigram form"""
    global bigram

    result = ''
    title = SearchKey.strip_accents(title)

    while len(title) >= 2:
        if SearchKey.is_valid_character(title[0]):

            b = title[0:2]
            if b in bigram:
                result += bigram[b]
                title = title[2:]
            else:
                result += chr(ord(title[0:1]))
                title = title[1:]
        else:
            #result += '?'
            title = title[1:]
    if len(title) == 1:
        if SearchKey.is_valid_character(title[0]):
            result += chr(ord(title[0]))
        #else:
        #    result += '?'

    return SearchKey.compact_spaces(result)

示例#2

显示文件

def bigram_encode(title):
    """encode a title in bigram form"""
    global bigram

    result = ''
    title = SearchKey.strip_accents(title)

    while len(title) >= 2:
        if SearchKey.is_valid_character(title[0]):

            b = title[0:2]
            if b in bigram:
                result += bigram[b]
                title = title[2:]
            else:
                result += chr(ord(title[0:1]))
                title = title[1:]
        else:
            #result += '?'
            title = title[1:]
    if len(title) == 1:
        if SearchKey.is_valid_character(title[0]):
            result += chr(ord(title[0]))
        #else:
        #    result += '?'

    return SearchKey.compact_spaces(result)

示例#3

显示文件

文件： ArticleRenderer.py 项目： 9072997/wikireader

def write_article(language_links):
    global compress
    global verbose
    global output, f_out, i_out
    global article_count
    global g_this_article_title
    global file_number
    global start_time
    global article_writer

    article_count += 1
    if verbose:
        PrintLog.message(u'[MWR {0:d}] {1:s}'.format(article_count, g_this_article_title))

    elif article_count % 1000 == 0:
        now_time = time.time()
        PrintLog.message(u'Render[{0:d}]: {1:7.2f}s {2:10d}'.format(file_number, now_time - start_time, article_count))
        start_time = now_time

    # create links
    links_stream = io.BytesIO('')

    for i in g_links:
        (x0, y0, x1, y1, url) = g_links[i]
        links_stream.write(struct.pack('<3I', (y0 << 8) | x0, (y1 << 8) | x1, link_number(url)))

    links_stream.flush()
    links = links_stream.getvalue()
    links_stream.close()

    # create language links
    links_stream = io.BytesIO('')
    japanese_convert = LanguageTranslation.LanguageJapanese().translate
    normal_convert = LanguageTranslation.LanguageNormal().translate

    for l in language_links:
        language, link = l.split(':', 1)

        language = language.strip()
        link = link.strip()

        # only need the first pronunciation for the link
        # as this must always be present
        if link is not None and '' != link:
            if 'ja' == language:
                stripped = japanese_convert(link)[0]
            else:
                stripped = normal_convert(link)[0]

            stripped = SearchKey.strip_accents(stripped)

            if link == stripped:
                links_stream.write(l.encode('utf-8') + '\0')
            else:
                links_stream.write((language + '#' + stripped).encode('utf-8') + '\1' + link.encode('utf-8') + '\0')

    links_stream.flush()
    langs = links_stream.getvalue()
    links_stream.close()

    # create the header (header size = 8)
    header = struct.pack('<I2H', 8 + len(links) + len(langs), g_link_cnt, 0)
    body = output.fetch()

    # combine the data
    whole_article = header + links + langs + body

    if compress:
        try:
            (article_number, fnd_offset, restricted) = article_index(g_this_article_title)
            restricted =  bool(int(restricted))  # '0' is True so turn it into False
            article_writer.add_article(article_number, whole_article, fnd_offset, restricted)
        except KeyError:
            PrintLog.message(u'Error in: write_article, Title not found')
            PrintLog.message(u'Title:  {0:s}'.format(g_this_article_title))
            PrintLog.message(u'Count:  {0:s}'.format(article_count))
    else:
        f_out.write(whole_article)
        f_out.flush()

示例#4

显示文件

def write_article(language_links):
    global compress
    global verbose
    global output, f_out, i_out
    global article_count
    global g_this_article_title
    global file_number
    global start_time
    global article_writer

    article_count += 1
    if verbose:
        PrintLog.message(u'[MWR {0:d}] {1:s}'.format(article_count,
                                                     g_this_article_title))

    elif article_count % 1000 == 0:
        now_time = time.time()
        PrintLog.message(u'Render[{0:d}]: {1:7.2f}s {2:10d}'.format(
            file_number, now_time - start_time, article_count))
        start_time = now_time

    # create links
    links_stream = io.BytesIO('')

    for i in g_links:
        (x0, y0, x1, y1, url) = g_links[i]
        links_stream.write(
            struct.pack('<3I', (y0 << 8) | x0, (y1 << 8) | x1,
                        link_number(url)))

    links_stream.flush()
    links = links_stream.getvalue()
    links_stream.close()

    # create language links
    links_stream = io.BytesIO('')
    japanese_convert = LanguageTranslation.LanguageJapanese().translate
    normal_convert = LanguageTranslation.LanguageNormal().translate

    for l in language_links:
        language, link = l.split(':', 1)

        language = language.strip()
        link = link.strip()

        # only need the first pronunciation for the link
        # as this must always be present
        if link is not None and '' != link:
            if 'ja' == language:
                stripped = japanese_convert(link)[0]
            else:
                stripped = normal_convert(link)[0]

            stripped = SearchKey.strip_accents(stripped)

            if link == stripped:
                links_stream.write(l.encode('utf-8') + '\0')
            else:
                links_stream.write((language + '#' +
                                    stripped).encode('utf-8') + '\1' +
                                   link.encode('utf-8') + '\0')

    links_stream.flush()
    langs = links_stream.getvalue()
    links_stream.close()

    # create the header (header size = 8)
    header = struct.pack('<I2H', 8 + len(links) + len(langs), g_link_cnt, 0)
    body = output.fetch()

    # combine the data
    whole_article = header + links + langs + body

    if compress:
        try:
            (article_number, fnd_offset,
             restricted) = article_index(g_this_article_title)
            restricted = bool(
                int(restricted))  # '0' is True so turn it into False
            article_writer.add_article(article_number, whole_article,
                                       fnd_offset, restricted)
        except KeyError:
            PrintLog.message(u'Error in: write_article, Title not found')
            PrintLog.message(u'Title:  {0:s}'.format(g_this_article_title))
            PrintLog.message(u'Count:  {0:s}'.format(article_count))
    else:
        f_out.write(whole_article)
        f_out.flush()