Python word_tokenize示例

编程语言: Python

命名空间/包名称: quebra_frases.tokenization

方法/功能: word_tokenize

hotexamples.com的示例: 4

Python word_tokenize - 已找到4个示例。这些是从开源项目中提取的最受好评的quebra_frases.tokenization.word_tokenize现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def get_exclusive_tokens(samples, squash=True):
    exclusives = []
    for s in samples:
        tokens = word_tokenize(s)
        others = [v for v in samples if v != s]
        other_tokens = flatten([word_tokenize(_) for _ in others])
        exclusives.append([t for t in tokens if t not in other_tokens])
    if squash:
        return set(flatten(exclusives))
    return exclusives

示例#2

显示文件

def get_common_tokens(samples, squash=True):
    common_toks = []
    for s in samples:
        tokens = word_tokenize(s)
        others = [v for v in samples if v != s]
        other_tokens = [word_tokenize(_) for _ in others]
        common_toks.append(
            [t for t in tokens if all(t in toks for toks in other_tokens)])
    if squash:
        return set(flatten(common_toks))
    return common_toks

示例#3

显示文件

文件： chunks.py 项目： HelloChatterbox/quebra_frases

def get_uncommon_chunks(samples, squash=True):
    toks = get_common_tokens(samples)
    chunks = [chunk_list(word_tokenize(s), toks) for s in samples]
    chunks = [[" ".join(_) for _ in s] for s in chunks]
    if squash:
        return set(flatten(chunks))
    return chunks

示例#4

显示文件

文件： chunks.py 项目： HelloChatterbox/quebra_frases

def get_exclusive_chunks(samples, squash=True):
    toks = list(get_common_tokens(samples)) + \
           list(get_uncommon_tokens(samples))
    toks = [t for t in toks if t not in get_exclusive_tokens(samples)]
    chunks = [chunk_list(word_tokenize(s), toks) for s in samples]
    chunks = [[" ".join(_) for _ in s] for s in chunks]
    if squash:
        return set(flatten(chunks))
    return chunks