示例#1
0
def get_exclusive_tokens(samples, squash=True):
    exclusives = []
    for s in samples:
        tokens = word_tokenize(s)
        others = [v for v in samples if v != s]
        other_tokens = flatten([word_tokenize(_) for _ in others])
        exclusives.append([t for t in tokens if t not in other_tokens])
    if squash:
        return set(flatten(exclusives))
    return exclusives
示例#2
0
def get_common_tokens(samples, squash=True):
    common_toks = []
    for s in samples:
        tokens = word_tokenize(s)
        others = [v for v in samples if v != s]
        other_tokens = [word_tokenize(_) for _ in others]
        common_toks.append(
            [t for t in tokens if all(t in toks for toks in other_tokens)])
    if squash:
        return set(flatten(common_toks))
    return common_toks
示例#3
0
def get_uncommon_chunks(samples, squash=True):
    toks = get_common_tokens(samples)
    chunks = [chunk_list(word_tokenize(s), toks) for s in samples]
    chunks = [[" ".join(_) for _ in s] for s in chunks]
    if squash:
        return set(flatten(chunks))
    return chunks
示例#4
0
def get_exclusive_chunks(samples, squash=True):
    toks = list(get_common_tokens(samples)) + \
           list(get_uncommon_tokens(samples))
    toks = [t for t in toks if t not in get_exclusive_tokens(samples)]
    chunks = [chunk_list(word_tokenize(s), toks) for s in samples]
    chunks = [[" ".join(_) for _ in s] for s in chunks]
    if squash:
        return set(flatten(chunks))
    return chunks