Exemplo n.º 1
0
def parse(lang_sample):
    """tally word popularity using novel extracts, etc"""
    words = words_from_archive(lang_sample, include_dups=True)
    counts = zero_default_dict()
    for word in words:
        counts[word] += 1
    return set(words), counts
Exemplo n.º 2
0
def parse(lang_sample):
    """tally word popularity using novel extracts, etc"""
    words = words_from_archive(lang_sample, include_dups=True)
    counts = zero_default_dict()
    for word in words:
        counts[word] += 1
    return set(words), counts
Exemplo n.º 3
0
def parse(lang_sample, file_format='bz'):

    from autocorrect.utils import words_from_archive, words_from_txt, \
        zero_default_dict
    """tally word popularity using novel extracts, etc"""

    if file_format == 'bz':
        words = words_from_archive(lang_sample, include_dups=True)
    elif file_format == 'txt':
        words = words_from_txt(lang_sample)

    counts = zero_default_dict()
    for word in words:
        counts[word] += 1
    return set(words), counts