Exemplo n.º 1
0
def quantify_variant(analysis, variant):
    n = variant + 1

    d = {}

    all_pos_tags = [ pos for (_, pos) in analysis.pos_tags() ]

    for ngram in ingrams(all_pos_tags, n):
        sparse_dict_increment(d, ngram)

    return {output_filter_ngram(k): v for (k, v) in d.items()}
def quantify(analysis):
    d = {}

    word_stream = (function_word_or_POS(token, tag)
                   for (token, tag) in analysis.pos_tags())
    num_tokens = float(len(analysis.pos_tags()))

    for trigram in nltk.util.itrigrams(word_stream):
        if trigram_is_functional(trigram):
            sparse_dict_increment(d, trigram)

    return {output_filter_ngram(k): (v / num_tokens) for (k, v) in d.items()}
def quantify(analysis):
    d = {}

    word_stream = (function_word_or_POS(token, tag) for (token, tag)
                   in analysis.pos_tags())
    num_tokens = float(len(analysis.pos_tags()))

    for trigram in nltk.util.itrigrams(word_stream):
        if trigram_is_functional(trigram):
            sparse_dict_increment(d, trigram)

    return {output_filter_ngram(k): (v / num_tokens) for (k, v) in d.items()}
Exemplo n.º 4
0
def quantify_variant(analysis, variant):
    """Quantify word n-grams"""
    n = variant + 1

    d = {}

    all_words = [word for (word, _) in analysis.pos_tags()]

    for ngram in ngrams(all_words, n):
        sparse_dict_increment(d, ngram)

    return {output_filter_ngram(k): v
            for (k, v) in d.items()}  # unnormalized counts
Exemplo n.º 5
0
def quantify_variant(analysis, variant):
    """Quantify POS n-grams"""
    if variant <= 2:
        n = variant + 1

        d = {}

        all_pos_tags = [pos for (_, pos) in analysis.pos_tags()]

        for ngram in ngrams(all_pos_tags, n):
            sparse_dict_increment(d, ngram)

        return {output_filter_ngram(k): v
                for (k, v) in d.items()}  # unnormalized counts
    elif variant == 3:
        n = 3
        d = {}
        all_pos_tags = [pos for (_, pos) in analysis.pos_tags()]
        for ngram in ngrams(all_pos_tags, n):
            sparse_dict_increment(d, ngram)
        return {output_filter_ngram(k): v for (k, v) in d.items() \
                if output_filter_ngram(k) in MY_POS}  # unnormalized counts
def quantify(analysis):
    """Quantify contextual function words."""
    if analysis.lang == 'en':
        from translationese.function_words import FUNCTION_WORDS
    elif analysis.lang == 'zh':
        from translationese.function_words import FUNCTION_WORDS_ZH as FUNCTION_WORDS
    else:
        print('language "{}" not implemented yet for contextual_function_words'.format(analysis.lang))
        exit()

    d = {}

    word_stream = (function_word_or_POS(token, tag, FUNCTION_WORDS) for (token, tag)
                   in analysis.pos_tags())
    num_tokens = float(len(analysis.pos_tags()))

    for trigram in nltk.trigrams(word_stream):
        if trigram_is_functional(trigram, FUNCTION_WORDS):
            sparse_dict_increment(d, trigram)

    return {output_filter_ngram(k): (v / num_tokens) for (k, v) in d.items()}