def quantify_variant(analysis, variant): n = variant + 1 d = {} all_pos_tags = [ pos for (_, pos) in analysis.pos_tags() ] for ngram in ingrams(all_pos_tags, n): sparse_dict_increment(d, ngram) return {output_filter_ngram(k): v for (k, v) in d.items()}
def quantify(analysis): d = {} word_stream = (function_word_or_POS(token, tag) for (token, tag) in analysis.pos_tags()) num_tokens = float(len(analysis.pos_tags())) for trigram in nltk.util.itrigrams(word_stream): if trigram_is_functional(trigram): sparse_dict_increment(d, trigram) return {output_filter_ngram(k): (v / num_tokens) for (k, v) in d.items()}
def quantify_variant(analysis, variant): """Quantify word n-grams""" n = variant + 1 d = {} all_words = [word for (word, _) in analysis.pos_tags()] for ngram in ngrams(all_words, n): sparse_dict_increment(d, ngram) return {output_filter_ngram(k): v for (k, v) in d.items()} # unnormalized counts
def quantify_variant(analysis, variant): """Quantify POS n-grams""" if variant <= 2: n = variant + 1 d = {} all_pos_tags = [pos for (_, pos) in analysis.pos_tags()] for ngram in ngrams(all_pos_tags, n): sparse_dict_increment(d, ngram) return {output_filter_ngram(k): v for (k, v) in d.items()} # unnormalized counts elif variant == 3: n = 3 d = {} all_pos_tags = [pos for (_, pos) in analysis.pos_tags()] for ngram in ngrams(all_pos_tags, n): sparse_dict_increment(d, ngram) return {output_filter_ngram(k): v for (k, v) in d.items() \ if output_filter_ngram(k) in MY_POS} # unnormalized counts
def quantify(analysis): """Quantify contextual function words.""" if analysis.lang == 'en': from translationese.function_words import FUNCTION_WORDS elif analysis.lang == 'zh': from translationese.function_words import FUNCTION_WORDS_ZH as FUNCTION_WORDS else: print('language "{}" not implemented yet for contextual_function_words'.format(analysis.lang)) exit() d = {} word_stream = (function_word_or_POS(token, tag, FUNCTION_WORDS) for (token, tag) in analysis.pos_tags()) num_tokens = float(len(analysis.pos_tags())) for trigram in nltk.trigrams(word_stream): if trigram_is_functional(trigram, FUNCTION_WORDS): sparse_dict_increment(d, trigram) return {output_filter_ngram(k): (v / num_tokens) for (k, v) in d.items()}