def print_sentence(sentence): single_sentence = "" for (word, tag) in sentence: if not (penn_punct.__contains__(tag) or brown_punct.__contains__(tag)): single_sentence += str(word)+" " else: single_sentence = single_sentence[:len(single_sentence)-1]+str(word)+" " print single_sentence
def single_sent_word_length(sentence): total_num_of_letters = 0 word_count = 0 for (word, tag) in sentence: if not (penn_punct.__contains__(tag) or brown_punct.__contains__(tag)): word_count += 1.0 for letter in word: total_num_of_letters+=1.0 average_word_length = total_num_of_letters/word_count return average_word_length
def word_length_average(tagged_corpus): total_num_of_letters = 0 word_count = 0 for sent in tagged_corpus: for (word, tag) in sent: if not (penn_punct.__contains__(tag) or brown_punct.__contains__(tag)): word_count += 1.0 for letter in word: total_num_of_letters+=1.0 average_word_length = total_num_of_letters/word_count return average_word_length
def count_pronouns_per_words(tagged_corpus): num_of_pronouns = 0 num_of_words = 0 for sent in tagged_corpus: for (word, tag) in sent: if not (penn_punct.__contains__(tag) or brown_punct.__contains__(tag)): num_of_words += 1.0 if (brown_pron_tags.__contains__(tag)) or (penn_pron_tags.__contains__(tag)): num_of_pronouns += 1.0 if not num_of_pronouns == 0: pron_per_word_total = num_of_words/num_of_pronouns else: pron_per_word_total = 0.0 return pron_per_word_total