def correct_verbs(chunk): '''Correct plural/singular verb mistakes. >>> correct_verbs([('is', 'VBZ'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')]) [('are', 'VBP'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')] >>> correct_verbs([('our', 'PRP$'), ('children', 'NNS'), ('is', 'VBZ'), ('learning', 'VBG')]) [('our', 'PRP$'), ('children', 'NNS'), ('are', 'VBP'), ('learning', 'VBG')] >>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('were', 'VBD'), ('learning', 'VBG')]) [('our', 'PRP$'), ('child', 'NN'), ('was', 'VBD'), ('learning', 'VBG')] >>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')]) [('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')] ''' vbidx = first_chunk_index(chunk, lambda (word, tag): tag.startswith('VB')) # if no verb found, do nothing if vbidx is None: return chunk verb, vbtag = chunk[vbidx] nnpred = lambda (word, tag): tag.startswith('NN') # find nearest noun to the right of verb nnidx = first_chunk_index(chunk, nnpred, start=vbidx + 1) # if no noun found to right, look to the left if nnidx is None: nnidx = first_chunk_index(chunk, nnpred, start=vbidx - 1, step=-1) # if no noun found, do nothing if nnidx is None: return chunk noun, nntag = chunk[nnidx] # get correct verb form and insert into chunk if nntag.endswith('S'): chunk[vbidx] = plural_verb_forms.get((verb, vbtag), (verb, vbtag)) else: chunk[vbidx] = singular_verb_forms.get((verb, vbtag), (verb, vbtag)) return chunk
def correct_verbs(chunk): '''Correct plural/singular verb mistakes. >>> correct_verbs([('is', 'VBZ'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')]) [('are', 'VBP'), ('our', 'PRP$'), ('children', 'NNS'), ('learning', 'VBG')] >>> correct_verbs([('our', 'PRP$'), ('children', 'NNS'), ('is', 'VBZ'), ('learning', 'VBG')]) [('our', 'PRP$'), ('children', 'NNS'), ('are', 'VBP'), ('learning', 'VBG')] >>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('were', 'VBD'), ('learning', 'VBG')]) [('our', 'PRP$'), ('child', 'NN'), ('was', 'VBD'), ('learning', 'VBG')] >>> correct_verbs([('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')]) [('our', 'PRP$'), ('child', 'NN'), ('is', 'VBZ'), ('learning', 'VBG')] ''' vbidx = first_chunk_index(chunk, lambda (word, tag): tag.startswith('VB')) # if no verb found, do nothing if vbidx is None: return chunk verb, vbtag = chunk[vbidx] nnpred = lambda (word, tag): tag.startswith('NN') # find nearest noun to the right of verb nnidx = first_chunk_index(chunk, nnpred, start=vbidx+1) # if no noun found to right, look to the left if nnidx is None: nnidx = first_chunk_index(chunk, nnpred, start=vbidx-1, step=-1) # if no noun found, do nothing if nnidx is None: return chunk noun, nntag = chunk[nnidx] # get correct verb form and insert into chunk if nntag.endswith('S'): chunk[vbidx] = plural_verb_forms.get((verb, vbtag), (verb, vbtag)) else: chunk[vbidx] = singular_verb_forms.get((verb, vbtag), (verb, vbtag)) return chunk
def word_tag(tag): if tag.startswith('J'): return wordnet.ADJ elif tag.startswith('V'): return wordnet.VERB elif tag.startswith('N'): return wordnet.NOUN elif tag.startswith('R'): return wordnet.ADV else: return ''
def filter_pos(text,tagger): #st = LancasterStemmer() tokens = nltk.word_tokenize(text) tagged = tagger.tag(tokens) nouns = list() verbs = list() for (word, tag) in tagged: if tag.startswith('N'): nouns.append(st.stem(word)) elif tag.startswith('V'): verbs.append(st.stem(word)) return nouns,verbs
def question_3(): # build inverse tag bigrams and build a conditional frequency distribution inv_tag_bigrams = [(b, a) for (a, b) in nltk.bigrams(BROWN_TAGS)] tag_cfdist = nltk.probability.ConditionalFreqDist(inv_tag_bigrams) # accumulate the counts of all tags occuring before nouns tag_is_noun = lambda tag: tag.startswith('N') tags_before_noun_dict = collections.defaultdict(int) for tag in BROWN_TAGSET: if not tag_is_noun(tag): continue for (predecessor, count) in tag_cfdist[tag].items(): tags_before_noun_dict[predecessor] += count # print the most common predecessors predecessors = sorted(tags_before_noun_dict, key=tags_before_noun_dict.get, reverse=True)[:5] predecessors_counts = [tags_before_noun_dict[elem] for elem in predecessors] predecessors_pos = [tag_to_pos(elem) for elem in predecessors] predecessors_percentages = ['%.2f' % (100.0 * c / sum(predecessors_counts)) for c in predecessors_counts] print 'Five most common tags before nouns' print table(['Tag'] + predecessors, ['Count'] + predecessors_counts, ['%'] + predecessors_percentages, ['Part of Speech'] + predecessors_pos) print ('Note that this result is very much in keeping with linguistic ' 'intuitions: one would expect articles, prepositions, adjectives, ' 'and other nouns to precede nouns.')
def pos_tags_to_wordnet_form(tagged_sent): ''' takes a list of (token, tag) tuples and converts the tag to a wordnet friendly form Verb -> v, noun -> n, adverb -> r, adj -> a ''' newtags = dict() for token in tagged_sent: tok = token[0] tag = token[1] if tag.startswith('V'): newtags[tok] = 'v' elif tag.startswith('N'): newtags[tok] = 'n' elif tag.startswith('J'): newtags[tok] = 'a' elif tag.startswith('RB'): newtags[tok] = 'r' return newtags
def swap_verb_phrase(chunk): '''Move modifier phrase after verb to front of chunk and drop the verb. >>> swap_verb_phrase([('the', 'DT'), ('book', 'NN'), ('was', 'VBD'), ('great', 'JJ')]) [('great', 'JJ'), ('the', 'DT'), ('book', 'NN')] >>> swap_verb_phrase([('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN'), ('is', 'VBZ'), ('fantastic', 'JJ')]) [('fantastic', 'JJ'), ('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN')] ''' # find location of verb vbpred = lambda (word, tag): tag != 'VBG' and tag.startswith('VB') and len(tag) > 2 vbidx = first_chunk_index(chunk, vbpred) if vbidx is None: return chunk return chunk[vbidx+1:] + chunk[:vbidx]
def swap_verb_phrase(chunk): '''Move modifier phrase after verb to front of chunk and drop the verb. >>> swap_verb_phrase([('the', 'DT'), ('book', 'NN'), ('was', 'VBD'), ('great', 'JJ')]) [('great', 'JJ'), ('the', 'DT'), ('book', 'NN')] >>> swap_verb_phrase([('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN'), ('is', 'VBZ'), ('fantastic', 'JJ')]) [('fantastic', 'JJ'), ('this', 'DT'), ('gripping', 'VBG'), ('book', 'NN')] ''' # find location of verb vbpred = lambda (word, tag): tag != 'VBG' and tag.startswith('VB') and len( tag) > 2 vbidx = first_chunk_index(chunk, vbpred) if vbidx is None: return chunk return chunk[vbidx + 1:] + chunk[:vbidx]
def swap_infinitive_phrase(chunk): '''Move subject to before the noun preceding the infinitive. >>> swap_infinitive_phrase([('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')]) [('recipes', 'NNS'), ('book', 'NN')] >>> swap_infinitive_phrase([('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')]) [('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')] >>> swap_infinitive_phrase([('delicious', 'JJ'), ('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')]) [('delicious', 'JJ'), ('recipes', 'NNS'), ('book', 'NN')] ''' inpred = lambda (word, tag): tag == 'IN' and word != 'like' inidx = first_chunk_index(chunk, inpred) if inidx is None: return chunk nnpred = lambda (word, tag): tag.startswith('NN') nnidx = first_chunk_index(chunk, nnpred, start=inidx, step=-1) or 0 return chunk[:nnidx] + chunk[inidx+1:] + chunk[nnidx:inidx]
def swap_infinitive_phrase(chunk): '''Move subject to before the noun preceding the infinitive. >>> swap_infinitive_phrase([('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')]) [('recipes', 'NNS'), ('book', 'NN')] >>> swap_infinitive_phrase([('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')]) [('tastes', 'VBZ'), ('like', 'IN'), ('chicken', 'NN')] >>> swap_infinitive_phrase([('delicious', 'JJ'), ('book', 'NN'), ('of', 'IN'), ('recipes', 'NNS')]) [('delicious', 'JJ'), ('recipes', 'NNS'), ('book', 'NN')] ''' inpred = lambda (word, tag): tag == 'IN' and word != 'like' inidx = first_chunk_index(chunk, inpred) if inidx is None: return chunk nnpred = lambda (word, tag): tag.startswith('NN') nnidx = first_chunk_index(chunk, nnpred, start=inidx, step=-1) or 0 return chunk[:nnidx] + chunk[inidx + 1:] + chunk[nnidx:inidx]
def vbpred(wt): word, tag = wt return tag != 'VBG' and tag.startswith('VB') and len(tag) > 2
def tag_to_pos(tag): """Make |tag| in the Brown Corpus Tagset more human-readable.""" import string tag = tag.upper() if tag.startswith('NN'): return 'common noun' if tag.startswith('NP'): return 'proper noun' if tag.startswith('VB'): return 'verb' if tag.startswith('JJ'): return 'adjective' if tag in string.punctuation: return 'punctuation' if tag.startswith('PP'): return 'pronoun' if tag.startswith('RB'): return 'adverb' if tag.startswith('CC') or tag.startswith('CS'): return 'conjunction' if tag.startswith('CD'): return 'numeral' if tag.startswith('IN'): return 'preposition' if tag.startswith('AT'): return 'article' if tag.startswith('TO'): return 'infinitival to' return 'OTHER'