from knock30 import conll_read from knock31 import extract_postag if __name__ == "__main__": print(extract_postag(conll_read(), 'lemma', 'VB'))
import matplotlib.pyplot as plt import operator from knock35 import word_freqs from knock30 import conll_read def freqs_chart(count_freq, top): words = [] freqs = [] for x, y in sorted(count_freq.items(), key=operator.itemgetter(1), reverse=True)[:top]: words.append(str(x)) freqs.append(int(y)) x = range(1, top + 1) y = freqs plt.bar(x, y) plt.xticks(x, words) plt.xlabel('Words') plt.ylabel('Frequency') plt.show() if __name__ == "__main__": texts = conll_read() counts = word_freqs(texts) freqs_chart(counts, 10)
from knock30 import conll_read def aofb_long(sentence): seqs = [] seq = [] for sent in sentence: for w in sent: if w['pos'] == 'NN': seq.append(w['text']) else: if len(seq) > 1: seqs.append(seq) seq = [] return seqs if __name__ == "__main__": phrase = aofb_long(conll_read()) for w in phrase: print(' '.join(w) + '\n')
from knock30 import conll_read def extract_postag(sentence, type, pos_tag): res = [] for sent in sentence: for token in sent: if token['pos'] == pos_tag: res.append(token[type]) return res if __name__ == "__main__": print(extract_postag(conll_read(), 'text', 'VB'))
from knock30 import conll_read from knock31 import extract_postag def extract_aofb(sentence): res = [] for sent in sentence: for w in range(len(sent) - 3): phrase = sent[w:w + 3] w1 = phrase[0]['pos'] == 'NN' w2 = phrase[1]['text'] == 'of' w3 = phrase[2]['pos'] == 'NN' if w1 and w2 and w3: res.append(word['text'] for word in phrase) return res if __name__ == "__main__": phrase = extract_aofb(conll_read()) for w in phrase: print(' '.join(w) + '\n')
from knock30 import conll_read import operator def word_freqs(sentence): word_count = {} for sent in sentence: for w in sent: if w['text'] in word_count: word_count[w['text']] += 1 else: word_count[w['text']] = 1 return word_count if __name__ == "__main__": text = conll_read() counts = word_freqs(text) for x, y in sorted(counts.items(), key=operator.itemgetter(1), reverse=True): print('%s: %r' % (x, y))