Пример #1
0
from knock30 import conll_read
from knock31 import extract_postag

if __name__ == "__main__":
    print(extract_postag(conll_read(), 'lemma', 'VB'))
Пример #2
0
import matplotlib.pyplot as plt
import operator
from knock35 import word_freqs
from knock30 import conll_read


def freqs_chart(count_freq, top):
    words = []
    freqs = []

    for x, y in sorted(count_freq.items(),
                       key=operator.itemgetter(1),
                       reverse=True)[:top]:
        words.append(str(x))
        freqs.append(int(y))

    x = range(1, top + 1)
    y = freqs

    plt.bar(x, y)
    plt.xticks(x, words)
    plt.xlabel('Words')
    plt.ylabel('Frequency')
    plt.show()


if __name__ == "__main__":
    texts = conll_read()
    counts = word_freqs(texts)
    freqs_chart(counts, 10)
Пример #3
0
from knock30 import conll_read


def aofb_long(sentence):
    seqs = []
    seq = []
    for sent in sentence:
        for w in sent:
            if w['pos'] == 'NN':
                seq.append(w['text'])
            else:
                if len(seq) > 1:
                    seqs.append(seq)
                seq = []
    return seqs


if __name__ == "__main__":
    phrase = aofb_long(conll_read())
    for w in phrase:
        print(' '.join(w) + '\n')
Пример #4
0
from knock30 import conll_read


def extract_postag(sentence, type, pos_tag):
    res = []
    for sent in sentence:
        for token in sent:
            if token['pos'] == pos_tag:
                res.append(token[type])
    return res


if __name__ == "__main__":
    print(extract_postag(conll_read(), 'text', 'VB'))
Пример #5
0
from knock30 import conll_read
from knock31 import extract_postag


def extract_aofb(sentence):
    res = []
    for sent in sentence:
        for w in range(len(sent) - 3):
            phrase = sent[w:w + 3]
            w1 = phrase[0]['pos'] == 'NN'
            w2 = phrase[1]['text'] == 'of'
            w3 = phrase[2]['pos'] == 'NN'
            if w1 and w2 and w3:
                res.append(word['text'] for word in phrase)
    return res


if __name__ == "__main__":
    phrase = extract_aofb(conll_read())
    for w in phrase:
        print(' '.join(w) + '\n')
Пример #6
0
from knock30 import conll_read
import operator


def word_freqs(sentence):
    word_count = {}
    for sent in sentence:
        for w in sent:
            if w['text'] in word_count:
                word_count[w['text']] += 1
            else:
                word_count[w['text']] = 1
    return word_count


if __name__ == "__main__":
    text = conll_read()
    counts = word_freqs(text)

    for x, y in sorted(counts.items(),
                       key=operator.itemgetter(1),
                       reverse=True):
        print('%s: %r' % (x, y))