def complex_test_2():
    from collections import Counter

    def foo1(counter, iterable):
        counter.update(iterable)
        return counter

    p = [(lambda x: x * 2, modifier.map), (foo1, modifier.reduce, Counter())]

    print(apply_pipeline([[1, 2, 3, 4], [1, 2, 3], [1, 2], [1]], p))
def simple_test_1_with_print():
    def foo1(x):
        print('Map 1')
        return x * 2

    def foo2(x):
        print('Filter 1')
        return x > 5

    def foo3(x):
        print('Map 2')
        return x - 1

    def foo4(x, y):
        print('Reduce 1')
        return x + y

    p = [(foo1, modifier.map), (foo2, modifier.filter), (foo3, modifier.map),
         (foo4, modifier.reduce)]

    print(apply_pipeline(iter(range(0, 10)), p))
Пример #3
0
    counter = 0
    with open('input/all_questions_corpus.txt', 'r',
              encoding="ISO-8859-1") as input_file:
        for line in input_file:
            counter += 1
            if counter % 1000000 == 0:
                print(counter)
            yield line.strip()


def get_lemma_set(question_document):
    return set([(word.lemma_, word.pos_) for word in question_document
                if not word.is_stop and not word.is_punct])


def update_counter(counter, iterable):
    counter.update(iterable)
    return counter


pipeline = [(nlp, modifier.map), (get_lemma_set, modifier.map),
            (update_counter, modifier.reduce, Counter())]

word_counter = apply_pipeline(all_questions(), pipeline)

print(len(word_counter))
print(word_counter.most_common(20))

with open('input/document_frequencies.pickle', 'wb') as output_file:
    pickle.dump(word_counter, output_file)
def create_features(input_file_path):
    for entry in apply_pipeline(read_dataset(input_file_path), nlp_pipeline):
        yield {
            k: v
            for k, v in entry.items() if k.endswith('feature') or k == 'id'
        }
def complex_test_1():
    p = [(lambda x: x * 2, modifier.map), (windowify(2), modifier.window),
         (lambda x: sum(x[0]) > 4, modifier.filter),
         (dewindowify, modifier.window), (lambda x, y: x + y, modifier.reduce)]

    print(apply_pipeline(iter(range(0, 10)), p))
def simple_test_1():
    p = [(lambda x: x * 2, modifier.map), (lambda x: x > 5, modifier.filter),
         (lambda x: x - 1, modifier.map),
         (lambda x, y: x + y, modifier.reduce)]

    print(apply_pipeline(iter(range(0, 10)), p))