示例#1
0
def test_order_computation():
    first = Input(input=[])
    gr1 = Graph(input_node=first, output_node=first, name="first")

    second = Input(input=gr1)
    gr2 = Graph(input_node=second, output_node=second, name="second")

    third = Input(input=gr2)
    gr3 = Graph(input_node=third, output_node=third, name="third")
    assert gr3.order == [gr1, gr2]
示例#2
0
def test_input_node_to_graph_in():
    first_input = Input(input=get_persons(), name="main_input")
    first_graph = Graph(input_node=first_input,
                        output_node=first_input,
                        name="main_graph")

    second_input = Input(input=first_graph)
    assert second_input.input_graph == first_graph
    assert second_input.input is None

    second_graph = Graph(input_node=second_input, output_node=second_input)
    assert second_graph.order == [first_graph]
示例#3
0
def test_simple_sort():
    input_node = Input(input=get_numbers()[::-1])
    sort_node = Sort(by='a')(input_node)

    graph = Graph(input_node=input_node, output_node=sort_node)
    res = graph.run()
    assert res == get_numbers()
示例#4
0
def test_outer_without_key():
    left_input = Input(input=get_advanced_persons())
    left_graph = Graph(input_node=left_input, output_node=left_input)

    right_input = Input(input=get_advanced_cities())
    outer_join = Join(left_graph, [], "outer")(right_input)

    graph = Graph(input_node=right_input, output_node=outer_join)
    res = graph.run()

    print("***** RESULT *****")
    for value in res:
        print(value)

    print()
    print("******************")
示例#5
0
def test_input_node_init():
    input_node = Input(
        input=get_persons(),
        name="main_input",
    )
    assert input_node.name == "main_input"
    assert input_node.input == get_persons()
    assert input_node.output is None
示例#6
0
def test_square_mapper():
    input_node = Input(input=get_numbers())
    mapper_node = Map(square_mapper)(input_node)
    graph = Graph(input_node=input_node, output_node=mapper_node)
    res = graph.run()

    answer = [{'a': i**2} for i in range(1, 6)]
    assert res == answer
示例#7
0
def test_simple_mapper():
    input_node = Input(input=get_numbers())
    mapper_node = Map(simple_mapper)(input_node)

    assert mapper_node.input == input_node
    graph = Graph(input_node=input_node, output_node=mapper_node)
    res = graph.run()
    assert res == get_numbers()
示例#8
0
def test_simple_fold():

    input_node = Input(input=get_numbers())
    folder_node = Fold(simple_folder, {"a": []})(input_node)

    graph = Graph(input_node=input_node, output_node=folder_node)
    res = graph.run()
    assert res == [{'a': [1, -2, 3, -4, 5]}]
示例#9
0
def test_input_node_to_graph():
    input_node = Input(
        input=get_persons(),
        name="main_input",
    )
    graph = Graph(input_node=input_node,
                  output_node=input_node,
                  name="main_graph")

    assert graph.name == "main_graph"
    assert graph.nodes == [input_node]
    assert graph._dependencies == []
    assert graph.order == []

    result = graph.run()
    assert result == get_persons()
示例#10
0
def test_city_fold():
    input_node = Input(input=get_cities())
    state = {"id": -1, "name": ""}
    folder_node = Fold(city_folder, state)(input_node)

    graph = Graph(input_node=input_node, output_node=folder_node)
    res = graph.run()
    print("***** RESULT *****")
    for value in res:
        print(value)

    print()
    print("******************")
    answer_name = ""
    names = [city["name"] for city in get_cities()]
    for name in names:
        answer_name += name

    assert res == [{"id": -1, "name": answer_name}]
示例#11
0
def test_person_mapper():
    input_node = Input(input=get_persons())
    mapper_node = Map(person_mapper)(input_node)
    graph = Graph(input_node=input_node, output_node=mapper_node)
    res = graph.run()
    assert res == [
        {
            "name": "Andrey",
            "id": 1
        },
        {
            "name": "Leonid",
            "id": 2
        },
        {
            "name": "Sergey",
            "id": 1
        },
    ]
示例#12
0
def test_persons_sort_name():
    input_node = Input(input=get_advanced_persons())
    sort_node = Sort(by='name')(input_node)
    graph = Graph(input_node=input_node, output_node=sort_node)
    res = graph.run()
    print("***** RESULT *****")
    for value in res:
        print(value)

    print()
    print("******************")

    assert res == [
        {'name': 'Andrey', 'id': 1, 'age': 38},
        {'name': 'Grigoroy', 'id': 4, 'age': 64},
        {'name': 'Leonid', 'id': 2, 'age': 20},
        {'name': 'Maxim', 'id': 5, 'age': 28},
        {'name': 'Misha', 'id': 1, 'age': 5},
        {'name': 'Rishat', 'id': 2, 'age': 17},
        {'name': 'Roma', 'id': 1, 'age': 10},
        {'name': 'Sergey', 'id': 1, 'age': 25},
        {'name': 'Stepan', 'id': 10, 'age': 14},
    ]
示例#13
0
def test_inner_simple_join():

    left_input = Input(input=get_advanced_persons())
    left_graph = Graph(input_node=left_input, output_node=left_input)

    right_input = Input(input=get_advanced_cities())
    inner_join = Join(left_graph, 'id', "inner")(right_input)

    graph = Graph(input_node=right_input, output_node=inner_join)
    res = graph.run()

    print("***** RESULT *****")
    for value in res:
        print(value)

    print()
    print("******************")

    assert res == [
        {
            'age': 38,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Andrey'
        },
        {
            'age': 25,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Sergey'
        },
        {
            'age': 5,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Misha'
        },
        {
            'age': 10,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Roma'
        },
        {
            'age': 20,
            'city': 'SPb',
            'id': 2,
            'name': 'Leonid'
        },
        {
            'age': 17,
            'city': 'SPb',
            'id': 2,
            'name': 'Rishat'
        },
        {
            'age': 14,
            'city': 'Kaluga',
            'id': 10,
            'name': 'Stepan'
        },
    ]
示例#14
0
def test_outer_join():
    left_input = Input(input=get_advanced_persons())
    left_graph = Graph(input_node=left_input, output_node=left_input)

    right_input = Input(input=get_advanced_cities())
    outer_join = Join(left_graph, 'id', "outer")(right_input)

    graph = Graph(input_node=right_input, output_node=outer_join)
    res = graph.run()

    print("***** RESULT *****")
    for value in res:
        print(value)

    print()
    print("******************")

    assert res == [
        {
            'age': 38,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Andrey'
        },
        {
            'age': 25,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Sergey'
        },
        {
            'age': 5,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Misha'
        },
        {
            'age': 10,
            'city': 'Mocsow',
            'id': 1,
            'name': 'Roma'
        },
        {
            'age': 20,
            'city': 'SPb',
            'id': 2,
            'name': 'Leonid'
        },
        {
            'age': 17,
            'city': 'SPb',
            'id': 2,
            'name': 'Rishat'
        },
        {
            'age': None,
            'city': 'Kazan',
            'id': 3,
            'name': None
        },
        {
            'age': 64,
            'city': None,
            'id': 4,
            'name': 'Grigoroy'
        },
        {
            'age': 28,
            'city': None,
            'id': 5,
            'name': 'Maxim'
        },
        {
            'age': None,
            'city': 'Novgorod',
            'id': 7,
            'name': None
        },
        {
            'age': 14,
            'city': 'Kaluga',
            'id': 10,
            'name': 'Stepan'
        },
        {
            'age': None,
            'city': 'Tula',
            'id': 12,
            'name': None
        },
    ]
示例#15
0
def test_empty_mapper():
    input_node = Input(input=[])
    mapper_node = Map(simple_mapper)(input_node)
    graph = Graph(input_node=input_node, output_node=mapper_node)
    res = graph.run()
    assert res == []
示例#16
0
        row["tf_idf"] = row["tf"] * \
                        math.log(row['docs_count'] / row['count_idf'])

    records = sorted(records, key=itemgetter("tf_idf"), reverse=True)

    yield {
        "word":
        row["word"],
        "index": [(records[i]["doc_id"], records[i]["tf_idf"])
                  for i in range(0, min(3, len(records)))]
    }


if __name__ == "__main__":

    split_input_node = Input()
    split_mapper = Map(split_text)(split_input_node)
    split_words = Graph(input_node=split_input_node,
                        output_node=split_mapper,
                        name="split_words")

    fold_input = Input()
    folder = Fold(docs_count, {"docs_count": 0}, "doc_number")(fold_input)
    count_docs = Graph(input_node=fold_input, output_node=folder)

    count_idf_input = Input(split_words)
    sort_node = Sort(["doc_id", "word"])(count_idf_input)
    reducer = Reduce(unique, ["doc_id", "word"])(sort_node)
    join = Join(count_docs, [], "outer")(reducer)
    sort_by_word = Sort("word")(join)
    count_idf_reducer = Reduce(calc_idf, ["word"])(sort_by_word)
示例#17
0
def split_text(record):
    """
    Split rows with 'text' field into set of rows with 'token' field
    (one for every occurence of every word in text)
    """
    new_text = re.sub('[^A-Za-z]+', ' ', record['text'])
    tokens = new_text.split()
    for token in tokens:
        yield {
            'doc_id': record['doc_id'],
            'word': token.lower(),
        }


def word_counter(rows):
    """ Count words. """
    yield {'word': rows[0]['word'], 'number': len(rows)}


if __name__ == "__main__":

    input_node = Input()
    mapper = Map(split_text)(input_node)
    sort = Sort("word")(mapper)
    reduce = Reduce(word_counter, "word")(sort)

    graph = Graph(input_node=input_node, output_node=reduce)
    graph.run(input_file="data/text_corpus.txt",
              output_file=open("word_count.txt", "w"))
示例#18
0
def test_input_node_empty_run():
    input_node = Input(input=[])
    res = list(input_node.run())
    assert res == []
示例#19
0
def test_input_node_run():
    input_node = Input(input=get_persons())
    res = list(input_node.run())
    assert res == get_persons()
示例#20
0
def test_left_common_cols_join():

    left_input = Input(input=get_advanced_persons())
    left_graph = Graph(input_node=left_input, output_node=left_input)

    right_input = Input(input=get_cities())
    left_join = Join(left_graph, 'id', "left")(right_input)

    graph = Graph(input_node=right_input, output_node=left_join)
    res = graph.run()

    print("***** RESULT *****")
    for value in res:
        print(value)

    print()
    print("******************")

    assert res == [
        {
            'age': 38,
            'id': 1,
            'left_name': 'Andrey',
            'right_name': 'Mocsow'
        },
        {
            'age': 25,
            'id': 1,
            'left_name': 'Sergey',
            'right_name': 'Mocsow'
        },
        {
            'age': 5,
            'id': 1,
            'left_name': 'Misha',
            'right_name': 'Mocsow'
        },
        {
            'age': 10,
            'id': 1,
            'left_name': 'Roma',
            'right_name': 'Mocsow'
        },
        {
            'age': 20,
            'id': 2,
            'left_name': 'Leonid',
            'right_name': 'SPb'
        },
        {
            'age': 17,
            'id': 2,
            'left_name': 'Rishat',
            'right_name': 'SPb'
        },
        {
            'age': 64,
            'id': 4,
            'left_name': 'Grigoroy',
            'right_name': None
        },
        {
            'age': 28,
            'id': 5,
            'left_name': 'Maxim',
            'right_name': None
        },
        {
            'age': 14,
            'id': 10,
            'left_name': 'Stepan',
            'right_name': 'Kaluga'
        },
    ]