Пример #1
0
def etl():
    return sequential(
        csv_to_map([
            'date', 'file', 'date2', 'log', 'app', 'beat', 'front', 'is_log',
            'msg', 'offset', 'arch'
        ]), keep(["msg"]),
        append(['msg'], wrap(lambda x: dict(Counter(x.lower())))))
def test_some_working_remove_non_existing_columns():
    date_formats = [
        "%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S",
        "%m%d", "%Y-%m-%d", "%Y%m%d"
    ]

    inp = {"date": "20171010"}
    operation = append(fields=["date"],
                       etl_func=compose(date_parser(date_formats),
                                        explode_date,
                                        remove_columns("hora", "dia")))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "20171010"
    assert res is not None
    assert "second" in res
    assert "minute" in res
    assert "hour" in res
    assert "day" in res
    assert "month" in res
    assert "year" in res
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None
Пример #3
0
def test_field_not_found():
    operation = append(["offer"], explode("offer"))
    (res, err) = operation({"one": 1})
    assert res is None
    assert err is not None
    assert "offer" in err
    assert err["offer"] == "offer not found"
Пример #4
0
def test_empty():
    time_formats = ["%H%M%S"]

    operation = append(fields=["time"], etl_func=time_parser(time_formats))
    (res, err) = operation(None)
    assert res is None
    assert err == {'time': "Time can't be None: None"}
Пример #5
0
def test_multiple_working():
    imp = {"hello": "world", "goodbye": "sadness"}
    op = append(["hello", "goodbye"], wrap(head_field_and_tail))

    (res, err) = op(imp)

    assert err is None
    assert res == {"w": "orld", "s": "adness"}
Пример #6
0
def test_some_working():
    imp = {"hello": "world"}
    op = append(["hello"], wrap(head_and_tail))

    (res, err) = op(imp)

    assert res == {"head": "w", "tail": "orld"}
    assert err is None
Пример #7
0
def test_empty():
    imp = None
    op = append(["hello"], wrap(head_and_tail))

    (res, err) = op(imp)

    assert res is None
    assert err == {"hello": "hello not found"}
Пример #8
0
def test_empty():
    date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ",
                    "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"]

    operation = append(fields=["date"], etl_func=date_parser(date_formats))
    (res, err) = operation(None)

    assert res is None
    assert err == {'date': "Date can't be None: None"}
Пример #9
0
def test_date_format_incorrect():
    date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ",
                    "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"]
    inp = {"date": "2017,10,10"}
    operation = append(fields=["date"], etl_func=date_parser(date_formats))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "2017,10,10"
    assert res is None
    assert err is not None
    assert err["date"] == "Can not parse date 2017,10,10"
Пример #10
0
def test_empty_time_formats():
    time_formats = None

    inp = {"time": "202020"}
    operation = append(fields=["time"], etl_func=time_parser(time_formats))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["time"] == "202020"
    assert res is None
    assert err is not None
    assert err["time"] == "Time formats can't be None"
Пример #11
0
def test_time_format_incorrect():
    time_formats = ["%H%M%S"]

    inp = {"time": "20,20"}
    operation = append(fields=["time"], etl_func=time_parser(time_formats))
    (res, err) = operation(inp)

    assert inp is not None
    assert inp["time"] == "20,20"
    assert res is None
    assert err is not None
    assert err["time"] == "Can not parse time 20,20"
Пример #12
0
def test_empty_date_formats():
    date_formats = None

    inp = {"date": "20171010"}
    operation = append(fields=["date"], etl_func=date_parser(date_formats))

    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "20171010"
    assert res is None
    assert err is not None
    assert err["date"] == "Date formats can't be None"
Пример #13
0
def test_some_working():
    time_formats = ["%H%M%S"]
    inp = {"time": "202020"}
    operation = append(fields=["time"],
                       etl_func=compose(time_parser(time_formats),
                                        explode_time))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["time"] == "202020"
    assert res is not None
    assert res["second"] == 20
    assert res["minute"] == 20
    assert res["hour"] == 20
    assert err is None
Пример #14
0
def test_some_working():
    date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ",
                    "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"]
    inp = {"date": "20171010"}
    operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "20171010"
    assert res is not None
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None
Пример #15
0
def test_field_list_two_rows_two_different_fields():
    operation = append(["nested"], explode("nested"))
    (res, err) = operation(
        {"nested": [{
            "one": 1,
            "two": 2
        }, {
            "one": 1,
            "three": 3
        }]})

    expected = {
        "nested_one": 1,
        "nested_one_1": 1,
        "nested_two": 2,
        "nested_three_1": 3
    }
    check_dict_by_field(res, expected)
Пример #16
0
def test_sequential_use():
    date_formats = ["%Y-%m-%d"]
    inp = {"date": "2017-10-10"}
    operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "2017-10-10"
    assert res is not None
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None

    inp = {"date": "0"}
    (res, err) = (None, None)
    (res, err) = operation(inp)
    assert inp is not None
    assert res is None
    assert err == {'date': 'Can not parse date 0'}

    inp = {"date": "2017-10-10"}
    (res, err) = (None, None)
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "2017-10-10"
    assert res is not None
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None
Пример #17
0
def test_field_list_two_rows_one_field():
    operation = append(["nested"], explode("nested"))
    (res, err) = operation({"nested": [{"one": 1}, {"one": 1}]})

    expected = {"nested_one": 1, "nested_one_1": 1}
    check_dict_by_field(res, expected)
Пример #18
0
def test_field_object_two_fields():
    operation = append(["nested"], explode("nested"))
    (res, err) = operation({"nested": {"one": 1, "two": 2}})

    expected = {"nested_one": 1, "nested_two": 2}
    check_dict_by_field(res, expected)
Пример #19
0
def test_empty():
    operation = append(["a"], explode("a"))
    (res, err) = operation(None)
    assert res is None