Exemplo n.º 1
0
def test_step_table_join_hash_is_true():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [2, "vine"]]),
                field_name="id",
                use_hash=True,
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 2,
            "name": "france",
            "population": 66,
            "note": "vine"
        },
    ]
Exemplo n.º 2
0
def test_step_table_join_mode_right():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [4, "rum"]]),
                field_name="id",
                mode="right",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "note",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83,
            "note": "beer"
        },
        {
            "id": 4,
            "name": None,
            "population": None,
            "note": "rum"
        },
    ]
Exemplo n.º 3
0
def test_step_row_filter_petl_selectisinstance():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(function=lambda row: isinstance(row["id"], int)),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
Exemplo n.º 4
0
def test_step_row_filter_petl_selectrangeopen():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="1 <= id <= 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]
Exemplo n.º 5
0
def test_step_table_intersect_with_use_hash():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_intersect(
                resource=Resource(data=[
                    ["id", "name", "population"],
                    [1, "germany", 83],
                    [2, "france", 50],
                    [3, "spain", 47],
                ]),
                use_hash=True,
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Exemplo n.º 6
0
def test_step_row_filter_petl_selectgt():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id > 2"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Exemplo n.º 7
0
def test_step_row_filter_petl_selectrangeclosed():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>1 < id < 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Exemplo n.º 8
0
def test_step_row_filter_petl_selecteq():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id == 1"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
Exemplo n.º 9
0
def test_step_table_recast():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_melt(field_name="id"),
            steps.table_recast(field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }

    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Exemplo n.º 10
0
def test_step_table_melt_with_to_field_names():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_melt(field_name="name",
                             variables=["population"],
                             to_field_names=["key", "val"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "key"
            },
            {
                "name": "val"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "germany",
            "key": "population",
            "val": 83
        },
        {
            "name": "france",
            "key": "population",
            "val": 66
        },
        {
            "name": "spain",
            "key": "population",
            "val": 47
        },
    ]
Exemplo n.º 11
0
def test_step_row_filter_petl_selectisinstance():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(function=lambda row: isinstance(row["id"], int)),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Exemplo n.º 12
0
def test_step_table_join_mode_anti():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_join(
                resource=Resource(
                    data=[["id", "note"], [1, "beer"], [4, "rum"]]),
                mode="anti",
            ),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
def test_step_table_pivot():
    source = Resource("data/transform-pivot.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_pivot(f1="region", f2="gender", f3="units", aggfun=sum),
        ],
    )
    print(target.schema)
    assert target.schema == {
        "fields": [
            {"name": "region", "type": "string"},
            {"name": "boy", "type": "integer"},
            {"name": "girl", "type": "integer"},
        ]
    }
    assert target.read_rows() == [
        {"region": "east", "boy": 33, "girl": 29},
        {"region": "west", "boy": 35, "girl": 23},
    ]
Exemplo n.º 14
0
def test_step_field_add_with_formula():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.field_add(name="calc", formula="id * 100 + population"),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
            {"name": "calc"},
        ]
    }
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83, "calc": 183},
        {"id": 2, "name": "france", "population": 66, "calc": 266},
        {"id": 3, "name": "spain", "population": 47, "calc": 347},
    ]
Exemplo n.º 15
0
def test_step_row_filter_petl_selectrangeopenleft():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>1 <= id < 3"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Exemplo n.º 16
0
def test_step_table_diff():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_diff(resource=Resource(data=[
                ["id", "name", "population"],
                [1, "germany", 83],
                [2, "france", 50],
                [3, "spain", 47],
            ])),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Exemplo n.º 17
0
def test_step_row_filter():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat="<formula>id > 1"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Exemplo n.º 18
0
def test_step_row_filter_with_callable_predicat():
    source = Resource(path="data/transform.csv")
    source.infer(only_sample=True)
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(predicat=lambda row: row["id"] > 1),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
Exemplo n.º 19
0
def test_step_table_diff():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_diff(resource=Resource(data=[
                ["id", "name", "population"],
                [1, "germany", 83],
                [2, "france", 50],
                [3, "spain", 47],
            ])),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
    ]
Exemplo n.º 20
0
def test_step_row_filter_petl_selectisnot():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(formula="id is not 1"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]