def test_step_row_subset_unique_with_name(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="unique", field_name="id"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == []
def test_transform_rename_move_field_issue_953(): target = transform( data=[ {"id": 1, "name": "germany", "population": 83}, {"id": 2, "name": "france", "population": 66}, {"id": 3, "name": "spain", "population": 47}, ], steps=[ steps.table_normalize(), steps.field_update(name="name", new_name="country"), steps.field_move(name="country", position=3), ], ) assert target.schema == { "fields": [ {"name": "id", "type": "integer"}, {"name": "population", "type": "integer"}, {"name": "country", "type": "string"}, ] } assert target.read_rows() == [ {"id": 1, "population": 83, "country": "germany"}, {"id": 2, "population": 66, "country": "france"}, {"id": 3, "population": 47, "country": "spain"}, ]
def test_step_row_subset_distinct_with_duplicates(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="distinct", field_name="id"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, ]
def test_step_row_subset_duplicates_with_name(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="duplicates", field_name="id"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, { "id": 1, "name": "france", "population": 66 }, { "id": 1, "name": "spain", "population": 47 }, ]
def test_step_cell_fill_direction_right(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", type="string"), steps.field_update(name="population", type="string"), steps.cell_replace(pattern="france", replace=None), steps.cell_fill(direction="right"), ], ) print(target.read_rows()) assert target.schema == { "fields": [ { "name": "id", "type": "string" }, { "name": "name", "type": "string" }, { "name": "population", "type": "string" }, ] } assert target.read_rows() == [ { "id": "1", "name": "germany", "population": "83" }, { "id": "2", "name": "2", "population": "66" }, { "id": "3", "name": "spain", "population": "47" }, ]
def test_step_field_unpack_with_preserve(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", type="array", value=[1, 1]), steps.field_unpack(name="id", to_names=["id2", "id3"], preserve=True), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "array" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "id2" }, { "name": "id3" }, ] } assert target.read_rows() == [ { "id": [1, 1], "name": "germany", "population": 83, "id2": 1, "id3": 1 }, { "id": [1, 1], "name": "france", "population": 66, "id2": 1, "id3": 1 }, { "id": [1, 1], "name": "spain", "population": 47, "id2": 1, "id3": 1 }, ]
def test_step_cell_interpolate(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", type="string"), steps.field_update(name="population", type="string"), steps.cell_interpolate(template="Prefix: %s"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "string" }, { "name": "name", "type": "string" }, { "name": "population", "type": "string" }, ] } assert target.read_rows() == [ { "id": "Prefix: 1", "name": "Prefix: germany", "population": "Prefix: 83" }, { "id": "Prefix: 2", "name": "Prefix: france", "population": "Prefix: 66" }, { "id": "Prefix: 3", "name": "Prefix: spain", "population": "Prefix: 47" }, ]
def test_step_cell_convert(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", type="string"), steps.field_update(name="population", type="string"), steps.cell_convert(value="n/a"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "string" }, { "name": "name", "type": "string" }, { "name": "population", "type": "string" }, ] } assert target.read_rows() == [ { "id": "n/a", "name": "n/a", "population": "n/a" }, { "id": "n/a", "name": "n/a", "population": "n/a" }, { "id": "n/a", "name": "n/a", "population": "n/a" }, ]
def test_step_row_subset_unique_with_name(): source = Resource(path="data/transform.csv") source.infer() target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="unique", field_name="id"), ], ) assert target.schema == source.schema assert target.read_rows() == []
def test_step_row_subset_distinct_with_duplicates(): source = Resource(path="data/transform.csv") source.infer(only_sample=True) target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="distinct", field_name="id"), ], ) assert target.schema == source.schema assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83 }, ]
def test_step_field_update_new_name(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", new_name="new-name"), ], ) assert target.schema == { "fields": [ {"name": "new-name", "type": "integer"}, {"name": "name", "type": "string"}, {"name": "population", "type": "integer"}, ] } assert target.read_rows() == [ {"new-name": 1, "name": "germany", "population": 83}, {"new-name": 2, "name": "france", "population": 66}, {"new-name": 3, "name": "spain", "population": 47}, ]
def test_step_field_unpack_source_is_object(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", type="object", value={"note": "eu"}), steps.field_unpack(name="id", to_names=["note"]), ], ) assert target.schema == { "fields": [ { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note" }, ] } assert target.read_rows() == [ { "name": "germany", "population": 83, "note": "eu" }, { "name": "france", "population": 66, "note": "eu" }, { "name": "spain", "population": 47, "note": "eu" }, ]