def test_step_table_join_mode_outer(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource( data=[["id", "note"], [1, "beer"], [4, "rum"]]), field_name="id", mode="outer", ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "note": "beer" }, { "id": 2, "name": "france", "population": 66, "note": None }, { "id": 3, "name": "spain", "population": 47, "note": None }, { "id": 4, "name": None, "population": None, "note": "rum" }, ]
def test_step_table_join_mode_left_from_descriptor_issue_996(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( { "fieldName": "id", "mode": "left" }, resource=Resource( data=[["id", "note"], [1, "beer"], [2, "vine"]]), ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "note": "beer" }, { "id": 2, "name": "france", "population": 66, "note": "vine" }, { "id": 3, "name": "spain", "population": 47, "note": None }, ]
def test_step_table_join_hash_is_true(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource( data=[["id", "note"], [1, "beer"], [2, "vine"]]), field_name="id", use_hash=True, ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "note": "beer" }, { "id": 2, "name": "france", "population": 66, "note": "vine" }, ]
def test_step_table_join_with_name_is_not_first_field(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.table_join( resource=Resource(data=[["name", "note"], ["germany", "beer"], ["france", "vine"]]), field_name="name", ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 2, "name": "france", "population": 66, "note": "vine" }, { "id": 1, "name": "germany", "population": 83, "note": "beer" }, ]
def test_step_table_join_mode_anti(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource( data=[["id", "note"], [1, "beer"], [4, "rum"]]), mode="anti", ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 2, "name": "france", "population": 66 }, { "id": 3, "name": "spain", "population": 47 }, ]