def test_source_without_identifer_raises_if_data_changes(cases): source = Source() source.stack( cases[0], markdown_to_df(""" | date | season | | - | - | | 2001-09-08 | Fall 2001 | | 2002-01-09 | Spring 2002 | """), ) with pytest.raises(dtspec.core.CannotStackStaticSourceError) as excinfo: source.stack( cases[0], markdown_to_df(""" | date | season | | - | - | | 2002-06-01 | Summer 2002 | | 2002-09-07 | Fall 2002 | """), ) # Error message contains a readable case name assert "TestCase1" in str(excinfo.value).split("\n")[0]
def sources(identifiers): return { "students": Source( id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" }, "organization_id": { "identifier": identifiers["organization"], "attribute": "id", }, }), "organizations": Source(id_mapping={ "id": { "identifier": identifiers["organization"], "attribute": "id" } }), }
def test_identifiers_generate_defaults(identifiers, cases): """ If a column is marked as an identifier column, but is not given a specific named id, then "anonymous" named ids will be generated when the data is stacked. """ source = Source(id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }) source.stack( cases[0], markdown_to_df(""" | first_name | | - | | Bob | | Nancy | """), ) anonymous_ids = [ v["id"] for v in identifiers["student"].cached_ids[id( cases[0])].named_ids.values() ] actual = source.data expected = markdown_to_df(""" | first_name | id | | - | - | | Bob | {s1} | | Nancy | {s2} | """.format(s1=anonymous_ids[0], s2=anonymous_ids[1])) assert_frame_equal(actual, expected)
def test_setting_defaults_and_values(identifiers, cases): source = Source( defaults={ "last_name": "Jones", "gender": "X" }, id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" } }, ) source.stack( cases[0], markdown_to_df(""" | id | first_name | | - | - | | s1 | Bob | | s2 | Nancy | """), values={"last_name": "Summers"}, ) actual = source.data expected = markdown_to_df(""" | id | first_name | last_name | gender | | - | - | - | - | | {s1} | Bob | Summers | X | | {s2} | Nancy | Summers | X | """.format( s1=identifiers["student"].generate(case=cases[0], named_id="s1")["id"], s2=identifiers["student"].generate(case=cases[0], named_id="s2")["id"], )) assert_frame_equal(actual, expected)
def source_w_multiple_ids(identifiers): return Source( id_mapping={ "id": { "identifier": identifiers["student"], "attribute": "id" }, "uuid": { "identifier": identifiers["student"], "attribute": "uuid" }, "organization_id": { "identifier": identifiers["organization"], "attribute": "id", }, })