def test_transform_dataset_shape_strips_whitespace(self): dataset_shape_transformer = dataset_shape.DatasetShapeTransformer( MEMBER_ORGANIZATION_ID, TEST_SCHEMA, TEST_COLUMN_MAPPING, row_format=True, multiple_val_delimiter=",", ) dataset = pd.DataFrame(data={"field1": [" 1", "2 ", " 3 "]}) actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape( dataset ) expected_shaped_dataset = pd.DataFrame( data={ "field1": ["1", "2", "3"], "MemberOrganization": ["sample_id", "sample_id", "sample_id"], "ForceOverWrite": ["1", "1", "1"], } ) pd.util.testing.assert_frame_equal( expected_shaped_dataset, actual_shaped_dataset )
def test_transform_dataset_shape_multiple_values_blank_values(self): dataset_shape_transformer = dataset_shape.DatasetShapeTransformer( MEMBER_ORGANIZATION_ID, TEST_SCHEMA, TEST_COLUMN_MAPPING, row_format=True, multiple_val_delimiter=",", ) dataset = pd.DataFrame( data={ "internal_column_name1": ["1", "2"], "internal_column_name2": ["3", "4"], "internal_column_name3": ["", "4"], } ) actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape( dataset ) expected_shaped_dataset = pd.DataFrame( data={ "field1": ["1", "2"], "field2": ["3", "4"], "field3": [[""], ["4"]], "MemberOrganization": ["sample_id", "sample_id"], "ForceOverWrite": ["1", "1"], } ) pd.util.testing.assert_frame_equal( expected_shaped_dataset, actual_shaped_dataset )
def test_transform_dataset_shape_row_format_some_columns_present(self): dataset_shape_transformer = dataset_shape.DatasetShapeTransformer( MEMBER_ORGANIZATION_ID, TEST_SCHEMA, TEST_COLUMN_MAPPING, row_format=True ) dataset = pd.DataFrame( data={"internal_column_name1": [1, 2], "internal_column_name2": [3, 4]} ) actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape( dataset ) expected_column_names = [ "field1", "field2", "MemberOrganization", "ForceOverWrite", ] self.assertTrue( all( [ a == b for a, b in zip( actual_shaped_dataset.columns.values, expected_column_names ) ] ) )
def test_transform_dataset_shape_col_format(self): dataset_shape_transformer = dataset_shape.DatasetShapeTransformer( MEMBER_ORGANIZATION_ID, TEST_SCHEMA_COL, {}, row_format=False ) dataset = pd.DataFrame( data={ "field1": ["field1_1", "field1_2"], "Intakefield2": ["field2_1", "field2_2"], "Intakefield3": ["field3_1", "field3_2"], "Exitfield3": ["field3_3", "field3_4"], "actual_field4": ["field4_1", "field4_1"], } ) actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape( dataset ) expected_shaped_dataset = pd.DataFrame( data={ "MilestoneFlag": [ "Intake", "Intake", "Exit", "Exit", "NinetyDays", "NinetyDays", ], "field1": [ "field1_1", "field1_2", "field1_1", "field1_2", "field1_1", "field1_2", ], "field2": ["field2_1", "field2_2", "", "", "", ""], "field3": ["field3_1", "field3_2", "field3_3", "field3_4", "", ""], "field4": ["", "", "", "", "field4_1", "field4_1"], "MemberOrganization": [ "sample_id", "sample_id", "sample_id", "sample_id", "sample_id", "sample_id", ], "ForceOverWrite": ["1", "1", "1", "1", "1", "1"], } ) pd.util.testing.assert_frame_equal( expected_shaped_dataset, actual_shaped_dataset )
def test_transform_dataset_shape_datset_is_empty(self): dataset_shape_transformer = dataset_shape.DatasetShapeTransformer( MEMBER_ORGANIZATION_ID, TEST_SCHEMA, TEST_COLUMN_MAPPING, row_format=True ) empty_dataset = pd.DataFrame(columns=[]) actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape( empty_dataset ) expected_shaped_dataset = pd.DataFrame(columns=[]) pd.util.testing.assert_frame_equal( expected_shaped_dataset, actual_shaped_dataset )