def test_transform_before_fit_produces_error(self): input_df = pd.DataFrame({"one": ["one", "two", "one", "four", "six", "two", "one", "one"]}) mti = preprocessing.MapToInt("one") with pytest.raises(NotFittedError): mti.transform(input_df)
def test_mapping_with_nans(self): input_df = pd.DataFrame({"one": ["one", "two", "one", "four", "six", np.nan, "one", "one"]}) mti = preprocessing.MapToInt("one") mti.fit(input_df) expected_output = {"one": 0, "two": 1, "four": 2, "six": 3} assert mti.mapping == expected_output
def test_fit_bad_colname_produces_error(self): input_df = pd.DataFrame({"one": ["one", "two", "one", "four", "six", "two", "one", "one"]}) mti = preprocessing.MapToInt("blahblahblah") with pytest.raises(KeyError): mti.fit(input_df)
def test_transform_copy(self): input_df = pd.DataFrame({"one": ["one", "two", "one", "four", "six", "two", "one", "one"]}) expected_df = input_df.copy() mti = preprocessing.MapToInt("one", copy=True) mti.fit(input_df) transformed_data = mti.transform(input_df) pd.util.testing.assert_frame_equal(input_df, expected_df)
def test_transform_inplace(self): input_df = pd.DataFrame({"one": ["one", "two", "one", "four", "six", "two", "one", "one"]}) mti = preprocessing.MapToInt("one", copy=False) mti.fit(input_df) mti.transform(input_df) expected_df = pd.DataFrame({"one": [0, 1, 0, 2, 3, 1, 0, 0]}) pd.util.testing.assert_frame_equal(input_df, expected_df)
def test_transform_with_nans(self): input_df = pd.DataFrame({"one": ["one", "two", "one", "four", "six", "two", np.nan, "one"]}) mti = preprocessing.MapToInt("one") mti.fit(input_df) transformed_df = mti.transform(input_df) expected_df = pd.DataFrame({"one": [0, 1, 0, 2, 3, 1, np.nan, 0]}) pd.util.testing.assert_frame_equal(transformed_df, expected_df)
def test_map_to_int_to_onehot(self): fit_df = pd.DataFrame({"quarter": ["Q1", "Q1", "Q1", "Q2", "Q2"]}) transform_df = fit_df.copy() mti = preprocessing.MapToInt("quarter", copy=True) ohe = preprocessing.OneHotEncoderFromDataFrame(categorical_feature_names=["quarter"], copy=True) pipe = Pipeline(steps=[("one", mti), ("two", ohe)]) pipe.fit(fit_df) output_df = pipe.transform(transform_df) expected_df = pd.DataFrame({"onehot_col1": [1.0, 1, 1, 0, 0], "onehot_col2": [0.0, 0, 0, 1, 1]}) pd.util.testing.assert_frame_equal(output_df, expected_df)