def build(X_df=None, y_df=None): """Build features and target Args: X_df (DataFrame): raw variables y_df (DataFrame): raw target Returns: dict with keys X_df, features, mapper_X, X, y_df, encoder_y, y """ if X_df is None: X_df, _ = load_data() if y_df is None: _, y_df = load_data() features = collect_contrib_features() mapper_X = FeatureEngineeringPipeline(features) X = mapper_X.fit_transform(X_df) encoder_y = get_target_encoder() y = encoder_y.fit_transform(y_df) return { 'X_df': X_df, 'features': features, 'mapper_X': mapper_X, 'X': X, 'y_df': y_df, 'encoder_y': encoder_y, 'y': y, }
def test_transform(input, transformer): feature = Feature(input, transformer) mapper = FeatureEngineeringPipeline(feature) df = pd.util.testing.makeCustomDataframe(5, 2) df.columns = ['foo', 'bar'] mapper.fit(df) X = mapper.transform(df) assert np.shape(X) == (5, 2)
def test_transform(self): feature = Feature(self.input, self.transformer) mapper = FeatureEngineeringPipeline(feature) df = pd.util.testing.makeCustomDataframe(5, 2) df.columns = ['foo', 'bar'] mapper.fit(df) X = mapper.transform(df) self.assertEqual(np.shape(X), (5, 1))
def test_df_colnames(input, transformer, output): feature = Feature(input, transformer, output=output) mapper = FeatureEngineeringPipeline(feature) entities_df = pd.util.testing.makeCustomDataframe(5, 2) entities_df.columns = ['foo', 'bar'] feature_matrix = mapper.fit_transform(entities_df) feature_frame = pd.DataFrame( feature_matrix, columns=mapper.transformed_names_, index=entities_df.index, ) assert fy.all(fy.isa(str), feature_frame.columns)
def test_can_deepcopy(): # see GH 90 feature = Feature('size', IdentityTransformer()) pipeline = FeatureEngineeringPipeline(feature) assert hasattr(pipeline, '_ballet_features') pipeline2 = deepcopy(pipeline) assert hasattr(pipeline2, '_ballet_features')
def pipeline(self) -> FeatureEngineeringPipeline: """Get the feature engineering pipeline from the existing features""" return FeatureEngineeringPipeline(self.features)
def test_init(input, transformer): feature = Feature(input, transformer) mapper = FeatureEngineeringPipeline(feature) assert isinstance(mapper, FeatureEngineeringPipeline)
def test_fit(self): feature = Feature(self.input, self.transformer) mapper = FeatureEngineeringPipeline(feature) df = pd.util.testing.makeCustomDataframe(5, 2) df.columns = ['foo', 'bar'] mapper.fit(df)
def test_init_scalar(self): feature = Feature(self.input, self.transformer) mapper = FeatureEngineeringPipeline(feature) self.assertIsInstance(mapper, FeatureEngineeringPipeline)
def test_init_seqcont(self): feature = Feature(self.input, self.transformer) features = [feature] mapper = FeatureEngineeringPipeline(features) self.assertIsInstance(mapper, FeatureEngineeringPipeline)