# variables to log transform NUMERICALS_LOG_VARS = ['LotFrontage', '1stFlrSF', 'GrLivArea'] # numerical variables with NA in train set NUMERICAL_VARS_WITH_NA = ['LotFrontage'] # categorical variables to encode CATEGORICAL_VARS = [ 'MSZoning', 'Neighborhood', 'RoofStyle', 'MasVnrType', 'BsmtQual', 'BsmtExposure', 'HeatingQC', 'CentralAir', 'KitchenQual', 'FireplaceQu', 'GarageType', 'GarageFinish', 'PavedDrive' ] price_pipe = Pipeline([ ('categorical_imputer', pp.CategoricalImputer(variables=CATEGORICAL_VARS_WITH_NA)), ('numerical_inputer', pp.NumericalImputer(variables=NUMERICAL_VARS_WITH_NA)), ('temporal_variable', pp.TemporalVariableEstimator(variables=TEMPORAL_VARS, reference_variable=TEMPORAL_VARS)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.01, variables=CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=CATEGORICAL_VARS)), ('log_transformer', pp.LogTransformer(variables=NUMERICALS_LOG_VARS)), ('drop_features', pp.DropUnecessaryFeatures(variables_to_drop=DROP_FEATURES)), ('scaler', MinMaxScaler()), ('Linear_model', Lasso(alpha=0.005, random_state=0)) ])
from sklearn.linear_model import Lasso from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler import preprocessors as pp import config price_pipe = Pipeline([ ('categorical_imputer', pp.CategoricalImputer(variables=config.CATEGORICAL_VARS_WITH_NA)), ('numerical_inputer', pp.NumericalImputer(variables=config.NUMERICAL_VARS_WITH_NA)), ('temporal_variable', pp.TemporalVariableEstimator(variables=config.TEMPORAL_VARS, reference_variable=config.DROP_FEATURES)), ('rare_label_encoder', pp.RareLabelCategoricalEncoder(tol=0.01, variables=config.CATEGORICAL_VARS)), ('categorical_encoder', pp.CategoricalEncoder(variables=config.CATEGORICAL_VARS)), ('log_transformer', pp.LogTransformer(variables=config.NUMERICALS_LOG_VARS)), ('drop_features', pp.DropUnecessaryFeatures(variables_to_drop=config.DROP_FEATURES)), ('scaler', MinMaxScaler()), ('Linear_model', Lasso(alpha=0.005, random_state=0)) ])