def test_column_transformer_get_set_params_with_remainder(): ct = ColumnTransformer([('trans1', StandardScaler(), [0])], remainder=StandardScaler()) exp = {'n_jobs': 1, 'remainder': ct.remainder, 'remainder__copy': True, 'remainder__with_mean': True, 'remainder__with_std': True, 'trans1': ct.transformers[0][1], 'trans1__copy': True, 'trans1__with_mean': True, 'trans1__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None} assert ct.get_params() == exp ct.set_params(remainder__with_std=False) assert not ct.get_params()['remainder__with_std'] ct.set_params(trans1='passthrough') exp = {'n_jobs': 1, 'remainder': ct.remainder, 'remainder__copy': True, 'remainder__with_mean': True, 'remainder__with_std': False, 'trans1': 'passthrough', 'transformers': ct.transformers, 'transformer_weights': None} assert ct.get_params() == exp
def test_column_transformer_get_set_params_with_remainder(): ct = ColumnTransformer([('trans1', StandardScaler(), [0])], remainder=StandardScaler()) exp = {'n_jobs': None, 'remainder': ct.remainder, 'remainder__copy': True, 'remainder__with_mean': True, 'remainder__with_std': True, 'sparse_threshold': 0.3, 'trans1': ct.transformers[0][1], 'trans1__copy': True, 'trans1__with_mean': True, 'trans1__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None} assert ct.get_params() == exp ct.set_params(remainder__with_std=False) assert not ct.get_params()['remainder__with_std'] ct.set_params(trans1='passthrough') exp = {'n_jobs': None, 'remainder': ct.remainder, 'remainder__copy': True, 'remainder__with_mean': True, 'remainder__with_std': False, 'sparse_threshold': 0.3, 'trans1': 'passthrough', 'transformers': ct.transformers, 'transformer_weights': None} assert ct.get_params() == exp
def test_column_transformer_get_set_params(): ct = ColumnTransformer([('trans1', StandardScaler(), [0]), ('trans2', StandardScaler(), [1])]) exp = {'n_jobs': 1, 'remainder': 'drop', 'trans1': ct.transformers[0][1], 'trans1__copy': True, 'trans1__with_mean': True, 'trans1__with_std': True, 'trans2': ct.transformers[1][1], 'trans2__copy': True, 'trans2__with_mean': True, 'trans2__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None} assert_dict_equal(ct.get_params(), exp) ct.set_params(trans1__with_mean=False) assert_false(ct.get_params()['trans1__with_mean']) ct.set_params(trans1='passthrough') exp = {'n_jobs': 1, 'remainder': 'drop', 'trans1': 'passthrough', 'trans2': ct.transformers[1][1], 'trans2__copy': True, 'trans2__with_mean': True, 'trans2__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None} assert_dict_equal(ct.get_params(), exp)
def test_column_transformer_get_set_params(): ct = ColumnTransformer([('trans1', StandardScaler(), [0]), ('trans2', StandardScaler(), [1])]) exp = {'n_jobs': 1, 'remainder': 'passthrough', 'trans1': ct.transformers[0][1], 'trans1__copy': True, 'trans1__with_mean': True, 'trans1__with_std': True, 'trans2': ct.transformers[1][1], 'trans2__copy': True, 'trans2__with_mean': True, 'trans2__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None} assert_dict_equal(ct.get_params(), exp) ct.set_params(trans1__with_mean=False) assert_false(ct.get_params()['trans1__with_mean']) ct.set_params(trans1='passthrough') exp = {'n_jobs': 1, 'remainder': 'passthrough', 'trans1': 'passthrough', 'trans2': ct.transformers[1][1], 'trans2__copy': True, 'trans2__with_mean': True, 'trans2__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None} assert_dict_equal(ct.get_params(), exp)
def test_column_transformer_get_set_params(): ct = ColumnTransformer([('trans1', StandardScaler(), [0]), ('trans2', StandardScaler(), [1])]) exp = { 'n_jobs': None, 'remainder': 'drop', 'sparse_threshold': 0.3, 'trans1': ct.transformers[0][1], 'trans1__copy': True, 'trans1__with_mean': True, 'trans1__with_std': True, 'trans2': ct.transformers[1][1], 'trans2__copy': True, 'trans2__with_mean': True, 'trans2__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None, 'verbose': False } assert ct.get_params() == exp ct.set_params(trans1__with_mean=False) assert not ct.get_params()['trans1__with_mean'] ct.set_params(trans1='passthrough') exp = { 'n_jobs': None, 'remainder': 'drop', 'sparse_threshold': 0.3, 'trans1': 'passthrough', 'trans2': ct.transformers[1][1], 'trans2__copy': True, 'trans2__with_mean': True, 'trans2__with_std': True, 'transformers': ct.transformers, 'transformer_weights': None, 'verbose': False } assert ct.get_params() == exp
class DFColumnTransformer(BaseEstimator, TransformerMixin): """ColumnTransformer that returns a DF. Parameters ---------- transformers : list of tuples List of tuples in the form: (name, transformer, columns). remainder : str Strategy for the features that were not selected. Possible values are ['drop', 'passthrough']. Returns: DataFrame DF of transformed data. """ def __init__(self, transformers, remainder='drop'): self.transformers = transformers self.ct = ColumnTransformer(self.transformers) self.remainder = remainder def fit(self, X, y=None): if self.remainder == 'drop': self.ct.fit(X, y) elif self.remainder == 'passthrough': transformed_features = np.concatenate( [tpl[-1] for tpl in self.transformers]) passed_features = list( set(X.columns).difference(transformed_features)) self.transformers.append( ('passthrough', PassThrough(), passed_features)) self.ct.set_params(transformers=self.transformers).fit(X, y) return self def transform(self, X, y=None): col_names = np.concatenate([tple[-1] for tple in self.ct.transformers]) return pd.DataFrame(data=self.ct.transform(X), index=X.index, columns=col_names)
# categorical_features = [3, 4, 7, 8] categorical_imputer = impute.SimpleImputer(strategy="most_frequent") categorical_encoder = preprocessing.OneHotEncoder(categories=categories, dtype = np.float64, handle_unknown = "error", sparse=False) categorical_transformer = Pipeline(steps=[ ('imp', categorical_imputer), ('enc', categorical_encoder) ]) start = time.time() preprocessor = ColumnTransformer( transformers=[ ('num', numeric_transformer, numeric_features), ('cat', categorical_transformer, categorical_features)]) preprocessor.set_params(num__imp__strategy = 'median') for i in range(10): sklearn.base.clone(preprocessor).fit(x_pipe, y_pipe) end = time.time() duration = end - start print("scale", duration) # .4 Of a second to scale preprocessor.fit_transform(x_pipe, y_pipe) x_pipe = preprocessor.transform(x_pipe) #Selector LVT_threshold Reducer PCA_iterated_power #LVT, 0, PCA, auto,