def fit(self, pipeline_config, X, Y, dataset_info): categorical_features = dataset_info.categorical_features ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore") encoder = ColumnTransformer(transformers=[ ("ohe", ohe, [i for i, f in enumerate(categorical_features) if f]) ], remainder="passthrough") encoder.categories_ = np.array([]) encoder.categorical_features = categorical_features if any(categorical_features) and not dataset_info.is_sparse: # encode X X = encoder.fit_transform(X) encoder.categories_ = encoder.transformers_[0][1].categories_ # Y to matrix Y, y_encoder = self.complete_y_tranformation(Y) dataset_info.categorical_features = None return { 'X': X, 'one_hot_encoder': encoder, 'Y': Y, 'y_one_hot_encoder': y_encoder, 'dataset_info': dataset_info }
def fit(self, pipeline_config, X_train, X_valid, Y_train, Y_valid, categorical_features): ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore") encoder = ColumnTransformer(transformers=[ ("ohe", ohe, [i for i, f in enumerate(categorical_features) if f]) ], remainder="passthrough") encoder.categories_ = np.array([]) encoder.categorical_features = categorical_features if any(categorical_features) and not scipy.sparse.issparse(X_train): # encode X X_train = encoder.fit_transform(X_train) if (X_valid is not None): X_valid = encoder.transform(X_valid) encoder.categories_ = encoder.transformers_[0][1].categories_ # Y to matrix y_encoder = None Y_train = Y_train.astype(np.float32) if len(Y_train.shape) == 1: Y_train = Y_train.reshape(-1, 1) if Y_valid is not None and len(Y_valid.shape) == 1: Y_valid = Y_valid.reshape(-1, 1) # encode Y if self.encode_Y and not scipy.sparse.issparse(Y_train): y_encoder = OneHotEncoder(sparse=False, categories="auto", handle_unknown='ignore') y_encoder.categories_ = np.array([]) Y_train = y_encoder.fit_transform(Y_train) if Y_valid is not None: Y_valid = y_encoder.transform(Y_valid) return { 'X_train': X_train, 'X_valid': X_valid, 'one_hot_encoder': encoder, 'Y_train': Y_train, 'Y_valid': Y_valid, 'y_one_hot_encoder': y_encoder, 'categorical_features': None }