def _transform(self, X: DataFrameContainer, y: Optional[NdArrayContainer]): y_data = y.data X_data, y_data = self._transform_proc(X.data, y_data) X = X.copy() y = y.copy() X_data = pd.DataFrame(X_data, columns=X.columns) X.data = X_data y.data = y_data return X, y
def process_X(self, X: DataFrameContainer, X_origin): if X is None: return None assert X.shape[1] == len(self.columns) if isinstance(X_origin, np.ndarray): X.columns = self.columns elif isinstance(X_origin, pd.DataFrame): assert set(X.columns) == set(self.columns) if not np.all(X.columns == self.columns): self.logger.warning( f"{X.dataset_source}'s columns do not match the TrainSet's columns by position!" ) X.data = X.data[self.columns] elif isinstance(X_origin, DataFrameContainer): pass else: raise NotImplementedError X.set_feature_groups(self.feature_groups) return X