def karunru_analyze_columns( input_df: XDataFrame) -> Tuple[List[str], List[str]]: """Classify columns to numerical or categorical. Args: input_df (XDataFrame) : Input data frame. Returns: Tuple[List[str], List[str]] : List of num cols and cat cols. Example: :: >>> import pandas as pd >>> from xfeat.utils import analyze_columns >>> df = pd.DataFrame({"col1": [1, 2], "col2": [2, 3], "col3": ["a", "b"]}) >>> analyze_columns(df) (['col1', 'col2'], ['col3']) """ numerical_cols = [] categorical_cols = input_df.select_dtypes("category").columns.tolist() for col in [ col for col in input_df.columns if col not in categorical_cols ]: if pd.api.types.is_numeric_dtype(input_df[col]): numerical_cols.append(col) else: categorical_cols.append(col) return numerical_cols, categorical_cols
def predict(self, model: TabNetModel, features: XDataFrame) -> np.ndarray: for col in features.select_dtypes(include="category").columns: features[col] = features[col].cat.add_categories("Unknown") features[col] = features[col].fillna("Unknown") features[col] = features[col].cat.codes numerical_cols = [ col for col in features.columns if col not in self.config["categorical_cols"] ] for col in numerical_cols: features[col] = features[col].fillna(features[col].mean()) if self.mode != "multiclass": return model.predict(features.values).reshape( -1, ) else: preds = model.predict_proba(features, ntree_limit=model.best_ntree_limit) return preds @ np.arange(4) / 3