예제 #1
0
def karunru_analyze_columns(
        input_df: XDataFrame) -> Tuple[List[str], List[str]]:
    """Classify columns to numerical or categorical.

    Args:
        input_df (XDataFrame) : Input data frame.
    Returns:
        Tuple[List[str], List[str]] : List of num cols and cat cols.

    Example:
        ::
            >>> import pandas as pd
            >>> from xfeat.utils import analyze_columns
            >>> df = pd.DataFrame({"col1": [1, 2], "col2": [2, 3], "col3": ["a", "b"]})
            >>> analyze_columns(df)
            (['col1', 'col2'], ['col3'])
    """
    numerical_cols = []
    categorical_cols = input_df.select_dtypes("category").columns.tolist()
    for col in [
            col for col in input_df.columns if col not in categorical_cols
    ]:
        if pd.api.types.is_numeric_dtype(input_df[col]):
            numerical_cols.append(col)
        else:
            categorical_cols.append(col)
    return numerical_cols, categorical_cols
예제 #2
0
    def predict(self, model: TabNetModel, features: XDataFrame) -> np.ndarray:

        for col in features.select_dtypes(include="category").columns:
            features[col] = features[col].cat.add_categories("Unknown")
            features[col] = features[col].fillna("Unknown")
            features[col] = features[col].cat.codes

        numerical_cols = [
            col
            for col in features.columns
            if col not in self.config["categorical_cols"]
        ]
        for col in numerical_cols:
            features[col] = features[col].fillna(features[col].mean())

        if self.mode != "multiclass":
            return model.predict(features.values).reshape(
                -1,
            )
        else:
            preds = model.predict_proba(features, ntree_limit=model.best_ntree_limit)
            return preds @ np.arange(4) / 3