示例#1
0
def top_features(df, features, top_n, y_var):
    """
    Returns top_n features based on F-values from ANOVA.
    
    """
    # Get X and y from df
    X , y = X_y_splitter(df, features, y_var)

    f_values = SelectKBest(f_classif, k=top_n).fit(X, y).scores_
    f_values = pd.DataFrame(
        {
            'feature': X.columns,
            'f_values': f_values
        }
    )
    f_values.sort_values('f_values', ascending=False, inplace=True)
    f_values.head()
    top_n_features = f_values['feature'][0:top_n].tolist()
    return(top_n_features)