def top_features(df, features, top_n, y_var): """ Returns top_n features based on F-values from ANOVA. """ # Get X and y from df X , y = X_y_splitter(df, features, y_var) f_values = SelectKBest(f_classif, k=top_n).fit(X, y).scores_ f_values = pd.DataFrame( { 'feature': X.columns, 'f_values': f_values } ) f_values.sort_values('f_values', ascending=False, inplace=True) f_values.head() top_n_features = f_values['feature'][0:top_n].tolist() return(top_n_features)