def stepwise_selection(data, target, SL_in=0.05, SL_out=0.05): initial_features = data.columns.tolist() best_features = [] while (len(initial_features) > 0): remaining_features = list(set(initial_features) - set(best_features)) new_pval = pd.Series(index=remaining_features) for new_column in remaining_features: model = OLS(target, sm.add_constant(data[best_features + [new_column]])).fit() new_pval[new_column] = model.pvalues[new_column] min_p_value = new_pval.min() if (min_p_value < SL_in): best_features.append(new_pval.idxmin()) while (len(best_features) > 0): best_features_with_constant = sm.add_constant( data[best_features]) p_values = OLS(target, best_features_with_constant).fit().pvalues[1:] max_p_value = p_values.max() if (max_p_value >= SL_out): excluded_feature = p_values.idxmax() best_features.remove(excluded_feature) else: break else: break return best_features