示例#1
0
def stepwise_selection(data, target, SL_in=0.05, SL_out=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features) > 0):
        remaining_features = list(set(initial_features) - set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = OLS(target,
                        sm.add_constant(data[best_features +
                                             [new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if (min_p_value < SL_in):
            best_features.append(new_pval.idxmin())
            while (len(best_features) > 0):
                best_features_with_constant = sm.add_constant(
                    data[best_features])
                p_values = OLS(target,
                               best_features_with_constant).fit().pvalues[1:]
                max_p_value = p_values.max()
                if (max_p_value >= SL_out):
                    excluded_feature = p_values.idxmax()
                    best_features.remove(excluded_feature)
                else:
                    break
        else:
            break
    return best_features