Python MinMaxScaler.corr示例

## Scaling and Encoding the data
for colum in df.columns:
    if df[colum].dtype == object:
        # print(colum , df[colum].unique().tolist())
        df[colum] = OneHotEncoder().fit_transform(df[colum])

columns = df.columns
df = MinMaxScaler().fit_transform(df)
df = pd.DataFrame(df, columns=columns)

## Finding the Correlations between Features
# sns.heatmap(df.corr(), fmt = '.1f',annot = True)
# plt.show()

correlations = df.corr()['SalePrice'].drop('SalePrice')

# print(correlations)
# print(correlations.quantile(.25))
# print(correlations.quantile(.75))
# print(correlations.quantile(.50))


## Choosing the best threshold for improving the model
def get_features(correlation_threshold):
    abs_corrs = correlations.abs()
    high_correlations = abs_corrs[
        abs_corrs > correlation_threshold].index.values.tolist()
    return high_correlations

示例#2

显示文件

文件： Student.py 项目： Vibhugupta10616/Student_performance_prediction

                      'school', 'sex', 'age', 'address', 'famsize', 'Pstatus',
                      'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian',
                      'traveltime', 'studytime', 'failures', 'schoolsup',
                      'famsup', 'paid', 'activities', 'nursery', 'higher',
                      'internet', 'romantic', 'famrel', 'freetime', 'goout',
                      'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2', 'G3'
                  ])

# print(df.head())
# print(df.dtypes)

## Finding the Correlations between Features
# sns.heatmap(df.corr(), fmt = '.1f',annot = True)
# plt.show()

correlations = df.corr()['G3'].drop('G3')

#print(correlations)
# print(correlations.quantile(.25))
# print(correlations.quantile(.75))


## Choosing the best threshold for improving the model
def get_features(correlation_threshold):
    abs_corrs = correlations.abs()
    high_correlations = abs_corrs[
        abs_corrs > correlation_threshold].index.values.tolist()
    return high_correlations


# thresh = []