## Scaling and Encoding the data for colum in df.columns: if df[colum].dtype == object: # print(colum , df[colum].unique().tolist()) df[colum] = OneHotEncoder().fit_transform(df[colum]) columns = df.columns df = MinMaxScaler().fit_transform(df) df = pd.DataFrame(df, columns=columns) ## Finding the Correlations between Features # sns.heatmap(df.corr(), fmt = '.1f',annot = True) # plt.show() correlations = df.corr()['SalePrice'].drop('SalePrice') # print(correlations) # print(correlations.quantile(.25)) # print(correlations.quantile(.75)) # print(correlations.quantile(.50)) ## Choosing the best threshold for improving the model def get_features(correlation_threshold): abs_corrs = correlations.abs() high_correlations = abs_corrs[ abs_corrs > correlation_threshold].index.values.tolist() return high_correlations
'school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2', 'G3' ]) # print(df.head()) # print(df.dtypes) ## Finding the Correlations between Features # sns.heatmap(df.corr(), fmt = '.1f',annot = True) # plt.show() correlations = df.corr()['G3'].drop('G3') #print(correlations) # print(correlations.quantile(.25)) # print(correlations.quantile(.75)) ## Choosing the best threshold for improving the model def get_features(correlation_threshold): abs_corrs = correlations.abs() high_correlations = abs_corrs[ abs_corrs > correlation_threshold].index.values.tolist() return high_correlations # thresh = []