## Scaling and Encoding the data
for colum in df.columns:
    if df[colum].dtype == object:
        # print(colum , df[colum].unique().tolist())
        df[colum] = OneHotEncoder().fit_transform(df[colum])

columns = df.columns
df = MinMaxScaler().fit_transform(df)
df = pd.DataFrame(df, columns=columns)

## Finding the Correlations between Features
# sns.heatmap(df.corr(), fmt = '.1f',annot = True)
# plt.show()

correlations = df.corr()['SalePrice'].drop('SalePrice')

# print(correlations)
# print(correlations.quantile(.25))
# print(correlations.quantile(.75))
# print(correlations.quantile(.50))


## Choosing the best threshold for improving the model
def get_features(correlation_threshold):
    abs_corrs = correlations.abs()
    high_correlations = abs_corrs[
        abs_corrs > correlation_threshold].index.values.tolist()
    return high_correlations

                      'school', 'sex', 'age', 'address', 'famsize', 'Pstatus',
                      'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian',
                      'traveltime', 'studytime', 'failures', 'schoolsup',
                      'famsup', 'paid', 'activities', 'nursery', 'higher',
                      'internet', 'romantic', 'famrel', 'freetime', 'goout',
                      'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2', 'G3'
                  ])

# print(df.head())
# print(df.dtypes)

## Finding the Correlations between Features
# sns.heatmap(df.corr(), fmt = '.1f',annot = True)
# plt.show()

correlations = df.corr()['G3'].drop('G3')

#print(correlations)
# print(correlations.quantile(.25))
# print(correlations.quantile(.75))


## Choosing the best threshold for improving the model
def get_features(correlation_threshold):
    abs_corrs = correlations.abs()
    high_correlations = abs_corrs[
        abs_corrs > correlation_threshold].index.values.tolist()
    return high_correlations


# thresh = []