print(results.summary2())

# Column names for reference above
df_column_name = pd.DataFrame(list(df_full_data.drop(['Attrition'], axis=1).columns.values))
df_column_name.index = np.arange(1, len(df_column_name) + 1)
df_column_name

# note: the new clustering dataset has slightly different columns
#model= smf.logit(formula="Attrition~ ClusterSegment	+ Age + DailyRate + EnvironmentSatisfaction + JobInvolvement + JobSatisfaction + NumCompaniesWorked + RelationshipSatisfaction + TotalWorkingYears + TrainingTimesLastYear + WorkLifeBalance + OverTime + PerformanceRating + MaritalStatus_Divorced + MaritalStatus_Married + MaritalStatus_Single + DistanceFromHomeRange_1_4 + DistanceFromHomeRange_5_9 + DistanceFromHomeRange_10_19 + DistanceFromHomeRange_20_30 + DistanceFromHomeRange_Over30 + NumCompaniesWorkedRange_0_2 + NumCompaniesWorkedRange_3_5 + NumCompaniesWorkedRange_6_10 + NumCompaniesWorkedRange_10over + YearsAtCompanyRange_0_2 + YearsAtCompanyRange_3_5 + YearsAtCompanyRange_6_10 + YearsAtCompanyRange_10over", data= df_full_data).fit(method='lbfgs')
model= smf.logit(formula="Attrition~ Age + DailyRate + EnvironmentSatisfaction + JobInvolvement + JobSatisfaction + RelationshipSatisfaction + TotalWorkingYears + TrainingTimesLastYear + WorkLifeBalance + OverTime + PerformanceRating + MaritalStatus_Divorced + MaritalStatus_Married + MaritalStatus_Single", data= df_full_data).fit()
model.summary()

# GETTING THE ODDS RATIOS, Z-VALUE, AND 95% CI
model_odds = pd.DataFrame(np.exp(model.params), columns= ['OR'])
model_odds['z-value']= model.pvalues
model_odds[['2.5%', '97.5%']] = np.exp(model.conf_int())
model_odds

"""# Model 6: Neural Network"""

# Random seeds
np.random.seed(123)
rn.seed(123)
tf.set_random_seed(123)

# Convert Attrition to one-hot encoding for NN to be able to read
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Neural Network Architecture