Exemplo n.º 1
0
    'seniority', 'company_id_12', 'company_id_9', 'company_id_10',
    'company_id_6', 'company_id_7', 'company_id_8', 'company_id_1',
    'dept_design', 'company_id_11', 'company_id_5'
],
                   axis=1)

data3.head()
data4 = data3.astype('int64')
data4.info()
data4.columns

cph.fit(data4,
        duration_col='duration',
        event_col='observed',
        show_progress=True)
cph.print_summary()

# Finally! Got rid of enough variables -- had too many features for the model to converge.

# Seems like salary is a big predictor of people leaving -- the lower the salary, the more likely they will leave?
# And if they work for company 2
# And if you are an engineer you are also more likely to leave

# Well aspects of this model suggest this way of modeling is not the best -- I don't think we have a linear
# relationship so shouldn't use it...
cph.score_
cph.check_assumptions(data4)
import pkg_resources
import lifelines
pkg_resources.get_distribution("lifelines").version