def run_survival_curve(self, df):
        ''' used for testing only'''

        aaf = AalenAdditiveFitter()

        modelspec = 'YR_BRTH + AGE_DX + RADIATN + HISTREC + ERSTATUS + PRSTATUS + BEHANAL + HST_STGA + NUMPRIMS + RACE'
        X = pt.dmatrix(modelspec, df, return_type='dataframe')
        X = X.join(df[['SRV_TIME_MON','CENSORED']])
        aaf.fit(X, 'SRV_TIME_MON', 'CENSORED')

        # INSERT VALUES TO TEST HERE                
        test = np.array([[ 1., 1961., 52., 0, 0., 2., 1., 0., 4., 2.]])

        aaf.predict_survival_function(test).plot();
        plt.show()

        exp = aaf.predict_expectation(test)
        print(exp)

        return
示例#2
0
    def run_survival_curve(self, df):
        ''' used for testing only'''

        aaf = AalenAdditiveFitter()

        modelspec = 'YR_BRTH + AGE_DX + RADIATN + HISTREC + ERSTATUS + PRSTATUS + BEHANAL + HST_STGA + NUMPRIMS + RACE'
        X = pt.dmatrix(modelspec, df, return_type='dataframe')
        X = X.join(df[['SRV_TIME_MON', 'CENSORED']])
        aaf.fit(X, 'SRV_TIME_MON', 'CENSORED')

        # INSERT VALUES TO TEST HERE
        test = np.array([[1., 1961., 52., 0, 0., 2., 1., 0., 4., 2.]])

        aaf.predict_survival_function(test).plot()
        plt.show()

        exp = aaf.predict_expectation(test)
        print(exp)

        return
示例#3
0
    def predict(self, R, Thetas=dict(), _type='cumulative_hazards', **kwargs):
        """
        Assuming that the type to refit is the first type of
         predictive_relationship
        """

        if not self.regression_:
            raise Exception("No regression was fitted on the traning")

        X = self._modify_test_data(R, Thetas)
        if _type == 'cumulative_hazards':
            return AalenAdditiveFitter.predict_cumulative_hazard(
                self, X, id_col=kwargs.get('id_col', None))
        elif _type == 'survival_function':
            return AalenAdditiveFitter.predict_survival_function(self, X)
        elif _type == 'percentile':
            return AalenAdditiveFitter.predict_percentile(
                self, X, kwargs.get('p', 0))
        elif _type == 'median':
            return AalenAdditiveFitter.predict_median(self, X)
        elif _type == 'expectation':
            return AalenAdditiveFitter.predict_expectation(self, X)
        else:
            raise ValueError("Not avaialble type of prediction")
#First we select 4 random couples married in 2017 each of which from a different state
# We are subsutting the dataframe 'data' to a smaller dataframe of 4 cpuples only and compare our prediction outputs'
row = {'State[Alabama]':[0.0] , 'State[Maryland]':[1.0],  'State[Mississippi]':[0.0], 'State[New Hampshire]':[0.0],  \
   'Couple_Race[T.Same-Race]':[1.0], 'Household_Income_Range[T.42,830$ - 44,765$]':[0.0],  \
   'Household_Income_Range[T.66,532$ - 70,303$]':[0.0],'Household_Income_Range[T.67,500$ - 75,000$]':[1.0], \
   'Husband_Education[T.16+ years]':[1.0] , 'Husband_Education[T.Less than 12 years]':[0.0], \
   'Husband_Race[T.Other Ethnic Groups]':[1.0],  'Marriage_Date':[2016], 'T':1,  'E':[0]}
MD = pd.DataFrame(data=row)
print("MD couple's unique data point", MD)

##plotting the predicted value for this specific couple
ax = plt.subplot(2, 1, 1)
aaf.predict_cumulative_hazard(MD).plot(ax=ax, legend=False)
plt.title('Mississippi Couple predicted Hazard and Survival time')
ax = plt.subplot(2, 1, 2)
aaf.predict_survival_function(MD).plot(ax=ax, legend=False)
plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/MarylandCouple.pdf')
plt.show()

#same idea for Albama couple , we choose the same education level , ethnicity to keep our comparison valid
row = {'State[Alabama]':[1.0] , 'State[Maryland]':[0.0],  'State[Mississippi]':[0.0], 'State[New Hampshire]':[0.0],  \
   'Couple_Race[T.Same-Race]':[1.0], 'Household_Income_Range[T.42,830$ - 44,765$]':[1.0],  \
   'Household_Income_Range[T.66,532$ - 70,303$]':[0.0],'Household_Income_Range[T.67,500$ - 75,000$]':[0.0], \
   'Husband_Education[T.16+ years]':[0.0] , 'Husband_Education[T.Less than 12 years]':[0.0], \
   'Husband_Race[T.Other Ethnic Groups]':[1.0],  'Marriage_Date':[2016], 'T':1,  'E':[0]}
AL = pd.DataFrame(data=row)
print("AL couple's unique data point", AL)

##plotting the predicted value for this specific couple
ax = plt.subplot(2, 1, 1)
aaf.predict_cumulative_hazard(AL).plot(ax=ax, legend=False)
示例#5
0
from lifelines.datasets import generate_regression_dataset
regression_dataset = generate_regression_dataset()
from lifelines import AalenAdditiveFitter, CoxPHFitter
cf = CoxPHFitter()
cf.fit(regression_dataset, duration_col='T', event_col='E')
aaf = AalenAdditiveFitter(fit_intercept=False)
aaf.fit(regression_dataset, duration_col='T', event_col='E')
x = regression_dataset[regression_dataset.columns - ['E','T']]
aaf.predict_survival_function(x.ix[10:12]).plot()
aaf.plot()
# Using Cox Proportional Hazards model
cf = CoxPHFitter()
cf.fit(regression_dataset, 'T', event_col='E')
cf.print_summary()

# Using Aalen's Additive model
aaf = AalenAdditiveFitter(fit_intercept=False)
aaf.fit(regression_dataset, 'T', event_col='E')






x = regression_dataset[regression_dataset.columns - ['E','T']]
aaf.predict_survival_function(x.ix[10:12]).plot() #get the unique survival functions of the first two subjects



aaf.plot()









示例#7
0
var3 0.2186     1.2443    0.0758 2.8836 0.0039      0.0700      0.3672  **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Concordance = 0.580
"""

cph.plot()

# Using Aalen's Additive model
aaf = AalenAdditiveFitter(fit_intercept=False)
aaf.fit(regression_dataset, duration_col='T', event_col='E')
aaf.plot()

X = regression_dataset.drop(['E', 'T'], axis=1)
aaf.predict_survival_function(
    X.iloc[10:12]).plot()  # get the unique survival functions of two subjects

scores = k_fold_cross_validation(cph,
                                 regression_dataset,
                                 duration_col='T',
                                 event_col='E',
                                 k=10)
print(scores)
print(np.mean(scores))
print(np.std(scores))

plt.show()
#==============================================================================
#==============================================================================
naf = NelsonAalenFitter()
naf.fit(T, event_observed=E)

#but instead of a survival_function_ being exposed, a cumulative_hazard_ is.

#Survival Regression

from lifelines.datasets import load_regression_dataset

regression_dataset = load_regression_dataset()

regression_dataset.head()

from lifelines import AalenAdditiveFitter, CoxPHFitter

# Using Cox Proportional Hazards model
cf = CoxPHFitter()
cf.fit(regression_dataset, 'T', event_col='E')
cf.print_summary()

# Using Aalen's Additive model
aaf = AalenAdditiveFitter(fit_intercept=False)
aaf.fit(regression_dataset, 'T', event_col='E')

x = regression_dataset[regression_dataset.columns - ['E', 'T']]
aaf.predict_survival_function(x.ix[10:12]).plot(
)  #get the unique survival functions of the first two subjects

aaf.plot()
示例#9
0
def kfoldcv(data, k=5, m=10, penalizer=0.5, timeinterval=np.linspace(1,20,20), duration_col='ndays_act', event_col='observed'):
    """
    Trains data with AalenAdditiveFitter and (k-fold) cross validate it.
    Based on lifelines library for survival analysis in Python.
    
    data: Pandas dataframe.
    k: number of folds
    m: number of time units to be included in the cross validation
    penalizer: argument of class AalenAdditiveFitter (lifelines library)
    timeinterval: argument of AalenAdditiveFitter().fit method. Time points that are fitted.
    duration_col: last column from data. It contains the lifetime of each case.
    event_col: second-to-last column from data. It contains the censorships. So far this function
                only works without censorships, that is, all death events must be observed. 
                Therefore, it must be a column of ones.
                
    Prints: Average relative error of the predicted probabilities. 

    """
    
    aaf = AalenAdditiveFitter(penalizer=penalizer)
    n, d = data.shape
    data = data.copy()
    data = data.reindex(np.random.permutation(data.index)).sort(event_col)
    scores = []
    assignments = np.array((n // k + 1) * list(range(1, k + 1)))
    assignments = assignments[:n]

    testing_columns = data.columns - [duration_col, event_col]

    for i in range(1, k + 1):

        ix = assignments == i
        training_data = data.ix[~ix]
        testing_data = data.ix[ix]

        T_actual = testing_data[duration_col].values
        E_actual = testing_data[event_col].values
        X_testing = testing_data[testing_columns]

        aaf.fit(training_data, duration_col=duration_col, event_col=event_col, timeline=timeinterval)

        used_ind = []
        prec_sum = 0
        rel_sum = 0
        rel_error_list = []
        df = testing_data
        #ndays must be the last column, and observed the second-to-last
        for j,row in df.iterrows():
        
            if not j in used_ind:
                
                a = df[np.all(df.ix[:,0:-2].values==df.ix[j,:-2].values, axis=1)]
                list_ = a.index.tolist()
                used_ind += list_
                
                actual_rate_series = a.ndays_act.value_counts() / a.shape[0]
                mini = min(actual_rate_series.shape[0], m)
                actual_rate = np.array(actual_rate_series)[:mini]
                pred_df = aaf.predict_survival_function(a.iloc[0,:-2][None,:])
                pred_array = np.array(pred_df)              
                pred_rate = np.zeros(mini)
                pred_rate[0] = 1 - pred_array[0]
                
                for alpha in range(1, mini):
                    pred_rate[alpha] = pred_array[alpha-1] - pred_array[alpha]
                      
                maxi = np.maximum(pred_rate, actual_rate)
                rel_error = np.abs(pred_rate - actual_rate) / maxi
                rel_error_list.append(rel_error)
                succes_rate = len(rel_error[rel_error <= 0.15]) / mini
                prec_sum += succes_rate * len(a)
                rel_sum += np.sum(rel_error) / mini * len(a)
                
        precision = prec_sum / df.shape[0]
        relative = rel_sum / df.shape[0]
        scores.append(precision)
        
        print "Average relative error: ", relative