Пример #1
0
    def test_aalen_additive_median_predictions_split_data(self):
        # This tests to make sure that my median predictions statisfy
        # the prediction are greater than the actual 1/2 the time.
        # generate some hazard rates and a survival data set
        n = 2500
        d = 5
        timeline = np.linspace(0, 70, 5000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        T = generate_random_lifetimes(hz, timeline)
        X['T'] = T
        # fit it to Aalen's model
        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T')

        # predictions
        T_pred = aaf.predict_median(X[list(range(6))])
        assert abs((T_pred.values > T).mean() - 0.5) < 0.05
Пример #2
0
    def test_aalen_additive_median_predictions_split_data(self):
        # This tests to make sure that my median predictions statisfy
        # the prediction are greater than the actual 1/2 the time.
        # generate some hazard rates and a survival data set
        n = 2500
        d = 5
        timeline = np.linspace(0, 70, 5000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        T = generate_random_lifetimes(hz, timeline)
        X['T'] = T
        # fit it to Aalen's model
        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T')

        # predictions
        T_pred = aaf.predict_median(X[list(range(6))])
        assert abs((T_pred.values > T).mean() - 0.5) < 0.05
Пример #3
0
    def test_swapping_order_of_columns_in_a_df_is_okay(self, rossi):
        aaf = AalenAdditiveFitter()
        aaf.fit(rossi, event_col='arrest', duration_col='week')

        misorder = ['age', 'race', 'wexp', 'mar', 'paro', 'prio', 'fin']
        natural_order = rossi.columns.drop(['week', 'arrest'])
        deleted_order = rossi.columns.difference(['week', 'arrest'])
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))

        aaf = AalenAdditiveFitter(fit_intercept=False)
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))
Пример #4
0
    def test_swapping_order_of_columns_in_a_df_is_okay(self, rossi):
        aaf = AalenAdditiveFitter()
        aaf.fit(rossi, event_col='arrest', duration_col='week')

        misorder = ['age', 'race', 'wexp', 'mar', 'paro', 'prio', 'fin']
        natural_order = rossi.columns.drop(['week', 'arrest'])
        deleted_order = rossi.columns - ['week', 'arrest']
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))

        aaf = AalenAdditiveFitter(fit_intercept=False)
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))
                      clinical,
                      return_type='dataframe')
    X['T'] = clinical['OS_OS']
    X['C'] = clinical['OS_vital_status']

    trainX = X.ix[trainLabels[i], :].reset_index()
    testX = X.ix[testLabels[i], :].reset_index()

    #Build model and train
    aaf = AalenAdditiveFitter(penalizer=1., fit_intercept=True)
    aaf.fit(trainX.drop(['index'], axis=1),
            duration_col='T',
            event_col='C',
            show_progress=False)
    #Predict on testing data
    median = aaf.predict_median(testX.drop(['T', 'C', 'index'], axis=1))
    median.index = testX['index']
    predictions.append(median.replace([np.inf, -np.inf, np.nan], 0))

# ###Saving Results to Synapse and ask Synapse to evaluate our predictions
# To document what we have done we will start by storing this code in Synapse as a file Entity.

# In[34]:

codeEntity = synapseclient.File('tcga_survival_analysis.py',
                                parentId='syn1720423')
codeEntity = syn.store(codeEntity)

# We then save the predictions we made to a file and create a file Entity for it.

# In[95]:
#Go through each training testing monteCarlo sampling and train/predict
predictions=[]
for i in range(trainLabels.shape[1]):
    X = patsy.dmatrix('age + grade + stage -1', clinical, return_type='dataframe')
    X['T'] = clinical['OS_OS']
    X['C'] = clinical['OS_vital_status']
    
    trainX = X.ix[trainLabels[i],:].reset_index()
    testX = X.ix[testLabels[i],:].reset_index()

    #Build model and train
    aaf = AalenAdditiveFitter(penalizer=1., fit_intercept=True)
    aaf.fit(trainX.drop(['index'], axis=1), duration_col='T', event_col='C',show_progress=False)
    #Predict on testing data
    median = aaf.predict_median(testX.drop(['T','C', 'index'], axis=1))
    median.index = testX['index']
    predictions.append(median.replace([np.inf, -np.inf, np.nan], 0))


# ###Saving Results to Synapse and ask Synapse to evaluate our predictions
# To document what we have done we will start by storing this code in Synapse as a file Entity.

# In[34]:

codeEntity = synapseclient.File('tcga_survival_analysis.py', parentId='syn1720423')
codeEntity = syn.store(codeEntity)


# We then save the predictions we made to a file and create a file Entity for it.