Пример #1
0
def calculate_ipcw(dataset, time_of_censoring):
    """
    Calculate IPCW weights by fitting a Cox model with all covariates.
    """
    cph = CoxPHFitter()
    df = pd.DataFrame(dataset["X"][:, :-1])
    df["t"] = dataset["t"]
    df["e"] = 1 - dataset["y"]
    cph.fit(df, "t", "e")
    sf = cph.predict_survival_function(dataset["X"][:, :-1])
    weights = np.zeros(len(dataset["X"]))
    for i in range(len(dataset["X"])):
        if dataset["y"][i] == 1:
            idx = np.searchsorted(sf.index, dataset["t"][i])
        else:
            idx = np.searchsorted(sf.index, time_of_censoring)
        weights[i] = 1 / sf.iloc[idx, i]
    return weights
Пример #2
0
def calculate_ipcw(dataset, time_of_censoring):
    """
    Calculate inverse propensity of censorship weights for the dataset.
    Uses a Cox model to model the censoring distribution.
    """
    cph = CoxPHFitter()
    df = pd.DataFrame(dataset["X"][:, :-1])
    df["t"] = dataset["t"]
    df["c"] = 1 - dataset["y"]
    cph.fit(df, "t", "c")
    sf = cph.predict_survival_function(dataset["X"][:, :-1])
    weights = np.zeros(len(dataset["X"]))
    for i in range(len(dataset["X"])):
        if dataset["y"][i] == 1:
            idx = np.searchsorted(sf.index, dataset["t"][i])
        else:
            idx = np.searchsorted(sf.index, time_of_censoring)
        weights[i] = 1 / sf.iloc[idx, i]
    return weights
def run_baseline(runs=10):
    concordance = []
    ibs = []
    
    for i in tqdm(range(runs)):
        df_train,df_test,df_val = load_data("./summaries/survival_data")
        
        x_mapper, labtrans, train, val, x_test, durations_test, events_test, pca = transform_data(
            df_train,df_test,df_val,'LogisticHazard', "standard", cols_standardize, log_columns, num_durations=100)
        x_train, y_train = train

        
        cols = ['PC'+str(i) for i in range(x_train.shape[1])] + ['duration','event']
        pc_col = ['PC'+str(i) for i in range(x_train.shape[1])]
        cox_train = pd.DataFrame(x_train,columns = pc_col)
        cox_test = pd.DataFrame(x_test,columns=pc_col)
        
#        cox_train.loc[:,pc_col] = x_train
        cox_train.loc[:,["duration"]] = y_train[0]
        cox_train.loc[:,'event'] = y_train[1]
#        cox_train = cox_train.drop(columns=[i for i in list(df_train) if i not in cols])
#        cox_test.loc[:,pc_col] = x_test
#        cox_test = cox_test.drop(columns=[i for i in list(df_train) if i not in cols])
        cox_train = cox_train.dropna()
        cox_test = cox_test.dropna()
        cph = CoxPHFitter().fit(cox_train, 'duration', 'event')
#        cph.print_summary()
        surv = cph.predict_survival_function(cox_test)
        ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
        concordance.append(ev.concordance_td('antolini')) 
        time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
        ibs.append(ev.integrated_brier_score(time_grid))
        
        print("Average concordance: %s"%np.mean(concordance))
        print("Average IBS: %s"%np.mean(ibs))
    
    plot_survival(cox_train,
                  pc_col,cph,'./survival/cox',baseline=True)
Пример #4
0
import numpy as np
import seaborn as sns
sns.set()

### avoid coding problems ####
import sys
reload(sys)
sys.setdefaultencoding('gbk')
##############################

# load data
from lifelines.datasets import load_regression_dataset
regression_dataset = load_regression_dataset()
#print regression_dataset

# fit
from lifelines import CoxPHFitter
cph = CoxPHFitter()
cph.fit(regression_dataset, 'T', event_col='E')
X = regression_dataset.drop(['E', 'T'], axis=1)

# draw
cph.predict_survival_function(X.iloc[1:5]).plot()
import matplotlib.pyplot as plt
plt.xlim(0, 10)
plt.ylim(0.2, 1)
plt.title('survival curves for events')
plt.show()
Пример #5
0
cph.fit(_X_train, duration_col='Survival', event_col='Event')
# cph.print_summary()  # access the results using cph.summary

# Validation
# _X_valid = _X_valid.drop(["Survival", "Event"], axis=1)  # Testing input after preprocessing.

_X_valid_1 = _X_valid_1.drop(["Survival", "Event"], axis=1)
_X_valid_2 = _X_valid_2.drop(["Survival", "Event"], axis=1)
_X_valid_3 = _X_valid_3.drop(["Survival", "Event"], axis=1)
_X_valid_4 = _X_valid_4.drop(["Survival", "Event"], axis=1)
_X_valid_5 = _X_valid_5.drop(["Survival", "Event"], axis=1)

# _seq_pred_y_valid_1 = cph.predict_survival_function(_X_valid_1, np.arange(before_steps)).as_matrix()

_seq_pred_y_valid = np.array([
    cph.predict_survival_function(_X_valid_1, np.arange(before_steps +
                                                        1)).as_matrix()[1, :],
    cph.predict_survival_function(_X_valid_2, np.arange(before_steps +
                                                        1)).as_matrix()[2, :],
    cph.predict_survival_function(_X_valid_3, np.arange(before_steps +
                                                        1)).as_matrix()[3, :],
    cph.predict_survival_function(_X_valid_4, np.arange(before_steps +
                                                        1)).as_matrix()[4, :],
    cph.predict_survival_function(_X_valid_5, np.arange(before_steps +
                                                        1)).as_matrix()[5, :]
])

_seq_pred_y_valid = _seq_pred_y_valid.transpose()

thrld_score = dict()
for sur_thrld_valid in np.arange(1.0, 0.0, -0.01):
Пример #6
0
                                                    random_state=42)
train = pd.concat([a_train, b_train], axis=1)
train = train.reset_index(drop=True)
test = pd.concat([a_test, b_test], axis=1)
test = test.reset_index(drop=True)
test_X = test.drop(droplist, axis=1)
test_y = test['术后住院时间']
from lifelines import CoxPHFitter
cph = CoxPHFitter()
train = train.drop('神经系统-膈肌麻痹(可能膈神经损伤)', axis=1)
test_X = test_X.drop('神经系统-膈肌麻痹(可能膈神经损伤)', axis=1)
train = train.drop('Id', axis=1)
test_X = test_X.drop('Id', axis=1)
cph.fit(train, duration_col='术后住院时间', event_col='出院时状态', show_progress=True)
cph.predict_partial_hazard(test_X)
survival_result = cph.predict_survival_function(test_X)
survival_result = survival_result[survival_result <= 0.5]
LOSResult = pd.DataFrame(np.arange(1354).reshape((677, 2)),
                         columns=['Id', 'LOS'])
i = 0
for c in survival_result.columns:
    item = survival_result[c].idxmax()
    LOSResult.iloc[i, 0] = i
    LOSResult.iloc[i, 1] = item
    i = i + 1
test['Id'] = test.index
test1 = pd.merge(test, LOSResult, on='Id')
fig, ax = plt.subplots(figsize=(12, 12))
from lifelines import KaplanMeierFitter
kmf_control = KaplanMeierFitter()
ax = kmf_control.fit(test1['术后住院时间'], label='Real').plot(ax=ax,
Пример #7
0
# In[ ]:

import matplotlip
from matplotlib import pyplot as plt
import lifelines
from lifelines import KaplanMeierFitter  #survival analysis library
from lifelines.statistics import logrank_test  #survival statistical testing
from lifelines import CoxPHFitter

df['churn'] = df1.fuga

cph = CoxPHFitter()
cph.fit(df,
        duration_col=ypd1['enddt'],
        event_col=ypd1['FUGA'],
        show_progress=True)
cph.print_summary()
cph.plot()

# In[ ]:

df_2 = df.drop(['enddt', 'FUGA'], axis=1)
cph.predict_partial_hazard(df_2)
cph.predict_survival_function(df_2, times=[5., 25., 50.])
cph.predict_median(X)

kmf = KaplanMeierFitter()
T = df['time_to_fuga']  #duration
C = df['churn']  #censorship - 1 if death/churn is seen, 0 if censored
Пример #8
0

#creating dummies for the score factor for the survival analysis
dummies0 = pd.get_dummies(df3['score_factor'])
df3 = pd.concat([df3, dummies0], axis=1)
#df3 = df3.drop(['score_factor', 'Low'], axis=1)


##### how good is the categorization in high, medium and low
cph = CoxPHFitter()
cph.fit(df = df3[['duration', 'event', 'High', 'Medium']], duration_col = 'duration', event_col = 'event')
cph.print_summary()
cph.plot()

cph.fit(df = df3[['duration', 'event', 'High', 'Medium']], duration_col = 'duration', event_col = 'event')
cph.predict_survival_function()







#how good is the numeric decile_score
df4 = df3[['duration', 'event', 'decile_score']]

cph.fit(df = df4, duration_col = 'duration', event_col = 'event')
cph.print_summary()
#cph.plot()
cph.predict_survival_function(X = df4)
Пример #9
0
def train_cox(x_train0, ix_in, y_per_pt, y_int, metric = 'auc', feature_grid = None):
    if feature_grid is None:
        feature_grid = np.logspace(7, 20, 14)
    survival = {}
    # for ic_in, ix_in in enumerate(ix_inner):
    train_index, test_index = ix_in
    x_train, x_test = x_train0.iloc[train_index, :], x_train0.iloc[test_index, :]

    lamb_dict = {}
    lamb_dict['auc'] = {}
    lamb_dict['ci'] = {}
    for il, lamb in enumerate(feature_grid):
        ix_inner2 = leave_one_out_cv(x_train, x_train['outcome'], ddtype='all_data')
        ix_rand_samp = np.random.choice(np.arange(len(ix_inner2)), 10, replace=False)
        ix_inner2_samp = np.array(ix_inner2, dtype='object')[ix_rand_samp]
        # ix_inner2_rand_samp = np.random.choice(ix_inner2, 10, replace = False)
        counter = 0
        start = time.time()

        hazards = []
        event_times = []
        event_outcomes = []
        probs_in = []
        true = []

        model = CoxPHFitter(penalizer=lamb, l1_ratio=1.)
        for ic_in2, ix_in2 in enumerate(ix_inner2_samp):
            start_inner = time.time()

            train_ix, test_ix = ix_in2
            x_tr2, x_ts2 = x_train.iloc[train_ix, :], x_train.iloc[test_ix, :]
            tmpts_in = [xx.split('-')[1] for xx in x_tr2.index.values]
            samp_weights = get_class_weights(np.array(y_int[x_tr2.index.values]), tmpts_in)
            samp_weights[samp_weights <= 0] = 1
            x_tr2.insert(x_tr2.shape[1], 'weights', samp_weights)
            try:
                model.fit(x_tr2, duration_col='week', event_col='outcome',
                          weights_col='weights', robust=True, show_progress = False)
            except:
                counter += 1
                continue
            pred_f = model.predict_survival_function(x_ts2.iloc[0, :])
            probs_in.append(1 - pred_f.loc[4.0].item())
            true.append(x_ts2['outcome'].iloc[-1])
            hazard = model.predict_partial_hazard(x_ts2)
            hazards.append(hazard)
            event_times.append(x_ts2['week'])
            event_outcomes.append(x_ts2['outcome'])
            end_inner = time.time()
            # print('Inner ix ' + str(ic_in2) + ' complete in ' + str(end_inner - start_inner))

        # if metric == 'CI':
        try:
            score = concordance_index(pd.concat(event_times), pd.concat(hazards), pd.concat(event_outcomes))
            lamb_dict['ci'][lamb] = score
            end_t = time.time()
            print(str(il) + ' complete')
            print((end_t - start)/60)
        except:
            print('No score available')
            continue
        # elif metric == 'auc':
        try:
            score = sklearn.metrics.roc_auc_score(true, probs_in)
            lamb_dict['auc'][lamb] = score
        except:
            continue

    lambdas, aucs_in = list(zip(*lamb_dict[metric].items()))
    ix_max = np.argmax(aucs_in)
    best_lamb = lambdas[ix_max]

    model_out = CoxPHFitter(penalizer=best_lamb, l1_ratio=1.)
    tmpts_in = [xx.split('-')[1] for xx in x_train.index.values]
    samp_weights = get_class_weights(np.array(y_int[x_train.index.values]), tmpts_in)
    samp_weights[samp_weights<=0] = 1
    x_train.insert(x_train.shape[1], 'weights', samp_weights)
    x_train['weights'] = samp_weights
    try:
        model_out.fit(x_train, duration_col='week', event_col='outcome', weights_col='weights', robust=True)
    except:
        return {}
    pred_f = model_out.predict_survival_function(x_test.iloc[0, :])
    pt = x_test.index.values[0].split('-')[0]

    hazard_out = model_out.predict_partial_hazard(x_test)


    pts = [ii.split('-')[0] for ii in x.index.values]
    tmpts = [ii.split('-')[1] for ii in x.index.values]
    # if pt not in survival.keys():
        # survival[pt] = {}
    ixs = np.where(np.array(pts) == pt)[0]
    survival['actual'] = str(np.max([float(tmpt) for tmpt in np.array(tmpts)[ixs]]))
    if y_per_pt[pt] == 'Cleared':
        survival['actual'] = survival['actual'] + '+'

    probs_sm = 1 - pred_f.loc[4.0].item()

    y_pred_exp = model_out.predict_expectation(x_test.iloc[[0], :])
    survival['predicted'] = str(np.round(y_pred_exp.item(), 3))
    surv_func = pred_f

    # probs_df = pd.Series(probs_sm)
    # y_pp = y_per_pt.replace('Cleared', 0).replace('Recur', 1)
    # final_df = pd.concat([y_pp, probs_df], axis=1).dropna()

    final_dict = {}
    # final_dict['probability_df'] = final_df
    final_dict['model'] = model_out
    final_dict['survival'] = survival
    final_dict['survival_function'] = surv_func
    final_dict['prob_true'] = (probs_sm, y_per_pt[pt])
    final_dict['times_hazards_outcomes'] = (x_test['week'], hazard_out, x_test['outcome'])
    final_dict['lambdas'] = lamb_dict
    # final_dict['auc'] = sklearn.metrics.roc_auc_score(final_df[0], final_df[1])
    return final_dict
Пример #10
0
def compute_coxhr(pair, df, lag, step_size, is_sex_specific, nindivs,
                  res_writer):
    logger.info(f"Running Cox regression")
    prior, outcome = pair
    # Handle sex-specific endpoints
    if is_sex_specific:
        df = df.drop(columns=["female"])

    # Fit Cox model
    cph = CoxPHFitter()
    cph.fit(
        df,
        duration_col="duration",
        event_col="outcome",
        step_size=step_size,
        # For the case-cohort study we need weights and robust errors:
        weights_col="weight",
        robust=True)

    # Compute absolute risk
    mean_indiv = pd.DataFrame({
        "BIRTH_TYEAR": [MEAN_INDIV_BIRTH_YEAR],
        "prior": [MEAN_INDIV_HAS_PRIOR_ENDPOINT],
        "female": [MEAN_INDIV_FEMALE_RATIO]
    })
    if is_sex_specific:
        mean_indiv.pop("female")

    if lag is None:
        predict_at = STUDY_ENDS - STUDY_STARTS
        lag_value = None
    else:
        _min_lag, max_lag = lag
        predict_at = max_lag
        lag_value = max_lag

    surv_probability = cph.predict_survival_function(mean_indiv,
                                                     times=[predict_at
                                                            ]).values[0][0]
    absolute_risk = 1 - surv_probability

    # Get values out of the fitted model
    norm_mean = cph._norm_mean
    prior_coef = cph.params_["prior"]
    prior_se = cph.standard_errors_["prior"]
    prior_hr = np.exp(prior_coef)
    prior_ci_lower = np.exp(prior_coef - 1.96 * prior_se)
    prior_ci_upper = np.exp(prior_coef + 1.96 * prior_se)
    prior_pval = cph.summary.p["prior"]
    prior_zval = cph.summary.z["prior"]
    prior_norm_mean = norm_mean["prior"]

    year_coef = cph.params_["BIRTH_TYEAR"]
    year_se = cph.standard_errors_["BIRTH_TYEAR"]
    year_hr = np.exp(year_coef)
    year_ci_lower = np.exp(year_coef - 1.96 * year_se)
    year_ci_upper = np.exp(year_coef + 1.96 * year_se)
    year_pval = cph.summary.p["BIRTH_TYEAR"]
    year_zval = cph.summary.z["BIRTH_TYEAR"]
    year_norm_mean = norm_mean["BIRTH_TYEAR"]

    if not is_sex_specific:
        sex_coef = cph.params_["female"]
        sex_se = cph.standard_errors_["female"]
        sex_hr = np.exp(sex_coef)
        sex_ci_lower = np.exp(sex_coef - 1.96 * sex_se)
        sex_ci_upper = np.exp(sex_coef + 1.96 * sex_se)
        sex_pval = cph.summary.p["female"]
        sex_zval = cph.summary.z["female"]
        sex_norm_mean = norm_mean["female"]
    else:
        sex_coef = np.nan
        sex_se = np.nan
        sex_hr = np.nan
        sex_ci_lower = np.nan
        sex_ci_upper = np.nan
        sex_pval = np.nan
        sex_zval = np.nan
        sex_norm_mean = np.nan

    # Save the baseline cumulative hazard (bch)
    df_bch = cph.baseline_cumulative_hazard_

    baseline_cumulative_hazard = bch_at(df_bch, predict_at)

    bch_values = {}
    for time in BCH_TIMEPOINTS:
        bch_values[time] = bch_at(df_bch, time)

    # Save values
    res_writer.writerow([
        prior, outcome, lag_value, step_size, nindivs, absolute_risk,
        prior_coef, prior_se, prior_hr, prior_ci_lower, prior_ci_upper,
        prior_pval, prior_zval, prior_norm_mean, year_coef, year_se, year_hr,
        year_ci_lower, year_ci_upper, year_pval, year_zval, year_norm_mean,
        sex_coef, sex_se, sex_hr, sex_ci_lower, sex_ci_upper, sex_pval,
        sex_zval, sex_norm_mean, baseline_cumulative_hazard, bch_values[0],
        bch_values[2.5], bch_values[5], bch_values[7.5], bch_values[10],
        bch_values[12.5], bch_values[15], bch_values[17.5], bch_values[20],
        bch_values[21.99]
    ])
Пример #11
0
def roc_cut(df, vars, group, time=0, family='logistic', save_tab=False):
    roc_cut = pd.DataFrame()

    if family == 'logistic':
        table = df[vars]
        y = df[group]
        for col in table:
            J = len(list(table.columns))
            for j in range(J):
                v = table[col].name
                pred = sma.GLM(y,
                               sma.add_constant(table[col]),
                               family=sma.families.Binomial()).fit().predict(
                                   sma.add_constant(table[col]))
                fpr, tpr, thresholds = roc_curve(y, pred)
                r = round(auc(fpr, tpr) * 100, 1)  #AUC
                i = np.arange(len(tpr))

                roc = pd.DataFrame({
                    'fpr': pd.Series(fpr, index=i),
                    'tpr': pd.Series(tpr, index=i),
                    '1-fpr': pd.Series(1 - fpr, index=i),
                    'tf': pd.Series(tpr - (1 - fpr), index=i),
                    'thresholds': pd.Series(thresholds, index=i)
                })
                roc = roc.iloc[(roc.tf - 0).abs().argsort()[:1]]
                thres = roc.iloc[0, 4]
                sens = round(roc.iloc[0, 1] * 100, 1)  #sensetivity
                spec = round(roc.iloc[0, 2] * 100, 1)  #specifisity
                cut = roc_job(pred, predictor=table[col], pos=thres).compute()
            roc_cut = roc_cut.append(
                {
                    'Фактор': v,
                    'AUC, %': r,
                    'Порог': cut,
                    'Чувствительность, %': sens,
                    'Специфичность, %': spec
                },
                ignore_index=True)

    elif family == 'cox':
        cph = CoxPHFitter()
        table = df[vars]
        table[time] = df[time]
        table[group] = df[group]
        for col in table.columns[:-2]:
            v = table[col].name
            cph.fit(table[[col, group, time]],
                    duration_col=time,
                    event_col=group)
            pred = cph.predict_survival_function(
                table[[col]], np.percentile(df[[time]], 0.99)).T

            fpr, tpr, thresholds = roc_curve(table[group], pred)
            r = round(auc(fpr, tpr) * 100, 1)  #AUC
            i = np.arange(len(tpr))

            roc = pd.DataFrame({
                'fpr': pd.Series(fpr, index=i),
                'tpr': pd.Series(tpr, index=i),
                '1-fpr': pd.Series(1 - fpr, index=i),
                'tf': pd.Series(tpr - (1 - fpr), index=i),
                'thresholds': pd.Series(thresholds, index=i)
            })
            roc = roc.iloc[(roc.tf - 0).abs().argsort()[:1]]
            thres = roc.iloc[0, 4]
            sens = round(roc.iloc[0, 1] * 100, 1)  #sensetivity
            spec = round(roc.iloc[0, 2] * 100, 1)  #specifisity
            cut = roc_job(pred[0], predictor=table[col], pos=thres).compute()
            roc_cut = roc_cut.append(
                {
                    'Фактор': v,
                    'AUC, %': r,
                    'Порог': cut,
                    'Чувствительность, %': sens,
                    'Специфичность, %': spec
                },
                ignore_index=True)

    else:
        print('Error')

    roc_cut = roc_cut.reindex(columns=[
        'Фактор', 'AUC, %', 'Порог', 'Чувствительность, %', 'Специфичность, %'
    ])

    if save_tab == True:
        return pd.DataFrame.to_excel(roc_cut, 'Пороги по ROC-анализу.xlsx')
    else:
        return roc_cut

    return roc_cut
def create_model(temp_features, current_cluster, use_cluster_as_feature):
    print('----------------------------------------------------------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------------------------------------------------------')
    
    # =============================================================================
    # #Keep TransplantationID in test data for error analysis
    # =============================================================================
    temp_labels = np.array(temp_features['Longterm_TransplantOutcome'])    
    temp_features= temp_features.drop('TransplantationID', axis = 1)
    temp_features= temp_features.drop('PatientID', axis = 1)
    if use_cluster_as_feature:
        temp_features = pd.get_dummies(data=temp_features, columns=['cluster'])
        print('Creating model for all clusters with cluster as feature')
    else:
        temp_features= temp_features.drop('cluster', axis = 1)    
        print('Creating model for cluster ' + str(current_cluster))
    #for col in temp_features.columns:
    #    print(col)
    # =============================================================================
    # #Spliting datasets into train and test sets
    # =============================================================================
    from sklearn.model_selection import train_test_split
    train_features, test_features, train_labels, test_labels = train_test_split(temp_features, temp_labels, test_size = 0.25, random_state = 42)
    
    # =============================================================================
    # #SMOTE for upsampling
    # =============================================================================
    from imblearn.over_sampling import SMOTE
    train_features, train_labels = SMOTE().fit_resample(train_features, train_labels)
    
    
    # =============================================================================
    # # Drop features with no variance
    # =============================================================================
    events = train_labels.astype(bool)
    for col in train_features.columns:
        if (train_features.loc[events, col].var() == 0.0 or train_features.loc[~events, col].var() == 0.0 ) and col != 'Longterm_TransplantOutcome':
            #print('Dropped column ' + col + ' (no variance)')
            train_features.drop([col], axis=1, inplace=True)
            test_features.drop([col], axis=1, inplace=True)

    # =============================================================================
    # #Cox Regression model
    # =============================================================================
    
    cph = CoxPHFitter(penalizer=0.1)   ## Instantiate the class to create a cph object
    cph.fit(train_features, 'tenure', event_col='Longterm_TransplantOutcome', show_progress=False, step_size=0.1)   ## Fit the data to train the model
    
    print('concordance index: ' + str(cph.concordance_index_))
    
    tr_rows = test_features.loc[:, test_features.columns != 'Longterm_TransplantOutcome'].iloc[:, :]    
    predictions = cph.predict_survival_function(tr_rows)
    predictions = predictions.transpose()
    
    # =============================================================================
    # #Error analysis
    # =============================================================================
    for col in predictions.columns:
        if float(col) > (365*6):
            col_use = col
            print(col_use)
            break
            
    predictions = predictions[col_use]
    predictions = predictions.to_frame(name='prediction')
    
    predictions.loc[predictions['prediction'] > 0.5, ['prediction']] = 1
    predictions.loc[predictions['prediction'] <= 0.5, ['prediction']] = 0
    predictions=(~predictions.astype(bool)).astype(int)
    
    labels = pd.DataFrame(test_labels, columns=['label'])
    
    predictions.reset_index(drop=True, inplace=True)
    labels.reset_index(drop=True, inplace=True)

    # =============================================================================
    # #Confusion matrix
    # =============================================================================
    from sklearn.metrics import confusion_matrix
    conf_mat = confusion_matrix(labels, predictions)
    print(conf_mat)
    import seaborn
    seaborn.heatmap(conf_mat)
    
    labels_desc = [1,0 ]
    cm = confusion_matrix(predictions, labels, labels_desc)
    print_cm(cm, labels_desc)
    
    # =============================================================================
    # #Precision, Recall, F1-Score
    # =============================================================================
    print(sklearn.metrics.classification_report(labels, predictions, labels=None, target_names=None, sample_weight=None, digits=2, output_dict=False, zero_division='warn'))
    
    # =============================================================================
    # #ROC curve
    # =============================================================================
    import sklearn.metrics as metrics
    fpr, tpr, threshold = metrics.roc_curve(labels,  predictions)
    roc_auc = metrics.auc(fpr, tpr)
    import matplotlib.pyplot as plt
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
Пример #13
0
'''

# 1. Kaplan Meier Survivor Function
kmf = KaplanMeierFitter()
T = data['dur']
C = data['evt']
kmf.fit(T, event_observed=C)
fig1 = kmf.plot(title='Survivor Function, Drop Out')
fig1.savefig('fig1.png')

# 2. Nelson Aalen Cumulative Hazard Function
naf = NelsonAalenFitter()
naf.fit(T, event_observed=C)
fig2 = naf.plot(title='Cumulative Hazard Function, Drop Out')
fig2.savefig('fig2.png')

# 3. Cox Proportional Hazard Model
cph = CoxPHFitter()
cph.fit(data, 'sex', event_col='evt')
fig3 = cph.predict_survival_function(data).plot()
fig3.savefig('fig3.png')
'''
I couldn't make this one give me the result I wanted.
The functioning Stata code is:
stphplot, by(sex) nolntime
and the resulting visualization is...
'''
img = mpimg.imread('cph.png')
imgplot = plt.imshow(img)
plt.show()
Пример #14
0
        train_data_df = \
            pd.DataFrame(np.hstack((X_train_standardized, y_train)),
                         columns=feature_names + ['time', 'status'])

        surv_model = CoxPHFitter()
        surv_model.fit(train_data_df, duration_col='time', event_col='status',
                       show_progress=False, step_size=.1)

        sorted_y_test = np.sort(np.unique(y_test[:, 0]))
        if sorted_y_test[0] != 0:
            mesh_points = np.concatenate(([0.], sorted_y_test))
        else:
            mesh_points = sorted_y_test
        surv = \
            surv_model.predict_survival_function(X_test_standardized,
                                                 mesh_points)
        surv = surv.values.T

        # ---------------------------------------------------------------------
        # compute c-index
        #
        if cindex_method == 'cum_haz':
            cum_haz = \
                surv_model.predict_cumulative_hazard(X_test_standardized,
                                                     sorted_y_test)
            cum_haz = cum_haz.values.T
            cum_hazard_scores = cum_haz.sum(axis=1)
            test_cindex = concordance_index(y_test[:, 0],
                                            -cum_hazard_scores,
                                            y_test[:, 1])
        elif cindex_method == 'cum_haz_from_surv':
Пример #15
0
censored_subjects = df.loc[df['DEAD'] == 0]
num_cs = len(censored_subjects)
print(num_cs)
# Add tailor made truck driven nnn km
d = [[1000, 10000, 0, 0, 0, 0, 2], [1000, 10000, 0, 1, 0, 0, 2],
     [1000, 10000, 0, 0, 1, 0, 2], [1000, 10000, 0, 1, 1, 0, 2]]
num_d = len(d)

dfn = pd.DataFrame(
    d, columns=["ID", "KM", "DEAD", "ENGINE", "MOUNTAIN", "CITY", "MONDAY"])
print(dfn)
censored_subjects = censored_subjects.append(dfn, ignore_index=True)

print(censored_subjects)

unconditioned_sf = cph.predict_survival_function(censored_subjects)
print(unconditioned_sf)

from lifelines.utils import median_survival_times, qth_survival_times

predictions_75 = qth_survival_times(0.75, unconditioned_sf)
predictions_25 = qth_survival_times(0.25, unconditioned_sf)
predictions_50 = median_survival_times(unconditioned_sf)
print(predictions_50)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 4))
for f in unconditioned_sf:
    ax.plot(unconditioned_sf[f], alpha=.5, label=f)
#ax.legend()

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 4))
Пример #16
0
#print cancer['T'].unique()
#print cancer['E'].unique()
#cancer = cancer.dropna()


# the '-1' term
# refers to not adding an intercept column (a column of all 1s).
# It can be added to the Fitter class.

covMatrix = cancer.cov()

cf = CoxPHFitter()
cf.fit(covMatrix, 'T', event_col= 'E')  #extra paramater for categorical , strata=catVar
cf.print_summary()

curve = cf.predict_survival_function(cancer)
curve.plot()
plt.show()
print "hazard coeff",cf.hazards_
print "baseline ", cf.baseline_hazard_

'''
scores = k_fold_cross_validation(cf, covMatrix, 'T', event_col='E', k=3)
print scores
print np.mean(scores)
print np.std(scores)

'''

Пример #17
0
 pred_surv = []
 for i in range(y_pred.shape[0]):
     pred_surv.append(np.interp(fu_time, times, y_pred[i, :]))
 pred_surv = np.array(pred_surv)
 (pred, actual) = calib_plot(fu_time,
                             n_bins,
                             pred_surv,
                             data_test.time.as_matrix(),
                             data_test.dead.as_matrix(),
                             CB_color_cycle[3],
                             'Deepsurv',
                             alpha=my_alpha,
                             markersize=my_markersize,
                             markertype='s')
 #mse_array[2, fu_time_i] = ((pred-actual)**2).mean()
 y_pred = cph.predict_survival_function(data_test)
 times = y_pred.index.values.astype('float64')
 y_pred = y_pred.as_matrix().transpose()
 pred_surv = []
 for i in range(y_pred.shape[0]):
     pred_surv.append(np.interp(fu_time, times, y_pred[i, :]))
 pred_surv = np.array(pred_surv)
 (pred, actual) = calib_plot(fu_time,
                             n_bins,
                             pred_surv,
                             data_test.time.as_matrix(),
                             data_test.dead.as_matrix(),
                             CB_color_cycle[2],
                             'Cox PH model',
                             alpha=my_alpha,
                             markersize=my_markersize,
Пример #18
0
    X_train_std = X_train_std[:, :-1]
    X_test_std = X_test_std[:, :-1]
    feature_names = feature_names[:-1]

train_data_df = \
    pd.DataFrame(np.hstack((X_train_std, y_train)),
                 columns=feature_names + ['time', 'status'])

surv_model = CoxPHFitter()
surv_model.fit(train_data_df,
               duration_col='time',
               event_col='status',
               show_progress=False,
               step_size=.1)

surv_df = surv_model.predict_survival_function(X_test_std, sorted_y_test)
surv = surv_df.values.T

print()
print('[Test data statistics]')
sorted_y_test_times = np.sort(y_test[:, 0])
print('Quartiles:')
print('- Min observed time:', np.min(y_test[:, 0]))
print('- Q1 observed time:',
      sorted_y_test_times[int(0.25 * len(sorted_y_test_times))])
print('- Median observed time:', np.median(y_test[:, 0]))
print('- Q3 observed time:',
      sorted_y_test_times[int(0.75 * len(sorted_y_test_times))])
print('- Max observed time:', np.max(y_test[:, 0]))
print('Mean observed time:', np.mean(y_test[:, 0]))
print('Fraction censored:', 1. - np.mean(y_test[:, 1]))
Пример #19
0
# Organize the data:
data.loc[data.status == 1, 'dead'] = 0
data.loc[data.status == 2, 'dead'] = 1
data.head()

# Fit data into our object:
kmf.fit(durations=data["time"], event_observed=data["dead"])

# Get the event table:
kmf.event_table

# Get required columns from the data:
data = data[[
    'time', 'age', 'sex', 'ph.ecog', 'ph.karno', 'pat.karno', 'meal.cal',
    'wt.loss', 'dead'
]]

# Get the summary using CoxPHFitter:
cph = CoxPHFitter()
cph.fit(data, "time", event_col="dead")
cph.print_summary()

# Plot the result on graph:
cph.plot()

data.iloc[10:15, :]

# Plotting the data:
d_data = data.iloc[10:15, :]
cph.predict_survival_function(d_data).plot()
Пример #20
0
c = (wavelet_HLL_glszm_LargeAreaHighGrayLevelEmphasis -
     12543870000) / 14860100000
d = (wavelet_LLL_gldm_LargeDependenceHighGrayLevelEmphasis -
     27874.14) / 11933.33

##rad_score calculation
rad_score = a * (-1.3008) + b * 0.6083 + c * (-0.4295) + d * 0.3595

##form datafram for test patient imformation
test_patient = pd.DataFrame([(rad_score, age, FVC, LDH_rate)])
test_patient.columns = ('rad_score', 'age', 'FVC<50', 'LDH_rate')

## form cox model
train = pd.read_csv('train.plus.rad_score_renew+HRCTscore.csv')
from lifelines import CoxPHFitter
cph = CoxPHFitter()
#data reorgination
feature_tr = train[[
    'Survival', 'CustomLabel', 'rad_score', 'age', 'FVC<50', 'LDH_rate'
]]
cph.fit(feature_tr, duration_col='Survival', event_col='CustomLabel')

#cph.plot(hazard_ratios=True)
#cph.predict_median(feature_te)
cph.predict_survival_function(test_patient, 24)  #test predict

#find baseline hazard fuction
cph.baseline_hazard_
cph.baseline_cumulative_hazard_
cph.baseline_survival_
# 4. We get the concordance. Our model has a concordance of .929 out of 1, so it’s a very good Cox model. We can use
#   this to compare between models, kind of like accuracy in Logistic Regression.

# lets actually plot all of this to get a better picture
cph.plot()
cph.plot_covariate_groups('TotalCharges', values=[0,4000], cmap='coolwarm')
# you can see in the survival curve plot that customers that have Total charges closer to 0 are at a higher risk of
# churning compared to those with charges closer to 4000.

# now lets do some churn prediction now that we have some useful insights into what makes customers churn.
# lets take all the non churners as we can't retain those who have already churned, these are called censored_subjects
# sticking to Survival Analysis lingo.
censored_subjects = data.loc[data['Churn_Yes'] == 0]

# now we can predict their unconditioned survival curves
unconditioned_sf = cph.predict_survival_function(censored_subjects)
# these are unconditioned because we will predict some churn before the customers current tenure time.

# lets condition the above prediction
conditioned_sf = unconditioned_sf.apply(lambda c: (c/c.loc[data.loc[c.name, 'tenure']]).clip_upper(1))

# now we can investigate customers to see how the conditioning has affected their survival over the baseline rate
subject = 12
unconditioned_sf[subject].plot(ls="--", color="#A60628", label="unconditioned")
conditioned_sf[subject].plot(color="#A60628", label="conditioned on $T>58$")
plt.legend()
# we can see that cust 12 is still a customer after 58 months, which means cust 12's survival curve drops slower than
# the baseline for similar custs without that condition.

# the predict_survival_function has created a metrix of survival probabilities for each remaining customer at each
# point in time. what we need to do now is use that to select a single value as  prdiction for how long a customer
Пример #22
0
def get_surv_curv(
        data,
        player):  ##add percentile of prediction as an annottion on the graph
    cph = CoxPHFitter()
    cph.fit(data, 'NBA_Experience', event_col='active')
    X = data.loc[[player]].drop(['NBA_Experience', 'active'], axis=1)
    league_surv = cph.baseline_survival_
    player_surv = cph.predict_survival_function(X)
    x = data.drop(['NBA_Experience', 'active'], axis=1)
    predictions = cph.predict_expectation(x)
    percentiles = predictions.rank(pct=True)
    player_pct = percentiles.loc[player]
    string = 'Career Length Prediction Percentile: ' + str(
        round(player_pct.values[0], 2))

    trace1 = go.Scatter(name='League Average',
                        x=league_surv.index,
                        y=league_surv['baseline survival'].values,
                        marker={'color': "#253046"})
    trace2 = go.Scatter(name=player,
                        x=player_surv.index,
                        y=player_surv[player].values,
                        marker={'color': '#B35E3B'})

    data = [trace1, trace2]
    layout = go.Layout({
        "xaxis": {
            "title": "Years in the NBA",
            'color': '#253046'
        },
        "yaxis": {
            "title": "Probability of remaining in the NBA",
            'color': '#253046'
        },
        'paper_bgcolor':
        '#F8F3F1',
        'plot_bgcolor':
        '#F8F3F1',
        'margin': {
            't': 50,
            'r': 30
        },
        'annotations': [{
            'x': 13,
            'y': 0.78,
            'text': string,
            'showarrow': False,
            'font': {
                'size': 14,
                'color': '#253046'
            }
        }],
        'legend': {
            'x': .8,
            'y': 1,
            'traceorder': 'normal'
        }
    })

    fig = go.Figure(data=data, layout=layout)

    return fig
Пример #23
0
    print('Training Process finished')
    # evaluate our test data
    x_test = x_test.reshape((x_test.shape[0], time_steps, num_input))
    predicted_y = sess.run(tf.nn.softmax(logits), feed_dict={X: x_test, Y: y_test})
    print("Test accuracy is:", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))

# Survival analysis using deep learning output as features
increase_indices = np.where(y_test[:,1] == 1)[0]
pre_increase_day = np.reshape(xx_test[increase_indices,90], (increase_indices.shape[0],1))
pre_increase_x = np.reshape(predicted_y[increase_indices,:], (-1,3))
event_col = [1]*pre_increase_x.shape[0]
sur_data = np.column_stack((pre_increase_day, pre_increase_x))
sur_data = np.column_stack((sur_data, event_col))
df = pd.DataFrame(data=sur_data)
df = df.drop([1], axis=1)

# predict survival hazard
from lifelines import CoxPHFitter
cph = CoxPHFitter()
cph.fit(df, duration_col=0, event_col=4)
cph.print_summary()  # access the results using cph.summary
cph.plot()

X = df.drop([0, 4], axis=1)
cph.predict_partial_hazard(X)
sur_pred=cph.predict_survival_function(X)



Пример #24
0
kmf =  KaplanMeierFitter()
kmf.fit(duration, event_observed = not_censor)
kmf.survival_function_.plot()

# Cox-PH Model Regression
from lifelines import CoxPHFitter
cf = CoxPHFitter()
cf.fit(data, 'duration', event_col = 'event')
cf.print_summary()

## Get Predictions from Model ##

# 24 year old college grad
#college_24 = pd.DataFrame({'age':[24], 'college':[1]})
#cf.predict_survival_function(college_24).plot()

# 65 year old high school grad
#hs_65 = pd.DataFrame({'age':[65], 'college':[0]})
#cf.predict_survival_function(hs_65).plot()

# Predicted Survival for 24yr-old College Grad and 65yr-old HS Grad
mixed = pd.DataFrame({'age':[24, 65,42], 'college':[1,0,.4], 'index': ['24yr old College Grad','65yr old HS Grad','Average']})
mixed = mixed.set_index(['index']) # setting row names
cf.predict_survival_function(mixed).plot() # Plotting survival
pl.title('Probability of Survival at Time t')
pl.xlabel('Time t')
pl.ylabel('Probability of Survival')
"""
cf.predict_survival_function without the .plot() option will return a matrix-like
object that has the probability of survival at time t.
"""
#scores.to_csv(r'T:\tbase\feature_importances.csv', quoting=csv.QUOTE_NONNUMERIC)
    
# =============================================================================
# #Cox Regression Model
# ======================================================    
cph = CoxPHFitter(penalizer=0.1)   ## Instantiate the class to create a cph object
cph.fit(train_features, 'tenure', event_col='Longterm_TransplantOutcome', show_progress=True, step_size=0.1)   ## Fit the data to train the model

cph.summary.to_csv(r'T:\tbase\cph_summary.csv')

print('concordance index: ' + str(cph.concordance_index_))

tr_rows = test_features.loc[:, test_features.columns != 'Longterm_TransplantOutcome'].iloc[:, :]
tr_rows_res = test_features.loc[:, test_features.columns == 'Longterm_TransplantOutcome'].iloc[:, :]

cph.predict_survival_function(tr_rows).plot()
print(tr_rows_res)

predictions = cph.predict_survival_function(tr_rows)

predictions = predictions.transpose()

#pd.DataFrame(predictions.columns).to_clipboard()

# =============================================================================
# #Qualitative error analysis
for col in predictions.columns:
    if float(col) > (365*6):
        col_use = col
        print(col_use)
        break
df_dummy.shape

# In[79]:

# proportional hazard model
cph = CoxPHFitter()
cph.fit(df_dummy, 'finalTerm', event_col='default')
cph.print_summary()

# In[80]:

tr_rows = df_dummy.iloc[0:10]

# In[81]:

cph.predict_survival_function(tr_rows).plot()

# In[82]:

preds = cph.predict_survival_function(df_dummy).T

# In[87]:

final = pd.concat([df, preds], axis=1)

# In[88]:

final

# In[105]:
Пример #27
0
def compute_coxhr(endpoint, df, lag, nindivs, res_writer):
    logger.info(f"Running Cox regression")
    # Handle sex-specific endpoints
    is_sex_specific = pd.notna(endpoint.SEX)
    if is_sex_specific:
        df = df.drop(columns=["female"])

    # Fit Cox model
    cph = CoxPHFitter()

    cph.fit(
        df,
        duration_col="duration",
        event_col="death",
        # For the case-cohort study we need weights and robust errors:
        weights_col="weight",
        robust=True)

    # Compute absolute risk
    mean_indiv = {"BIRTH_TYEAR": [1959.0], "endpoint": [True], "female": [0.5]}

    if is_sex_specific:
        mean_indiv.pop("female")

    if lag is None:
        predict_at = STUDY_ENDS - STUDY_STARTS
        lag_value = None
    else:
        _min_lag, max_lag = lag
        predict_at = max_lag
        lag_value = max_lag

    surv_probability = cph.predict_survival_function(pd.DataFrame(mean_indiv),
                                                     times=[predict_at
                                                            ]).values[0][0]
    absolute_risk = 1 - surv_probability

    norm_mean = cph._norm_mean
    # Get values out of the fitted model
    endp_coef = cph.params_["endpoint"]
    endp_se = cph.standard_errors_["endpoint"]
    endp_hr = np.exp(endp_coef)
    endp_ci_lower = np.exp(endp_coef - 1.96 * endp_se)
    endp_ci_upper = np.exp(endp_coef + 1.96 * endp_se)
    endp_pval = cph.summary.p["endpoint"]
    endp_zval = cph.summary.z["endpoint"]
    endp_norm_mean = norm_mean["endpoint"]

    year_coef = cph.params_["BIRTH_TYEAR"]
    year_se = cph.standard_errors_["BIRTH_TYEAR"]
    year_hr = np.exp(year_coef)
    year_ci_lower = np.exp(year_coef - 1.96 * year_se)
    year_ci_upper = np.exp(year_coef + 1.96 * year_se)
    year_pval = cph.summary.p["BIRTH_TYEAR"]
    year_zval = cph.summary.z["BIRTH_TYEAR"]
    year_norm_mean = norm_mean["BIRTH_TYEAR"]

    if not is_sex_specific:
        sex_coef = cph.params_["female"]
        sex_se = cph.standard_errors_["female"]
        sex_hr = np.exp(sex_coef)
        sex_ci_lower = np.exp(sex_coef - 1.96 * sex_se)
        sex_ci_upper = np.exp(sex_coef + 1.96 * sex_se)
        sex_pval = cph.summary.p["female"]
        sex_zval = cph.summary.z["female"]
        sex_norm_mean = norm_mean["female"]
    else:
        sex_coef = np.nan
        sex_se = np.nan
        sex_hr = np.nan
        sex_ci_lower = np.nan
        sex_ci_upper = np.nan
        sex_pval = np.nan
        sex_zval = np.nan
        sex_norm_mean = np.nan

    # Save the baseline cumulative hazard (bch)
    df_bch = cph.baseline_cumulative_hazard_

    baseline_cumulative_hazard = bch_at(df_bch, predict_at)

    bch_values = {}
    for time in BCH_TIMEPOINTS:
        bch_values[time] = bch_at(df_bch, time)

    # Save values
    res_writer.writerow([
        endpoint.NAME, lag_value, nindivs, absolute_risk, endp_coef, endp_se,
        endp_hr, endp_ci_lower, endp_ci_upper, endp_pval, endp_zval,
        endp_norm_mean, year_coef, year_se, year_hr, year_ci_lower,
        year_ci_upper, year_pval, year_zval, year_norm_mean, sex_coef, sex_se,
        sex_hr, sex_ci_lower, sex_ci_upper, sex_pval, sex_zval, sex_norm_mean,
        baseline_cumulative_hazard, bch_values[0], bch_values[2.5],
        bch_values[5], bch_values[7.5], bch_values[10], bch_values[12.5],
        bch_values[15], bch_values[17.5], bch_values[20], bch_values[21.99]
    ])
    logger.info("done running Cox regression")
Пример #28
0
# cph.print_summary()  # access the results using cph.summary

# Validation
# _X_valid = _X_valid.drop(["Survival", "Event"], axis=1)  # Testing input after preprocessing.

_X_valid_1 = _X_valid_1.drop(["Survival", "Event"], axis=1)
_X_valid_2 = _X_valid_2.drop(["Survival", "Event"], axis=1)
_X_valid_3 = _X_valid_3.drop(["Survival", "Event"], axis=1)
_X_valid_4 = _X_valid_4.drop(["Survival", "Event"], axis=1)
_X_valid_5 = _X_valid_5.drop(["Survival", "Event"], axis=1)

# _seq_pred_y_valid_1 = cph.predict_survival_function(_X_valid_1, np.arange(before_steps)).as_matrix()

_seq_pred_y_valid = np.array([cph.predict_survival_function(_X_valid_1, np.arange(before_steps+1)).as_matrix()[1,:],
cph.predict_survival_function(_X_valid_2, np.arange(before_steps+1)).as_matrix()[2,:],
cph.predict_survival_function(_X_valid_3, np.arange(before_steps+1)).as_matrix()[3,:],
cph.predict_survival_function(_X_valid_4, np.arange(before_steps+1)).as_matrix()[4,:],
cph.predict_survival_function(_X_valid_5, np.arange(before_steps+1)).as_matrix()[5,:]])

_seq_pred_y_valid = _seq_pred_y_valid.transpose()

thrld_score = dict()
for sur_thrld_valid in np.arange(1.0, 0.0, -0.01):

    yy = []
    pp = []

    seq_pred_y_valid = np.zeros((_seq_pred_y_valid.shape[0], before_steps))
    seq_pred_y_valid[np.where(_seq_pred_y_valid[:, :before_steps] > sur_thrld_valid)] = 1

    early_correct = np.sum(seq_pred_y_valid == batch_y_valid, axis=0)
Пример #29
0
#loan_grade = grading("what is the loan grade? Choose from the following Grade 'A','B','C','D','E','F','G'" )
ownership = home(
    "Loanholder's house ownership. Is it 'Mortgage', 'Own', 'Rent' or 'Any'?")

d = {
    'annual_inc': inc,
    'loan_amnt': loan,
    'delinq_2yrs': delinq_2yrs,
    'Grade': loan_grade
}

d.update(ownership)

Test = pd.DataFrame([d])

survivalProb = pd.DataFrame(np.array(cf.predict_survival_function(Test)))
time = pd.DataFrame(cf.predict_survival_function(Test).index.values).rename(
    columns={0: 'Time'})
hazard = 1 - pd.DataFrame(cf.predict_survival_function(Test))

payment_term = get_non_negative_int('What is the payment term? ')
payments = get_non_negative_int('What are the series of loan payments? ')

ExpTerms = pd.DataFrame(time * survivalProb * payment_term).astype(int)


def npv(rate, cashflows):
    total = 0.0
    for i, cashflow in enumerate(cashflows):
        total += cashflow / (1 + rate)**i
    return total
Пример #30
0
"""

# print cancer['T'].unique()
# print cancer['E'].unique()
# cancer = cancer.dropna()


# the '-1' term
# refers to not adding an intercept column (a column of all 1s).
# It can be added to the Fitter class.

covMatrix = cancer.cov()

cf = CoxPHFitter()
cf.fit(covMatrix, "T", event_col="E")  # extra paramater for categorical , strata=catVar
cf.print_summary()

curve = cf.predict_survival_function(cancer)
curve.plot()
plt.show()
print "hazard coeff", cf.hazards_
print "baseline ", cf.baseline_hazard_

"""
scores = k_fold_cross_validation(cf, covMatrix, 'T', event_col='E', k=3)
print scores
print np.mean(scores)
print np.std(scores)

"""
Пример #31
0
lis = [50,140,380,500]
#plt.plot(cph.predict_survival_function(new_final.iloc[lis,:]),label = '1')
plt.plot(cph.predict_survival_function(new_final.iloc[50:51,:]),label = 'engine 1')
plt.plot(cph.predict_survival_function(new_final.iloc[140:141,:]),label = 'engine 2')
plt.plot(cph.predict_survival_function(new_final.iloc[380:381,:]),label = 'engine 3')
plt.plot(cph.predict_survival_function(new_final.iloc[500:501,:]),label = 'engine 4')
plt.xlabel('Remaining Life Cycle')
plt.ylabel('Probability')
plt.legend()
plt.axvline(x=150)
plt.grid()
'''

# randomize 10
new_lis = [18,20,55,61,86,124,168,229,260,269,362,387,390,437,458,\
           519,530,618,656,667]
mat = cph.predict_survival_function(new_final.iloc[new_lis, :])
mat = mat.round(decimals=4)

for xxx in [60, 71, 159, 197, 208]:
    print('%g \n' % xxx)
    print(mat[xxx].iloc[np.where(
        np.logical_and(mat[xxx] > 0.48, mat[xxx] < 0.52))])

for bbb in new_lis:
    print('%g' % bbb)
    print(final.iloc[bbb, :].time_in_cycle)
    print('\n')

plt.scatter(a, b, label='real engine life cycle')
plt.scatter(a, c, label='Predicted to Breakdown Soon')
Пример #32
0
        surv_model = CoxPHFitter()
        surv_model.fit(train_data_df,
                       duration_col='time',
                       event_col='status',
                       show_progress=False,
                       step_size=.1)
        elapsed = time.time() - tic
        print('Time elapsed: %f second(s)' % elapsed)
        np.savetxt(time_elapsed_filename, np.array(elapsed).reshape(1, -1))

        # ---------------------------------------------------------------------
        # evaluation
        #

        sorted_y_test = np.unique(y_test[:, 0])
        surv_df = surv_model.predict_survival_function(X_test_std,
                                                       sorted_y_test)
        surv = surv_df.values.T
        ev = EvalSurv(surv_df, y_test[:, 0], y_test[:, 1], censor_surv='km')
        cindex_td = ev.concordance_td('antolini')
        print('c-index (td):', cindex_td)

        linear_predictors = \
            surv_model.predict_log_partial_hazard(X_test_std)
        cindex = concordance_index(y_test[:, 0], -linear_predictors, y_test[:,
                                                                            1])
        print('c-index:', cindex)

        time_grid = np.linspace(sorted_y_test[0], sorted_y_test[-1], 100)
        integrated_brier = ev.integrated_brier_score(time_grid)
        print('Integrated Brier score:', integrated_brier, flush=True)