def input_process2(self, training_list, clinical): print("Data processing-II...") sample, t, f, age = [], [], [], [] for list in tqdm(training_list): for i in range(len(clinical)): if clinical.iloc[i]['sample'] + '.png' == str(list): p_id = clinical.iloc[i]['sample'] time = clinical.iloc[i]['os_time'] status = clinical.iloc[i]['vital_status'] a = clinical.iloc[i]['age'] sample.append(p_id) t.append(time) f.append(status) age.append(a) continue else: pass t = np.asarray(t) f = np.asarray(f) sample = np.asarray(sample) age = np.asarray(age) br=np.arange(0.,365.*10,365./4) nl=len(br)-1 y_t = nnet_survival.make_surv_array(t,f,br) ind = range(len(f)) print('Done!') if self.omics=='mrna': rand_range=[1,2] if self.omics=='meth': rand_range=[3,4] if self.omics=='mirna': rand_range=[4,5] if self.omics=='mrna_meth': rand_range=[6,7] if self.omics=='mrna_mirna': rand_range=[8,9] if self.omics=='mrna_meth_mirna': rand_range=[10,11] return t, f, sample, age, br, nl, y_t, ind, rand_range
#Cox model discrimination test set prediction = cph.predict_partial_hazard(data_test) print(concordance_index(data_test.time, -prediction, data_test.dead)) #0.735 ################################ #Nnet-survival / Our model (flexible version to #allow non-proportional hazards) halflife = 365. * 1.4 breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2) #breaks=-np.log(1-np.arange(0.0,1,0.099))*halflife/np.log(2) n_intervals = len(breaks) - 1 timegap = breaks[1:] - breaks[:-1] y_train = nnet_survival.make_surv_array(data_train.time.values, data_train.dead.values, breaks) y_test = nnet_survival.make_surv_array(data_test.time.values, data_test.dead.values, breaks) hidden_layers_sizes = 7 #Using single hidden layer, with this many neurons ############################################################## #Our model cross-validation to pick L2 regularization strength from sklearn.model_selection import KFold n_folds = 10 kf = KFold(n_splits=n_folds, shuffle=True, random_state=0) early_stopping = EarlyStopping(monitor='loss', patience=20) #l2_array = np.concatenate(([0.],np.power(10.,np.arange(-6,-2)))) l2_array = np.power(10., np.arange(-4, 1))
def binary_ANN_surviavl(): breaks = np.arange(0, 5000, 50) n_intervals = len(breaks) - 1 timegap = breaks[1:] - breaks[:-1] halflife1 = 200 halflife2 = 400 halflife_cens = 400 n_samples = 5000 np.random.seed(seed=0) t1 = np.random.exponential(scale=1 / (np.log(2) / halflife1), size=int(n_samples / 2)) t2 = np.random.exponential(scale=1 / (np.log(2) / halflife2), size=int(n_samples / 2)) t = np.concatenate((t1, t2)) censtime = np.random.exponential(scale=1 / (np.log(2) / (halflife_cens)), size=n_samples) f = t < censtime t[~f] = censtime[~f] y_train = nnet_survival.make_surv_array(t, f, breaks) x_train = np.zeros(n_samples) x_train[int(n_samples / 2):] = 1 model = Sequential() # Hidden layers would go here. For this example, using simple linear model with no hidden layers. model.add(Dense(1, input_dim=1, use_bias=0, kernel_initializer='zeros')) model.add(nnet_survival.PropHazards(n_intervals)) model.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=optimizers.RMSprop()) # model.summary() early_stopping = EarlyStopping(monitor='loss', patience=2) history = model.fit(x_train, y_train, batch_size=32, epochs=1000, callbacks=[early_stopping]) y_pred = model.predict_proba(x_train, verbose=0) kmf = KaplanMeierFitter() kmf.fit(t[0:int(n_samples / 2)], event_observed=f[0:int(n_samples / 2)]) plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[0, :]))), 'bo-') plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate, color='k') kmf.fit(t[int(n_samples / 2) + 1:], event_observed=f[int(n_samples / 2) + 1:]) plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[-1, :]))), 'ro-') plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate, color='k') plt.xticks(np.arange(0, 2000.0001, 200)) plt.yticks(np.arange(0, 1.0001, 0.125)) plt.xlim([0, 2000]) plt.ylim([0, 1]) plt.xlabel('Follow-up time (days)') plt.ylabel('Proportion surviving') plt.title('One covariate. Actual=black, predicted=blue/red.') plt.show() myData = pd.DataFrame({'x_train': x_train, 't': t, 'f': f}) cf = CoxPHFitter() cf.fit(myData, 't', event_col='f') # x_train = x_train.astype(np.float64) # cox_coef = cf.hazards_.x_train.values[0] cox_coef = cf.hazards_.x_train nn_coef = model.get_weights()[0][0][0] print('Cox model coefficient:') print(cox_coef) print('Cox model hazard ratio:') print(np.exp(cox_coef)) print('Neural network coefficient:') print(nn_coef) print('Neural network hazard ratio:') print(np.exp(nn_coef))
breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2) n_intervals = len(breaks) - 1 timegap = breaks[1:] - breaks[:-1] ################################################################## #Flexible model (non-proportional hazards). #All pts with same exponential survival distribution, no censoring #Not described in paper. halflife1 = 365. n_samples = 1000 np.random.seed(seed=0) t = np.random.exponential(scale=1 / (np.log(2) / halflife1), size=n_samples) f = np.ones(n_samples) #all patients failed (none censored) #y_train=nnet_survival.make_surv_array(t,f) y_train = nnet_survival.make_surv_array(t, f, breaks) x_train = np.zeros(n_samples) model = Sequential() #Hidden layers would go here. For this example, using simple linear model with no hidden layers. model.add( Dense(n_intervals, input_dim=1, kernel_initializer='zeros', bias_initializer='zeros')) model.add(Activation('sigmoid')) model.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=optimizers.RMSprop()) #model.summary() early_stopping = EarlyStopping(monitor='loss', patience=2) history = model.fit(x_train,
def ANN_survival_model(): ################################################################# print('-------------------------------------------------------------') print( 'start cross-validation to pick L2 regularization strength for training' ) print('-------------------------------------------------------------') halflife = 365. * 2.8 breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2) n_intervals = len(breaks) - 1 timegap = breaks[1:] - breaks[:-1] # y_train = nnet_survival.make_surv_array(data_train.time.values, data_train.dead.values, breaks) # y_test = nnet_survival.make_surv_array(data_test.time.values, data_test.dead.values, breaks) y_train = nnet_survival.make_surv_array(data_train.duration_d.values, data_train.CVD.values, breaks) y_test = nnet_survival.make_surv_array(data_test.duration_d.values, data_test.CVD.values, breaks) # uncensored 데이터와 censored 데이터를 구분 # uncensored 데이터는 2번째 배열에서 dead 인터벌에 1값 # censored 데이터는 2번째 배열에서 0 값 hidden_layers_sizes = 7 # Using single hidden layer, with this many neurons from sklearn.model_selection import KFold n_folds = 10 kf = KFold(n_splits=n_folds, shuffle=True, random_state=0) early_stopping = EarlyStopping(monitor='loss', patience=20) # l2_array = np.concatenate(([0.],np.power(10.,np.arange(-6,-2)))) l2_array = np.power(10., np.arange(-4, 1)) grid_search_train = np.zeros((len(l2_array), n_folds)) grid_search_test = np.zeros((len(l2_array), n_folds)) print('execution of 10-fold validation for five times\n') for i in range(1): # for i in range(len(l2_array)): print(str(i + 1) + ' / ' + str(len(l2_array)) + " times") j = 0 cv_folds = kf.split(x_train) for traincv, testcv in cv_folds: x_train_cv = x_train[traincv] y_train_cv = y_train[traincv] x_test_cv = x_train[testcv] y_test_cv = y_train[testcv] # 활성함수는 렐루, 마지막 레이어에 시그모이드, iterator 1000, 7차원 hidden layer model = Sequential() # model.add(Dense(n_intervals,input_dim=x_train.shape[1],bias_initializer='zeros',kernel_regularizer=regularizers.l2(l2_array[i]))) # 입력층 개수는 변수의 개수 model.add( Dense(hidden_layers_sizes, input_dim=x_train.shape[1], bias_initializer='zeros', activation='relu', kernel_regularizer=regularizers.l2(l2_array[i]))) # model.add(Activation('relu')) model.add(Dense(n_intervals)) model.add(Activation('sigmoid')) model.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=optimizers.RMSprop()) # lr=0.0001)) history = model.fit(x_train_cv, y_train_cv, batch_size=256, epochs=100000, callbacks=[early_stopping], verbose=0) # model.summary() print(model.metrics_names) grid_search_train[i, j] = model.evaluate(x_train_cv, y_train_cv, verbose=0) print(grid_search_train[i, j]) grid_search_test[i, j] = model.evaluate(x_test_cv, y_test_cv, verbose=0) print(grid_search_test[i, j]) j = j + 1 print(np.average(grid_search_train, axis=1)) print(np.average(grid_search_test, axis=1)) l2_final = l2_array[np.argmax(-np.average(grid_search_test, axis=1))] ############################### plot ###################################### fig, loss_ax = plt.subplots() acc_ax = loss_ax.twinx() loss_ax.plot(history.history['loss'], 'y', label='train loss') loss_ax.plot(history.history['val_loss'], 'r', label='test loss') acc_ax.plot(history.history['acc'], 'b', label='train acc') acc_ax.plot(history.history['val_acc'], 'g', label='test acc') loss_ax.set_xlabel('epoch') loss_ax.set_ylabel('loss') acc_ax.set_ylabel('accuray') loss_ax.legend(loc='upper left') acc_ax.legend(loc='lower left') plt.show() ########################################################################### score = model.evaluate(x_test, y_test, batch_size=2, verbose=1) # print('Test loss: ', score[0]) # print('Test accuracy: ', score[1]) # Discrimination performance y_pred = model.predict_proba(x_train, verbose=1) oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]], axis=1)[:, -1] print('================================') print('Training data with concordance_index ') print(concordance_index(data_train.duration_d, oneyr_surv, data_train.CVD)) print('================================') y_pred = model.predict_proba(x_test, verbose=1) oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]], axis=1)[:, -1] print('================================') print('Test data with concordance_index ') print(concordance_index(data_test.duration_d, oneyr_surv, data_test.CVD)) print('================================')