def binary_ANN_surviavl(): breaks = np.arange(0, 5000, 50) n_intervals = len(breaks) - 1 timegap = breaks[1:] - breaks[:-1] halflife1 = 200 halflife2 = 400 halflife_cens = 400 n_samples = 5000 np.random.seed(seed=0) t1 = np.random.exponential(scale=1 / (np.log(2) / halflife1), size=int(n_samples / 2)) t2 = np.random.exponential(scale=1 / (np.log(2) / halflife2), size=int(n_samples / 2)) t = np.concatenate((t1, t2)) censtime = np.random.exponential(scale=1 / (np.log(2) / (halflife_cens)), size=n_samples) f = t < censtime t[~f] = censtime[~f] y_train = nnet_survival.make_surv_array(t, f, breaks) x_train = np.zeros(n_samples) x_train[int(n_samples / 2):] = 1 model = Sequential() # Hidden layers would go here. For this example, using simple linear model with no hidden layers. model.add(Dense(1, input_dim=1, use_bias=0, kernel_initializer='zeros')) model.add(nnet_survival.PropHazards(n_intervals)) model.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=optimizers.RMSprop()) # model.summary() early_stopping = EarlyStopping(monitor='loss', patience=2) history = model.fit(x_train, y_train, batch_size=32, epochs=1000, callbacks=[early_stopping]) y_pred = model.predict_proba(x_train, verbose=0) kmf = KaplanMeierFitter() kmf.fit(t[0:int(n_samples / 2)], event_observed=f[0:int(n_samples / 2)]) plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[0, :]))), 'bo-') plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate, color='k') kmf.fit(t[int(n_samples / 2) + 1:], event_observed=f[int(n_samples / 2) + 1:]) plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[-1, :]))), 'ro-') plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate, color='k') plt.xticks(np.arange(0, 2000.0001, 200)) plt.yticks(np.arange(0, 1.0001, 0.125)) plt.xlim([0, 2000]) plt.ylim([0, 1]) plt.xlabel('Follow-up time (days)') plt.ylabel('Proportion surviving') plt.title('One covariate. Actual=black, predicted=blue/red.') plt.show() myData = pd.DataFrame({'x_train': x_train, 't': t, 'f': f}) cf = CoxPHFitter() cf.fit(myData, 't', event_col='f') # x_train = x_train.astype(np.float64) # cox_coef = cf.hazards_.x_train.values[0] cox_coef = cf.hazards_.x_train nn_coef = model.get_weights()[0][0][0] print('Cox model coefficient:') print(cox_coef) print('Cox model hazard ratio:') print(np.exp(cox_coef)) print('Neural network coefficient:') print(nn_coef) print('Neural network hazard ratio:') print(np.exp(nn_coef))
for traincv, testcv in cv_folds: x_train_cv = x_train[traincv] y_train_cv = y_train[traincv] x_test_cv = x_train[testcv] y_test_cv = y_train[testcv] model = Sequential() #model.add(Dense(n_intervals,input_dim=x_train.shape[1],bias_initializer='zeros',kernel_regularizer=regularizers.l2(l2_array[i]))) model.add( Dense(hidden_layers_sizes, input_dim=x_train.shape[1], bias_initializer='zeros', activation='relu', kernel_regularizer=regularizers.l2(l2_array[i]))) model.add(Dense(n_intervals)) model.add(Activation('sigmoid')) model.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=optimizers.RMSprop()) #lr=0.0001)) history = model.fit(x_train_cv, y_train_cv, batch_size=256, epochs=100000, callbacks=[early_stopping], verbose=0) grid_search_train[i, j] = model.evaluate(x_train_cv, y_train_cv, verbose=0) grid_search_test[i, j] = model.evaluate(x_test_cv, y_test_cv, verbose=0) j = j + 1
def ANN_survival_model(): ################################################################# print('-------------------------------------------------------------') print( 'start cross-validation to pick L2 regularization strength for training' ) print('-------------------------------------------------------------') halflife = 365. * 2.8 breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2) n_intervals = len(breaks) - 1 timegap = breaks[1:] - breaks[:-1] # y_train = nnet_survival.make_surv_array(data_train.time.values, data_train.dead.values, breaks) # y_test = nnet_survival.make_surv_array(data_test.time.values, data_test.dead.values, breaks) y_train = nnet_survival.make_surv_array(data_train.duration_d.values, data_train.CVD.values, breaks) y_test = nnet_survival.make_surv_array(data_test.duration_d.values, data_test.CVD.values, breaks) # uncensored 데이터와 censored 데이터를 구분 # uncensored 데이터는 2번째 배열에서 dead 인터벌에 1값 # censored 데이터는 2번째 배열에서 0 값 hidden_layers_sizes = 7 # Using single hidden layer, with this many neurons from sklearn.model_selection import KFold n_folds = 10 kf = KFold(n_splits=n_folds, shuffle=True, random_state=0) early_stopping = EarlyStopping(monitor='loss', patience=20) # l2_array = np.concatenate(([0.],np.power(10.,np.arange(-6,-2)))) l2_array = np.power(10., np.arange(-4, 1)) grid_search_train = np.zeros((len(l2_array), n_folds)) grid_search_test = np.zeros((len(l2_array), n_folds)) print('execution of 10-fold validation for five times\n') for i in range(1): # for i in range(len(l2_array)): print(str(i + 1) + ' / ' + str(len(l2_array)) + " times") j = 0 cv_folds = kf.split(x_train) for traincv, testcv in cv_folds: x_train_cv = x_train[traincv] y_train_cv = y_train[traincv] x_test_cv = x_train[testcv] y_test_cv = y_train[testcv] # 활성함수는 렐루, 마지막 레이어에 시그모이드, iterator 1000, 7차원 hidden layer model = Sequential() # model.add(Dense(n_intervals,input_dim=x_train.shape[1],bias_initializer='zeros',kernel_regularizer=regularizers.l2(l2_array[i]))) # 입력층 개수는 변수의 개수 model.add( Dense(hidden_layers_sizes, input_dim=x_train.shape[1], bias_initializer='zeros', activation='relu', kernel_regularizer=regularizers.l2(l2_array[i]))) # model.add(Activation('relu')) model.add(Dense(n_intervals)) model.add(Activation('sigmoid')) model.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=optimizers.RMSprop()) # lr=0.0001)) history = model.fit(x_train_cv, y_train_cv, batch_size=256, epochs=100000, callbacks=[early_stopping], verbose=0) # model.summary() print(model.metrics_names) grid_search_train[i, j] = model.evaluate(x_train_cv, y_train_cv, verbose=0) print(grid_search_train[i, j]) grid_search_test[i, j] = model.evaluate(x_test_cv, y_test_cv, verbose=0) print(grid_search_test[i, j]) j = j + 1 print(np.average(grid_search_train, axis=1)) print(np.average(grid_search_test, axis=1)) l2_final = l2_array[np.argmax(-np.average(grid_search_test, axis=1))] ############################### plot ###################################### fig, loss_ax = plt.subplots() acc_ax = loss_ax.twinx() loss_ax.plot(history.history['loss'], 'y', label='train loss') loss_ax.plot(history.history['val_loss'], 'r', label='test loss') acc_ax.plot(history.history['acc'], 'b', label='train acc') acc_ax.plot(history.history['val_acc'], 'g', label='test acc') loss_ax.set_xlabel('epoch') loss_ax.set_ylabel('loss') acc_ax.set_ylabel('accuray') loss_ax.legend(loc='upper left') acc_ax.legend(loc='lower left') plt.show() ########################################################################### score = model.evaluate(x_test, y_test, batch_size=2, verbose=1) # print('Test loss: ', score[0]) # print('Test accuracy: ', score[1]) # Discrimination performance y_pred = model.predict_proba(x_train, verbose=1) oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]], axis=1)[:, -1] print('================================') print('Training data with concordance_index ') print(concordance_index(data_train.duration_d, oneyr_surv, data_train.CVD)) print('================================') y_pred = model.predict_proba(x_test, verbose=1) oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]], axis=1)[:, -1] print('================================') print('Test data with concordance_index ') print(concordance_index(data_test.duration_d, oneyr_surv, data_test.CVD)) print('================================')