Пример #1
0
def binary_ANN_surviavl():
    breaks = np.arange(0, 5000, 50)
    n_intervals = len(breaks) - 1
    timegap = breaks[1:] - breaks[:-1]

    halflife1 = 200
    halflife2 = 400
    halflife_cens = 400
    n_samples = 5000
    np.random.seed(seed=0)
    t1 = np.random.exponential(scale=1 / (np.log(2) / halflife1),
                               size=int(n_samples / 2))
    t2 = np.random.exponential(scale=1 / (np.log(2) / halflife2),
                               size=int(n_samples / 2))
    t = np.concatenate((t1, t2))
    censtime = np.random.exponential(scale=1 / (np.log(2) / (halflife_cens)),
                                     size=n_samples)
    f = t < censtime
    t[~f] = censtime[~f]

    y_train = nnet_survival.make_surv_array(t, f, breaks)
    x_train = np.zeros(n_samples)
    x_train[int(n_samples / 2):] = 1

    model = Sequential()
    # Hidden layers would go here. For this example, using simple linear model with no hidden layers.
    model.add(Dense(1, input_dim=1, use_bias=0, kernel_initializer='zeros'))
    model.add(nnet_survival.PropHazards(n_intervals))
    model.compile(loss=nnet_survival.surv_likelihood(n_intervals),
                  optimizer=optimizers.RMSprop())
    # model.summary()
    early_stopping = EarlyStopping(monitor='loss', patience=2)
    history = model.fit(x_train,
                        y_train,
                        batch_size=32,
                        epochs=1000,
                        callbacks=[early_stopping])
    y_pred = model.predict_proba(x_train, verbose=0)

    kmf = KaplanMeierFitter()
    kmf.fit(t[0:int(n_samples / 2)], event_observed=f[0:int(n_samples / 2)])
    plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[0, :]))), 'bo-')
    plt.plot(kmf.survival_function_.index.values,
             kmf.survival_function_.KM_estimate,
             color='k')
    kmf.fit(t[int(n_samples / 2) + 1:],
            event_observed=f[int(n_samples / 2) + 1:])
    plt.plot(breaks, np.concatenate(([1], np.cumprod(y_pred[-1, :]))), 'ro-')
    plt.plot(kmf.survival_function_.index.values,
             kmf.survival_function_.KM_estimate,
             color='k')
    plt.xticks(np.arange(0, 2000.0001, 200))
    plt.yticks(np.arange(0, 1.0001, 0.125))
    plt.xlim([0, 2000])
    plt.ylim([0, 1])
    plt.xlabel('Follow-up time (days)')
    plt.ylabel('Proportion surviving')
    plt.title('One covariate. Actual=black, predicted=blue/red.')
    plt.show()

    myData = pd.DataFrame({'x_train': x_train, 't': t, 'f': f})
    cf = CoxPHFitter()
    cf.fit(myData, 't', event_col='f')
    # x_train = x_train.astype(np.float64)
    # cox_coef = cf.hazards_.x_train.values[0]
    cox_coef = cf.hazards_.x_train
    nn_coef = model.get_weights()[0][0][0]
    print('Cox model coefficient:')
    print(cox_coef)
    print('Cox model hazard ratio:')
    print(np.exp(cox_coef))
    print('Neural network coefficient:')
    print(nn_coef)
    print('Neural network hazard ratio:')
    print(np.exp(nn_coef))
Пример #2
0
    for traincv, testcv in cv_folds:
        x_train_cv = x_train[traincv]
        y_train_cv = y_train[traincv]
        x_test_cv = x_train[testcv]
        y_test_cv = y_train[testcv]
        model = Sequential()
        #model.add(Dense(n_intervals,input_dim=x_train.shape[1],bias_initializer='zeros',kernel_regularizer=regularizers.l2(l2_array[i])))
        model.add(
            Dense(hidden_layers_sizes,
                  input_dim=x_train.shape[1],
                  bias_initializer='zeros',
                  activation='relu',
                  kernel_regularizer=regularizers.l2(l2_array[i])))
        model.add(Dense(n_intervals))
        model.add(Activation('sigmoid'))
        model.compile(loss=nnet_survival.surv_likelihood(n_intervals),
                      optimizer=optimizers.RMSprop())  #lr=0.0001))
        history = model.fit(x_train_cv,
                            y_train_cv,
                            batch_size=256,
                            epochs=100000,
                            callbacks=[early_stopping],
                            verbose=0)
        grid_search_train[i, j] = model.evaluate(x_train_cv,
                                                 y_train_cv,
                                                 verbose=0)
        grid_search_test[i, j] = model.evaluate(x_test_cv,
                                                y_test_cv,
                                                verbose=0)
        j = j + 1
Пример #3
0
def ANN_survival_model():
    #################################################################
    print('-------------------------------------------------------------')
    print(
        'start cross-validation to pick L2 regularization strength for training'
    )
    print('-------------------------------------------------------------')
    halflife = 365. * 2.8

    breaks = -np.log(1 - np.arange(0.0, 0.96, 0.05)) * halflife / np.log(2)
    n_intervals = len(breaks) - 1
    timegap = breaks[1:] - breaks[:-1]

    # y_train = nnet_survival.make_surv_array(data_train.time.values, data_train.dead.values, breaks)
    # y_test = nnet_survival.make_surv_array(data_test.time.values, data_test.dead.values, breaks)

    y_train = nnet_survival.make_surv_array(data_train.duration_d.values,
                                            data_train.CVD.values, breaks)
    y_test = nnet_survival.make_surv_array(data_test.duration_d.values,
                                           data_test.CVD.values, breaks)

    # uncensored 데이터와 censored 데이터를 구분
    # uncensored 데이터는 2번째 배열에서 dead 인터벌에 1값
    # censored 데이터는 2번째 배열에서 0 값
    hidden_layers_sizes = 7  # Using single hidden layer, with this many neurons

    from sklearn.model_selection import KFold

    n_folds = 10
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
    early_stopping = EarlyStopping(monitor='loss', patience=20)

    # l2_array = np.concatenate(([0.],np.power(10.,np.arange(-6,-2))))
    l2_array = np.power(10., np.arange(-4, 1))
    grid_search_train = np.zeros((len(l2_array), n_folds))
    grid_search_test = np.zeros((len(l2_array), n_folds))
    print('execution of 10-fold validation for five times\n')
    for i in range(1):
        # for i in range(len(l2_array)):
        print(str(i + 1) + ' / ' + str(len(l2_array)) + " times")
        j = 0
        cv_folds = kf.split(x_train)
        for traincv, testcv in cv_folds:
            x_train_cv = x_train[traincv]
            y_train_cv = y_train[traincv]
            x_test_cv = x_train[testcv]
            y_test_cv = y_train[testcv]

            # 활성함수는 렐루, 마지막 레이어에 시그모이드, iterator 1000, 7차원 hidden layer
            model = Sequential()
            # model.add(Dense(n_intervals,input_dim=x_train.shape[1],bias_initializer='zeros',kernel_regularizer=regularizers.l2(l2_array[i])))

            # 입력층 개수는 변수의 개수
            model.add(
                Dense(hidden_layers_sizes,
                      input_dim=x_train.shape[1],
                      bias_initializer='zeros',
                      activation='relu',
                      kernel_regularizer=regularizers.l2(l2_array[i])))
            # model.add(Activation('relu'))
            model.add(Dense(n_intervals))
            model.add(Activation('sigmoid'))

            model.compile(loss=nnet_survival.surv_likelihood(n_intervals),
                          optimizer=optimizers.RMSprop())  # lr=0.0001))

            history = model.fit(x_train_cv,
                                y_train_cv,
                                batch_size=256,
                                epochs=100000,
                                callbacks=[early_stopping],
                                verbose=0)
            # model.summary()
            print(model.metrics_names)
            grid_search_train[i, j] = model.evaluate(x_train_cv,
                                                     y_train_cv,
                                                     verbose=0)
            print(grid_search_train[i, j])
            grid_search_test[i, j] = model.evaluate(x_test_cv,
                                                    y_test_cv,
                                                    verbose=0)
            print(grid_search_test[i, j])
            j = j + 1

    print(np.average(grid_search_train, axis=1))
    print(np.average(grid_search_test, axis=1))
    l2_final = l2_array[np.argmax(-np.average(grid_search_test, axis=1))]

    ############################### plot ######################################
    fig, loss_ax = plt.subplots()
    acc_ax = loss_ax.twinx()

    loss_ax.plot(history.history['loss'], 'y', label='train loss')
    loss_ax.plot(history.history['val_loss'], 'r', label='test loss')

    acc_ax.plot(history.history['acc'], 'b', label='train acc')
    acc_ax.plot(history.history['val_acc'], 'g', label='test acc')

    loss_ax.set_xlabel('epoch')
    loss_ax.set_ylabel('loss')
    acc_ax.set_ylabel('accuray')

    loss_ax.legend(loc='upper left')
    acc_ax.legend(loc='lower left')

    plt.show()
    ###########################################################################
    score = model.evaluate(x_test, y_test, batch_size=2, verbose=1)
    # print('Test loss: ', score[0])
    # print('Test accuracy: ', score[1])

    # Discrimination performance
    y_pred = model.predict_proba(x_train, verbose=1)
    oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]],
                            axis=1)[:, -1]

    print('================================')
    print('Training data with concordance_index ')
    print(concordance_index(data_train.duration_d, oneyr_surv, data_train.CVD))
    print('================================')

    y_pred = model.predict_proba(x_test, verbose=1)
    oneyr_surv = np.cumprod(y_pred[:, 0:np.nonzero(breaks > 365)[0][0]],
                            axis=1)[:, -1]

    print('================================')
    print('Test data with concordance_index ')
    print(concordance_index(data_test.duration_d, oneyr_surv, data_test.CVD))
    print('================================')